33 files changed, 10447 insertions, 0 deletions
diff --git a/compiler/codeGen/Bitmap.hs b/compiler/codeGen/Bitmap.hs
new file mode 100644
index 0000000000..c0b490978c
--- /dev/null
+++ b/compiler/codeGen/Bitmap.hs
@@ -0,0 +1,79 @@
+--
+-- (c) The University of Glasgow 2003
+-- 
+
+-- Functions for constructing bitmaps, which are used in various
+-- places in generated code (stack frame liveness masks, function
+-- argument liveness masks, SRT bitmaps).
+
+module Bitmap ( 
+	Bitmap, mkBitmap,
+	intsToBitmap, intsToReverseBitmap,
+	mAX_SMALL_BITMAP_SIZE
+  ) where
+
+#include "HsVersions.h"
+#include "../includes/MachDeps.h"
+
+import SMRep
+import Constants
+import DATA_BITS
+
+{-|
+A bitmap represented by a sequence of 'StgWord's on the /target/
+architecture.  These are used for bitmaps in info tables and other
+generated code which need to be emitted as sequences of StgWords.
+-}
+type Bitmap = [StgWord]
+
+-- | Make a bitmap from a sequence of bits
+mkBitmap :: [Bool] -> Bitmap
+mkBitmap [] = []
+mkBitmap stuff = chunkToBitmap chunk : mkBitmap rest
+  where (chunk, rest) = splitAt wORD_SIZE_IN_BITS stuff
+
+chunkToBitmap :: [Bool] -> StgWord
+chunkToBitmap chunk = 
+  foldr (.|.) 0 [ 1 `shiftL` n | (True,n) <- zip chunk [0..] ]
+
+-- | Make a bitmap where the slots specified are the /ones/ in the bitmap.
+-- eg. @[1,2,4], size 4 ==> 0xb@.
+--
+-- The list of @Int@s /must/ be already sorted.
+intsToBitmap :: Int -> [Int] -> Bitmap
+intsToBitmap size slots{- must be sorted -}
+  | size <= 0 = []
+  | otherwise = 
+    (foldr (.|.) 0 (map (1 `shiftL`) these)) : 
+	intsToBitmap (size - wORD_SIZE_IN_BITS) 
+	     (map (\x -> x - wORD_SIZE_IN_BITS) rest)
+   where (these,rest) = span (<wORD_SIZE_IN_BITS) slots
+
+-- | Make a bitmap where the slots specified are the /zeros/ in the bitmap.
+-- eg. @[1,2,4], size 4 ==> 0x8@  (we leave any bits outside the size as zero,
+-- just to make the bitmap easier to read).
+--
+-- The list of @Int@s /must/ be already sorted.
+intsToReverseBitmap :: Int -> [Int] -> Bitmap
+intsToReverseBitmap size slots{- must be sorted -}
+  | size <= 0 = []
+  | otherwise = 
+    (foldr xor init (map (1 `shiftL`) these)) : 
+	intsToReverseBitmap (size - wORD_SIZE_IN_BITS) 
+	     (map (\x -> x - wORD_SIZE_IN_BITS) rest)
+   where (these,rest) = span (<wORD_SIZE_IN_BITS) slots
+	 init
+	   | size >= wORD_SIZE_IN_BITS = complement 0
+	   | otherwise                 = (1 `shiftL` size) - 1
+
+{- |
+Magic number, must agree with @BITMAP_BITS_SHIFT@ in InfoTables.h.
+Some kinds of bitmap pack a size\/bitmap into a single word if
+possible, or fall back to an external pointer when the bitmap is too
+large.  This value represents the largest size of bitmap that can be
+packed into a single word.
+-}
+mAX_SMALL_BITMAP_SIZE :: Int
+mAX_SMALL_BITMAP_SIZE  | wORD_SIZE == 4 = 27
+		       | otherwise      = 58
+
diff --git a/compiler/codeGen/CgBindery.hi-boot-5 b/compiler/codeGen/CgBindery.hi-boot-5
new file mode 100644
index 0000000000..f375fcc6e1
--- /dev/null
+++ b/compiler/codeGen/CgBindery.hi-boot-5
@@ -0,0 +1,7 @@
+__interface CgBindery 1 0 where
+__export CgBindery CgBindings CgIdInfo VolatileLoc StableLoc nukeVolatileBinds;
+1 type CgBindings = VarEnv.IdEnv CgIdInfo;
+1 data CgIdInfo;
+1 data VolatileLoc;
+1 data StableLoc;
+1 nukeVolatileBinds :: CgBindings -> CgBindings ;
diff --git a/compiler/codeGen/CgBindery.hi-boot-6 b/compiler/codeGen/CgBindery.hi-boot-6
new file mode 100644
index 0000000000..7d1f300a86
--- /dev/null
+++ b/compiler/codeGen/CgBindery.hi-boot-6
@@ -0,0 +1,8 @@
+module CgBindery where
+
+type CgBindings = VarEnv.IdEnv CgIdInfo
+data CgIdInfo
+data VolatileLoc
+data StableLoc
+
+nukeVolatileBinds :: CgBindings -> CgBindings
diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs
new file mode 100644
index 0000000000..f78edda655
--- /dev/null
+++ b/compiler/codeGen/CgBindery.lhs
@@ -0,0 +1,494 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[CgBindery]{Utility functions related to doing @CgBindings@}
+
+\begin{code}
+module CgBindery (
+	CgBindings, CgIdInfo,
+	StableLoc, VolatileLoc,
+
+	cgIdInfoId, cgIdInfoArgRep, cgIdInfoLF,
+
+	stableIdInfo, heapIdInfo, 
+	letNoEscapeIdInfo, idInfoToAmode,
+
+	addBindC, addBindsC,
+
+	nukeVolatileBinds,
+	nukeDeadBindings,
+	getLiveStackSlots,
+
+	bindArgsToStack,  rebindToStack,
+	bindNewToNode, bindNewToReg, bindArgsToRegs,
+	bindNewToTemp, 
+	getArgAmode, getArgAmodes, 
+	getCgIdInfo, 
+	getCAddrModeIfVolatile, getVolatileRegs,
+	maybeLetNoEscape, 
+    ) where
+
+#include "HsVersions.h"
+
+import CgMonad
+import CgHeapery	( getHpRelOffset )
+import CgStackery	( freeStackSlots, getSpRelOffset )
+import CgUtils		( cgLit, cmmOffsetW )
+import CLabel		( mkClosureLabel, pprCLabel )
+import ClosureInfo	( mkLFImported, mkLFArgument, LambdaFormInfo )
+
+import Cmm
+import PprCmm		( {- instance Outputable -} )
+import SMRep		( CgRep(..), WordOff, isFollowableArg, 
+			  isVoidArg, cgRepSizeW, argMachRep, 
+			  idCgRep, typeCgRep )
+import Id		( Id, idName )
+import VarEnv
+import VarSet		( varSetElems )
+import Literal		( literalType )
+import Maybes		( catMaybes )
+import Name		( isExternalName )
+import StgSyn		( StgArg, StgLiveVars, GenStgArg(..), isStgTypeArg )
+import Unique           ( Uniquable(..) )
+import UniqSet		( elementOfUniqSet )
+import Outputable
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[Bindery-datatypes]{Data types}
+%*									*
+%************************************************************************
+
+@(CgBinding a b)@ is a type of finite maps from a to b.
+
+The assumption used to be that @lookupCgBind@ must get exactly one
+match.  This is {\em completely wrong} in the case of compiling
+letrecs (where knot-tying is used).  An initial binding is fed in (and
+never evaluated); eventually, a correct binding is put into the
+environment.  So there can be two bindings for a given name.
+
+\begin{code}
+type CgBindings = IdEnv CgIdInfo
+
+data CgIdInfo
+  = CgIdInfo	
+	{ cg_id :: Id	-- Id that this is the info for
+			-- Can differ from the Id at occurrence sites by 
+			-- virtue of being externalised, for splittable C
+	, cg_rep :: CgRep
+	, cg_vol :: VolatileLoc
+	, cg_stb :: StableLoc
+	, cg_lf  :: LambdaFormInfo }
+
+mkCgIdInfo id vol stb lf
+  = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, 
+	       cg_lf = lf, cg_rep = idCgRep id }
+
+voidIdInfo id = CgIdInfo { cg_id = id, cg_vol = NoVolatileLoc
+			 , cg_stb = VoidLoc, cg_lf = mkLFArgument id
+			 , cg_rep = VoidArg }
+	-- Used just for VoidRep things
+
+data VolatileLoc	-- These locations die across a call
+  = NoVolatileLoc
+  | RegLoc	CmmReg		   -- In one of the registers (global or local)
+  | VirHpLoc	VirtualHpOffset  -- Hp+offset (address of closure)
+  | VirNodeLoc	VirtualHpOffset  -- Cts of offset indirect from Node
+				   -- ie *(Node+offset)
+\end{code}
+
+@StableLoc@ encodes where an Id can be found, used by
+the @CgBindings@ environment in @CgBindery@.
+
+\begin{code}
+data StableLoc
+  = NoStableLoc
+
+  | VirStkLoc	VirtualSpOffset		-- The thing is held in this
+					-- stack slot
+
+  | VirStkLNE	VirtualSpOffset		-- A let-no-escape thing; the
+					-- value is this stack pointer
+					-- (as opposed to the contents of the slot)
+
+  | StableLoc	CmmExpr
+  | VoidLoc	-- Used only for VoidRep variables.  They never need to
+		-- be saved, so it makes sense to treat treat them as
+		-- having a stable location
+\end{code}
+
+\begin{code}
+instance Outputable CgIdInfo where
+  ppr (CgIdInfo id rep vol stb lf)
+    = ppr id <+> ptext SLIT("-->") <+> vcat [ppr vol, ppr stb]
+
+instance Outputable VolatileLoc where
+  ppr NoVolatileLoc = empty
+  ppr (RegLoc r)     = ptext SLIT("reg") <+> ppr r
+  ppr (VirHpLoc v)   = ptext SLIT("vh")  <+> ppr v
+  ppr (VirNodeLoc v) = ptext SLIT("vn")  <+> ppr v
+
+instance Outputable StableLoc where
+  ppr NoStableLoc   = empty
+  ppr VoidLoc       = ptext SLIT("void")
+  ppr (VirStkLoc v) = ptext SLIT("vs")    <+> ppr v
+  ppr (VirStkLNE v) = ptext SLIT("lne")    <+> ppr v
+  ppr (StableLoc a) = ptext SLIT("amode") <+> ppr a
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[Bindery-idInfo]{Manipulating IdInfo}
+%*									*
+%************************************************************************
+
+\begin{code}
+stableIdInfo id amode   lf_info = mkCgIdInfo id NoVolatileLoc (StableLoc amode) lf_info
+heapIdInfo id offset    lf_info = mkCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info
+letNoEscapeIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLNE sp) lf_info
+stackIdInfo id sp	lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLoc sp) lf_info
+nodeIdInfo id offset    lf_info = mkCgIdInfo id (VirNodeLoc offset) NoStableLoc lf_info
+regIdInfo id reg        lf_info = mkCgIdInfo id (RegLoc reg) NoStableLoc lf_info
+
+idInfoToAmode :: CgIdInfo -> FCode CmmExpr
+idInfoToAmode info
+  = case cg_vol info of {
+      RegLoc reg 	-> returnFC (CmmReg reg) ;
+      VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetW (CmmReg nodeReg) nd_off) mach_rep) ;
+      VirHpLoc hp_off   -> getHpRelOffset hp_off ;
+      NoVolatileLoc -> 
+
+    case cg_stb info of
+      StableLoc amode  -> returnFC amode
+      VirStkLoc sp_off -> do { sp_rel <- getSpRelOffset sp_off
+			     ; return (CmmLoad sp_rel mach_rep) }
+
+      VirStkLNE sp_off -> getSpRelOffset sp_off
+
+      VoidLoc -> return $ pprPanic "idInfoToAmode: void" (ppr (cg_id info))
+		-- We return a 'bottom' amode, rather than panicing now
+		-- In this way getArgAmode returns a pair of (VoidArg, bottom)
+		-- and that's exactly what we want
+
+      NoStableLoc -> pprPanic "idInfoToAmode: no loc" (ppr (cg_id info))
+    }
+  where
+    mach_rep = argMachRep (cg_rep info)
+
+cgIdInfoId :: CgIdInfo -> Id
+cgIdInfoId = cg_id 
+
+cgIdInfoLF :: CgIdInfo -> LambdaFormInfo
+cgIdInfoLF = cg_lf
+
+cgIdInfoArgRep :: CgIdInfo -> CgRep
+cgIdInfoArgRep = cg_rep
+
+maybeLetNoEscape (CgIdInfo { cg_stb = VirStkLNE sp_off }) = Just sp_off
+maybeLetNoEscape other   				  = Nothing
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[CgMonad-bindery]{Monad things for fiddling with @CgBindings@}
+%*									*
+%************************************************************************
+
+.There are three basic routines, for adding (@addBindC@), modifying
+(@modifyBindC@) and looking up (@getCgIdInfo@) bindings.
+
+A @Id@ is bound to a @(VolatileLoc, StableLoc)@ triple.
+The name should not already be bound. (nice ASSERT, eh?)
+
+\begin{code}
+addBindC :: Id -> CgIdInfo -> Code
+addBindC name stuff_to_bind = do
+	binds <- getBinds
+	setBinds $ extendVarEnv binds name stuff_to_bind
+
+addBindsC :: [(Id, CgIdInfo)] -> Code
+addBindsC new_bindings = do
+	binds <- getBinds
+	let new_binds = foldl (\ binds (name,info) -> extendVarEnv binds name info)
+			      binds
+			      new_bindings
+	setBinds new_binds
+
+modifyBindC :: Id -> (CgIdInfo -> CgIdInfo) -> Code
+modifyBindC name mangle_fn = do
+	binds <- getBinds
+	setBinds $ modifyVarEnv mangle_fn binds name
+
+getCgIdInfo :: Id -> FCode CgIdInfo
+getCgIdInfo id
+  = do	{ 	-- Try local bindings first
+	; local_binds  <- getBinds
+	; case lookupVarEnv local_binds id of {
+	    Just info -> return info ;
+	    Nothing   -> do
+
+	{ 	-- Try top-level bindings
+	  static_binds <- getStaticBinds
+	; case lookupVarEnv static_binds id of {
+	    Just info -> return info ;
+	    Nothing   ->
+
+		-- Should be imported; make up a CgIdInfo for it
+	let 
+	    name = idName id
+	in
+	if isExternalName name then do
+	    hmods <- getHomeModules 
+	    let ext_lbl = CmmLit (CmmLabel (mkClosureLabel hmods name))
+	    return (stableIdInfo id ext_lbl (mkLFImported id))
+	else
+	if isVoidArg (idCgRep id) then
+		-- Void things are never in the environment
+	    return (voidIdInfo id)
+	else
+	-- Bug	
+	cgLookupPanic id
+	}}}}
+    
+			
+cgLookupPanic :: Id -> FCode a
+cgLookupPanic id
+  = do	static_binds <- getStaticBinds
+	local_binds <- getBinds
+	srt <- getSRTLabel
+	pprPanic "cgPanic"
+		(vcat [ppr id,
+		ptext SLIT("static binds for:"),
+		vcat [ ppr (cg_id info) | info <- varEnvElts static_binds ],
+		ptext SLIT("local binds for:"),
+		vcat [ ppr (cg_id info) | info <- varEnvElts local_binds ],
+	        ptext SLIT("SRT label") <+> pprCLabel srt
+	      ])
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[Bindery-nuke-volatile]{Nuking volatile bindings}
+%*									*
+%************************************************************************
+
+We sometimes want to nuke all the volatile bindings; we must be sure
+we don't leave any (NoVolatile, NoStable) binds around...
+
+\begin{code}
+nukeVolatileBinds :: CgBindings -> CgBindings
+nukeVolatileBinds binds
+  = mkVarEnv (foldr keep_if_stable [] (varEnvElts binds))
+  where
+    keep_if_stable (CgIdInfo { cg_stb = NoStableLoc }) acc = acc
+    keep_if_stable info acc
+      = (cg_id info, info { cg_vol = NoVolatileLoc }) : acc
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[lookup-interface]{Interface functions to looking up bindings}
+%*									*
+%************************************************************************
+
+\begin{code}
+getCAddrModeIfVolatile :: Id -> FCode (Maybe CmmExpr)
+getCAddrModeIfVolatile id
+  = do	{ info <- getCgIdInfo id
+	; case cg_stb info of
+		NoStableLoc -> do -- Aha!  So it is volatile!
+			amode <- idInfoToAmode info
+			return $ Just amode
+		a_stable_loc -> return Nothing }
+\end{code}
+
+@getVolatileRegs@ gets a set of live variables, and returns a list of
+all registers on which these variables depend.  These are the regs
+which must be saved and restored across any C calls.  If a variable is
+both in a volatile location (depending on a register) {\em and} a
+stable one (notably, on the stack), we modify the current bindings to
+forget the volatile one.
+
+\begin{code}
+getVolatileRegs :: StgLiveVars -> FCode [GlobalReg]
+
+getVolatileRegs vars = do
+  do 	{ stuff <- mapFCs snaffle_it (varSetElems vars)
+	; returnFC $ catMaybes stuff }
+  where
+    snaffle_it var = do
+	{ info <- getCgIdInfo var 
+	; let
+		-- commoned-up code...
+	     consider_reg reg
+		=	-- We assume that all regs can die across C calls
+			-- We leave it to the save-macros to decide which
+			-- regs *really* need to be saved.
+		  case cg_stb info of
+			NoStableLoc     -> returnFC (Just reg) -- got one!
+			is_a_stable_loc -> do
+				{ -- has both volatile & stable locations;
+				  -- force it to rely on the stable location
+				  modifyBindC var nuke_vol_bind 
+				; return Nothing }
+
+	; case cg_vol info of
+	    RegLoc (CmmGlobal reg) -> consider_reg reg
+	    VirNodeLoc _ 	   -> consider_reg node
+	    other_loc 	 	   -> returnFC Nothing	-- Local registers
+	}
+
+    nuke_vol_bind info = info { cg_vol = NoVolatileLoc }
+\end{code}
+
+\begin{code}
+getArgAmode :: StgArg -> FCode (CgRep, CmmExpr)
+getArgAmode (StgVarArg var) 
+  = do	{ info <- getCgIdInfo var
+	; amode <- idInfoToAmode info
+	; return (cgIdInfoArgRep info, amode ) }
+
+getArgAmode (StgLitArg lit) 
+  = do	{ cmm_lit <- cgLit lit
+	; return (typeCgRep (literalType lit), CmmLit cmm_lit) }
+
+getArgAmode (StgTypeArg _) = panic "getArgAmode: type arg"
+
+getArgAmodes :: [StgArg] -> FCode [(CgRep, CmmExpr)]
+getArgAmodes [] = returnFC []
+getArgAmodes (atom:atoms)
+  | isStgTypeArg atom = getArgAmodes atoms
+  | otherwise 	      = do { amode  <- getArgAmode  atom 
+	 		   ; amodes <- getArgAmodes atoms
+	 		   ; return ( amode : amodes ) }
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[binding-and-rebinding-interface]{Interface functions for binding and re-binding names}
+%*									*
+%************************************************************************
+
+\begin{code}
+bindArgsToStack :: [(Id, VirtualSpOffset)] -> Code
+bindArgsToStack args
+  = mapCs bind args
+  where
+    bind(id, offset) = addBindC id (stackIdInfo id offset (mkLFArgument id))
+
+bindArgsToRegs :: [(Id, GlobalReg)] -> Code
+bindArgsToRegs args
+  = mapCs bind args
+  where
+    bind (arg, reg) = bindNewToReg arg (CmmGlobal reg) (mkLFArgument arg)
+
+bindNewToNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Code
+bindNewToNode id offset lf_info
+  = addBindC id (nodeIdInfo id offset lf_info)
+
+-- Create a new temporary whose unique is that in the id,
+-- bind the id to it, and return the addressing mode for the
+-- temporary.
+bindNewToTemp :: Id -> FCode CmmReg
+bindNewToTemp id
+  = do	addBindC id (regIdInfo id temp_reg lf_info)
+	return temp_reg
+  where
+    uniq     = getUnique id
+    temp_reg = CmmLocal (LocalReg uniq (argMachRep (idCgRep id)))
+    lf_info  = mkLFArgument id	-- Always used of things we
+				-- know nothing about
+
+bindNewToReg :: Id -> CmmReg -> LambdaFormInfo -> Code
+bindNewToReg name reg lf_info
+  = addBindC name info
+  where
+    info = mkCgIdInfo name (RegLoc reg) NoStableLoc lf_info
+\end{code}
+
+\begin{code}
+rebindToStack :: Id -> VirtualSpOffset -> Code
+rebindToStack name offset
+  = modifyBindC name replace_stable_fn
+  where
+    replace_stable_fn info = info { cg_stb = VirStkLoc offset }
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[CgMonad-deadslots]{Finding dead stack slots}
+%*									*
+%************************************************************************
+
+nukeDeadBindings does the following:
+
+      -	Removes all bindings from the environment other than those
+	for variables in the argument to nukeDeadBindings.
+      -	Collects any stack slots so freed, and returns them to the  stack free
+	list.
+      -	Moves the virtual stack pointer to point to the topmost used
+	stack locations.
+
+You can have multi-word slots on the stack (where a Double# used to
+be, for instance); if dead, such a slot will be reported as *several*
+offsets (one per word).
+
+Probably *naughty* to look inside monad...
+
+\begin{code}
+nukeDeadBindings :: StgLiveVars  -- All the *live* variables
+		 -> Code
+nukeDeadBindings live_vars = do
+	binds <- getBinds
+	let (dead_stk_slots, bs') =
+		dead_slots live_vars 
+			[] []
+			[ (cg_id b, b) | b <- varEnvElts binds ]
+	setBinds $ mkVarEnv bs'
+	freeStackSlots dead_stk_slots
+\end{code}
+
+Several boring auxiliary functions to do the dirty work.
+
+\begin{code}
+dead_slots :: StgLiveVars
+	   -> [(Id,CgIdInfo)]
+	   -> [VirtualSpOffset]
+	   -> [(Id,CgIdInfo)]
+	   -> ([VirtualSpOffset], [(Id,CgIdInfo)])
+
+-- dead_slots carries accumulating parameters for
+--	filtered bindings, dead slots
+dead_slots live_vars fbs ds []
+  = (ds, reverse fbs) -- Finished; rm the dups, if any
+
+dead_slots live_vars fbs ds ((v,i):bs)
+  | v `elementOfUniqSet` live_vars
+    = dead_slots live_vars ((v,i):fbs) ds bs
+	  -- Live, so don't record it in dead slots
+	  -- Instead keep it in the filtered bindings
+
+  | otherwise
+    = case cg_stb i of
+	VirStkLoc offset
+	 | size > 0
+	 -> dead_slots live_vars fbs ([offset-size+1 .. offset] ++ ds) bs
+
+	_ -> dead_slots live_vars fbs ds bs
+  where
+    size :: WordOff
+    size = cgRepSizeW (cg_rep i)
+\end{code}
+
+\begin{code}
+getLiveStackSlots :: FCode [VirtualSpOffset]
+-- Return the offsets of slots in stack containig live pointers
+getLiveStackSlots 
+  = do 	{ binds <- getBinds
+	; return [off | CgIdInfo { cg_stb = VirStkLoc off, 
+				   cg_rep = rep } <- varEnvElts binds, 
+		        isFollowableArg rep] }
+\end{code}
diff --git a/compiler/codeGen/CgBindery.lhs-boot b/compiler/codeGen/CgBindery.lhs-boot
new file mode 100644
index 0000000000..e504a6a9ba
--- /dev/null
+++ b/compiler/codeGen/CgBindery.lhs-boot
@@ -0,0 +1,11 @@
+\begin{code}
+module CgBindery where
+import VarEnv( IdEnv )
+
+data CgIdInfo
+data VolatileLoc
+data StableLoc
+type CgBindings = IdEnv CgIdInfo
+
+nukeVolatileBinds :: CgBindings -> CgBindings
+\end{code}
+\ No newline at end of file
diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs
new file mode 100644
index 0000000000..f463255807
--- /dev/null
+++ b/compiler/codeGen/CgCallConv.hs
@@ -0,0 +1,512 @@
+-----------------------------------------------------------------------------
+--
+--		CgCallConv
+--
+-- The datatypes and functions here encapsulate the 
+-- calling and return conventions used by the code generator.
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+
+module CgCallConv (
+	-- Argument descriptors
+	mkArgDescr, argDescrType,
+
+	-- Liveness
+	isBigLiveness, buildContLiveness, mkRegLiveness, 
+	smallLiveness, mkLivenessCLit,
+
+	-- Register assignment
+	assignCallRegs, assignReturnRegs, assignPrimOpCallRegs,
+
+	-- Calls
+	constructSlowCall, slowArgs, slowCallPattern,
+
+	-- Returns
+	CtrlReturnConvention(..),
+	ctrlReturnConvAlg,
+	dataReturnConvPrim,
+	getSequelAmode
+    ) where
+
+#include "HsVersions.h"
+
+import CgUtils		( emitRODataLits, mkWordCLit )
+import CgMonad
+
+import Constants	( mAX_FAMILY_SIZE_FOR_VEC_RETURNS,
+			  mAX_Vanilla_REG, mAX_Float_REG,
+			  mAX_Double_REG, mAX_Long_REG,
+			  mAX_Real_Vanilla_REG, mAX_Real_Float_REG,
+			  mAX_Real_Double_REG, mAX_Real_Long_REG,
+			  bITMAP_BITS_SHIFT
+			)
+
+import ClosureInfo	( ArgDescr(..), Liveness(..) )
+import CgStackery	( getSpRelOffset )
+import SMRep
+import MachOp		( wordRep )
+import Cmm		( CmmExpr(..), GlobalReg(..), CmmLit(..), CmmReg(..), node )
+import CmmUtils		( mkLblExpr )
+import CLabel
+import Maybes		( mapCatMaybes )
+import Id		( Id )
+import Name		( Name )
+import TyCon		( TyCon, tyConFamilySize )
+import Bitmap		( Bitmap, mAX_SMALL_BITMAP_SIZE, 
+			  mkBitmap, intsToReverseBitmap )
+import Util		( isn'tIn, sortLe )
+import StaticFlags	( opt_Unregisterised )
+import FastString	( LitString )
+import Outputable
+import DATA_BITS
+
+
+-------------------------------------------------------------------------
+--
+--	Making argument descriptors
+--
+--  An argument descriptor describes the layout of args on the stack,
+--  both for 	* GC (stack-layout) purposes, and 
+--		* saving/restoring registers when a heap-check fails
+--
+-- Void arguments aren't important, therefore (contrast constructSlowCall)
+--
+-------------------------------------------------------------------------
+
+-- bring in ARG_P, ARG_N, etc.
+#include "../includes/StgFun.h"
+
+-------------------------
+argDescrType :: ArgDescr -> Int
+-- The "argument type" RTS field type
+argDescrType (ArgSpec n) = n
+argDescrType (ArgGen liveness)
+  | isBigLiveness liveness = ARG_GEN_BIG
+  | otherwise		   = ARG_GEN
+
+
+mkArgDescr :: Name -> [Id] -> FCode ArgDescr
+mkArgDescr nm args 
+  = case stdPattern arg_reps of
+	Just spec_id -> return (ArgSpec spec_id)
+	Nothing      -> do { liveness <- mkLiveness nm size bitmap
+			   ; return (ArgGen liveness) }
+  where
+    arg_reps = filter nonVoidArg (map idCgRep args)
+	-- Getting rid of voids eases matching of standard patterns
+
+    bitmap   = mkBitmap arg_bits
+    arg_bits = argBits arg_reps
+    size     = length arg_bits
+
+argBits :: [CgRep] -> [Bool]	-- True for non-ptr, False for ptr
+argBits [] 		= []
+argBits (PtrArg : args) = False : argBits args
+argBits (arg    : args) = take (cgRepSizeW arg) (repeat True) ++ argBits args
+
+stdPattern :: [CgRep] -> Maybe Int
+stdPattern []          = Just ARG_NONE	-- just void args, probably
+
+stdPattern [PtrArg]    = Just ARG_P
+stdPattern [FloatArg]  = Just ARG_F
+stdPattern [DoubleArg] = Just ARG_D
+stdPattern [LongArg]   = Just ARG_L
+stdPattern [NonPtrArg] = Just ARG_N
+	 
+stdPattern [NonPtrArg,NonPtrArg] = Just ARG_NN
+stdPattern [NonPtrArg,PtrArg]    = Just ARG_NP
+stdPattern [PtrArg,NonPtrArg]    = Just ARG_PN
+stdPattern [PtrArg,PtrArg]       = Just ARG_PP
+
+stdPattern [NonPtrArg,NonPtrArg,NonPtrArg] = Just ARG_NNN
+stdPattern [NonPtrArg,NonPtrArg,PtrArg]    = Just ARG_NNP
+stdPattern [NonPtrArg,PtrArg,NonPtrArg]    = Just ARG_NPN
+stdPattern [NonPtrArg,PtrArg,PtrArg]	   = Just ARG_NPP
+stdPattern [PtrArg,NonPtrArg,NonPtrArg]    = Just ARG_PNN
+stdPattern [PtrArg,NonPtrArg,PtrArg]	   = Just ARG_PNP
+stdPattern [PtrArg,PtrArg,NonPtrArg]	   = Just ARG_PPN
+stdPattern [PtrArg,PtrArg,PtrArg]	   = Just ARG_PPP
+	 
+stdPattern [PtrArg,PtrArg,PtrArg,PtrArg]	       = Just ARG_PPPP
+stdPattern [PtrArg,PtrArg,PtrArg,PtrArg,PtrArg]        = Just ARG_PPPPP
+stdPattern [PtrArg,PtrArg,PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPPPP
+stdPattern other = Nothing
+
+
+-------------------------------------------------------------------------
+--
+--	Liveness info
+--
+-------------------------------------------------------------------------
+
+mkLiveness :: Name -> Int -> Bitmap -> FCode Liveness
+mkLiveness name size bits
+  | size > mAX_SMALL_BITMAP_SIZE		-- Bitmap does not fit in one word
+  = do	{ let lbl = mkBitmapLabel name
+	; emitRODataLits lbl ( mkWordCLit (fromIntegral size)
+		             : map mkWordCLit bits)
+	; return (BigLiveness lbl) }
+  
+  | otherwise		-- Bitmap fits in one word
+  = let
+        small_bits = case bits of 
+			[]  -> 0
+			[b] -> fromIntegral b
+			_   -> panic "livenessToAddrMode"
+    in
+    return (smallLiveness size small_bits)
+
+smallLiveness :: Int -> StgWord -> Liveness
+smallLiveness size small_bits = SmallLiveness bits
+  where bits = fromIntegral size .|. (small_bits `shiftL` bITMAP_BITS_SHIFT)
+
+-------------------
+isBigLiveness :: Liveness -> Bool
+isBigLiveness (BigLiveness _)   = True
+isBigLiveness (SmallLiveness _) = False
+
+-------------------
+mkLivenessCLit :: Liveness -> CmmLit
+mkLivenessCLit (BigLiveness lbl)    = CmmLabel lbl
+mkLivenessCLit (SmallLiveness bits) = mkWordCLit bits
+
+
+-------------------------------------------------------------------------
+--
+--		Bitmap describing register liveness
+--		across GC when doing a "generic" heap check
+--		(a RET_DYN stack frame).
+--
+-- NB. Must agree with these macros (currently in StgMacros.h): 
+-- GET_NON_PTRS(), GET_PTRS(), GET_LIVENESS().
+-------------------------------------------------------------------------
+
+mkRegLiveness :: [(Id, GlobalReg)] -> Int -> Int -> StgWord
+mkRegLiveness regs ptrs nptrs
+  = (fromIntegral nptrs `shiftL` 16) .|. 
+    (fromIntegral ptrs  `shiftL` 24) .|.
+    all_non_ptrs `xor` reg_bits regs
+  where
+    all_non_ptrs = 0xff
+
+    reg_bits [] = 0
+    reg_bits ((id, VanillaReg i) : regs) | isFollowableArg (idCgRep id)
+  	= (1 `shiftL` (i - 1)) .|. reg_bits regs
+    reg_bits (_ : regs)
+	= reg_bits regs
+  
+-------------------------------------------------------------------------
+--
+--		Pushing the arguments for a slow call
+--
+-------------------------------------------------------------------------
+
+-- For a slow call, we must take a bunch of arguments and intersperse
+-- some stg_ap_<pattern>_ret_info return addresses.
+constructSlowCall
+	:: [(CgRep,CmmExpr)]
+	-> (CLabel,		-- RTS entry point for call
+	   [(CgRep,CmmExpr)],	-- args to pass to the entry point
+	   [(CgRep,CmmExpr)])	-- stuff to save on the stack
+
+   -- don't forget the zero case
+constructSlowCall [] 
+  = (mkRtsApFastLabel SLIT("stg_ap_0"), [], [])
+
+constructSlowCall amodes
+  = (stg_ap_pat, these, rest)
+  where 
+    stg_ap_pat = mkRtsApFastLabel arg_pat
+    (arg_pat, these, rest) = matchSlowPattern amodes
+
+enterRtsRetLabel arg_pat
+  | tablesNextToCode = mkRtsRetInfoLabel arg_pat
+  | otherwise        = mkRtsRetLabel arg_pat
+
+-- | 'slowArgs' takes a list of function arguments and prepares them for
+-- pushing on the stack for "extra" arguments to a function which requires
+-- fewer arguments than we currently have.
+slowArgs :: [(CgRep,CmmExpr)] -> [(CgRep,CmmExpr)]
+slowArgs [] = []
+slowArgs amodes = (NonPtrArg, mkLblExpr stg_ap_pat) : args ++ slowArgs rest
+  where	(arg_pat, args, rest) = matchSlowPattern amodes
+	stg_ap_pat = mkRtsRetInfoLabel arg_pat
+  
+matchSlowPattern :: [(CgRep,CmmExpr)] 
+		 -> (LitString, [(CgRep,CmmExpr)], [(CgRep,CmmExpr)])
+matchSlowPattern amodes = (arg_pat, these, rest)
+  where (arg_pat, n)  = slowCallPattern (map fst amodes)
+	(these, rest) = splitAt n amodes
+
+-- These cases were found to cover about 99% of all slow calls:
+slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_pppppp"), 6)
+slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) 	= (SLIT("stg_ap_ppppp"), 5)
+slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: _) 	= (SLIT("stg_ap_pppp"), 4)
+slowCallPattern (PtrArg: PtrArg: PtrArg: VoidArg: _) 	= (SLIT("stg_ap_pppv"), 4)
+slowCallPattern (PtrArg: PtrArg: PtrArg: _)       	= (SLIT("stg_ap_ppp"), 3)
+slowCallPattern (PtrArg: PtrArg: VoidArg: _)       	= (SLIT("stg_ap_ppv"), 3)
+slowCallPattern (PtrArg: PtrArg: _)			= (SLIT("stg_ap_pp"), 2)
+slowCallPattern (PtrArg: VoidArg: _)			= (SLIT("stg_ap_pv"), 2)
+slowCallPattern (PtrArg: _)				= (SLIT("stg_ap_p"), 1)
+slowCallPattern (VoidArg: _)				= (SLIT("stg_ap_v"), 1)
+slowCallPattern (NonPtrArg: _)				= (SLIT("stg_ap_n"), 1)
+slowCallPattern (FloatArg: _)				= (SLIT("stg_ap_f"), 1)
+slowCallPattern (DoubleArg: _)				= (SLIT("stg_ap_d"), 1)
+slowCallPattern (LongArg: _)				= (SLIT("stg_ap_l"), 1)
+slowCallPattern _  = panic "CgStackery.slowCallPattern"
+
+-------------------------------------------------------------------------
+--
+--		Return conventions
+--
+-------------------------------------------------------------------------
+
+-- A @CtrlReturnConvention@ says how {\em control} is returned.
+
+data CtrlReturnConvention
+  = VectoredReturn	Int	-- size of the vector table (family size)
+  | UnvectoredReturn    Int 	-- family size
+
+ctrlReturnConvAlg :: TyCon -> CtrlReturnConvention
+ctrlReturnConvAlg tycon
+  = case (tyConFamilySize tycon) of
+      size -> -- we're supposed to know...
+	if (size > (1::Int) && size <= mAX_FAMILY_SIZE_FOR_VEC_RETURNS) then
+	    VectoredReturn size
+	else
+	    UnvectoredReturn size	
+  -- NB: unvectored returns Include size 0 (no constructors), so that
+  --     the following perverse code compiles (it crashed GHC in 5.02)
+  -- 	    data T1
+  --	    data T2 = T2 !T1 Int
+  --     The only value of type T1 is bottom, which never returns anyway.
+
+dataReturnConvPrim :: CgRep -> CmmReg
+dataReturnConvPrim PtrArg    = CmmGlobal (VanillaReg 1)
+dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1)
+dataReturnConvPrim LongArg   = CmmGlobal (LongReg 1)
+dataReturnConvPrim FloatArg  = CmmGlobal (FloatReg 1)
+dataReturnConvPrim DoubleArg = CmmGlobal (DoubleReg 1)
+dataReturnConvPrim VoidArg   = panic "dataReturnConvPrim: void"
+
+
+-- getSequelAmode returns an amode which refers to an info table.  The info
+-- table will always be of the RET(_VEC)?_(BIG|SMALL) kind.  We're careful
+-- not to handle real code pointers, just in case we're compiling for 
+-- an unregisterised/untailcallish architecture, where info pointers and
+-- code pointers aren't the same.
+-- DIRE WARNING.
+-- The OnStack case of sequelToAmode delivers an Amode which is only
+-- valid just before the final control transfer, because it assumes
+-- that Sp is pointing to the top word of the return address.  This
+-- seems unclean but there you go.
+
+getSequelAmode :: FCode CmmExpr
+getSequelAmode
+  = do	{ EndOfBlockInfo virt_sp sequel <- getEndOfBlockInfo
+	; case sequel of
+	    OnStack -> do { sp_rel <- getSpRelOffset virt_sp
+			  ; returnFC (CmmLoad sp_rel wordRep) }
+
+	    UpdateCode 	           -> returnFC (CmmLit (CmmLabel mkUpdInfoLabel))
+	    CaseAlts lbl _ _ True  -> returnFC (CmmLit (CmmLabel mkSeqInfoLabel))
+	    CaseAlts lbl _ _ False -> returnFC (CmmLit (CmmLabel lbl))
+	}
+
+-------------------------------------------------------------------------
+--
+--		Build a liveness mask for the current stack
+--
+-------------------------------------------------------------------------
+
+-- There are four kinds of things on the stack:
+--
+--	- pointer variables (bound in the environment)
+-- 	- non-pointer variables (boudn in the environment)
+-- 	- free slots (recorded in the stack free list)
+-- 	- non-pointer data slots (recorded in the stack free list)
+-- 
+-- We build up a bitmap of non-pointer slots by searching the environment
+-- for all the pointer variables, and subtracting these from a bitmap
+-- with initially all bits set (up to the size of the stack frame).
+
+buildContLiveness :: Name		-- Basis for label (only)
+		  -> [VirtualSpOffset] 	-- Live stack slots
+		  -> FCode Liveness
+buildContLiveness name live_slots
+ = do	{ stk_usg    <- getStkUsage
+	; let	StackUsage { realSp = real_sp, 
+			     frameSp = frame_sp } = stk_usg
+
+		start_sp :: VirtualSpOffset
+		start_sp = real_sp - retAddrSizeW
+		-- In a continuation, we want a liveness mask that 
+		-- starts from just after the return address, which is 
+		-- on the stack at real_sp.
+
+		frame_size :: WordOff
+		frame_size = start_sp - frame_sp
+		-- real_sp points to the frame-header for the current
+		-- stack frame, and the end of this frame is frame_sp.
+		-- The size is therefore real_sp - frame_sp - retAddrSizeW
+		-- (subtract one for the frame-header = return address).
+	
+		rel_slots :: [WordOff]
+	 	rel_slots = sortLe (<=) 
+	    	    [ start_sp - ofs  -- Get slots relative to top of frame
+	    	    | ofs <- live_slots ]
+
+		bitmap = intsToReverseBitmap frame_size rel_slots
+
+	; WARN( not (all (>=0) rel_slots), 
+		ppr name $$ ppr live_slots $$ ppr frame_size $$ ppr start_sp $$ ppr rel_slots )
+	  mkLiveness name frame_size bitmap }
+
+
+-------------------------------------------------------------------------
+--
+--		Register assignment
+--
+-------------------------------------------------------------------------
+
+--  How to assign registers for 
+--
+--	1) Calling a fast entry point.
+--	2) Returning an unboxed tuple.
+--	3) Invoking an out-of-line PrimOp.
+--
+-- Registers are assigned in order.
+-- 
+-- If we run out, we don't attempt to assign any further registers (even
+-- though we might have run out of only one kind of register); we just
+-- return immediately with the left-overs specified.
+-- 
+-- The alternative version @assignAllRegs@ uses the complete set of
+-- registers, including those that aren't mapped to real machine
+-- registers.  This is used for calling special RTS functions and PrimOps
+-- which expect their arguments to always be in the same registers.
+
+assignCallRegs, assignPrimOpCallRegs, assignReturnRegs
+	:: [(CgRep,a)]		-- Arg or result values to assign
+	-> ([(a, GlobalReg)],	-- Register assignment in same order
+				-- for *initial segment of* input list
+				--   (but reversed; doesn't matter)
+				-- VoidRep args do not appear here
+	    [(CgRep,a)])	-- Leftover arg or result values
+
+assignCallRegs args
+  = assign_regs args (mkRegTbl [node])
+	-- The entry convention for a function closure
+	-- never uses Node for argument passing; instead
+	-- Node points to the function closure itself
+
+assignPrimOpCallRegs args
+ = assign_regs args (mkRegTbl_allRegs [])
+	-- For primops, *all* arguments must be passed in registers
+
+assignReturnRegs args
+ = assign_regs args (mkRegTbl [])
+	-- For returning unboxed tuples etc, 
+	-- we use all regs
+
+assign_regs :: [(CgRep,a)]     	-- Arg or result values to assign
+	    -> AvailRegs	-- Regs still avail: Vanilla, Float, Double, Longs
+	    -> ([(a, GlobalReg)], [(CgRep, a)])
+assign_regs args supply
+  = go args [] supply
+  where
+    go [] acc supply = (acc, [])	-- Return the results reversed (doesn't matter)
+    go ((VoidArg,_) : args) acc supply 	-- Skip void arguments; they aren't passed, and
+	= go args acc supply		-- there's nothign to bind them to
+    go ((rep,arg) : args) acc supply 
+	= case assign_reg rep supply of
+		Just (reg, supply') -> go args ((arg,reg):acc) supply'
+		Nothing	   	    -> (acc, (rep,arg):args) 	-- No more regs
+
+assign_reg :: CgRep -> AvailRegs -> Maybe (GlobalReg, AvailRegs)
+assign_reg FloatArg  (vs, f:fs, ds, ls) = Just (FloatReg f,   (vs, fs, ds, ls))
+assign_reg DoubleArg (vs, fs, d:ds, ls) = Just (DoubleReg d,  (vs, fs, ds, ls))
+assign_reg LongArg   (vs, fs, ds, l:ls) = Just (LongReg l,    (vs, fs, ds, ls))
+assign_reg PtrArg    (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls))
+assign_reg NonPtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls))
+    -- PtrArg and NonPtrArg both go in a vanilla register
+assign_reg other     not_enough_regs    = Nothing
+
+
+-------------------------------------------------------------------------
+--
+--		Register supplies
+--
+-------------------------------------------------------------------------
+
+-- Vanilla registers can contain pointers, Ints, Chars.
+-- Floats and doubles have separate register supplies.
+--
+-- We take these register supplies from the *real* registers, i.e. those
+-- that are guaranteed to map to machine registers.
+
+useVanillaRegs | opt_Unregisterised = 0
+	       | otherwise          = mAX_Real_Vanilla_REG
+useFloatRegs   | opt_Unregisterised = 0
+	       | otherwise          = mAX_Real_Float_REG
+useDoubleRegs  | opt_Unregisterised = 0
+	       | otherwise          = mAX_Real_Double_REG
+useLongRegs    | opt_Unregisterised = 0
+	       | otherwise          = mAX_Real_Long_REG
+
+vanillaRegNos, floatRegNos, doubleRegNos, longRegNos :: [Int]
+vanillaRegNos	 = regList useVanillaRegs
+floatRegNos	 = regList useFloatRegs
+doubleRegNos	 = regList useDoubleRegs
+longRegNos       = regList useLongRegs
+
+allVanillaRegNos, allFloatRegNos, allDoubleRegNos, allLongRegNos :: [Int]
+allVanillaRegNos = regList mAX_Vanilla_REG
+allFloatRegNos	 = regList mAX_Float_REG
+allDoubleRegNos	 = regList mAX_Double_REG
+allLongRegNos	 = regList mAX_Long_REG
+
+regList 0 = []
+regList n = [1 .. n]
+
+type AvailRegs = ( [Int]   -- available vanilla regs.
+		 , [Int]   -- floats
+		 , [Int]   -- doubles
+		 , [Int]   -- longs (int64 and word64)
+		 )
+
+mkRegTbl :: [GlobalReg] -> AvailRegs
+mkRegTbl regs_in_use
+  = mkRegTbl' regs_in_use vanillaRegNos floatRegNos doubleRegNos longRegNos
+
+mkRegTbl_allRegs :: [GlobalReg] -> AvailRegs
+mkRegTbl_allRegs regs_in_use
+  = mkRegTbl' regs_in_use allVanillaRegNos allFloatRegNos allDoubleRegNos allLongRegNos
+
+mkRegTbl' regs_in_use vanillas floats doubles longs
+  = (ok_vanilla, ok_float, ok_double, ok_long)
+  where
+    ok_vanilla = mapCatMaybes (select VanillaReg) vanillas
+    ok_float   = mapCatMaybes (select FloatReg)	  floats
+    ok_double  = mapCatMaybes (select DoubleReg)  doubles
+    ok_long    = mapCatMaybes (select LongReg)    longs   
+				    -- rep isn't looked at, hence we can use any old rep.
+
+    select :: (Int -> GlobalReg) -> Int{-cand-} -> Maybe Int
+	-- one we've unboxed the Int, we make a GlobalReg
+	-- and see if it is already in use; if not, return its number.
+
+    select mk_reg_fun cand
+      = let
+	    reg = mk_reg_fun cand
+	in
+	if reg `not_elem` regs_in_use
+	then Just cand
+	else Nothing
+      where
+	not_elem = isn'tIn "mkRegTbl"
+
+
diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs
new file mode 100644
index 0000000000..e7c08940c5
--- /dev/null
+++ b/compiler/codeGen/CgCase.lhs
@@ -0,0 +1,634 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgCase.lhs,v 1.75 2005/06/21 10:44:41 simonmar Exp $
+%
+%********************************************************
+%*							*
+\section[CgCase]{Converting @StgCase@ expressions}
+%*							*
+%********************************************************
+
+\begin{code}
+module CgCase (	cgCase, saveVolatileVarsAndRegs, 
+		restoreCurrentCostCentre
+	) where
+
+#include "HsVersions.h"
+
+import {-# SOURCE #-} CgExpr  ( cgExpr )
+
+import CgMonad
+import StgSyn
+import CgBindery	( getArgAmodes,
+			  bindNewToReg, bindNewToTemp,
+			  getCgIdInfo, getArgAmode,
+			  rebindToStack, getCAddrModeIfVolatile,
+			  nukeDeadBindings, idInfoToAmode
+			)
+import CgCon		( bindConArgs, bindUnboxedTupleComponents )
+import CgHeapery	( altHeapCheck, unbxTupleHeapCheck )
+import CgCallConv	( dataReturnConvPrim, ctrlReturnConvAlg,
+			  CtrlReturnConvention(..)
+			)
+import CgStackery	( allocPrimStack, allocStackTop, getSpRelOffset,
+			  deAllocStackTop, freeStackSlots
+			)
+import CgTailCall	( performTailCall )
+import CgPrimOp		( cgPrimOp )
+import CgForeignCall	( cgForeignCall )
+import CgUtils		( newTemp, cgLit, emitLitSwitch, emitSwitch,
+			  tagToClosure )
+import CgProf		( curCCS, curCCSAddr )
+import CgInfoTbls	( emitDirectReturnTarget, emitAlgReturnTarget, 
+			  dataConTagZ )
+import SMRep		( CgRep(..), retAddrSizeW, nonVoidArg, isVoidArg,
+			  idCgRep, tyConCgRep, typeHint )
+import CmmUtils		( CmmStmts, noStmts, oneStmt, plusStmts )
+import Cmm
+import MachOp		( wordRep )
+import ClosureInfo	( mkLFArgument )
+import StaticFlags	( opt_SccProfilingOn )
+import Id		( Id, idName, isDeadBinder, idType )
+import ForeignCall	( ForeignCall(..), CCallSpec(..), playSafe )
+import VarSet		( varSetElems )
+import CoreSyn		( AltCon(..) )
+import PrimOp		( PrimOp(..), primOpOutOfLine )
+import TyCon		( isEnumerationTyCon, tyConFamilySize )
+import Util		( isSingleton )
+import Outputable
+\end{code}
+
+\begin{code}
+data GCFlag
+  = GCMayHappen	-- The scrutinee may involve GC, so everything must be
+		-- tidy before the code for the scrutinee.
+
+  | NoGC	-- The scrutinee is a primitive value, or a call to a
+		-- primitive op which does no GC.  Hence the case can
+		-- be done inline, without tidying up first.
+\end{code}
+
+It is quite interesting to decide whether to put a heap-check
+at the start of each alternative.  Of course we certainly have
+to do so if the case forces an evaluation, or if there is a primitive
+op which can trigger GC.
+
+A more interesting situation is this:
+
+ \begin{verbatim}
+	!A!;
+	...A...
+	case x# of
+	  0#      -> !B!; ...B...
+	  default -> !C!; ...C...
+ \end{verbatim}
+
+where \tr{!x!} indicates a possible heap-check point. The heap checks
+in the alternatives {\em can} be omitted, in which case the topmost
+heapcheck will take their worst case into account.
+
+In favour of omitting \tr{!B!}, \tr{!C!}:
+
+ - {\em May} save a heap overflow test,
+	if ...A... allocates anything.  The other advantage
+	of this is that we can use relative addressing
+	from a single Hp to get at all the closures so allocated.
+
+ - No need to save volatile vars etc across the case
+
+Against:
+
+  - May do more allocation than reqd.  This sometimes bites us
+	badly.  For example, nfib (ha!)  allocates about 30\% more space if the
+	worst-casing is done, because many many calls to nfib are leaf calls
+	which don't need to allocate anything.
+
+	This never hurts us if there is only one alternative.
+
+\begin{code}
+cgCase	:: StgExpr
+	-> StgLiveVars
+	-> StgLiveVars
+	-> Id
+	-> SRT
+	-> AltType
+	-> [StgAlt]
+	-> Code
+\end{code}
+
+Special case #1: case of literal.
+
+\begin{code}
+cgCase (StgLit lit) live_in_whole_case live_in_alts bndr srt 
+       alt_type@(PrimAlt tycon) alts
+  = do	{ tmp_reg <- bindNewToTemp bndr
+	; cm_lit <- cgLit lit
+	; stmtC (CmmAssign tmp_reg (CmmLit cm_lit))
+	; cgPrimAlts NoGC alt_type tmp_reg alts }
+\end{code}
+
+Special case #2: scrutinising a primitive-typed variable.	No
+evaluation required.  We don't save volatile variables, nor do we do a
+heap-check in the alternatives.	 Instead, the heap usage of the
+alternatives is worst-cased and passed upstream.  This can result in
+allocating more heap than strictly necessary, but it will sometimes
+eliminate a heap check altogether.
+
+\begin{code}
+cgCase (StgApp v []) live_in_whole_case live_in_alts bndr srt
+       alt_type@(PrimAlt tycon) alts
+  = do	{ -- Careful! we can't just bind the default binder to the same thing
+	  -- as the scrutinee, since it might be a stack location, and having
+	  -- two bindings pointing at the same stack locn doesn't work (it
+	  -- confuses nukeDeadBindings).  Hence, use a new temp.
+	  v_info <- getCgIdInfo v
+	; amode <- idInfoToAmode v_info
+	; tmp_reg <- bindNewToTemp bndr
+	; stmtC (CmmAssign tmp_reg amode)
+	; cgPrimAlts NoGC alt_type tmp_reg alts }
+\end{code}
+
+Special case #3: inline PrimOps and foreign calls.
+
+\begin{code}
+cgCase (StgOpApp op@(StgPrimOp primop) args _) 
+       live_in_whole_case live_in_alts bndr srt alt_type alts
+  | not (primOpOutOfLine primop)
+  = cgInlinePrimOp primop args bndr alt_type live_in_alts alts
+\end{code}
+
+TODO: Case-of-case of primop can probably be done inline too (but
+maybe better to translate it out beforehand).  See
+ghc/lib/misc/PackedString.lhs for examples where this crops up (with
+4.02).
+
+Special case #4: inline foreign calls: an unsafe foreign call can be done
+right here, just like an inline primop.
+
+\begin{code}
+cgCase (StgOpApp op@(StgFCallOp fcall _) args _) 
+       live_in_whole_case live_in_alts bndr srt alt_type alts
+  | unsafe_foreign_call
+  = ASSERT( isSingleton alts )
+    do	--  *must* be an unboxed tuple alt.
+	-- exactly like the cgInlinePrimOp case for unboxed tuple alts..
+	{ res_tmps <- mapFCs bindNewToTemp non_void_res_ids
+	; let res_hints = map (typeHint.idType) non_void_res_ids
+	; cgForeignCall (zip res_tmps res_hints) fcall args live_in_alts
+	; cgExpr rhs }
+  where
+   (_, res_ids, _, rhs) = head alts
+   non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids
+
+   unsafe_foreign_call
+	 = case fcall of
+	 	CCall (CCallSpec _ _ s) -> not (playSafe s)
+		_other			-> False				
+\end{code}
+
+Special case: scrutinising a non-primitive variable.
+This can be done a little better than the general case, because
+we can reuse/trim the stack slot holding the variable (if it is in one).
+
+\begin{code}
+cgCase (StgApp fun args)
+	live_in_whole_case live_in_alts bndr srt alt_type alts
+  = do	{ fun_info <- getCgIdInfo fun
+	; arg_amodes <- getArgAmodes args
+
+	-- Nuking dead bindings *before* calculating the saves is the
+	-- value-add here.  We might end up freeing up some slots currently
+	-- occupied by variables only required for the call.
+	-- NOTE: we need to look up the variables used in the call before
+	-- doing this, because some of them may not be in the environment
+	-- afterward.
+	; nukeDeadBindings live_in_alts	
+	; (save_assts, alts_eob_info, maybe_cc_slot)
+		<- saveVolatileVarsAndRegs live_in_alts
+
+	; scrut_eob_info
+	    <- forkEval alts_eob_info 
+			(allocStackTop retAddrSizeW >> nopC)
+			(do { deAllocStackTop retAddrSizeW
+			    ; cgEvalAlts maybe_cc_slot bndr srt alt_type alts })
+
+	; setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info)
+			    (performTailCall fun_info arg_amodes save_assts) }
+\end{code}
+
+Note about return addresses: we *always* push a return address, even
+if because of an optimisation we end up jumping direct to the return
+code (not through the address itself).  The alternatives always assume
+that the return address is on the stack.  The return address is
+required in case the alternative performs a heap check, since it
+encodes the liveness of the slots in the activation record.
+
+On entry to the case alternative, we can re-use the slot containing
+the return address immediately after the heap check.  That's what the
+deAllocStackTop call is doing above.
+
+Finally, here is the general case.
+
+\begin{code}
+cgCase expr live_in_whole_case live_in_alts bndr srt alt_type alts
+  = do	{	-- Figure out what volatile variables to save
+	  nukeDeadBindings live_in_whole_case
+    
+	; (save_assts, alts_eob_info, maybe_cc_slot)
+		<- saveVolatileVarsAndRegs live_in_alts
+
+	     -- Save those variables right now!
+	; emitStmts save_assts
+
+	    -- generate code for the alts
+	; scrut_eob_info
+	       <- forkEval alts_eob_info
+		      	   (do 	{ nukeDeadBindings live_in_alts
+				; allocStackTop retAddrSizeW   -- space for retn address 
+				; nopC })
+			   (do	{ deAllocStackTop retAddrSizeW
+				; cgEvalAlts maybe_cc_slot bndr srt alt_type alts })
+
+	; setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info)
+			    (cgExpr expr)
+    }
+\end{code}
+
+There's a lot of machinery going on behind the scenes to manage the
+stack pointer here.  forkEval takes the virtual Sp and free list from
+the first argument, and turns that into the *real* Sp for the second
+argument.  It also uses this virtual Sp as the args-Sp in the EOB info
+returned, so that the scrutinee will trim the real Sp back to the
+right place before doing whatever it does.  
+  --SDM (who just spent an hour figuring this out, and didn't want to 
+	 forget it).
+
+Why don't we push the return address just before evaluating the
+scrutinee?  Because the slot reserved for the return address might
+contain something useful, so we wait until performing a tail call or
+return before pushing the return address (see
+CgTailCall.pushReturnAddress).  
+
+This also means that the environment doesn't need to know about the
+free stack slot for the return address (for generating bitmaps),
+because we don't reserve it until just before the eval.
+
+TODO!!  Problem: however, we have to save the current cost centre
+stack somewhere, because at the eval point the current CCS might be
+different.  So we pick a free stack slot and save CCCS in it.  One
+consequence of this is that activation records on the stack don't
+follow the layout of closures when we're profiling.  The CCS could be
+anywhere within the record).
+
+\begin{code}
+maybeReserveSeqFrame PolyAlt (EndOfBlockInfo args_sp (CaseAlts amode stuff bndr _))
+   = EndOfBlockInfo (args_sp + retAddrSizeW) (CaseAlts amode stuff bndr True)
+maybeReserveSeqFrame other scrut_eob_info = scrut_eob_info
+\end{code}
+
+
+%************************************************************************
+%*									*
+		Inline primops
+%*									*
+%************************************************************************
+
+\begin{code}
+cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts
+  | isVoidArg (idCgRep bndr)
+  = ASSERT( con == DEFAULT && isSingleton alts && null bs )
+    do	{ 	-- VOID RESULT; just sequencing, 
+		-- so get in there and do it
+	  cgPrimOp [] primop args live_in_alts
+	; cgExpr rhs }
+  where
+    (con,bs,_,rhs) = head alts
+
+cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts
+  = do	{ 	-- PRIMITIVE ALTS, with non-void result
+	  tmp_reg <- bindNewToTemp bndr
+	; cgPrimOp [tmp_reg] primop args live_in_alts
+	; cgPrimAlts NoGC (PrimAlt tycon) tmp_reg alts }
+
+cgInlinePrimOp primop args bndr (UbxTupAlt tycon) live_in_alts alts
+  = ASSERT( isSingleton alts )
+    do	{  	-- UNBOXED TUPLE ALTS
+	 	-- No heap check, no yield, just get in there and do it.
+		-- NB: the case binder isn't bound to anything; 
+		--     it has a unboxed tuple type
+	  
+	  res_tmps <- mapFCs bindNewToTemp non_void_res_ids
+	; cgPrimOp res_tmps primop args live_in_alts
+	; cgExpr rhs }
+  where
+   (_, res_ids, _, rhs) = head alts
+   non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids
+
+cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts
+  = do 	{ 	-- ENUMERATION TYPE RETURN
+		-- Typical: case a ># b of { True -> ..; False -> .. }
+		-- The primop itself returns an index into the table of
+		-- closures for the enumeration type.
+	   tag_amode <- ASSERT( isEnumerationTyCon tycon )
+			do_enum_primop primop
+
+	 	-- Bind the default binder if necessary
+		-- (avoiding it avoids the assignment)
+		-- The deadness info is set by StgVarInfo
+	; hmods <- getHomeModules
+	; whenC (not (isDeadBinder bndr))
+		(do { tmp_reg <- bindNewToTemp bndr
+		    ; stmtC (CmmAssign tmp_reg (tagToClosure hmods tycon tag_amode)) })
+
+		-- Compile the alts
+	; (branches, mb_deflt) <- cgAlgAlts NoGC Nothing{-cc_slot-}
+				   	    (AlgAlt tycon) alts
+
+		-- Do the switch
+	; emitSwitch tag_amode branches mb_deflt 0 (tyConFamilySize tycon - 1)
+	}
+  where
+
+    do_enum_primop :: PrimOp -> FCode CmmExpr	-- Returns amode for result
+    do_enum_primop TagToEnumOp	-- No code!
+       | [arg] <- args = do
+         (_,e) <- getArgAmode arg
+	 return e
+    do_enum_primop primop
+      = do tmp <- newTemp wordRep
+	   cgPrimOp [tmp] primop args live_in_alts
+    	   returnFC (CmmReg tmp)
+
+cgInlinePrimOp primop arg_amodes bndr PolyAlt live_in_alts alts
+  = pprPanic "cgCase: case of primop has polymorphic type" (ppr bndr)
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[CgCase-alts]{Alternatives}
+%*									*
+%************************************************************************
+
+@cgEvalAlts@ returns an addressing mode for a continuation for the
+alternatives of a @case@, used in a context when there
+is some evaluation to be done.
+
+\begin{code}
+cgEvalAlts :: Maybe VirtualSpOffset	-- Offset of cost-centre to be restored, if any
+	   -> Id
+	   -> SRT			-- SRT for the continuation
+	   -> AltType
+	   -> [StgAlt]
+	   -> FCode Sequel	-- Any addr modes inside are guaranteed
+				-- to be a label so that we can duplicate it 
+				-- without risk of duplicating code
+
+cgEvalAlts cc_slot bndr srt alt_type@(PrimAlt tycon) alts
+  = do	{ let   rep = tyConCgRep tycon
+		reg = dataReturnConvPrim rep	-- Bottom for voidRep
+
+	; abs_c <- forkProc $ do
+		{ 	-- Bind the case binder, except if it's void
+			-- (reg is bottom in that case)
+		  whenC (nonVoidArg rep) $
+		  bindNewToReg bndr reg (mkLFArgument bndr)
+		; restoreCurrentCostCentre cc_slot True
+		; cgPrimAlts GCMayHappen alt_type reg alts }
+
+	; lbl <- emitDirectReturnTarget (idName bndr) abs_c srt
+	; returnFC (CaseAlts lbl Nothing bndr False) }
+
+cgEvalAlts cc_slot bndr srt (UbxTupAlt _) [(con,args,_,rhs)]
+  =	-- Unboxed tuple case
+	-- By now, the simplifier should have have turned it
+	-- into 	case e of (# a,b #) -> e
+	-- There shouldn't be a 
+	--		case e of DEFAULT -> e
+    ASSERT2( case con of { DataAlt _ -> True; other -> False },
+	     text "cgEvalAlts: dodgy case of unboxed tuple type" )
+    do	{ 	-- forkAbsC for the RHS, so that the envt is
+		-- not changed for the emitDirectReturn call
+	  abs_c <- forkProc $ do 
+		{ (live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents args
+			-- Restore the CC *after* binding the tuple components, 
+			-- so that we get the stack offset of the saved CC right.
+		; restoreCurrentCostCentre cc_slot True
+			-- Generate a heap check if necessary
+			-- and finally the code for the alternative
+		; unbxTupleHeapCheck live_regs ptrs nptrs noStmts
+				     (cgExpr rhs) }
+	; lbl <- emitDirectReturnTarget (idName bndr) abs_c srt
+	; returnFC (CaseAlts lbl Nothing bndr False) }
+
+cgEvalAlts cc_slot bndr srt alt_type alts
+  = 	-- Algebraic and polymorphic case
+    do	{	-- Bind the default binder
+	  bindNewToReg bndr nodeReg (mkLFArgument bndr)
+
+	-- Generate sequel info for use downstream
+	-- At the moment, we only do it if the type is vector-returnable.
+	-- Reason: if not, then it costs extra to label the
+	-- alternatives, because we'd get return code like:
+	--
+	--	switch TagReg { 0 : JMP(alt_1); 1 : JMP(alt_2) ..etc }
+	--
+	-- which is worse than having the alt code in the switch statement
+
+	; (alts, mb_deflt) <- cgAlgAlts GCMayHappen cc_slot alt_type alts
+
+	; (lbl, branches) <- emitAlgReturnTarget (idName bndr) 
+				alts mb_deflt srt ret_conv
+
+	; returnFC (CaseAlts lbl branches bndr False) }
+  where
+    ret_conv = case alt_type of
+    		AlgAlt tc -> ctrlReturnConvAlg tc
+    		PolyAlt   -> UnvectoredReturn 0
+\end{code}
+
+
+HWL comment on {\em GrAnSim\/}  (adding GRAN_YIELDs for context switch): If
+we  do  an inlining of the  case  no separate  functions  for returning are
+created, so we don't have to generate a GRAN_YIELD in that case.  This info
+must be  propagated  to cgAlgAltRhs (where the  GRAN_YIELD  macro might  be
+emitted). Hence, the new Bool arg to cgAlgAltRhs.
+
+%************************************************************************
+%*									*
+\subsection[CgCase-alg-alts]{Algebraic alternatives}
+%*									*
+%************************************************************************
+
+In @cgAlgAlts@, none of the binders in the alternatives are
+assumed to be yet bound.
+
+HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): The
+last   arg of  cgAlgAlts  indicates  if we  want  a context   switch at the
+beginning of  each alternative. Normally we  want that. The  only exception
+are inlined alternatives.
+
+\begin{code}
+cgAlgAlts :: GCFlag
+       -> Maybe VirtualSpOffset
+       -> AltType				--  ** AlgAlt or PolyAlt only **
+       -> [StgAlt]				-- The alternatives
+       -> FCode ( [(ConTagZ, CgStmts)], -- The branches
+		  Maybe CgStmts )	-- The default case
+
+cgAlgAlts gc_flag cc_slot alt_type alts
+  = do alts <- forkAlts [ cgAlgAlt gc_flag cc_slot alt_type alt | alt <- alts]
+       let
+	    mb_deflt = case alts of -- DEFAULT is always first, if present
+			 ((DEFAULT,blks) : _) -> Just blks
+			 other		      -> Nothing
+
+	    branches = [(dataConTagZ con, blks) 
+	   	       | (DataAlt con, blks) <- alts]
+       -- in
+       return (branches, mb_deflt)
+
+
+cgAlgAlt :: GCFlag
+      	 -> Maybe VirtualSpOffset	-- Turgid state
+      	 -> AltType			--  ** AlgAlt or PolyAlt only **
+      	 -> StgAlt
+      	 -> FCode (AltCon, CgStmts)
+
+cgAlgAlt gc_flag cc_slot alt_type (con, args, use_mask, rhs)
+  = do	{ abs_c <- getCgStmts $ do
+		{ bind_con_args con args
+		; restoreCurrentCostCentre cc_slot True
+		; maybeAltHeapCheck gc_flag alt_type (cgExpr rhs) }
+	; return (con, abs_c) }
+  where
+    bind_con_args DEFAULT      args = nopC
+    bind_con_args (DataAlt dc) args = bindConArgs dc args
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[CgCase-prim-alts]{Primitive alternatives}
+%*									*
+%************************************************************************
+
+@cgPrimAlts@ generates suitable a @CSwitch@
+for dealing with the alternatives of a primitive @case@, given an
+addressing mode for the thing to scrutinise.  It also keeps track of
+the maximum stack depth encountered down any branch.
+
+As usual, no binders in the alternatives are yet bound.
+
+\begin{code}
+cgPrimAlts :: GCFlag
+	   -> AltType	-- Always PrimAlt, but passed to maybeAltHeapCheck
+	   -> CmmReg	-- Scrutinee
+	   -> [StgAlt]	-- Alternatives
+	   -> Code
+-- NB: cgPrimAlts emits code that does the case analysis.
+-- It's often used in inline situations, rather than to genearte
+-- a labelled return point.  That's why its interface is a little
+-- different to cgAlgAlts
+--
+-- INVARIANT: the default binder is already bound
+cgPrimAlts gc_flag alt_type scrutinee alts
+  = do	{ tagged_absCs <- forkAlts (map (cgPrimAlt gc_flag alt_type) alts)
+	; let ((DEFAULT, deflt_absC) : others) = tagged_absCs	-- There is always a default
+	      alt_absCs = [(lit,rhs) | (LitAlt lit, rhs) <- others]
+ 	; emitLitSwitch (CmmReg scrutinee) alt_absCs deflt_absC }
+
+cgPrimAlt :: GCFlag
+	  -> AltType
+	  -> StgAlt				-- The alternative
+	  -> FCode (AltCon, CgStmts)	-- Its compiled form
+
+cgPrimAlt gc_flag alt_type (con, [], [], rhs)
+  = ASSERT( case con of { DEFAULT -> True; LitAlt _ -> True; other -> False } )
+    do	{ abs_c <- getCgStmts (maybeAltHeapCheck gc_flag alt_type (cgExpr rhs)) 
+	; returnFC (con, abs_c) }
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[CgCase-tidy]{Code for tidying up prior to an eval}
+%*									*
+%************************************************************************
+
+\begin{code}
+maybeAltHeapCheck 
+	:: GCFlag 
+	-> AltType	-- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt
+	-> Code		-- Continuation
+	-> Code
+maybeAltHeapCheck NoGC	      _        code = code
+maybeAltHeapCheck GCMayHappen alt_type code = altHeapCheck alt_type code
+
+saveVolatileVarsAndRegs
+    :: StgLiveVars                    -- Vars which should be made safe
+    -> FCode (CmmStmts,  	      -- Assignments to do the saves
+	      EndOfBlockInfo,	      -- sequel for the alts
+              Maybe VirtualSpOffset)  -- Slot for current cost centre
+
+saveVolatileVarsAndRegs vars
+  = do	{ var_saves <- saveVolatileVars vars
+	; (maybe_cc_slot, cc_save) <- saveCurrentCostCentre
+	; eob_info <- getEndOfBlockInfo
+	; returnFC (var_saves `plusStmts` cc_save,
+		    eob_info,
+		    maybe_cc_slot) }
+
+
+saveVolatileVars :: StgLiveVars		-- Vars which should be made safe
+		 -> FCode CmmStmts	-- Assignments to to the saves
+
+saveVolatileVars vars
+  = do	{ stmts_s <- mapFCs save_it (varSetElems vars)
+	; return (foldr plusStmts noStmts stmts_s) }
+  where
+    save_it var
+      = do { v <- getCAddrModeIfVolatile var
+	   ; case v of
+		Nothing	        -> return noStmts 	   -- Non-volatile
+		Just vol_amode  -> save_var var vol_amode  -- Aha! It's volatile
+	}
+
+    save_var var vol_amode
+      = do { slot <- allocPrimStack (idCgRep var)
+	   ; rebindToStack var slot
+	   ; sp_rel <- getSpRelOffset slot
+	   ; returnFC (oneStmt (CmmStore sp_rel vol_amode)) }
+\end{code}
+
+---------------------------------------------------------------------------
+
+When we save the current cost centre (which is done for lexical
+scoping), we allocate a free stack location, and return (a)~the
+virtual offset of the location, to pass on to the alternatives, and
+(b)~the assignment to do the save (just as for @saveVolatileVars@).
+
+\begin{code}
+saveCurrentCostCentre ::
+	FCode (Maybe VirtualSpOffset,	-- Where we decide to store it
+	       CmmStmts)		-- Assignment to save it
+
+saveCurrentCostCentre
+  | not opt_SccProfilingOn 
+  = returnFC (Nothing, noStmts)
+  | otherwise
+  = do	{ slot <- allocPrimStack PtrArg
+	; sp_rel <- getSpRelOffset slot
+	; returnFC (Just slot,
+		    oneStmt (CmmStore sp_rel curCCS)) }
+
+-- Sometimes we don't free the slot containing the cost centre after restoring it
+-- (see CgLetNoEscape.cgLetNoEscapeBody).
+restoreCurrentCostCentre :: Maybe VirtualSpOffset -> Bool -> Code
+restoreCurrentCostCentre Nothing     _freeit = nopC
+restoreCurrentCostCentre (Just slot) freeit
+ = do 	{ sp_rel <- getSpRelOffset slot
+	; whenC freeit (freeStackSlots [slot])
+	; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel wordRep)) }
+\end{code}
+
diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs
new file mode 100644
index 0000000000..1a2cbc5202
--- /dev/null
+++ b/compiler/codeGen/CgClosure.lhs
@@ -0,0 +1,599 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgClosure.lhs,v 1.72 2005/05/18 12:06:51 simonmar Exp $
+%
+\section[CgClosure]{Code generation for closures}
+
+This module provides the support code for @StgToAbstractC@ to deal
+with {\em closures} on the RHSs of let(rec)s.  See also
+@CgCon@, which deals with constructors.
+
+\begin{code}
+module CgClosure ( cgTopRhsClosure, 
+		   cgStdRhsClosure, 
+		   cgRhsClosure,
+		   emitBlackHoleCode,
+		   ) where
+
+#include "HsVersions.h"
+
+import {-# SOURCE #-} CgExpr ( cgExpr )
+
+import CgMonad
+import CgBindery
+import CgHeapery
+import CgStackery	( mkVirtStkOffsets, pushUpdateFrame, getVirtSp,
+			  setRealAndVirtualSp )
+import CgProf		( chooseDynCostCentres, ldvEnter, enterCostCentre,
+			  costCentreFrom )
+import CgTicky
+import CgParallel	( granYield, granFetchAndReschedule )
+import CgInfoTbls	( emitClosureCodeAndInfoTable, getSRTInfo )
+import CgCallConv	( assignCallRegs, mkArgDescr )
+import CgUtils		( emitDataLits, addIdReps, cmmRegOffW, 
+			  emitRtsCallWithVols )
+import ClosureInfo	-- lots and lots of stuff
+import SMRep		( CgRep, cgRepSizeW, argMachRep, fixedHdrSize, WordOff,
+			  idCgRep )
+import MachOp		( MachHint(..) )
+import Cmm
+import CmmUtils		( CmmStmts, mkStmts, oneStmt, plusStmts, noStmts,
+			  mkLblExpr )
+import CLabel
+import StgSyn
+import StaticFlags	( opt_DoTickyProfiling )
+import CostCentre	
+import Id		( Id, idName, idType )
+import Name		( Name, isExternalName )
+import Module		( Module, pprModule )
+import ListSetOps	( minusList )
+import Util		( isIn, mapAccumL, zipWithEqual )
+import BasicTypes	( TopLevelFlag(..) )
+import Constants	( oFFSET_StgInd_indirectee, wORD_SIZE )
+import Outputable
+import FastString
+\end{code}
+
+%********************************************************
+%*							*
+\subsection[closures-no-free-vars]{Top-level closures}
+%*							*
+%********************************************************
+
+For closures bound at top level, allocate in static space.
+They should have no free variables.
+
+\begin{code}
+cgTopRhsClosure :: Id
+		-> CostCentreStack	-- Optional cost centre annotation
+		-> StgBinderInfo
+		-> SRT
+		-> UpdateFlag
+		-> [Id]		-- Args
+		-> StgExpr
+		-> FCode (Id, CgIdInfo)
+
+cgTopRhsClosure id ccs binder_info srt upd_flag args body = do
+  {	-- LAY OUT THE OBJECT
+    let name = idName id
+  ; lf_info  <- mkClosureLFInfo id TopLevel [] upd_flag args
+  ; srt_info <- getSRTInfo name srt
+  ; mod_name <- moduleName
+  ; let descr         = closureDescription mod_name name
+	closure_info  = mkClosureInfo True id lf_info 0 0 srt_info descr
+	closure_label = mkLocalClosureLabel name
+    	cg_id_info    = stableIdInfo id (mkLblExpr closure_label) lf_info
+	closure_rep   = mkStaticClosureFields closure_info ccs True []
+
+  	 -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY)
+  ; emitDataLits closure_label closure_rep
+  ; forkClosureBody (closureCodeBody binder_info closure_info
+				     ccs args body)
+
+  ; returnFC (id, cg_id_info) }
+\end{code}
+
+%********************************************************
+%*							*
+\subsection[non-top-level-closures]{Non top-level closures}
+%*							*
+%********************************************************
+
+For closures with free vars, allocate in heap.
+
+\begin{code}
+cgStdRhsClosure
+	:: Id
+	-> CostCentreStack	-- Optional cost centre annotation
+	-> StgBinderInfo
+	-> [Id]			-- Free vars
+	-> [Id]			-- Args
+	-> StgExpr
+	-> LambdaFormInfo
+	-> [StgArg]		-- payload
+	-> FCode (Id, CgIdInfo)
+
+cgStdRhsClosure bndr cc bndr_info fvs args body lf_info payload 
+  = do	-- AHA!  A STANDARD-FORM THUNK
+  {	-- LAY OUT THE OBJECT
+    amodes <- getArgAmodes payload
+  ; mod_name <- moduleName
+  ; let (tot_wds, ptr_wds, amodes_w_offsets) 
+	    = mkVirtHeapOffsets (isLFThunk lf_info) amodes
+
+	descr	     = closureDescription mod_name (idName bndr)
+	closure_info = mkClosureInfo False 	-- Not static
+				     bndr lf_info tot_wds ptr_wds 
+				     NoC_SRT	-- No SRT for a std-form closure
+				     descr
+		
+  ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
+
+	-- BUILD THE OBJECT
+  ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets
+
+	-- RETURN
+  ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) }
+\end{code}
+
+Here's the general case.
+
+\begin{code}
+cgRhsClosure	:: Id
+		-> CostCentreStack	-- Optional cost centre annotation
+		-> StgBinderInfo
+		-> SRT
+		-> [Id]			-- Free vars
+		-> UpdateFlag
+		-> [Id]			-- Args
+		-> StgExpr
+		-> FCode (Id, CgIdInfo)
+
+cgRhsClosure bndr cc bndr_info srt fvs upd_flag args body = do
+  { 	-- LAY OUT THE OBJECT
+	-- If the binder is itself a free variable, then don't store
+	-- it in the closure.  Instead, just bind it to Node on entry.
+	-- NB we can be sure that Node will point to it, because we
+	-- havn't told mkClosureLFInfo about this; so if the binder
+	-- _was_ a free var of its RHS, mkClosureLFInfo thinks it *is*
+	-- stored in the closure itself, so it will make sure that
+	-- Node points to it...
+    let
+	name 	     = idName bndr
+	is_elem	     = isIn "cgRhsClosure"
+	bndr_is_a_fv = bndr `is_elem` fvs
+	reduced_fvs | bndr_is_a_fv = fvs `minusList` [bndr]
+		    | otherwise	   = fvs
+
+  ; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args
+  ; fv_infos <- mapFCs getCgIdInfo reduced_fvs
+  ; srt_info <- getSRTInfo name srt
+  ; mod_name <- moduleName
+  ; let	bind_details :: [(CgIdInfo, VirtualHpOffset)]
+	(tot_wds, ptr_wds, bind_details) 
+	   = mkVirtHeapOffsets (isLFThunk lf_info) (map add_rep fv_infos)
+
+	add_rep info = (cgIdInfoArgRep info, info)
+
+	descr	     = closureDescription mod_name name
+	closure_info = mkClosureInfo False	-- Not static
+				     bndr lf_info tot_wds ptr_wds
+				     srt_info descr
+
+	-- BUILD ITS INFO TABLE AND CODE
+  ; forkClosureBody (do
+	{	-- Bind the fvs
+	  let bind_fv (info, offset) 
+		= bindNewToNode (cgIdInfoId info) offset (cgIdInfoLF info)
+	; mapCs bind_fv bind_details
+
+	  	-- Bind the binder itself, if it is a free var
+	; whenC bndr_is_a_fv (bindNewToReg bndr nodeReg lf_info)
+	
+		-- Compile the body
+	; closureCodeBody bndr_info closure_info cc args body })
+
+	-- BUILD THE OBJECT
+  ; let
+	to_amode (info, offset) = do { amode <- idInfoToAmode info
+				     ; return (amode, offset) }
+  ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
+  ; amodes_w_offsets <- mapFCs to_amode bind_details
+  ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets
+
+	-- RETURN
+  ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) }
+
+
+mkClosureLFInfo :: Id		-- The binder
+		-> TopLevelFlag	-- True of top level
+		-> [Id]		-- Free vars
+		-> UpdateFlag 	-- Update flag
+		-> [Id] 	-- Args
+		-> FCode LambdaFormInfo
+mkClosureLFInfo bndr top fvs upd_flag args
+  | null args = return (mkLFThunk (idType bndr) top fvs upd_flag)
+  | otherwise = do { arg_descr <- mkArgDescr (idName bndr) args
+		   ; return (mkLFReEntrant top fvs args arg_descr) }
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[code-for-closures]{The code for closures}
+%*									*
+%************************************************************************
+
+\begin{code}
+closureCodeBody :: StgBinderInfo
+		-> ClosureInfo	   -- Lots of information about this closure
+		-> CostCentreStack -- Optional cost centre attached to closure
+		-> [Id]
+		-> StgExpr
+		-> Code
+\end{code}
+
+There are two main cases for the code for closures.  If there are {\em
+no arguments}, then the closure is a thunk, and not in normal form.
+So it should set up an update frame (if it is shared).
+NB: Thunks cannot have a primitive type!
+
+\begin{code}
+closureCodeBody binder_info cl_info cc [{- No args i.e. thunk -}] body = do
+  { body_absC <- getCgStmts $ do
+	{ tickyEnterThunk cl_info
+	; ldvEnter (CmmReg nodeReg)  -- NB: Node always points when profiling
+	; thunkWrapper cl_info $ do
+		-- We only enter cc after setting up update so
+		-- that cc of enclosing scope will be recorded
+		-- in update frame CAF/DICT functions will be
+		-- subsumed by this enclosing cc
+	    { enterCostCentre cl_info cc body
+	    ; cgExpr body }
+	}
+    
+  ; emitClosureCodeAndInfoTable cl_info [] body_absC }
+\end{code}
+
+If there is /at least one argument/, then this closure is in
+normal form, so there is no need to set up an update frame.
+
+The Macros for GrAnSim are produced at the beginning of the
+argSatisfactionCheck (by calling fetchAndReschedule).  There info if
+Node points to closure is available. -- HWL
+
+\begin{code}
+closureCodeBody binder_info cl_info cc args body 
+  = ASSERT( length args > 0 )
+  do { 	-- Get the current virtual Sp (it might not be zero, 
+	-- eg. if we're compiling a let-no-escape).
+    vSp <- getVirtSp
+  ; let (reg_args, other_args) = assignCallRegs (addIdReps args)
+	(sp_top, stk_args)     = mkVirtStkOffsets vSp other_args
+
+	-- Allocate the global ticky counter
+  ; let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info)
+  ; emitTickyCounter cl_info args sp_top
+
+   	-- ...and establish the ticky-counter 
+	-- label for this block
+  ; setTickyCtrLabel ticky_ctr_lbl $ do
+
+    	-- Emit the slow-entry code
+  { reg_save_code <- mkSlowEntryCode cl_info reg_args
+
+	-- Emit the main entry code
+  ; blks <- forkProc $
+	    mkFunEntryCode cl_info cc reg_args stk_args
+			   sp_top reg_save_code body
+  ; emitClosureCodeAndInfoTable cl_info [] blks
+  }}
+
+
+
+mkFunEntryCode :: ClosureInfo
+	       -> CostCentreStack
+	       -> [(Id,GlobalReg)] 	  -- Args in regs
+	       -> [(Id,VirtualSpOffset)]  -- Args on stack
+	       -> VirtualSpOffset	  -- Last allocated word on stack
+	       -> CmmStmts 		  -- Register-save code in case of GC
+	       -> StgExpr
+	       -> Code
+-- The main entry code for the closure
+mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do
+  { 	-- Bind args to regs/stack as appropriate,
+	-- and record expected position of sps
+  ; bindArgsToRegs  reg_args
+  ; bindArgsToStack stk_args
+  ; setRealAndVirtualSp sp_top
+
+	-- Enter the cost-centre, if required
+	-- ToDo: It's not clear why this is outside the funWrapper,
+	--	 but the tickyEnterFun is inside. Perhaps we can put
+	--	 them together?
+  ; enterCostCentre cl_info cc body
+
+	-- Do the business
+  ; funWrapper cl_info reg_args reg_save_code $ do
+	{ tickyEnterFun cl_info
+	; cgExpr body }
+  }
+\end{code}
+
+The "slow entry" code for a function.  This entry point takes its
+arguments on the stack.  It loads the arguments into registers
+according to the calling convention, and jumps to the function's
+normal entry point.  The function's closure is assumed to be in
+R1/node.
+
+The slow entry point is used in two places:
+
+ (a) unknown calls: eg. stg_PAP_entry 
+ (b) returning from a heap-check failure
+
+\begin{code}
+mkSlowEntryCode :: ClosureInfo -> [(Id,GlobalReg)] -> FCode CmmStmts
+-- If this function doesn't have a specialised ArgDescr, we need
+-- to generate the function's arg bitmap, slow-entry code, and
+-- register-save code for the heap-check failure
+-- Here, we emit the slow-entry code, and 
+-- return the register-save assignments
+mkSlowEntryCode cl_info reg_args
+  | Just (_, ArgGen _) <- closureFunInfo cl_info
+  = do 	{ emitSimpleProc slow_lbl (emitStmts load_stmts)
+	; return save_stmts }
+  | otherwise = return noStmts
+  where
+     name = closureName cl_info
+     slow_lbl = mkSlowEntryLabel name
+
+     load_stmts = mkStmts load_assts `plusStmts` mkStmts [stk_adj_pop, jump_to_entry]
+     save_stmts = oneStmt stk_adj_push `plusStmts`  mkStmts save_assts
+
+     reps_w_regs :: [(CgRep,GlobalReg)]
+     reps_w_regs = [(idCgRep id, reg) | (id,reg) <- reverse reg_args]
+     (final_stk_offset, stk_offsets)
+	= mapAccumL (\off (rep,_) -> (off + cgRepSizeW rep, off))
+		    0 reps_w_regs
+
+     load_assts = zipWithEqual "mk_load" mk_load reps_w_regs stk_offsets
+     mk_load (rep,reg) offset = CmmAssign (CmmGlobal reg) 
+					  (CmmLoad (cmmRegOffW spReg offset)
+						   (argMachRep rep))
+
+     save_assts = zipWithEqual "mk_save" mk_save reps_w_regs stk_offsets
+     mk_save (rep,reg) offset = ASSERT( argMachRep rep == globalRegRep reg )
+				CmmStore (cmmRegOffW spReg offset) 
+					 (CmmReg (CmmGlobal reg))
+
+     stk_adj_pop   = CmmAssign spReg (cmmRegOffW spReg final_stk_offset)
+     stk_adj_push  = CmmAssign spReg (cmmRegOffW spReg (- final_stk_offset))
+     jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name)) []
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsubsection[closure-code-wrappers]{Wrappers around closure code}
+%*									*
+%************************************************************************
+
+\begin{code}
+thunkWrapper:: ClosureInfo -> Code -> Code
+thunkWrapper closure_info thunk_code = do
+  { let node_points = nodeMustPointToIt (closureLFInfo closure_info)
+
+    -- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node
+    -- (we prefer fetchAndReschedule-style context switches to yield ones)
+  ; if node_points 
+    then granFetchAndReschedule [] node_points 
+    else granYield 		[] node_points
+
+        -- Stack and/or heap checks
+  ; thunkEntryChecks closure_info $ do
+      	{	-- Overwrite with black hole if necessary
+	  whenC (blackHoleOnEntry closure_info && node_points)
+ 	        (blackHoleIt closure_info)
+	; setupUpdate closure_info thunk_code }
+		-- setupUpdate *encloses* the thunk_code
+  }
+
+funWrapper :: ClosureInfo 	-- Closure whose code body this is
+	   -> [(Id,GlobalReg)] 	-- List of argument registers (if any)
+	   -> CmmStmts		-- reg saves for the heap check failure
+	   -> Code		-- Body of function being compiled
+	   -> Code
+funWrapper closure_info arg_regs reg_save_code fun_body = do
+  { let node_points = nodeMustPointToIt (closureLFInfo closure_info)
+
+   	-- Enter for Ldv profiling
+  ; whenC node_points (ldvEnter (CmmReg nodeReg))
+
+	-- GranSim yeild poin
+  ; granYield arg_regs node_points
+
+        -- Heap and/or stack checks wrap the function body
+  ; funEntryChecks closure_info reg_save_code 
+		   fun_body
+  }
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsubsubsection[update-and-BHs]{Update and black-hole wrappers}
+%*									*
+%************************************************************************
+
+
+\begin{code}
+blackHoleIt :: ClosureInfo -> Code
+-- Only called for closures with no args
+-- Node points to the closure
+blackHoleIt closure_info = emitBlackHoleCode (closureSingleEntry closure_info)
+
+emitBlackHoleCode :: Bool -> Code
+emitBlackHoleCode is_single_entry 
+  | eager_blackholing = do 
+	tickyBlackHole (not is_single_entry)
+	stmtC (CmmStore (CmmReg nodeReg) (CmmLit (CmmLabel bh_lbl)))
+  | otherwise = 
+	nopC
+  where
+    bh_lbl | is_single_entry = mkRtsDataLabel SLIT("stg_SE_BLACKHOLE_info")
+	   | otherwise	     = mkRtsDataLabel SLIT("stg_BLACKHOLE_info")
+
+	-- If we wanted to do eager blackholing with slop filling,
+	-- we'd need to do it at the *end* of a basic block, otherwise
+	-- we overwrite the free variables in the thunk that we still
+	-- need.  We have a patch for this from Andy Cheadle, but not
+	-- incorporated yet. --SDM [6/2004]
+	--
+	-- Profiling needs slop filling (to support LDV profiling), so
+	-- currently eager blackholing doesn't work with profiling.
+	--
+	-- TICKY_TICKY needs EAGER_BLACKHOLING to verify no double-entries of
+	-- single-entry thunks.
+    eager_blackholing 
+	| opt_DoTickyProfiling = True
+	| otherwise            = False
+
+\end{code}
+
+\begin{code}
+setupUpdate :: ClosureInfo -> Code -> Code	-- Only called for closures with no args
+	-- Nota Bene: this function does not change Node (even if it's a CAF),
+	-- so that the cost centre in the original closure can still be
+	-- extracted by a subsequent enterCostCentre
+setupUpdate closure_info code
+  | closureReEntrant closure_info
+  = code
+
+  | not (isStaticClosure closure_info)
+  = if closureUpdReqd closure_info
+    then do { tickyPushUpdateFrame;  pushUpdateFrame (CmmReg nodeReg) code }
+    else do { tickyUpdateFrameOmitted; code }
+ 
+  | otherwise	-- A static closure
+  = do 	{ tickyUpdateBhCaf closure_info
+
+	; if closureUpdReqd closure_info
+	  then do	-- Blackhole the (updatable) CAF:
+		{ upd_closure <- link_caf closure_info True
+	 	; pushUpdateFrame upd_closure code }
+	  else do
+		{ 	-- No update reqd, you'd think we don't need to 
+			-- black-hole it. But when ticky-ticky is on, we 
+			-- black-hole it regardless, to catch errors in which
+			-- an allegedly single-entry closure is entered twice
+			--
+			-- We discard the pointer returned by link_caf, because
+			-- we don't push an update frame
+		  whenC opt_DoTickyProfiling -- Blackhole even a SE CAF
+			(link_caf closure_info False >> nopC)
+		; tickyUpdateFrameOmitted
+		; code }
+    }
+
+
+-----------------------------------------------------------------------------
+-- Entering a CAF
+--
+-- When a CAF is first entered, it creates a black hole in the heap,
+-- and updates itself with an indirection to this new black hole.
+--
+-- We update the CAF with an indirection to a newly-allocated black
+-- hole in the heap.  We also set the blocking queue on the newly
+-- allocated black hole to be empty.
+--
+-- Why do we make a black hole in the heap when we enter a CAF?
+--    
+--     - for a  generational garbage collector, which needs a fast
+--       test for whether an updatee is in an old generation or not
+--
+--     - for the parallel system, which can implement updates more
+--       easily if the updatee is always in the heap. (allegedly).
+--
+-- When debugging, we maintain a separate CAF list so we can tell when
+-- a CAF has been garbage collected.
+
+-- newCAF must be called before the itbl ptr is overwritten, since
+-- newCAF records the old itbl ptr in order to do CAF reverting
+-- (which Hugs needs to do in order that combined mode works right.)
+--
+
+-- ToDo [Feb 04]  This entire link_caf nonsense could all be moved
+-- into the "newCAF" RTS procedure, which we call anyway, including
+-- the allocation of the black-hole indirection closure.
+-- That way, code size would fall, the CAF-handling code would 
+-- be closer together, and the compiler wouldn't need to know
+-- about off_indirectee etc.
+
+link_caf :: ClosureInfo
+	 -> Bool		-- True <=> updatable, False <=> single-entry
+         -> FCode CmmExpr       -- Returns amode for closure to be updated
+-- To update a CAF we must allocate a black hole, link the CAF onto the
+-- CAF list, then update the CAF to point to the fresh black hole.
+-- This function returns the address of the black hole, so it can be
+-- updated with the new value when available.  The reason for all of this
+-- is that we only want to update dynamic heap objects, not static ones,
+-- so that generational GC is easier.
+link_caf cl_info is_upd = do
+  { 	-- Alloc black hole specifying CC_HDR(Node) as the cost centre
+  ; let	use_cc   = costCentreFrom (CmmReg nodeReg)
+        blame_cc = use_cc
+  ; hp_offset <- allocDynClosure bh_cl_info use_cc blame_cc []
+  ; hp_rel    <- getHpRelOffset hp_offset
+
+	-- Call the RTS function newCAF to add the CAF to the CafList
+	-- so that the garbage collector can find them
+	-- This must be done *before* the info table pointer is overwritten, 
+	-- because the old info table ptr is needed for reversion
+  ; emitRtsCallWithVols SLIT("newCAF") [(CmmReg nodeReg,PtrHint)] [node]
+	-- node is live, so save it.
+
+	-- Overwrite the closure with a (static) indirection 
+	-- to the newly-allocated black hole
+  ; stmtsC [ CmmStore (cmmRegOffW nodeReg off_indirectee) hp_rel
+	   , CmmStore (CmmReg nodeReg) ind_static_info ]
+
+  ; returnFC hp_rel }
+  where
+    bh_cl_info :: ClosureInfo
+    bh_cl_info | is_upd    = cafBlackHoleClosureInfo   cl_info
+	       | otherwise = seCafBlackHoleClosureInfo cl_info
+
+    ind_static_info :: CmmExpr
+    ind_static_info = mkLblExpr mkIndStaticInfoLabel
+
+    off_indirectee :: WordOff
+    off_indirectee = fixedHdrSize + oFFSET_StgInd_indirectee*wORD_SIZE
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[CgClosure-Description]{Profiling Closure Description.}
+%*									*
+%************************************************************************
+
+For "global" data constructors the description is simply occurrence
+name of the data constructor itself.  Otherwise it is determined by
+@closureDescription@ from the let binding information.
+
+\begin{code}
+closureDescription :: Module		-- Module
+		   -> Name		-- Id of closure binding
+		   -> String
+	-- Not called for StgRhsCon which have global info tables built in
+	-- CgConTbls.lhs with a description generated from the data constructor
+closureDescription mod_name name
+  = showSDocDump (char '<' <>
+		    (if isExternalName name
+		      then ppr name -- ppr will include the module name prefix
+		      else pprModule mod_name <> char '.' <> ppr name) <>
+		    char '>')
+   -- showSDocDump, because we want to see the unique on the Name.
+\end{code}
+  
diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs
new file mode 100644
index 0000000000..bfb55bf46e
--- /dev/null
+++ b/compiler/codeGen/CgCon.lhs
@@ -0,0 +1,457 @@
+%
+% (c) The GRASP Project, Glasgow University, 1992-1998
+%
+\section[CgCon]{Code generation for constructors}
+
+This module provides the support code for @StgToAbstractC@ to deal
+with {\em constructors} on the RHSs of let(rec)s.  See also
+@CgClosure@, which deals with closures.
+
+\begin{code}
+module CgCon (
+	cgTopRhsCon, buildDynCon,
+	bindConArgs, bindUnboxedTupleComponents,
+	cgReturnDataCon,
+	cgTyCon
+    ) where
+
+#include "HsVersions.h"
+
+import CgMonad
+import StgSyn
+
+import CgBindery	( getArgAmodes, bindNewToNode,
+			  bindArgsToRegs, idInfoToAmode, stableIdInfo,
+			  heapIdInfo, CgIdInfo, bindArgsToStack
+			)
+import CgStackery	( mkVirtStkOffsets, freeStackSlots,
+			  getRealSp, getVirtSp, setRealAndVirtualSp )
+import CgUtils		( addIdReps, cmmLabelOffW, emitRODataLits, emitDataLits )
+import CgCallConv	( assignReturnRegs )
+import Constants	( mAX_INTLIKE, mIN_INTLIKE, mAX_CHARLIKE, mIN_CHARLIKE )
+import CgHeapery	( allocDynClosure, layOutDynConstr, 
+			  layOutStaticConstr, mkStaticClosureFields )
+import CgTailCall	( performReturn, emitKnownConReturnCode, returnUnboxedTuple )
+import CgProf		( mkCCostCentreStack, ldvEnter, curCCS )
+import CgTicky
+import CgInfoTbls	( emitClosureCodeAndInfoTable, dataConTagZ )
+import CLabel
+import ClosureInfo	( mkConLFInfo, mkLFArgument )
+import CmmUtils		( mkLblExpr )
+import Cmm
+import SMRep		( WordOff, CgRep, separateByPtrFollowness,
+			  fixedHdrSize, typeCgRep )
+import CostCentre	( currentOrSubsumedCCS, dontCareCCS, CostCentreStack,
+			  currentCCS )
+import Constants	( mIN_INTLIKE, mAX_INTLIKE, mIN_CHARLIKE, mAX_CHARLIKE )
+import TyCon		( TyCon, tyConDataCons, isEnumerationTyCon, tyConName )
+import DataCon		( DataCon, dataConRepArgTys, isNullaryRepDataCon,
+			  isUnboxedTupleCon, dataConWorkId, 
+			  dataConName, dataConRepArity
+			)
+import Id		( Id, idName, isDeadBinder )
+import Type		( Type )
+import PrelInfo		( maybeCharLikeCon, maybeIntLikeCon )
+import Outputable
+import Util		( lengthIs )
+import ListSetOps	( assocMaybe )
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[toplevel-constructors]{Top-level constructors}
+%*									*
+%************************************************************************
+
+\begin{code}
+cgTopRhsCon :: Id		-- Name of thing bound to this RHS
+	    -> DataCon		-- Id
+	    -> [StgArg]		-- Args
+	    -> FCode (Id, CgIdInfo)
+cgTopRhsCon id con args
+  = do { 
+	; hmods <- getHomeModules
+#if mingw32_TARGET_OS
+        -- Windows DLLs have a problem with static cross-DLL refs.
+        ; ASSERT( not (isDllConApp hmods con args) ) return ()
+#endif
+	; ASSERT( args `lengthIs` dataConRepArity con ) return ()
+
+	-- LAY IT OUT
+	; amodes <- getArgAmodes args
+
+	; let
+	    name          = idName id
+	    lf_info	  = mkConLFInfo con
+    	    closure_label = mkClosureLabel hmods name
+	    caffy         = any stgArgHasCafRefs args
+	    (closure_info, amodes_w_offsets) = layOutStaticConstr hmods con amodes
+	    closure_rep = mkStaticClosureFields
+	    		     closure_info
+	    		     dontCareCCS		-- Because it's static data
+	    		     caffy			-- Has CAF refs
+			     payload
+
+	    payload = map get_lit amodes_w_offsets	
+	    get_lit (CmmLit lit, _offset) = lit
+	    get_lit other = pprPanic "CgCon.get_lit" (ppr other)
+		-- NB1: amodes_w_offsets is sorted into ptrs first, then non-ptrs
+		-- NB2: all the amodes should be Lits!
+
+		-- BUILD THE OBJECT
+	; emitDataLits closure_label closure_rep
+
+		-- RETURN
+	; returnFC (id, stableIdInfo id (mkLblExpr closure_label) lf_info) }
+\end{code}
+
+%************************************************************************
+%*									*
+%* non-top-level constructors						*
+%*									*
+%************************************************************************
+\subsection[code-for-constructors]{The code for constructors}
+
+\begin{code}
+buildDynCon :: Id		  -- Name of the thing to which this constr will
+				  -- be bound
+	    -> CostCentreStack	  -- Where to grab cost centre from;
+				  -- current CCS if currentOrSubsumedCCS
+	    -> DataCon		  -- The data constructor
+	    -> [(CgRep,CmmExpr)] -- Its args
+	    -> FCode CgIdInfo	  -- Return details about how to find it
+
+-- We used to pass a boolean indicating whether all the
+-- args were of size zero, so we could use a static
+-- construtor; but I concluded that it just isn't worth it.
+-- Now I/O uses unboxed tuples there just aren't any constructors
+-- with all size-zero args.
+--
+-- The reason for having a separate argument, rather than looking at
+-- the addr modes of the args is that we may be in a "knot", and
+-- premature looking at the args will cause the compiler to black-hole!
+\end{code}
+
+First we deal with the case of zero-arity constructors.  Now, they
+will probably be unfolded, so we don't expect to see this case much,
+if at all, but it does no harm, and sets the scene for characters.
+
+In the case of zero-arity constructors, or, more accurately, those
+which have exclusively size-zero (VoidRep) args, we generate no code
+at all.
+
+\begin{code}
+buildDynCon binder cc con []
+  = do hmods <- getHomeModules
+       returnFC (stableIdInfo binder
+			   (mkLblExpr (mkClosureLabel hmods (dataConName con)))
+    			   (mkConLFInfo con))
+\end{code}
+
+The following three paragraphs about @Char@-like and @Int@-like
+closures are obsolete, but I don't understand the details well enough
+to properly word them, sorry. I've changed the treatment of @Char@s to
+be analogous to @Int@s: only a subset is preallocated, because @Char@
+has now 31 bits. Only literals are handled here. -- Qrczak
+
+Now for @Char@-like closures.  We generate an assignment of the
+address of the closure to a temporary.  It would be possible simply to
+generate no code, and record the addressing mode in the environment,
+but we'd have to be careful if the argument wasn't a constant --- so
+for simplicity we just always asssign to a temporary.
+
+Last special case: @Int@-like closures.  We only special-case the
+situation in which the argument is a literal in the range
+@mIN_INTLIKE@..@mAX_INTLILKE@.  NB: for @Char@-like closures we can
+work with any old argument, but for @Int@-like ones the argument has
+to be a literal.  Reason: @Char@ like closures have an argument type
+which is guaranteed in range.
+
+Because of this, we use can safely return an addressing mode.
+
+\begin{code}
+buildDynCon binder cc con [arg_amode]
+  | maybeIntLikeCon con 
+  , (_, CmmLit (CmmInt val _)) <- arg_amode
+  , let val_int = (fromIntegral val) :: Int
+  , val_int <= mAX_INTLIKE && val_int >= mIN_INTLIKE
+  = do 	{ let intlike_lbl   = mkRtsDataLabel SLIT("stg_INTLIKE_closure")
+	      offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1)
+		-- INTLIKE closures consist of a header and one word payload
+	      intlike_amode = CmmLit (cmmLabelOffW intlike_lbl offsetW)
+	; returnFC (stableIdInfo binder intlike_amode (mkConLFInfo con)) }
+
+buildDynCon binder cc con [arg_amode]
+  | maybeCharLikeCon con 
+  , (_, CmmLit (CmmInt val _)) <- arg_amode
+  , let val_int = (fromIntegral val) :: Int
+  , val_int <= mAX_CHARLIKE && val_int >= mIN_CHARLIKE
+  = do 	{ let charlike_lbl   = mkRtsDataLabel SLIT("stg_CHARLIKE_closure")
+	      offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1)
+		-- CHARLIKE closures consist of a header and one word payload
+	      charlike_amode = CmmLit (cmmLabelOffW charlike_lbl offsetW)
+	; returnFC (stableIdInfo binder charlike_amode (mkConLFInfo con)) }
+\end{code}
+
+Now the general case.
+
+\begin{code}
+buildDynCon binder ccs con args
+  = do	{ 
+	; hmods <- getHomeModules
+	; let
+	    (closure_info, amodes_w_offsets) = layOutDynConstr hmods con args
+
+	; hp_off <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets
+ 	; returnFC (heapIdInfo binder hp_off lf_info) }
+  where
+    lf_info = mkConLFInfo con
+
+    use_cc	-- cost-centre to stick in the object
+      | currentOrSubsumedCCS ccs = curCCS
+      | otherwise		 = CmmLit (mkCCostCentreStack ccs)
+
+    blame_cc = use_cc -- cost-centre on which to blame the alloc (same)
+\end{code}
+
+
+%************************************************************************
+%*									*
+%* constructor-related utility function:				*
+%*		bindConArgs is called from cgAlt of a case		*
+%*									*
+%************************************************************************
+\subsection[constructor-utilities]{@bindConArgs@: constructor-related utility}
+
+@bindConArgs@ $con args$ augments the environment with bindings for the
+binders $args$, assuming that we have just returned from a @case@ which
+found a $con$.
+
+\begin{code}
+bindConArgs :: DataCon -> [Id] -> Code
+bindConArgs con args
+  = do hmods <- getHomeModules
+       let
+	  bind_arg (arg, offset) = bindNewToNode arg offset (mkLFArgument arg)
+	  (_, args_w_offsets)    = layOutDynConstr hmods con (addIdReps args)
+	--
+       ASSERT(not (isUnboxedTupleCon con)) return ()
+       mapCs bind_arg args_w_offsets
+\end{code}
+
+Unboxed tuples are handled slightly differently - the object is
+returned in registers and on the stack instead of the heap.
+
+\begin{code}
+bindUnboxedTupleComponents
+	:: [Id]				-- Args
+	-> FCode ([(Id,GlobalReg)],	-- Regs assigned
+		  WordOff,		-- Number of pointer stack slots
+		  WordOff,		-- Number of non-pointer stack slots
+		  VirtualSpOffset)	-- Offset of return address slot
+					-- (= realSP on entry)
+
+bindUnboxedTupleComponents args
+ =  do	{   
+	  vsp <- getVirtSp
+	; rsp <- getRealSp
+
+	   -- Assign as many components as possible to registers
+	; let (reg_args, stk_args) = assignReturnRegs (addIdReps args)
+
+		-- Separate the rest of the args into pointers and non-pointers
+	      (ptr_args, nptr_args) = separateByPtrFollowness stk_args
+  
+		-- Allocate the rest on the stack
+		-- The real SP points to the return address, above which any 
+	 	-- leftover unboxed-tuple components will be allocated
+	      (ptr_sp,  ptr_offsets)  = mkVirtStkOffsets rsp    ptr_args
+	      (nptr_sp, nptr_offsets) = mkVirtStkOffsets ptr_sp nptr_args
+              ptrs  = ptr_sp  - rsp
+	      nptrs = nptr_sp - ptr_sp
+
+	    -- The stack pointer points to the last stack-allocated component
+    	; setRealAndVirtualSp nptr_sp
+
+	    -- We have just allocated slots starting at real SP + 1, and set the new
+	    -- virtual SP to the topmost allocated slot.  
+	    -- If the virtual SP started *below* the real SP, we've just jumped over
+	    -- some slots that won't be in the free-list, so put them there
+	    -- This commonly happens because we've freed the return-address slot
+	    -- (trimming back the virtual SP), but the real SP still points to that slot
+	; freeStackSlots [vsp+1,vsp+2 .. rsp]
+
+	; bindArgsToRegs reg_args
+	; bindArgsToStack ptr_offsets
+	; bindArgsToStack nptr_offsets
+
+	; returnFC (reg_args, ptrs, nptrs, rsp) }
+\end{code}
+
+%************************************************************************
+%*									*
+	Actually generate code for a constructor return
+%*									*
+%************************************************************************
+
+
+Note: it's the responsibility of the @cgReturnDataCon@ caller to be
+sure the @amodes@ passed don't conflict with each other.
+\begin{code}
+cgReturnDataCon :: DataCon -> [(CgRep, CmmExpr)] -> Code
+
+cgReturnDataCon con amodes
+  = ASSERT( amodes `lengthIs` dataConRepArity con )
+    do	{ EndOfBlockInfo _ sequel <- getEndOfBlockInfo
+	; case sequel of
+	    CaseAlts _ (Just (alts, deflt_lbl)) bndr _ 
+	      ->    -- Ho! We know the constructor so we can
+		    -- go straight to the right alternative
+		 case assocMaybe alts (dataConTagZ con) of {
+		    Just join_lbl -> build_it_then (jump_to join_lbl);
+		    Nothing
+			-- Special case!  We're returning a constructor to the default case
+			-- of an enclosing case.  For example:
+			--
+			--	case (case e of (a,b) -> C a b) of
+			--	  D x -> ...
+			--	  y   -> ...<returning here!>...
+			--
+			-- In this case,
+			--	if the default is a non-bind-default (ie does not use y),
+			--  	then we should simply jump to the default join point;
+    
+			| isDeadBinder bndr -> performReturn (jump_to deflt_lbl)
+			| otherwise	    -> build_it_then (jump_to deflt_lbl) }
+    
+	    other_sequel	-- The usual case
+	      | isUnboxedTupleCon con -> returnUnboxedTuple amodes
+              | otherwise -> build_it_then (emitKnownConReturnCode con)
+	}
+  where
+    jump_to lbl = stmtC (CmmJump (CmmLit lbl) [])
+    build_it_then return_code
+      = do { 	-- BUILD THE OBJECT IN THE HEAP
+	   	-- The first "con" says that the name bound to this
+		-- closure is "con", which is a bit of a fudge, but it only
+		-- affects profiling
+
+		-- This Id is also used to get a unique for a
+		-- temporary variable, if the closure is a CHARLIKE.
+		-- funnily enough, this makes the unique always come
+		-- out as '54' :-)
+	     tickyReturnNewCon (length amodes)
+	   ; idinfo <- buildDynCon (dataConWorkId con) currentCCS con amodes
+	   ; amode <- idInfoToAmode idinfo
+	   ; checkedAbsC (CmmAssign nodeReg amode)
+	   ; performReturn return_code }
+\end{code}
+
+
+%************************************************************************
+%*									*
+	Generating static stuff for algebraic data types
+%*									*
+%************************************************************************
+
+	[These comments are rather out of date]
+
+\begin{tabular}{lll}
+Info tbls &	 Macro  &     	     Kind of constructor \\
+\hline
+info & @CONST_INFO_TABLE@&    Zero arity (no info -- compiler uses static closure)\\
+info & @CHARLIKE_INFO_TABLE@& Charlike   (no info -- compiler indexes fixed array)\\
+info & @INTLIKE_INFO_TABLE@&  Intlike; the one macro generates both info tbls\\
+info & @SPEC_INFO_TABLE@&     SPECish, and bigger than or equal to @MIN_UPD_SIZE@\\
+info & @GEN_INFO_TABLE@&      GENish (hence bigger than or equal to @MIN_UPD_SIZE@)\\
+\end{tabular}
+
+Possible info tables for constructor con:
+
+\begin{description}
+\item[@_con_info@:]
+Used for dynamically let(rec)-bound occurrences of
+the constructor, and for updates.  For constructors
+which are int-like, char-like or nullary, when GC occurs,
+the closure tries to get rid of itself.
+
+\item[@_static_info@:]
+Static occurrences of the constructor
+macro: @STATIC_INFO_TABLE@.
+\end{description}
+
+For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure;
+it's place is taken by the top level defn of the constructor.
+
+For charlike and intlike closures there is a fixed array of static
+closures predeclared.
+
+\begin{code}
+cgTyCon :: TyCon -> FCode [Cmm]  -- each constructor gets a separate Cmm
+cgTyCon tycon
+  = do	{ constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon)
+
+	    -- Generate a table of static closures for an enumeration type
+	    -- Put the table after the data constructor decls, because the
+	    -- datatype closure table (for enumeration types)
+	    -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff
+	; extra <- 
+	   if isEnumerationTyCon tycon then do
+	        tbl <- getCmm (emitRODataLits (mkLocalClosureTableLabel 
+						(tyConName tycon))
+			   [ CmmLabel (mkLocalClosureLabel (dataConName con))
+    			   | con <- tyConDataCons tycon])
+		return [tbl]
+	   else
+		return []
+
+	; return (extra ++ constrs)
+    }
+\end{code}
+
+Generate the entry code, info tables, and (for niladic constructor) the
+static closure, for a constructor.
+
+\begin{code}
+cgDataCon :: DataCon -> Code
+cgDataCon data_con
+  = do	{     -- Don't need any dynamic closure code for zero-arity constructors
+	  hmods <- getHomeModules
+
+	; let
+	    -- To allow the debuggers, interpreters, etc to cope with
+	    -- static data structures (ie those built at compile
+	    -- time), we take care that info-table contains the
+	    -- information we need.
+	    (static_cl_info, _) = 
+		layOutStaticConstr hmods data_con arg_reps
+
+	    (dyn_cl_info, arg_things) = 
+		layOutDynConstr    hmods data_con arg_reps
+
+	    emit_info cl_info ticky_code
+		= do { code_blks <- getCgStmts the_code
+		     ; emitClosureCodeAndInfoTable cl_info [] code_blks }
+		where
+		  the_code = do	{ ticky_code
+				; ldvEnter (CmmReg nodeReg)
+				; body_code }
+
+	    arg_reps :: [(CgRep, Type)]
+	    arg_reps = [(typeCgRep ty, ty) | ty <- dataConRepArgTys data_con]
+
+	    body_code = do { 	
+			-- NB: We don't set CC when entering data (WDP 94/06)
+			     tickyReturnOldCon (length arg_things)
+			   ; performReturn (emitKnownConReturnCode data_con) }
+				-- noStmts: Ptr to thing already in Node
+
+	; whenC (not (isNullaryRepDataCon data_con))
+	 	(emit_info dyn_cl_info tickyEnterDynCon)
+
+		-- Dynamic-Closure first, to reduce forward references
+	; emit_info static_cl_info tickyEnterStaticCon }
+
+  where
+\end{code}
diff --git a/compiler/codeGen/CgExpr.hi-boot-5 b/compiler/codeGen/CgExpr.hi-boot-5
new file mode 100644
index 0000000000..588e63f8f1
--- /dev/null
+++ b/compiler/codeGen/CgExpr.hi-boot-5
@@ -0,0 +1,3 @@
+__interface CgExpr 1 0 where
+__export CgExpr cgExpr;
+1 cgExpr :: StgSyn.StgExpr -> CgMonad.Code ;
diff --git a/compiler/codeGen/CgExpr.hi-boot-6 b/compiler/codeGen/CgExpr.hi-boot-6
new file mode 100644
index 0000000000..dc2d75cefe
--- /dev/null
+++ b/compiler/codeGen/CgExpr.hi-boot-6
@@ -0,0 +1,3 @@
+module CgExpr where
+
+cgExpr :: StgSyn.StgExpr -> CgMonad.Code
diff --git a/compiler/codeGen/CgExpr.lhs b/compiler/codeGen/CgExpr.lhs
new file mode 100644
index 0000000000..33d72f1608
--- /dev/null
+++ b/compiler/codeGen/CgExpr.lhs
@@ -0,0 +1,454 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgExpr.lhs,v 1.62 2005/06/21 10:44:41 simonmar Exp $
+%
+%********************************************************
+%*							*
+\section[CgExpr]{Converting @StgExpr@s}
+%*							*
+%********************************************************
+
+\begin{code}
+module CgExpr ( cgExpr ) where
+
+#include "HsVersions.h"
+
+import Constants	( mAX_SPEC_SELECTEE_SIZE, mAX_SPEC_AP_SIZE )
+import StgSyn
+import CgMonad
+
+import SMRep		( fixedHdrSize, isFollowableArg, CgRep(..), argMachRep,
+			  nonVoidArg, idCgRep, typeCgRep, typeHint,
+			  primRepToCgRep )
+import CoreSyn		( AltCon(..) )
+import CgProf		( emitSetCCC )
+import CgHeapery	( layOutDynConstr )
+import CgBindery	( getArgAmodes, getArgAmode, CgIdInfo, 
+			  nukeDeadBindings, addBindC, addBindsC )
+import CgCase		( cgCase, saveVolatileVarsAndRegs )
+import CgClosure	( cgRhsClosure, cgStdRhsClosure )
+import CgCon		( buildDynCon, cgReturnDataCon )
+import CgLetNoEscape	( cgLetNoEscapeClosure )
+import CgCallConv	( dataReturnConvPrim )
+import CgTailCall
+import CgInfoTbls	( emitDirectReturnInstr )
+import CgForeignCall	( emitForeignCall, shimForeignCallArg )
+import CgPrimOp		( cgPrimOp )
+import CgUtils		( addIdReps, newTemp, assignTemp, cgLit, tagToClosure )
+import ClosureInfo	( mkSelectorLFInfo, mkApLFInfo )
+import Cmm		( CmmExpr(..), CmmStmt(..), CmmReg, nodeReg )
+import MachOp		( wordRep, MachHint )
+import VarSet
+import Literal		( literalType )
+import PrimOp		( primOpOutOfLine, getPrimOpResultInfo, 
+			  PrimOp(..), PrimOpResultInfo(..) )
+import Id		( Id )
+import TyCon		( isUnboxedTupleTyCon, isEnumerationTyCon )
+import Type		( Type, tyConAppArgs, tyConAppTyCon, repType,
+			  PrimRep(VoidRep) )
+import Maybes		( maybeToBool )
+import ListSetOps	( assocMaybe )
+import BasicTypes	( RecFlag(..) )
+import Util             ( lengthIs )
+import Outputable
+\end{code}
+
+This module provides the support code for @StgToAbstractC@ to deal
+with STG {\em expressions}.  See also @CgClosure@, which deals
+with closures, and @CgCon@, which deals with constructors.
+
+\begin{code}
+cgExpr	:: StgExpr		-- input
+	-> Code			-- output
+\end{code}
+
+%********************************************************
+%*							*
+%*		Tail calls				*
+%*							*
+%********************************************************
+
+``Applications'' mean {\em tail calls}, a service provided by module
+@CgTailCall@.  This includes literals, which show up as
+@(STGApp (StgLitArg 42) [])@.
+
+\begin{code}
+cgExpr (StgApp fun args) = cgTailCall fun args
+\end{code}
+
+%********************************************************
+%*							*
+%*		STG ConApps  (for inline versions)	*
+%*							*
+%********************************************************
+
+\begin{code}
+cgExpr (StgConApp con args)
+  = do	{ amodes <- getArgAmodes args
+	; cgReturnDataCon con amodes }
+\end{code}
+
+Literals are similar to constructors; they return by putting
+themselves in an appropriate register and returning to the address on
+top of the stack.
+
+\begin{code}
+cgExpr (StgLit lit)
+  = do  { cmm_lit <- cgLit lit
+	; performPrimReturn rep (CmmLit cmm_lit) }
+  where
+    rep = typeCgRep (literalType lit)
+\end{code}
+
+
+%********************************************************
+%*							*
+%* 	PrimOps and foreign calls.
+%*							*
+%********************************************************
+
+NOTE about "safe" foreign calls: a safe foreign call is never compiled
+inline in a case expression.  When we see
+
+	case (ccall ...) of { ... }
+
+We generate a proper return address for the alternatives and push the
+stack frame before doing the call, so that in the event that the call
+re-enters the RTS the stack is in a sane state.
+
+\begin{code}
+cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do
+    {-
+	First, copy the args into temporaries.  We're going to push
+	a return address right before doing the call, so the args
+	must be out of the way.
+    -}
+    reps_n_amodes <- getArgAmodes stg_args
+    let 
+	-- Get the *non-void* args, and jiggle them with shimForeignCall
+	arg_exprs = [ shimForeignCallArg stg_arg expr 
+		    | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, 
+		      nonVoidArg rep]
+
+    -- in
+    arg_tmps <- mapM assignTemp arg_exprs
+    let
+	arg_hints = zip arg_tmps (map (typeHint.stgArgType) stg_args)
+    -- in
+    {-
+	Now, allocate some result regs.
+    -}
+    (res_reps,res_regs,res_hints)  <- newUnboxedTupleRegs res_ty
+    ccallReturnUnboxedTuple (zip res_reps (map CmmReg res_regs)) $
+	emitForeignCall (zip res_regs res_hints) fcall 
+	   arg_hints emptyVarSet{-no live vars-}
+      
+-- tagToEnum# is special: we need to pull the constructor out of the table,
+-- and perform an appropriate return.
+
+cgExpr (StgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty) 
+  = ASSERT(isEnumerationTyCon tycon)
+    do	{ (_,amode) <- getArgAmode arg
+	; amode' <- assignTemp amode	-- We're going to use it twice,
+					-- so save in a temp if non-trivial
+	; hmods <- getHomeModules
+	; stmtC (CmmAssign nodeReg (tagToClosure hmods tycon amode'))
+	; performReturn (emitAlgReturnCode tycon amode') }
+   where
+	  -- If you're reading this code in the attempt to figure
+	  -- out why the compiler panic'ed here, it is probably because
+	  -- you used tagToEnum# in a non-monomorphic setting, e.g., 
+	  --         intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
+	  -- That won't work.
+	tycon = tyConAppTyCon res_ty
+
+
+cgExpr x@(StgOpApp op@(StgPrimOp primop) args res_ty)
+  | primOpOutOfLine primop
+	= tailCallPrimOp primop args
+
+  | ReturnsPrim VoidRep <- result_info
+	= do cgPrimOp [] primop args emptyVarSet
+	     performReturn emitDirectReturnInstr
+
+  | ReturnsPrim rep <- result_info
+	= do cgPrimOp [dataReturnConvPrim (primRepToCgRep rep)] 
+			primop args emptyVarSet
+	     performReturn emitDirectReturnInstr
+
+  | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
+	= do (reps, regs, _hints) <- newUnboxedTupleRegs res_ty
+	     cgPrimOp regs primop args emptyVarSet{-no live vars-}
+	     returnUnboxedTuple (zip reps (map CmmReg regs))
+
+  | ReturnsAlg tycon <- result_info, isEnumerationTyCon tycon
+	-- c.f. cgExpr (...TagToEnumOp...)
+	= do tag_reg <- newTemp wordRep
+	     hmods <- getHomeModules
+	     cgPrimOp [tag_reg] primop args emptyVarSet
+	     stmtC (CmmAssign nodeReg (tagToClosure hmods tycon (CmmReg tag_reg)))
+	     performReturn (emitAlgReturnCode tycon (CmmReg tag_reg))
+  where
+	result_info = getPrimOpResultInfo primop
+\end{code}
+
+%********************************************************
+%*							*
+%*		Case expressions			*
+%*							*
+%********************************************************
+Case-expression conversion is complicated enough to have its own
+module, @CgCase@.
+\begin{code}
+
+cgExpr (StgCase expr live_vars save_vars bndr srt alt_type alts)
+  = cgCase expr live_vars save_vars bndr srt alt_type alts
+\end{code}
+
+
+%********************************************************
+%*							*
+%* 		Let and letrec				*
+%*							*
+%********************************************************
+\subsection[let-and-letrec-codegen]{Converting @StgLet@ and @StgLetrec@}
+
+\begin{code}
+cgExpr (StgLet (StgNonRec name rhs) expr)
+  = cgRhs name rhs	`thenFC` \ (name, info) ->
+    addBindC name info 	`thenC`
+    cgExpr expr
+
+cgExpr (StgLet (StgRec pairs) expr)
+  = fixC (\ new_bindings -> addBindsC new_bindings `thenC`
+			    listFCs [ cgRhs b e | (b,e) <- pairs ]
+    ) `thenFC` \ new_bindings ->
+
+    addBindsC new_bindings `thenC`
+    cgExpr expr
+\end{code}
+
+\begin{code}
+cgExpr (StgLetNoEscape live_in_whole_let live_in_rhss bindings body)
+  = do	{  	-- Figure out what volatile variables to save
+	; nukeDeadBindings live_in_whole_let
+	; (save_assts, rhs_eob_info, maybe_cc_slot) 
+		<- saveVolatileVarsAndRegs live_in_rhss
+
+	-- Save those variables right now!
+	; emitStmts save_assts
+
+	-- Produce code for the rhss
+	-- and add suitable bindings to the environment
+	; cgLetNoEscapeBindings live_in_rhss rhs_eob_info 
+			 	maybe_cc_slot bindings
+
+	-- Do the body
+	; setEndOfBlockInfo rhs_eob_info (cgExpr body) }
+\end{code}
+
+
+%********************************************************
+%*							*
+%*		SCC Expressions				*
+%*							*
+%********************************************************
+
+SCC expressions are treated specially. They set the current cost
+centre.
+
+\begin{code}
+cgExpr (StgSCC cc expr) = do emitSetCCC cc; cgExpr expr
+\end{code}
+
+%********************************************************
+%*							*
+%*		Non-top-level bindings			*
+%*							*
+%********************************************************
+\subsection[non-top-level-bindings]{Converting non-top-level bindings}
+
+We rely on the support code in @CgCon@ (to do constructors) and
+in @CgClosure@ (to do closures).
+
+\begin{code}
+cgRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo)
+	-- the Id is passed along so a binding can be set up
+
+cgRhs name (StgRhsCon maybe_cc con args)
+  = do	{ amodes <- getArgAmodes args
+	; idinfo <- buildDynCon name maybe_cc con amodes
+	; returnFC (name, idinfo) }
+
+cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body)
+  = do hmods <- getHomeModules
+       mkRhsClosure hmods name cc bi srt fvs upd_flag args body
+\end{code}
+
+mkRhsClosure looks for two special forms of the right-hand side:
+	a) selector thunks.
+	b) AP thunks
+
+If neither happens, it just calls mkClosureLFInfo.  You might think
+that mkClosureLFInfo should do all this, but it seems wrong for the
+latter to look at the structure of an expression
+
+Selectors
+~~~~~~~~~
+We look at the body of the closure to see if it's a selector---turgid,
+but nothing deep.  We are looking for a closure of {\em exactly} the
+form:
+
+...  = [the_fv] \ u [] ->
+	 case the_fv of
+	   con a_1 ... a_n -> a_i
+
+
+\begin{code}
+mkRhsClosure	hmods bndr cc bi srt
+		[the_fv]   		-- Just one free var
+		upd_flag		-- Updatable thunk
+		[]			-- A thunk
+		body@(StgCase (StgApp scrutinee [{-no args-}])
+		      _ _ _ _   -- ignore uniq, etc.
+		      (AlgAlt tycon)
+		      [(DataAlt con, params, use_mask,
+			    (StgApp selectee [{-no args-}]))])
+  |  the_fv == scrutinee		-- Scrutinee is the only free variable
+  && maybeToBool maybe_offset		-- Selectee is a component of the tuple
+  && offset_into_int <= mAX_SPEC_SELECTEE_SIZE	-- Offset is small enough
+  = -- NOT TRUE: ASSERT(is_single_constructor)
+    -- The simplifier may have statically determined that the single alternative
+    -- is the only possible case and eliminated the others, even if there are
+    -- other constructors in the datatype.  It's still ok to make a selector
+    -- thunk in this case, because we *know* which constructor the scrutinee
+    -- will evaluate to.
+    cgStdRhsClosure bndr cc bi [the_fv] [] body lf_info [StgVarArg the_fv]
+  where
+    lf_info 		  = mkSelectorLFInfo bndr offset_into_int
+				 (isUpdatable upd_flag)
+    (_, params_w_offsets) = layOutDynConstr hmods con (addIdReps params)
+			-- Just want the layout
+    maybe_offset	  = assocMaybe params_w_offsets selectee
+    Just the_offset 	  = maybe_offset
+    offset_into_int       = the_offset - fixedHdrSize
+\end{code}
+
+Ap thunks
+~~~~~~~~~
+
+A more generic AP thunk of the form
+
+	x = [ x_1...x_n ] \.. [] -> x_1 ... x_n
+
+A set of these is compiled statically into the RTS, so we just use
+those.  We could extend the idea to thunks where some of the x_i are
+global ids (and hence not free variables), but this would entail
+generating a larger thunk.  It might be an option for non-optimising
+compilation, though.
+
+We only generate an Ap thunk if all the free variables are pointers,
+for semi-obvious reasons.
+
+\begin{code}
+mkRhsClosure 	hmods bndr cc bi srt
+		fvs
+		upd_flag
+		[]			-- No args; a thunk
+		body@(StgApp fun_id args)
+
+  | args `lengthIs` (arity-1)
+ 	&& all isFollowableArg (map idCgRep fvs) 
+ 	&& isUpdatable upd_flag
+ 	&& arity <= mAX_SPEC_AP_SIZE 
+
+ 		   -- Ha! an Ap thunk
+	= cgStdRhsClosure bndr cc bi fvs [] body lf_info payload
+
+   where
+	lf_info = mkApLFInfo bndr upd_flag arity
+	-- the payload has to be in the correct order, hence we can't
+ 	-- just use the fvs.
+	payload = StgVarArg fun_id : args
+	arity 	= length fvs
+\end{code}
+
+The default case
+~~~~~~~~~~~~~~~~
+\begin{code}
+mkRhsClosure hmods bndr cc bi srt fvs upd_flag args body
+  = cgRhsClosure bndr cc bi srt fvs upd_flag args body
+\end{code}
+
+
+%********************************************************
+%*							*
+%*		Let-no-escape bindings
+%*							*
+%********************************************************
+\begin{code}
+cgLetNoEscapeBindings live_in_rhss rhs_eob_info maybe_cc_slot 
+	(StgNonRec binder rhs)
+  = do	{ (binder,info) <- cgLetNoEscapeRhs live_in_rhss rhs_eob_info 
+					    maybe_cc_slot 	
+					    NonRecursive binder rhs 
+	; addBindC binder info }
+
+cgLetNoEscapeBindings live_in_rhss rhs_eob_info maybe_cc_slot (StgRec pairs)
+  = do	{ new_bindings <- fixC (\ new_bindings -> do
+		{ addBindsC new_bindings
+		; listFCs [ cgLetNoEscapeRhs full_live_in_rhss 
+				rhs_eob_info maybe_cc_slot Recursive b e 
+			  | (b,e) <- pairs ] })
+
+	; addBindsC new_bindings }
+  where
+    -- We add the binders to the live-in-rhss set so that we don't
+    -- delete the bindings for the binder from the environment!
+    full_live_in_rhss = live_in_rhss `unionVarSet` (mkVarSet [b | (b,r) <- pairs])
+
+cgLetNoEscapeRhs
+    :: StgLiveVars	-- Live in rhss
+    -> EndOfBlockInfo
+    -> Maybe VirtualSpOffset
+    -> RecFlag
+    -> Id
+    -> StgRhs
+    -> FCode (Id, CgIdInfo)
+
+cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot rec binder
+		 (StgRhsClosure cc bi _ upd_flag srt args body)
+  = -- We could check the update flag, but currently we don't switch it off
+    -- for let-no-escaped things, so we omit the check too!
+    -- case upd_flag of
+    --     Updatable -> panic "cgLetNoEscapeRhs"	-- Nothing to update!
+    --     other     -> cgLetNoEscapeClosure binder cc bi live_in_whole_let live_in_rhss args body
+    cgLetNoEscapeClosure binder cc bi srt full_live_in_rhss rhs_eob_info
+	maybe_cc_slot rec args body
+
+-- For a constructor RHS we want to generate a single chunk of code which
+-- can be jumped to from many places, which will return the constructor.
+-- It's easy; just behave as if it was an StgRhsClosure with a ConApp inside!
+cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot rec binder
+    	    	 (StgRhsCon cc con args)
+  = cgLetNoEscapeClosure binder cc noBinderInfo{-safe-} NoSRT
+			 full_live_in_rhss rhs_eob_info maybe_cc_slot rec
+	[] 	--No args; the binder is data structure, not a function
+	(StgConApp con args)
+\end{code}
+
+Little helper for primitives that return unboxed tuples.
+
+\begin{code}
+newUnboxedTupleRegs :: Type -> FCode ([CgRep], [CmmReg], [MachHint])
+newUnboxedTupleRegs res_ty =
+   let
+	ty_args = tyConAppArgs (repType res_ty)
+	(reps,hints) = unzip [ (rep, typeHint ty) | ty <- ty_args, 
+					   	    let rep = typeCgRep ty,
+					 	    nonVoidArg rep ]
+   in do
+   regs <- mapM (newTemp . argMachRep) reps
+   return (reps,regs,hints)
+\end{code}
diff --git a/compiler/codeGen/CgExpr.lhs-boot b/compiler/codeGen/CgExpr.lhs-boot
new file mode 100644
index 0000000000..29cdc3a605
--- /dev/null
+++ b/compiler/codeGen/CgExpr.lhs-boot
@@ -0,0 +1,7 @@
+\begin{code}
+module CgExpr where
+import StgSyn( StgExpr )
+import CgMonad( Code )
+
+cgExpr :: StgExpr -> Code
+\end{code}
diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs
new file mode 100644
index 0000000000..10f41bdf8b
--- /dev/null
+++ b/compiler/codeGen/CgForeignCall.hs
@@ -0,0 +1,256 @@
+-----------------------------------------------------------------------------
+--
+-- Code generation for foreign calls.
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+module CgForeignCall (
+  cgForeignCall,
+  emitForeignCall,
+  emitForeignCall',
+  shimForeignCallArg,
+  emitSaveThreadState, -- will be needed by the Cmm parser
+  emitLoadThreadState, -- ditto
+  emitCloseNursery,
+  emitOpenNursery,
+ ) where
+
+#include "HsVersions.h"
+
+import StgSyn		( StgLiveVars, StgArg, stgArgType )
+import CgProf		( curCCS, curCCSAddr )
+import CgBindery	( getVolatileRegs, getArgAmodes )
+import CgMonad
+import CgUtils		( cmmOffsetW, cmmOffsetB, cmmLoadIndexW, newTemp,
+			  assignTemp )
+import Type		( tyConAppTyCon, repType )
+import TysPrim
+import CLabel		( mkForeignLabel, mkRtsCodeLabel )
+import Cmm
+import CmmUtils
+import MachOp
+import SMRep
+import ForeignCall
+import Constants
+import StaticFlags	( opt_SccProfilingOn )
+import Outputable
+
+import Monad		( when )
+
+-- -----------------------------------------------------------------------------
+-- Code generation for Foreign Calls
+
+cgForeignCall
+	:: [(CmmReg,MachHint)]	-- where to put the results
+	-> ForeignCall		-- the op
+	-> [StgArg]		-- arguments
+	-> StgLiveVars	-- live vars, in case we need to save them
+	-> Code
+cgForeignCall results fcall stg_args live
+  = do 
+  reps_n_amodes <- getArgAmodes stg_args
+  let
+	-- Get the *non-void* args, and jiggle them with shimForeignCall
+	arg_exprs = [ shimForeignCallArg stg_arg expr 
+	  	    | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, 
+	               nonVoidArg rep]
+
+	arg_hints = zip arg_exprs (map (typeHint.stgArgType) stg_args)
+  -- in
+  emitForeignCall results fcall arg_hints live
+
+
+emitForeignCall
+	:: [(CmmReg,MachHint)]	-- where to put the results
+	-> ForeignCall		-- the op
+	-> [(CmmExpr,MachHint)] -- arguments
+	-> StgLiveVars	-- live vars, in case we need to save them
+	-> Code
+
+emitForeignCall results (CCall (CCallSpec target cconv safety)) args live
+  = do vols <- getVolatileRegs live
+       emitForeignCall' safety results
+		(CmmForeignCall cmm_target cconv) call_args (Just vols)
+  where
+      (call_args, cmm_target)
+	= case target of
+	   StaticTarget lbl -> (args, CmmLit (CmmLabel 
+					(mkForeignLabel lbl call_size False)))
+	   DynamicTarget    ->  case args of (fn,_):rest -> (rest, fn)
+
+	-- in the stdcall calling convention, the symbol needs @size appended
+	-- to it, where size is the total number of bytes of arguments.  We
+	-- attach this info to the CLabel here, and the CLabel pretty printer
+	-- will generate the suffix when the label is printed.
+      call_size
+	| StdCallConv <- cconv = Just (sum (map (arg_size.cmmExprRep.fst) args))
+	| otherwise            = Nothing
+
+	-- ToDo: this might not be correct for 64-bit API
+      arg_size rep = max (machRepByteWidth rep) wORD_SIZE
+
+emitForeignCall results (DNCall _) args live
+  = panic "emitForeignCall: DNCall"
+
+
+-- alternative entry point, used by CmmParse
+emitForeignCall'
+	:: Safety
+	-> [(CmmReg,MachHint)]	-- where to put the results
+	-> CmmCallTarget	-- the op
+	-> [(CmmExpr,MachHint)] -- arguments
+	-> Maybe [GlobalReg]	-- live vars, in case we need to save them
+	-> Code
+emitForeignCall' safety results target args vols 
+  | not (playSafe safety) = do
+    temp_args <- load_args_into_temps args
+    stmtC (CmmCall target results temp_args vols)
+
+  | otherwise = do
+    id <- newTemp wordRep
+    temp_args <- load_args_into_temps args
+    emitSaveThreadState
+    stmtC (CmmCall (CmmForeignCall suspendThread CCallConv) 
+			[(id,PtrHint)]
+			[ (CmmReg (CmmGlobal BaseReg), PtrHint) ] 
+			vols
+			)
+    stmtC (CmmCall target results temp_args vols)
+    stmtC (CmmCall (CmmForeignCall resumeThread CCallConv) 
+			[ (CmmGlobal BaseReg, PtrHint) ]
+				-- Assign the result to BaseReg: we
+				-- might now have a different
+				-- Capability!
+			[ (CmmReg id, PtrHint) ]
+			vols
+			)
+    emitLoadThreadState
+
+
+suspendThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("suspendThread")))
+resumeThread  = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("resumeThread")))
+
+
+-- we might need to load arguments into temporaries before
+-- making the call, because certain global registers might
+-- overlap with registers that the C calling convention uses
+-- for passing arguments.
+--
+-- This is a HACK; really it should be done in the back end, but
+-- it's easier to generate the temporaries here.
+load_args_into_temps args = mapM maybe_assignTemp args
+	
+maybe_assignTemp (e, hint)
+  | hasNoGlobalRegs e = return (e, hint)
+  | otherwise          = do 
+	-- don't use assignTemp, it uses its own notion of "trivial"
+	-- expressions, which are wrong here
+	reg <- newTemp (cmmExprRep e)
+	stmtC (CmmAssign reg e)
+	return (CmmReg reg, hint)
+
+-- -----------------------------------------------------------------------------
+-- Save/restore the thread state in the TSO
+
+-- This stuff can't be done in suspendThread/resumeThread, because it
+-- refers to global registers which aren't available in the C world.
+
+emitSaveThreadState = do
+  -- CurrentTSO->sp = Sp;
+  stmtC $ CmmStore (cmmOffset stgCurrentTSO tso_SP) stgSp
+  emitCloseNursery
+  -- and save the current cost centre stack in the TSO when profiling:
+  when opt_SccProfilingOn $
+	stmtC (CmmStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS)
+
+   -- CurrentNursery->free = Hp+1;
+emitCloseNursery = stmtC $ CmmStore nursery_bdescr_free (cmmOffsetW stgHp 1)
+
+emitLoadThreadState = do
+  tso <- newTemp wordRep
+  stmtsC [
+	-- tso = CurrentTSO;
+  	CmmAssign tso stgCurrentTSO,
+	-- Sp = tso->sp;
+	CmmAssign sp (CmmLoad (cmmOffset (CmmReg tso) tso_SP)
+	                      wordRep),
+	-- SpLim = tso->stack + RESERVED_STACK_WORDS;
+	CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg tso) tso_STACK)
+			            rESERVED_STACK_WORDS)
+    ]
+  emitOpenNursery
+  -- and load the current cost centre stack from the TSO when profiling:
+  when opt_SccProfilingOn $
+	stmtC (CmmStore curCCSAddr 
+		(CmmLoad (cmmOffset (CmmReg tso) tso_CCCS) wordRep))
+
+emitOpenNursery = stmtsC [
+        -- Hp = CurrentNursery->free - 1;
+	CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free wordRep) (-1)),
+
+        -- HpLim = CurrentNursery->start + 
+	--		CurrentNursery->blocks*BLOCK_SIZE_W - 1;
+	CmmAssign hpLim
+	    (cmmOffsetExpr
+		(CmmLoad nursery_bdescr_start wordRep)
+		(cmmOffset
+		  (CmmMachOp mo_wordMul [
+		    CmmMachOp (MO_S_Conv I32 wordRep)
+		      [CmmLoad nursery_bdescr_blocks I32],
+		    CmmLit (mkIntCLit bLOCK_SIZE)
+		   ])
+		  (-1)
+		)
+	    )
+   ]
+
+
+nursery_bdescr_free   = cmmOffset stgCurrentNursery oFFSET_bdescr_free
+nursery_bdescr_start  = cmmOffset stgCurrentNursery oFFSET_bdescr_start
+nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks
+
+tso_SP    = tsoFieldB     oFFSET_StgTSO_sp
+tso_STACK = tsoFieldB     oFFSET_StgTSO_stack
+tso_CCCS  = tsoProfFieldB oFFSET_StgTSO_CCCS
+
+-- The TSO struct has a variable header, and an optional StgTSOProfInfo in
+-- the middle.  The fields we're interested in are after the StgTSOProfInfo.
+tsoFieldB :: ByteOff -> ByteOff
+tsoFieldB off
+  | opt_SccProfilingOn = off + sIZEOF_StgTSOProfInfo + fixedHdrSize * wORD_SIZE
+  | otherwise          = off + fixedHdrSize * wORD_SIZE
+
+tsoProfFieldB :: ByteOff -> ByteOff
+tsoProfFieldB off = off + fixedHdrSize * wORD_SIZE
+
+stgSp		  = CmmReg sp
+stgHp		  = CmmReg hp
+stgCurrentTSO	  = CmmReg currentTSO
+stgCurrentNursery = CmmReg currentNursery
+
+sp		  = CmmGlobal Sp
+spLim		  = CmmGlobal SpLim
+hp		  = CmmGlobal Hp
+hpLim		  = CmmGlobal HpLim
+currentTSO	  = CmmGlobal CurrentTSO
+currentNursery 	  = CmmGlobal CurrentNursery
+
+-- -----------------------------------------------------------------------------
+-- For certain types passed to foreign calls, we adjust the actual
+-- value passed to the call.  For ByteArray#/Array# we pass the
+-- address of the actual array, not the address of the heap object.
+
+shimForeignCallArg :: StgArg -> CmmExpr -> CmmExpr
+shimForeignCallArg arg expr
+  | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon
+	= cmmOffsetB expr arrPtrsHdrSize
+
+  | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon
+	= cmmOffsetB expr arrWordsHdrSize
+
+  | otherwise = expr
+  where	
+	-- should be a tycon app, since this is a foreign call
+	tycon = tyConAppTyCon (repType (stgArgType arg))
diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs
new file mode 100644
index 0000000000..184af904df
--- /dev/null
+++ b/compiler/codeGen/CgHeapery.lhs
@@ -0,0 +1,588 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgHeapery.lhs,v 1.47 2005/06/21 10:44:41 simonmar Exp $
+%
+\section[CgHeapery]{Heap management functions}
+
+\begin{code}
+module CgHeapery (
+	initHeapUsage, getVirtHp, setVirtHp, setRealHp, 
+	getHpRelOffset,	hpRel,
+
+	funEntryChecks, thunkEntryChecks, 
+	altHeapCheck, unbxTupleHeapCheck, 
+	hpChkGen, hpChkNodePointsAssignSp0,
+	stkChkGen, stkChkNodePoints,
+
+	layOutDynConstr, layOutStaticConstr,
+	mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure,
+
+	allocDynClosure, emitSetDynHdr
+    ) where
+
+#include "HsVersions.h"
+
+import StgSyn		( AltType(..) )
+import CLabel		( CLabel, mkRtsCodeLabel )
+import CgUtils		( mkWordCLit, cmmRegOffW, cmmOffsetW,
+			  cmmOffsetExprB )
+import CgMonad
+import CgProf		( staticProfHdr, profDynAlloc, dynProfHdr )
+import CgTicky		( staticTickyHdr, tickyDynAlloc, tickyAllocHeap )
+import CgParallel	( staticGranHdr, staticParHdr, doGranAllocate )
+import CgStackery	( getFinalStackHW, getRealSp )
+import CgCallConv	( mkRegLiveness )
+import ClosureInfo	( closureSize, staticClosureNeedsLink, 
+			  mkConInfo,  closureNeedsUpdSpace,
+			  infoTableLabelFromCI, closureLabelFromCI,
+			  nodeMustPointToIt, closureLFInfo, 			
+			  ClosureInfo )
+import SMRep		( CgRep(..), cgRepSizeW, separateByPtrFollowness,
+			  WordOff, fixedHdrSize, thunkHdrSize,
+			  isVoidArg, primRepToCgRep )
+
+import Cmm		( CmmLit(..), CmmStmt(..), CmmExpr(..), GlobalReg(..),
+			  CmmReg(..), hpReg, nodeReg, spReg )
+import MachOp		( mo_wordULt, mo_wordUGt, mo_wordSub )
+import CmmUtils		( mkIntCLit, CmmStmts, noStmts, oneStmt, plusStmts,
+			  mkStmts )
+import Id		( Id )
+import DataCon		( DataCon )
+import TyCon		( tyConPrimRep )
+import CostCentre	( CostCentreStack )
+import Util		( mapAccumL, filterOut )
+import Constants	( wORD_SIZE )
+import Packages		( HomeModules )
+import Outputable
+
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[CgUsages-heapery]{Monad things for fiddling with heap usage}
+%*									*
+%************************************************************************
+
+The heap always grows upwards, so hpRel is easy
+
+\begin{code}
+hpRel :: VirtualHpOffset 	-- virtual offset of Hp
+      -> VirtualHpOffset 	-- virtual offset of The Thing
+      -> WordOff			-- integer word offset
+hpRel hp off = off - hp
+\end{code}
+
+@initHeapUsage@ applies a function to the amount of heap that it uses.
+It initialises the heap usage to zeros, and passes on an unchanged
+heap usage.
+
+It is usually a prelude to performing a GC check, so everything must
+be in a tidy and consistent state.
+
+rje: Note the slightly suble fixed point behaviour needed here
+
+\begin{code}
+initHeapUsage :: (VirtualHpOffset -> Code) -> Code
+initHeapUsage fcode
+  = do	{ orig_hp_usage <- getHpUsage
+	; setHpUsage initHpUsage
+	; fixC (\heap_usage2 -> do
+		{ fcode (heapHWM heap_usage2)
+		; getHpUsage })
+	; setHpUsage orig_hp_usage }
+
+setVirtHp :: VirtualHpOffset -> Code
+setVirtHp new_virtHp
+  = do	{ hp_usage <- getHpUsage
+	; setHpUsage (hp_usage {virtHp = new_virtHp}) }
+
+getVirtHp :: FCode VirtualHpOffset
+getVirtHp 
+  = do	{ hp_usage <- getHpUsage
+	; return (virtHp hp_usage) }
+
+setRealHp ::  VirtualHpOffset -> Code
+setRealHp new_realHp
+  = do	{ hp_usage <- getHpUsage
+	; setHpUsage (hp_usage {realHp = new_realHp}) }
+
+getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
+getHpRelOffset virtual_offset
+  = do	{ hp_usg <- getHpUsage
+	; return (cmmRegOffW hpReg (hpRel (realHp hp_usg) virtual_offset)) }
+\end{code}
+
+
+%************************************************************************
+%*									*
+		Layout of heap objects
+%*									*
+%************************************************************************
+
+\begin{code}
+layOutDynConstr, layOutStaticConstr
+	:: HomeModules
+	-> DataCon 	
+	-> [(CgRep,a)]
+	-> (ClosureInfo,
+	    [(a,VirtualHpOffset)])
+
+layOutDynConstr    = layOutConstr False
+layOutStaticConstr = layOutConstr True
+
+layOutConstr  is_static hmods data_con args
+   = (mkConInfo hmods is_static data_con tot_wds ptr_wds,
+      things_w_offsets)
+  where
+    (tot_wds,		 --  #ptr_wds + #nonptr_wds
+     ptr_wds,		 --  #ptr_wds
+     things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args
+\end{code}
+
+@mkVirtHeapOffsets@ always returns boxed things with smaller offsets
+than the unboxed things, and furthermore, the offsets in the result
+list
+
+\begin{code}
+mkVirtHeapOffsets
+	  :: Bool		-- True <=> is a thunk
+	  -> [(CgRep,a)]	-- Things to make offsets for
+	  -> (WordOff,		-- _Total_ number of words allocated
+	      WordOff,		-- Number of words allocated for *pointers*
+	      [(a, VirtualHpOffset)])
+				-- Things with their offsets from start of 
+				--  object in order of increasing offset
+
+-- First in list gets lowest offset, which is initial offset + 1.
+
+mkVirtHeapOffsets is_thunk things
+  = let non_void_things		      = filterOut (isVoidArg . fst) things
+	(ptrs, non_ptrs)    	      = separateByPtrFollowness non_void_things
+    	(wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs
+	(tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs
+    in
+    (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets)
+  where
+    hdr_size 	| is_thunk   = thunkHdrSize
+		| otherwise  = fixedHdrSize
+
+    computeOffset wds_so_far (rep, thing)
+      = (wds_so_far + cgRepSizeW rep, (thing, hdr_size + wds_so_far))
+\end{code}
+
+
+%************************************************************************
+%*									*
+		Lay out a static closure
+%*									*
+%************************************************************************
+
+Make a static closure, adding on any extra padding needed for CAFs,
+and adding a static link field if necessary.
+
+\begin{code}
+mkStaticClosureFields 
+	:: ClosureInfo 
+	-> CostCentreStack 
+	-> Bool 		-- Has CAF refs
+	-> [CmmLit]		-- Payload
+	-> [CmmLit]		-- The full closure
+mkStaticClosureFields cl_info ccs caf_refs payload
+  = mkStaticClosure info_lbl ccs payload padding_wds 
+	static_link_field saved_info_field
+  where
+    info_lbl = infoTableLabelFromCI cl_info
+
+    -- CAFs must have consistent layout, regardless of whether they
+    -- are actually updatable or not.  The layout of a CAF is:
+    --
+    --        3 saved_info
+    --        2 static_link
+    --        1 indirectee
+    --        0 info ptr
+    --
+    -- the static_link and saved_info fields must always be in the same
+    -- place.  So we use closureNeedsUpdSpace rather than
+    -- closureUpdReqd here:
+
+    is_caf = closureNeedsUpdSpace cl_info
+
+    padding_wds
+	| not is_caf = []
+	| otherwise  = ASSERT(null payload) [mkIntCLit 0]
+
+    static_link_field
+	| is_caf || staticClosureNeedsLink cl_info = [static_link_value]
+	| otherwise				   = []
+
+    saved_info_field
+	| is_caf     = [mkIntCLit 0]
+	| otherwise  = []
+
+	-- for a static constructor which has NoCafRefs, we set the
+	-- static link field to a non-zero value so the garbage
+	-- collector will ignore it.
+    static_link_value
+	| caf_refs	= mkIntCLit 0
+	| otherwise	= mkIntCLit 1
+
+
+mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit]
+  -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
+mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field
+  =  [CmmLabel info_lbl]
+  ++ variable_header_words
+  ++ payload
+  ++ padding_wds
+  ++ static_link_field
+  ++ saved_info_field
+  where
+    variable_header_words
+	=  staticGranHdr
+	++ staticParHdr
+	++ staticProfHdr ccs
+	++ staticTickyHdr
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[CgHeapery-heap-overflow]{Heap overflow checking}
+%*									*
+%************************************************************************
+
+The new code  for heapChecks. For GrAnSim the code for doing a heap check
+and doing a context switch has been separated. Especially, the HEAP_CHK
+macro only performs a heap check. THREAD_CONTEXT_SWITCH should be used for
+doing a context switch. GRAN_FETCH_AND_RESCHEDULE must be put at the
+beginning of every slow entry code in order to simulate the fetching of
+closures. If fetching is necessary (i.e. current closure is not local) then
+an automatic context switch is done.
+
+--------------------------------------------------------------
+A heap/stack check at a function or thunk entry point.
+
+\begin{code}
+funEntryChecks :: ClosureInfo -> CmmStmts -> Code -> Code
+funEntryChecks cl_info reg_save_code code 
+  = hpStkCheck cl_info True reg_save_code code
+
+thunkEntryChecks :: ClosureInfo -> Code -> Code
+thunkEntryChecks cl_info code 
+  = hpStkCheck cl_info False noStmts code
+
+hpStkCheck :: ClosureInfo	-- Function closure
+	   -> Bool 		-- Is a function? (not a thunk)
+	   -> CmmStmts		-- Register saves
+	   -> Code
+	   -> Code
+
+hpStkCheck cl_info is_fun reg_save_code code
+  =  getFinalStackHW	$ \ spHw -> do
+	{ sp <- getRealSp
+	; let stk_words = spHw - sp
+	; initHeapUsage	$ \ hpHw  -> do
+	    {	-- Emit heap checks, but be sure to do it lazily so 
+		-- that the conditionals on hpHw don't cause a black hole
+	      codeOnly $ do
+		{ do_checks stk_words hpHw full_save_code rts_label
+		; tickyAllocHeap hpHw }
+	    ; setRealHp hpHw
+	    ; code }
+	}
+  where
+    node_asst 
+	| nodeMustPointToIt (closureLFInfo cl_info)
+	= noStmts
+	| otherwise
+	= oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl)))
+    closure_lbl = closureLabelFromCI cl_info
+
+    full_save_code = node_asst `plusStmts` reg_save_code
+
+    rts_label | is_fun    = CmmReg (CmmGlobal GCFun)
+				-- Function entry point
+	      | otherwise = CmmReg (CmmGlobal GCEnter1)
+				-- Thunk or case return
+	-- In the thunk/case-return case, R1 points to a closure
+	-- which should be (re)-entered after GC
+\end{code}
+
+Heap checks in a case alternative are nice and easy, provided this is
+a bog-standard algebraic case.  We have in our hand:
+
+       * one return address, on the stack,
+       * one return value, in Node.
+
+the canned code for this heap check failure just pushes Node on the
+stack, saying 'EnterGHC' to return.  The scheduler will return by
+entering the top value on the stack, which in turn will return through
+the return address, getting us back to where we were.  This is
+therefore only valid if the return value is *lifted* (just being
+boxed isn't good enough).
+
+For primitive returns, we have an unlifted value in some register
+(either R1 or FloatReg1 or DblReg1).  This means using specialised
+heap-check code for these cases.
+
+\begin{code}
+altHeapCheck 
+    :: AltType	-- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt
+		--	(Unboxed tuples are dealt with by ubxTupleHeapCheck)
+    -> Code	-- Continuation
+    -> Code
+altHeapCheck alt_type code
+  = initHeapUsage $ \ hpHw -> do
+	{ codeOnly $ do
+	     { do_checks 0 {- no stack chk -} hpHw
+			 noStmts {- nothign to save -}
+			 (rts_label alt_type)
+	     ; tickyAllocHeap hpHw }
+	; setRealHp hpHw
+	; code }
+  where
+    rts_label PolyAlt = CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "stg_gc_unpt_r1")))
+      	-- Do *not* enter R1 after a heap check in
+	-- a polymorphic case.  It might be a function
+	-- and the entry code for a function (currently)
+	-- applies it
+	--
+	-- However R1 is guaranteed to be a pointer
+
+    rts_label (AlgAlt tc) = stg_gc_enter1
+	-- Enter R1 after the heap check; it's a pointer
+ 	
+    rts_label (PrimAlt tc)
+      = CmmLit $ CmmLabel $ 
+	case primRepToCgRep (tyConPrimRep tc) of
+	  VoidArg   -> mkRtsCodeLabel SLIT( "stg_gc_noregs")
+	  FloatArg  -> mkRtsCodeLabel SLIT( "stg_gc_f1")
+	  DoubleArg -> mkRtsCodeLabel SLIT( "stg_gc_d1")
+	  LongArg   -> mkRtsCodeLabel SLIT( "stg_gc_l1")
+				-- R1 is boxed but unlifted: 
+	  PtrArg    -> mkRtsCodeLabel SLIT( "stg_gc_unpt_r1")
+				-- R1 is unboxed:
+	  NonPtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unbx_r1")
+
+    rts_label (UbxTupAlt _) = panic "altHeapCheck"
+\end{code}
+
+
+Unboxed tuple alternatives and let-no-escapes (the two most annoying
+constructs to generate code for!)  For unboxed tuple returns, there
+are an arbitrary number of possibly unboxed return values, some of
+which will be in registers, and the others will be on the stack.  We
+always organise the stack-resident fields into pointers &
+non-pointers, and pass the number of each to the heap check code.
+
+\begin{code}
+unbxTupleHeapCheck 
+	:: [(Id, GlobalReg)]	-- Live registers
+	-> WordOff	-- no. of stack slots containing ptrs
+	-> WordOff	-- no. of stack slots containing nonptrs
+	-> CmmStmts	-- code to insert in the failure path
+	-> Code
+	-> Code
+
+unbxTupleHeapCheck regs ptrs nptrs fail_code code
+  -- We can't manage more than 255 pointers/non-pointers 
+  -- in a generic heap check.
+  | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
+  | otherwise 
+  = initHeapUsage $ \ hpHw -> do
+	{ codeOnly $ do { do_checks 0 {- no stack check -} hpHw
+				    full_fail_code rts_label
+			; tickyAllocHeap hpHw }
+	; setRealHp hpHw
+	; code }
+  where
+    full_fail_code  = fail_code `plusStmts` oneStmt assign_liveness
+    assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) 	-- Ho ho ho!
+				(CmmLit (mkWordCLit liveness))
+    liveness 	    = mkRegLiveness regs ptrs nptrs
+    rts_label	    = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_ut")))
+
+\end{code}
+
+
+%************************************************************************
+%*									*
+		Heap/Stack Checks.
+%*									*
+%************************************************************************
+
+When failing a check, we save a return address on the stack and
+jump to a pre-compiled code fragment that saves the live registers
+and returns to the scheduler.
+
+The return address in most cases will be the beginning of the basic
+block in which the check resides, since we need to perform the check
+again on re-entry because someone else might have stolen the resource
+in the meantime.
+
+\begin{code}
+do_checks :: WordOff	-- Stack headroom
+	  -> WordOff	-- Heap  headroom
+	  -> CmmStmts	-- Assignments to perform on failure
+	  -> CmmExpr	-- Rts address to jump to on failure
+	  -> Code
+do_checks 0 0 _ _   = nopC
+do_checks stk hp reg_save_code rts_lbl
+  = do_checks' (CmmLit (mkIntCLit (stk*wORD_SIZE))) 
+	       (CmmLit (mkIntCLit (hp*wORD_SIZE)))
+	 (stk /= 0) (hp /= 0) reg_save_code rts_lbl
+
+-- The offsets are now in *bytes*
+do_checks' stk_expr hp_expr stk_nonzero hp_nonzero reg_save_code rts_lbl
+  = do	{ doGranAllocate hp_expr
+
+	-- Emit a block for the heap-check-failure code
+	; blk_id <- forkLabelledCode $ do
+			{ whenC hp_nonzero $
+				stmtC (CmmAssign (CmmGlobal HpAlloc) hp_expr)
+			; emitStmts reg_save_code
+			; stmtC (CmmJump rts_lbl []) }
+
+	-- Check for stack overflow *FIRST*; otherwise
+	-- we might bumping Hp and then failing stack oflo
+	; whenC stk_nonzero
+		(stmtC (CmmCondBranch stk_oflo blk_id))
+
+	; whenC hp_nonzero
+		(stmtsC [CmmAssign hpReg 
+				(cmmOffsetExprB (CmmReg hpReg) hp_expr),
+		        CmmCondBranch hp_oflo blk_id]) 
+		-- Bump heap pointer, and test for heap exhaustion
+		-- Note that we don't move the heap pointer unless the 
+		-- stack check succeeds.  Otherwise we might end up
+		-- with slop at the end of the current block, which can 
+		-- confuse the LDV profiler.
+    }
+  where
+	-- Stk overflow if (Sp - stk_bytes < SpLim)
+    stk_oflo = CmmMachOp mo_wordULt 
+		  [CmmMachOp mo_wordSub [CmmReg spReg, stk_expr],
+		   CmmReg (CmmGlobal SpLim)]
+
+	-- Hp overflow if (Hpp > HpLim)
+	-- (Hp has been incremented by now)
+	-- HpLim points to the LAST WORD of valid allocation space.
+    hp_oflo = CmmMachOp mo_wordUGt 
+		  [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
+\end{code}
+
+%************************************************************************
+%*									*
+     Generic Heap/Stack Checks - used in the RTS
+%*									*
+%************************************************************************
+
+\begin{code}
+hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code
+hpChkGen bytes liveness reentry
+  = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assigns stg_gc_gen
+  where
+    assigns = mkStmts [
+    		CmmAssign (CmmGlobal (VanillaReg 9))  liveness,
+    		CmmAssign (CmmGlobal (VanillaReg 10)) reentry
+		]
+
+-- a heap check where R1 points to the closure to enter on return, and
+-- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
+hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> Code
+hpChkNodePointsAssignSp0 bytes sp0
+  = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assign stg_gc_enter1
+  where assign = oneStmt (CmmStore (CmmReg spReg) sp0)
+
+stkChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code
+stkChkGen bytes liveness reentry
+  = do_checks' bytes (CmmLit (mkIntCLit 0)) True False assigns stg_gc_gen
+  where
+    assigns = mkStmts [
+    		CmmAssign (CmmGlobal (VanillaReg 9))  liveness,
+    		CmmAssign (CmmGlobal (VanillaReg 10)) reentry
+		]
+
+stkChkNodePoints :: CmmExpr -> Code
+stkChkNodePoints bytes
+  = do_checks' bytes (CmmLit (mkIntCLit 0)) True False noStmts stg_gc_enter1
+
+stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_gen")))
+stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[initClosure]{Initialise a dynamic closure}
+%*									*
+%************************************************************************
+
+@allocDynClosure@ puts the thing in the heap, and modifies the virtual Hp
+to account for this.
+
+\begin{code}
+allocDynClosure
+	:: ClosureInfo
+	-> CmmExpr 		-- Cost Centre to stick in the object
+	-> CmmExpr 		-- Cost Centre to blame for this alloc
+				-- (usually the same; sometimes "OVERHEAD")
+
+	-> [(CmmExpr, VirtualHpOffset)]	-- Offsets from start of the object
+					-- ie Info ptr has offset zero.
+	-> FCode VirtualHpOffset	-- Returns virt offset of object
+
+allocDynClosure cl_info use_cc blame_cc amodes_with_offsets
+  = do	{ virt_hp <- getVirtHp
+
+	-- FIND THE OFFSET OF THE INFO-PTR WORD
+	; let	info_offset = virt_hp + 1
+		-- info_offset is the VirtualHpOffset of the first
+		-- word of the new object
+		-- Remember, virtHp points to last allocated word, 
+		-- ie 1 *before* the info-ptr word of new object.
+
+		info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info))
+		hdr_w_offsets = initDynHdr info_ptr use_cc `zip` [0..]
+
+	-- SAY WHAT WE ARE ABOUT TO DO
+	; profDynAlloc cl_info use_cc	
+		-- ToDo: This is almost certainly wrong
+		-- We're ignoring blame_cc. But until we've
+		-- fixed the boxing hack in chooseDynCostCentres etc,
+		-- we're worried about making things worse by "fixing"
+		-- this part to use blame_cc!
+
+	; tickyDynAlloc cl_info
+
+	-- ALLOCATE THE OBJECT
+	; base <- getHpRelOffset info_offset
+	; hpStore base (hdr_w_offsets ++ amodes_with_offsets)
+
+	-- BUMP THE VIRTUAL HEAP POINTER
+	; setVirtHp (virt_hp + closureSize cl_info)
+	
+	-- RETURN PTR TO START OF OBJECT
+	; returnFC info_offset }
+
+
+initDynHdr :: CmmExpr 
+	   -> CmmExpr		-- Cost centre to put in object
+	   -> [CmmExpr]
+initDynHdr info_ptr cc
+  =  [info_ptr]
+     	-- ToDo: Gransim stuff
+	-- ToDo: Parallel stuff
+  ++ dynProfHdr cc
+	-- No ticky header
+
+hpStore :: CmmExpr -> [(CmmExpr, VirtualHpOffset)] -> Code
+-- Store the item (expr,off) in base[off]
+hpStore base es
+  = stmtsC [ CmmStore (cmmOffsetW base off) val 
+	   | (val, off) <- es ]
+
+emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> Code
+emitSetDynHdr base info_ptr ccs 
+  = hpStore base (zip (initDynHdr info_ptr ccs) [0..])
+\end{code}
diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs
new file mode 100644
index 0000000000..b769950d87
--- /dev/null
+++ b/compiler/codeGen/CgInfoTbls.hs
@@ -0,0 +1,591 @@
+-----------------------------------------------------------------------------
+--
+-- Building info tables.
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+module CgInfoTbls (
+	emitClosureCodeAndInfoTable,
+	emitInfoTableAndCode,
+	dataConTagZ,
+	getSRTInfo,
+	emitDirectReturnTarget, emitAlgReturnTarget,
+	emitDirectReturnInstr, emitVectoredReturnInstr,
+	mkRetInfoTable,
+	mkStdInfoTable,
+	stdInfoTableSizeB,
+	mkFunGenInfoExtraBits,
+	entryCode, closureInfoPtr,
+	getConstrTag,
+	infoTable, infoTableClosureType,
+	infoTablePtrs, infoTableNonPtrs,
+	funInfoTable,
+	retVec
+  ) where
+
+
+#include "HsVersions.h"
+
+import ClosureInfo	( ClosureInfo, closureTypeDescr, closureName,
+			  infoTableLabelFromCI, Liveness,
+			  closureValDescr, closureSRT, closureSMRep,
+			  closurePtrsSize, closureNonHdrSize, closureFunInfo,
+			  C_SRT(..), needsSRT, isConstrClosure_maybe,
+			  ArgDescr(..) )
+import SMRep		( StgHalfWord, hALF_WORD_SIZE_IN_BITS, hALF_WORD_SIZE,
+			  WordOff, ByteOff,
+			  smRepClosureTypeInt, tablesNextToCode,
+			  rET_BIG, rET_SMALL, rET_VEC_BIG, rET_VEC_SMALL )
+import CgBindery	( getLiveStackSlots )
+import CgCallConv	( isBigLiveness, mkLivenessCLit, buildContLiveness,
+			  argDescrType, getSequelAmode,
+			  CtrlReturnConvention(..) )
+import CgUtils		( mkStringCLit, packHalfWordsCLit, mkWordCLit, 
+			  cmmOffsetB, cmmOffsetExprW, cmmLabelOffW, cmmOffsetW,
+			  emitDataLits, emitRODataLits, emitSwitch, cmmNegate,
+			  newTemp )
+import CgMonad
+
+import CmmUtils		( mkIntCLit, zeroCLit )
+import Cmm		( CmmStmt(..), CmmExpr(..), CmmLit(..), LocalReg,
+			  CmmBasicBlock, nodeReg )
+import MachOp		( MachOp(..), wordRep, halfWordRep )
+import CLabel
+import StgSyn		( SRT(..) )
+import Name		( Name )
+import DataCon		( DataCon, dataConTag, fIRST_TAG )
+import Unique		( Uniquable(..) )
+import DynFlags		( DynFlags(..), HscTarget(..) )
+import StaticFlags	( opt_SccProfilingOn )
+import ListSetOps	( assocDefault )
+import Maybes		( isJust )
+import Constants	( wORD_SIZE, sIZEOF_StgFunInfoExtraRev )
+import Outputable
+
+
+-------------------------------------------------------------------------
+--
+--	Generating the info table and code for a closure
+--
+-------------------------------------------------------------------------
+
+-- Here we make a concrete info table, represented as a list of CmmAddr
+-- (it can't be simply a list of Word, because the SRT field is
+-- represented by a label+offset expression).
+
+-- With tablesNextToCode, the layout is
+--	<reversed variable part>
+--	<normal forward StgInfoTable, but without 
+--		an entry point at the front>
+--	<code>
+--
+-- Without tablesNextToCode, the layout of an info table is
+--	<entry label>
+--	<normal forward rest of StgInfoTable>
+--	<forward variable part>
+--
+--	See includes/InfoTables.h
+
+emitClosureCodeAndInfoTable :: ClosureInfo -> [LocalReg] -> CgStmts -> Code
+emitClosureCodeAndInfoTable cl_info args body
+ = do	{ ty_descr_lit <- 
+		if opt_SccProfilingOn 
+		   then mkStringCLit (closureTypeDescr cl_info)
+		   else return (mkIntCLit 0)
+  	; cl_descr_lit <- 
+		if opt_SccProfilingOn 
+		   then mkStringCLit cl_descr_string
+		   else return (mkIntCLit 0)
+	; let std_info = mkStdInfoTable ty_descr_lit cl_descr_lit 
+					cl_type srt_len layout_lit
+
+	; blks <- cgStmtsToBlocks body
+	; emitInfoTableAndCode info_lbl std_info extra_bits args blks }
+  where
+    info_lbl  = infoTableLabelFromCI cl_info
+
+    cl_descr_string = closureValDescr cl_info
+    cl_type = smRepClosureTypeInt (closureSMRep cl_info)
+
+    srt = closureSRT cl_info	     
+    needs_srt = needsSRT srt
+
+    mb_con = isConstrClosure_maybe  cl_info
+    is_con = isJust mb_con
+
+    (srt_label,srt_len)
+	= case mb_con of
+	    Just con -> -- Constructors don't have an SRT
+			-- We keep the *zero-indexed* tag in the srt_len
+			-- field of the info table. 
+			(mkIntCLit 0, fromIntegral (dataConTagZ con)) 
+
+	    Nothing  -> -- Not a constructor
+			srtLabelAndLength srt info_lbl
+
+    ptrs       = closurePtrsSize cl_info
+    nptrs      = size - ptrs
+    size       = closureNonHdrSize cl_info
+    layout_lit = packHalfWordsCLit ptrs nptrs
+
+    extra_bits
+	| is_fun    = fun_extra_bits
+	| is_con    = []
+	| needs_srt = [srt_label]
+ 	| otherwise = []
+
+    maybe_fun_stuff = closureFunInfo cl_info
+    is_fun = isJust maybe_fun_stuff
+    (Just (arity, arg_descr)) = maybe_fun_stuff
+
+    fun_extra_bits
+	| ArgGen liveness <- arg_descr
+	= [ fun_amode,
+	    srt_label,
+	    makeRelativeRefTo info_lbl $ mkLivenessCLit liveness, 
+	    slow_entry ]
+	| needs_srt = [fun_amode, srt_label]
+	| otherwise = [fun_amode]
+
+    slow_entry = makeRelativeRefTo info_lbl (CmmLabel slow_entry_label)
+    slow_entry_label = mkSlowEntryLabel (closureName cl_info)
+
+    fun_amode = packHalfWordsCLit fun_type arity
+    fun_type  = argDescrType arg_descr
+
+-- We keep the *zero-indexed* tag in the srt_len field of the info
+-- table of a data constructor.
+dataConTagZ :: DataCon -> ConTagZ
+dataConTagZ con = dataConTag con - fIRST_TAG
+
+-- A low-level way to generate the variable part of a fun-style info table.
+-- (must match fun_extra_bits above).  Used by the C-- parser.
+mkFunGenInfoExtraBits :: Int -> Int -> CmmLit -> CmmLit -> CmmLit -> [CmmLit]
+mkFunGenInfoExtraBits fun_type arity srt_label liveness slow_entry
+  = [ packHalfWordsCLit fun_type arity,
+      srt_label,
+      liveness,
+      slow_entry ]
+
+-------------------------------------------------------------------------
+--
+--	Generating the info table and code for a return point
+--
+-------------------------------------------------------------------------
+
+--	Here's the layout of a return-point info table
+--
+-- Tables next to code:
+--
+--			<reversed vector table>
+--			<srt slot>
+--			<standard info table>
+--  	ret-addr -->	<entry code (if any)>
+--
+-- Not tables-next-to-code:
+--
+--	ret-addr -->	<ptr to entry code>
+--			<standard info table>
+--			<srt slot>
+--			<forward vector table>
+--
+--  * The vector table is only present for vectored returns
+--
+--  * The SRT slot is only there if either
+--	(a) there is SRT info to record, OR
+--	(b) if the return is vectored
+--   The latter (b) is necessary so that the vector is in a
+--   predictable place
+
+vectorSlot :: CmmExpr -> CmmExpr -> CmmExpr
+-- Get the vector slot from the info pointer
+vectorSlot info_amode zero_indexed_tag
+  | tablesNextToCode 
+  = cmmOffsetExprW (cmmOffsetW info_amode (- (stdInfoTableSizeW + 2)))
+		   (cmmNegate zero_indexed_tag)
+	-- The "2" is one for the SRT slot, and one more 
+	-- to get to the first word of the vector
+
+  | otherwise 
+  = cmmOffsetExprW (cmmOffsetW info_amode (stdInfoTableSizeW + 2))
+		   zero_indexed_tag
+	-- The "2" is one for the entry-code slot and one for the SRT slot
+
+retVec :: CmmExpr -> CmmExpr -> CmmExpr
+-- Get a return vector from the info pointer
+retVec info_amode zero_indexed_tag
+  = let slot = vectorSlot info_amode zero_indexed_tag
+        tableEntry = CmmLoad slot wordRep
+    in if tablesNextToCode
+           then CmmMachOp (MO_Add wordRep) [tableEntry, info_amode]
+           else tableEntry
+           
+emitReturnTarget
+   :: Name
+   -> CgStmts			-- The direct-return code (if any)
+				--    	(empty for vectored returns)
+   -> [CmmLit]			-- Vector of return points 
+				-- 	(empty for non-vectored returns)
+   -> SRT
+   -> FCode CLabel
+emitReturnTarget name stmts vector srt
+  = do	{ live_slots <- getLiveStackSlots
+	; liveness   <- buildContLiveness name live_slots
+	; srt_info   <- getSRTInfo name srt
+
+	; let
+	      cl_type = case (null vector, isBigLiveness liveness) of
+	 		 (True,  True)  -> rET_BIG
+ 			 (True,  False) -> rET_SMALL
+ 			 (False, True)  -> rET_VEC_BIG
+	 		 (False, False) -> rET_VEC_SMALL
+ 
+	      (std_info, extra_bits) = 
+		   mkRetInfoTable info_lbl liveness srt_info cl_type vector
+
+	; blks <- cgStmtsToBlocks stmts
+	; emitInfoTableAndCode info_lbl std_info extra_bits args blks
+	; return info_lbl }
+  where
+    args      = {- trace "emitReturnTarget: missing args" -} []
+    uniq      = getUnique name
+    info_lbl  = mkReturnInfoLabel uniq
+
+
+mkRetInfoTable
+  :: CLabel             -- info label
+  -> Liveness		-- liveness
+  -> C_SRT		-- SRT Info
+  -> Int		-- type (eg. rET_SMALL)
+  -> [CmmLit]		-- vector
+  -> ([CmmLit],[CmmLit])
+mkRetInfoTable info_lbl liveness srt_info cl_type vector
+  =  (std_info, extra_bits)
+  where
+	(srt_label, srt_len) = srtLabelAndLength srt_info info_lbl
+ 
+	srt_slot | need_srt  = [srt_label]
+	         | otherwise = []
+
+	need_srt = needsSRT srt_info || not (null vector)
+		-- If there's a vector table then we must allocate
+		-- an SRT slot, so that the vector table is at a 
+		-- known offset from the info pointer
+ 
+	liveness_lit = makeRelativeRefTo info_lbl $ mkLivenessCLit liveness
+	std_info = mkStdInfoTable zeroCLit zeroCLit cl_type srt_len liveness_lit
+        extra_bits = srt_slot ++ map (makeRelativeRefTo info_lbl) vector
+
+
+emitDirectReturnTarget
+   :: Name
+   -> CgStmts		-- The direct-return code
+   -> SRT
+   -> FCode CLabel
+emitDirectReturnTarget name code srt
+  = emitReturnTarget name code [] srt
+
+emitAlgReturnTarget
+	:: Name				-- Just for its unique
+	-> [(ConTagZ, CgStmts)]		-- Tagged branches
+	-> Maybe CgStmts		-- Default branch (if any)
+	-> SRT				-- Continuation's SRT
+	-> CtrlReturnConvention
+	-> FCode (CLabel, SemiTaggingStuff)
+
+emitAlgReturnTarget name branches mb_deflt srt ret_conv
+  = case ret_conv of
+      UnvectoredReturn fam_sz -> do	
+	{ blks <- getCgStmts $
+		    emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
+		-- NB: tag_expr is zero-based
+	; lbl <- emitDirectReturnTarget name blks srt 
+	; return (lbl, Nothing) }
+		-- Nothing: the internal branches in the switch don't have
+		-- global labels, so we can't use them at the 'call site'
+
+      VectoredReturn fam_sz -> do
+	{ let tagged_lbls = zip (map fst branches) $
+	                    map (CmmLabel . mkAltLabel uniq . fst) branches
+	      deflt_lbl | isJust mb_deflt = CmmLabel $ mkDefaultLabel uniq
+	                | otherwise       = mkIntCLit 0
+	; let vector = [ assocDefault deflt_lbl tagged_lbls i 
+		       | i <- [0..fam_sz-1]]
+	; lbl <- emitReturnTarget name noCgStmts vector srt 
+	; mapFCs emit_alt branches
+	; emit_deflt mb_deflt
+	; return (lbl, Just (tagged_lbls, deflt_lbl)) }
+  where
+    uniq = getUnique name 
+    tag_expr = getConstrTag (CmmReg nodeReg)
+
+    emit_alt :: (Int, CgStmts) -> FCode (Int, CmmLit)
+	-- Emit the code for the alternative as a top-level
+	-- code block returning a label for it
+    emit_alt (tag, stmts) = do	 { let lbl = mkAltLabel uniq tag
+				 ; blks <- cgStmtsToBlocks stmts
+				 ; emitProc [] lbl [] blks
+				 ; return (tag, CmmLabel lbl) }
+
+    emit_deflt (Just stmts) = do { let lbl = mkDefaultLabel uniq
+				 ; blks <- cgStmtsToBlocks stmts
+				 ; emitProc [] lbl [] blks
+				 ; return (CmmLabel lbl) }
+    emit_deflt Nothing = return (mkIntCLit 0)
+		-- Nothing case: the simplifier might have eliminated a case
+		-- 		 so we may have e.g. case xs of 
+		--					 [] -> e
+		-- In that situation the default should never be taken, 
+		-- so we just use a NULL pointer
+
+--------------------------------
+emitDirectReturnInstr :: Code
+emitDirectReturnInstr 
+  = do 	{ info_amode <- getSequelAmode
+	; stmtC (CmmJump (entryCode info_amode) []) }
+
+emitVectoredReturnInstr :: CmmExpr	-- _Zero-indexed_ constructor tag
+			-> Code
+emitVectoredReturnInstr zero_indexed_tag
+  = do	{ info_amode <- getSequelAmode
+		-- HACK! assign info_amode to a temp, because retVec
+		-- uses it twice and the NCG doesn't have any CSE yet.
+		-- Only do this for the NCG, because gcc is too stupid
+		-- to optimise away the extra tmp (grrr).
+	; dflags <- getDynFlags
+	; x <- if hscTarget dflags == HscAsm
+		   then do z <- newTemp wordRep
+			   stmtC (CmmAssign z info_amode)
+			   return (CmmReg z)
+		   else
+			return info_amode
+	; let target = retVec x zero_indexed_tag
+	; stmtC (CmmJump target []) }
+
+
+-------------------------------------------------------------------------
+--
+--	Generating a standard info table
+--
+-------------------------------------------------------------------------
+
+-- The standard bits of an info table.  This part of the info table
+-- corresponds to the StgInfoTable type defined in InfoTables.h.
+--
+-- Its shape varies with ticky/profiling/tables next to code etc
+-- so we can't use constant offsets from Constants
+
+mkStdInfoTable
+   :: CmmLit		-- closure type descr (profiling)
+   -> CmmLit		-- closure descr (profiling)
+   -> Int		-- closure type
+   -> StgHalfWord	-- SRT length
+   -> CmmLit		-- layout field
+   -> [CmmLit]
+
+mkStdInfoTable type_descr closure_descr cl_type srt_len layout_lit
+ = 	-- Parallel revertible-black hole field
+    prof_info
+	-- Ticky info (none at present)
+	-- Debug info (none at present)
+ ++ [layout_lit, type_lit]
+
+ where  
+    prof_info 
+	| opt_SccProfilingOn = [type_descr, closure_descr]
+	| otherwise	     = []
+
+    type_lit = packHalfWordsCLit cl_type srt_len
+	
+stdInfoTableSizeW :: WordOff
+-- The size of a standard info table varies with profiling/ticky etc,
+-- so we can't get it from Constants
+-- It must vary in sync with mkStdInfoTable
+stdInfoTableSizeW
+  = size_fixed + size_prof
+  where
+    size_fixed = 2	-- layout, type
+    size_prof | opt_SccProfilingOn = 2
+	      | otherwise	   = 0
+
+stdInfoTableSizeB = stdInfoTableSizeW * wORD_SIZE :: ByteOff
+
+stdSrtBitmapOffset :: ByteOff
+-- Byte offset of the SRT bitmap half-word which is 
+-- in the *higher-addressed* part of the type_lit
+stdSrtBitmapOffset = stdInfoTableSizeB - hALF_WORD_SIZE
+
+stdClosureTypeOffset :: ByteOff
+-- Byte offset of the closure type half-word 
+stdClosureTypeOffset = stdInfoTableSizeB - wORD_SIZE
+
+stdPtrsOffset, stdNonPtrsOffset :: ByteOff
+stdPtrsOffset    = stdInfoTableSizeB - 2*wORD_SIZE
+stdNonPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE + hALF_WORD_SIZE
+
+-------------------------------------------------------------------------
+--
+--	Accessing fields of an info table
+--
+-------------------------------------------------------------------------
+
+closureInfoPtr :: CmmExpr -> CmmExpr
+-- Takes a closure pointer and returns the info table pointer
+closureInfoPtr e = CmmLoad e wordRep
+
+entryCode :: CmmExpr -> CmmExpr
+-- Takes an info pointer (the first word of a closure)
+-- and returns its entry code
+entryCode e | tablesNextToCode = e
+	    | otherwise	       = CmmLoad e wordRep
+
+getConstrTag :: CmmExpr -> CmmExpr
+-- Takes a closure pointer, and return the *zero-indexed*
+-- constructor tag obtained from the info table
+-- This lives in the SRT field of the info table
+-- (constructors don't need SRTs).
+getConstrTag closure_ptr 
+  = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableConstrTag info_table]
+  where
+    info_table = infoTable (closureInfoPtr closure_ptr)
+
+infoTable :: CmmExpr -> CmmExpr
+-- Takes an info pointer (the first word of a closure)
+-- and returns a pointer to the first word of the standard-form
+-- info table, excluding the entry-code word (if present)
+infoTable info_ptr
+  | tablesNextToCode = cmmOffsetB info_ptr (- stdInfoTableSizeB)
+  | otherwise	     = cmmOffsetW info_ptr 1	-- Past the entry code pointer
+
+infoTableConstrTag :: CmmExpr -> CmmExpr
+-- Takes an info table pointer (from infoTable) and returns the constr tag
+-- field of the info table (same as the srt_bitmap field)
+infoTableConstrTag = infoTableSrtBitmap
+
+infoTableSrtBitmap :: CmmExpr -> CmmExpr
+-- Takes an info table pointer (from infoTable) and returns the srt_bitmap
+-- field of the info table
+infoTableSrtBitmap info_tbl
+  = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) halfWordRep
+
+infoTableClosureType :: CmmExpr -> CmmExpr
+-- Takes an info table pointer (from infoTable) and returns the closure type
+-- field of the info table.
+infoTableClosureType info_tbl 
+  = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) halfWordRep
+
+infoTablePtrs :: CmmExpr -> CmmExpr
+infoTablePtrs info_tbl 
+  = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) halfWordRep
+
+infoTableNonPtrs :: CmmExpr -> CmmExpr
+infoTableNonPtrs info_tbl 
+  = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) halfWordRep
+
+funInfoTable :: CmmExpr -> CmmExpr
+-- Takes the info pointer of a function,
+-- and returns a pointer to the first word of the StgFunInfoExtra struct
+-- in the info table.
+funInfoTable info_ptr
+  | tablesNextToCode
+  = cmmOffsetB info_ptr (- stdInfoTableSizeB - sIZEOF_StgFunInfoExtraRev)
+  | otherwise
+  = cmmOffsetW info_ptr (1 + stdInfoTableSizeW)
+				-- Past the entry code pointer
+
+-------------------------------------------------------------------------
+--
+--	Emit the code for a closure (or return address)
+--	and its associated info table
+--
+-------------------------------------------------------------------------
+
+-- The complication here concerns whether or not we can
+-- put the info table next to the code
+
+emitInfoTableAndCode 
+	:: CLabel 		-- Label of info table
+	-> [CmmLit]		-- ...its invariant part
+	-> [CmmLit] 		-- ...and its variant part
+	-> [LocalReg]		-- ...args
+	-> [CmmBasicBlock]	-- ...and body
+	-> Code
+
+emitInfoTableAndCode info_lbl std_info extra_bits args blocks
+  | tablesNextToCode 	-- Reverse the extra_bits; and emit the top-level proc
+  = emitProc (reverse extra_bits ++ std_info) 
+	     entry_lbl args blocks
+	-- NB: the info_lbl is discarded
+
+  | null blocks -- No actual code; only the info table is significant
+  =		-- Use a zero place-holder in place of the 
+		-- entry-label in the info table
+    emitRODataLits info_lbl (zeroCLit : std_info ++ extra_bits)
+
+  | otherwise	-- Separately emit info table (with the function entry 
+  =		-- point as first entry) and the entry code 
+    do	{ emitDataLits info_lbl (CmmLabel entry_lbl : std_info ++ extra_bits)
+	; emitProc [] entry_lbl args blocks }
+
+  where
+	entry_lbl = infoLblToEntryLbl info_lbl
+
+-------------------------------------------------------------------------
+--
+--	Static reference tables
+--
+-------------------------------------------------------------------------
+
+-- There is just one SRT for each top level binding; all the nested
+-- bindings use sub-sections of this SRT.  The label is passed down to
+-- the nested bindings via the monad.
+
+getSRTInfo :: Name -> SRT -> FCode C_SRT
+getSRTInfo id NoSRT = return NoC_SRT
+getSRTInfo id (SRT off len bmp)
+  | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape]
+  = do	{ srt_lbl <- getSRTLabel
+	; let srt_desc_lbl = mkSRTDescLabel id
+	; emitRODataLits srt_desc_lbl
+		   ( cmmLabelOffW srt_lbl off
+		   : mkWordCLit (fromIntegral len)
+		   : map mkWordCLit bmp)
+	; return (C_SRT srt_desc_lbl 0 srt_escape) }
+
+  | otherwise 
+  = do	{ srt_lbl <- getSRTLabel
+	; return (C_SRT srt_lbl off (fromIntegral (head bmp))) }
+		-- The fromIntegral converts to StgHalfWord
+
+srt_escape = (-1) :: StgHalfWord
+
+srtLabelAndLength :: C_SRT -> CLabel -> (CmmLit, StgHalfWord)
+srtLabelAndLength NoC_SRT _		
+  = (zeroCLit, 0)
+srtLabelAndLength (C_SRT lbl off bitmap) info_lbl
+  = (makeRelativeRefTo info_lbl $ cmmLabelOffW lbl off, bitmap)
+
+-------------------------------------------------------------------------
+--
+--	Position independent code
+--
+-------------------------------------------------------------------------
+-- In order to support position independent code, we mustn't put absolute
+-- references into read-only space. Info tables in the tablesNextToCode
+-- case must be in .text, which is read-only, so we doctor the CmmLits
+-- to use relative offsets instead.
+
+-- Note that this is done even when the -fPIC flag is not specified,
+-- as we want to keep binary compatibility between PIC and non-PIC.
+
+makeRelativeRefTo :: CLabel -> CmmLit -> CmmLit
+        
+makeRelativeRefTo info_lbl (CmmLabel lbl)
+  | tablesNextToCode
+  = CmmLabelDiffOff lbl info_lbl 0
+makeRelativeRefTo info_lbl (CmmLabelOff lbl off)
+  | tablesNextToCode
+  = CmmLabelDiffOff lbl info_lbl off
+makeRelativeRefTo _ lit = lit
diff --git a/compiler/codeGen/CgLetNoEscape.lhs b/compiler/codeGen/CgLetNoEscape.lhs
new file mode 100644
index 0000000000..39860f4ee0
--- /dev/null
+++ b/compiler/codeGen/CgLetNoEscape.lhs
@@ -0,0 +1,212 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
+%
+% $Id: CgLetNoEscape.lhs,v 1.26 2004/09/30 10:35:47 simonpj Exp $
+%
+%********************************************************
+%*							*
+\section[CgLetNoEscape]{Handling ``let-no-escapes''}
+%*							*
+%********************************************************
+
+\begin{code}
+module CgLetNoEscape ( cgLetNoEscapeClosure ) where
+
+#include "HsVersions.h"
+
+import {-# SOURCE #-} CgExpr ( cgExpr )
+
+import StgSyn
+import CgMonad
+
+import CgBindery	( CgIdInfo, letNoEscapeIdInfo, nukeDeadBindings	)
+import CgCase		( restoreCurrentCostCentre )
+import CgCon		( bindUnboxedTupleComponents )
+import CgHeapery	( unbxTupleHeapCheck )
+import CgInfoTbls	( emitDirectReturnTarget )
+import CgStackery	( allocStackTop, deAllocStackTop, getSpRelOffset )
+import Cmm		( CmmStmt(..) )
+import CmmUtils		( mkLblExpr, oneStmt )
+import CLabel		( mkReturnInfoLabel )
+import ClosureInfo	( mkLFLetNoEscape )
+import CostCentre       ( CostCentreStack )
+import Id		( Id, idName )
+import Var		( idUnique )
+import SMRep		( retAddrSizeW )
+import BasicTypes	( RecFlag(..) )
+import Outputable
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[what-is-non-escaping]{What {\em is} a ``non-escaping let''?}
+%*									*
+%************************************************************************
+
+[The {\em code} that detects these things is elsewhere.]
+
+Consider:
+\begin{verbatim}
+	let x = fvs \ args -> e
+	in
+	 	if ... then x else
+		if ... then x else ...
+\end{verbatim}
+@x@ is used twice (so we probably can't unfold it), but when it is
+entered, the stack is deeper than it was when the definition of @x@
+happened.  Specifically, if instead of allocating a closure for @x@,
+we saved all @x@'s fvs on the stack, and remembered the stack depth at
+that moment, then whenever we enter @x@ we can simply set the stack
+pointer(s) to these remembered (compile-time-fixed) values, and jump
+to the code for @x@.
+
+All of this is provided x is:
+\begin{enumerate}
+\item
+non-updatable;
+\item
+guaranteed to be entered before the stack retreats -- ie x is not
+buried in a heap-allocated closure, or passed as an argument to something;
+\item
+all the enters have exactly the right number of arguments,
+no more no less;
+\item
+all the enters are tail calls; that is, they return to the
+caller enclosing the definition of @x@.
+\end{enumerate}
+
+Under these circumstances we say that @x@ is {\em non-escaping}.
+
+An example of when (4) does {\em not} hold:
+\begin{verbatim}
+	let x = ...
+	in case x of ...alts...
+\end{verbatim}
+
+Here, @x@ is certainly entered only when the stack is deeper than when
+@x@ is defined, but here it must return to \tr{...alts...} So we can't
+just adjust the stack down to @x@'s recalled points, because that
+would lost @alts@' context.
+
+Things can get a little more complicated.  Consider:
+\begin{verbatim}
+	let y = ...
+	in let x = fvs \ args -> ...y...
+	in ...x...
+\end{verbatim}
+
+Now, if @x@ is used in a non-escaping way in \tr{...x...}, {\em and}
+@y@ is used in a non-escaping way in \tr{...y...}, {\em then} @y@ is
+non-escaping.
+
+@x@ can even be recursive!  Eg:
+\begin{verbatim}
+	letrec x = [y] \ [v] -> if v then x True else ...
+	in
+		...(x b)...
+\end{verbatim}
+
+
+%************************************************************************
+%*									*
+\subsection[codeGen-for-non-escaping]{Generating code for a ``non-escaping let''}
+%*									*
+%************************************************************************
+
+
+Generating code for this is fun.  It is all very very similar to what
+we do for a case expression.  The duality is between
+\begin{verbatim}
+	let-no-escape x = b
+	in e
+\end{verbatim}
+and
+\begin{verbatim}
+	case e of ... -> b
+\end{verbatim}
+
+That is, the RHS of @x@ (ie @b@) will execute {\em later}, just like
+the alternative of the case; it needs to be compiled in an environment
+in which all volatile bindings are forgotten, and the free vars are
+bound only to stable things like stack locations..  The @e@ part will
+execute {\em next}, just like the scrutinee of a case.
+
+First, we need to save all @x@'s free vars
+on the stack, if they aren't there already.
+
+\begin{code}
+cgLetNoEscapeClosure
+	:: Id			-- binder
+	-> CostCentreStack   	-- NB: *** NOT USED *** ToDo (WDP 94/06)
+	-> StgBinderInfo	-- NB: ditto
+	-> SRT
+	-> StgLiveVars		-- variables live in RHS, including the binders
+				-- themselves in the case of a recursive group
+	-> EndOfBlockInfo       -- where are we going to?
+	-> Maybe VirtualSpOffset -- Slot for current cost centre
+	-> RecFlag		-- is the binding recursive?
+	-> [Id]			-- args (as in \ args -> body)
+    	-> StgExpr		-- body (as in above)
+	-> FCode (Id, CgIdInfo)
+
+-- ToDo: deal with the cost-centre issues
+
+cgLetNoEscapeClosure 
+	bndr cc binder_info srt full_live_in_rhss 
+	rhs_eob_info cc_slot rec args body
+  = let
+	arity   = length args
+	lf_info = mkLFLetNoEscape arity
+    in
+    -- saveVolatileVarsAndRegs done earlier in cgExpr.
+
+    do  { (vSp, _) <- forkEvalHelp rhs_eob_info
+
+		(do { allocStackTop retAddrSizeW
+		    ; nukeDeadBindings full_live_in_rhss })
+
+		(do { deAllocStackTop retAddrSizeW
+		    ; abs_c <- forkProc $ cgLetNoEscapeBody bndr cc 
+						  cc_slot args body
+
+			-- Ignore the label that comes back from
+			-- mkRetDirectTarget.  It must be conjured up elswhere
+		    ; emitDirectReturnTarget (idName bndr) abs_c srt
+		    ; return () })
+
+	; returnFC (bndr, letNoEscapeIdInfo bndr vSp lf_info) }
+\end{code}
+
+\begin{code}
+cgLetNoEscapeBody :: Id		-- Name of the joint point
+		  -> CostCentreStack
+		  -> Maybe VirtualSpOffset
+		  -> [Id]	-- Args
+		  -> StgExpr	-- Body
+		  -> Code
+
+cgLetNoEscapeBody bndr cc cc_slot all_args body = do
+  { (arg_regs, ptrs, nptrs, ret_slot) <- bindUnboxedTupleComponents all_args
+
+     -- restore the saved cost centre.  BUT: we must not free the stack slot
+     -- containing the cost centre, because it might be needed for a
+     -- recursive call to this let-no-escape.
+  ; restoreCurrentCostCentre cc_slot False{-don't free-}
+
+	-- Enter the closures cc, if required
+  ; -- enterCostCentreCode closure_info cc IsFunction
+
+ 	-- The "return address" slot doesn't have a return address in it;
+	-- but the heap-check needs it filled in if the heap-check fails.
+	-- So we pass code to fill it in to the heap-check macro
+  ; sp_rel <- getSpRelOffset ret_slot
+
+  ; let	lbl 	       = mkReturnInfoLabel (idUnique bndr)
+	frame_hdr_asst = oneStmt (CmmStore sp_rel (mkLblExpr lbl))
+
+	-- Do heap check [ToDo: omit for non-recursive case by recording in
+	--	in envt and absorbing at call site]
+  ; unbxTupleHeapCheck arg_regs ptrs nptrs frame_hdr_asst 
+			(cgExpr body)
+  }
+\end{code}
diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs
new file mode 100644
index 0000000000..4f95c9b36a
--- /dev/null
+++ b/compiler/codeGen/CgMonad.lhs
@@ -0,0 +1,853 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgMonad.lhs,v 1.45 2005/06/21 10:44:41 simonmar Exp $
+%
+\section[CgMonad]{The code generation monad}
+
+See the beginning of the top-level @CodeGen@ module, to see how this
+monadic stuff fits into the Big Picture.
+
+\begin{code}
+module CgMonad (
+	Code,	-- type
+	FCode,	-- type
+
+	initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs,
+	returnFC, fixC, checkedAbsC, 
+	stmtC, stmtsC, labelC, emitStmts, nopC, whenC, newLabelC,
+	newUnique, newUniqSupply, 
+
+	CgStmts, emitCgStmts, forkCgStmts, cgStmtsToBlocks,
+	getCgStmts', getCgStmts,
+	noCgStmts, oneCgStmt, consCgStmt,
+
+	getCmm,
+	emitData, emitProc, emitSimpleProc,
+
+	forkLabelledCode,
+	forkClosureBody, forkStatics, forkAlts, forkEval,
+	forkEvalHelp, forkProc, codeOnly,
+	SemiTaggingStuff, ConTagZ,
+
+	EndOfBlockInfo(..),
+	setEndOfBlockInfo, getEndOfBlockInfo,
+
+	setSRTLabel, getSRTLabel, 
+	setTickyCtrLabel, getTickyCtrLabel,
+
+	StackUsage(..), HeapUsage(..),
+	VirtualSpOffset, VirtualHpOffset,
+	initStkUsage, initHpUsage,
+	getHpUsage,  setHpUsage,
+	heapHWM,
+
+	moduleName,
+
+	Sequel(..), -- ToDo: unabstract?
+
+	-- ideally we wouldn't export these, but some other modules access internal state
+	getState, setState, getInfoDown, getDynFlags, getHomeModules,
+
+	-- more localised access to monad state	
+	getStkUsage, setStkUsage,
+	getBinds, setBinds, getStaticBinds,
+
+	-- out of general friendliness, we also export ...
+	CgInfoDownwards(..), CgState(..)	-- non-abstract
+    ) where
+
+#include "HsVersions.h"
+
+import {-# SOURCE #-} CgBindery ( CgBindings, nukeVolatileBinds )
+
+import DynFlags		( DynFlags )
+import Packages		( HomeModules )
+import Cmm
+import CmmUtils		( CmmStmts, isNopStmt )
+import CLabel
+import SMRep		( WordOff )
+import Module		( Module )
+import Id		( Id )
+import VarEnv
+import OrdList
+import Unique		( Unique )
+import Util		( mapAccumL )
+import UniqSupply	( UniqSupply, mkSplitUniqSupply, splitUniqSupply, uniqFromSupply )
+import FastString
+import Outputable
+
+import Control.Monad	( liftM )
+
+infixr 9 `thenC`	-- Right-associative!
+infixr 9 `thenFC`
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[CgMonad-environment]{Stuff for manipulating environments}
+%*									*
+%************************************************************************
+
+This monadery has some information that it only passes {\em
+downwards}, as well as some ``state'' which is modified as we go
+along.
+
+\begin{code}
+data CgInfoDownwards	-- information only passed *downwards* by the monad
+  = MkCgInfoDown {
+	cgd_dflags  :: DynFlags,
+	cgd_hmods   :: HomeModules,	-- Packages we depend on
+	cgd_mod     :: Module,		-- Module being compiled
+	cgd_statics :: CgBindings,	-- [Id -> info] : static environment
+	cgd_srt     :: CLabel,		-- label of the current SRT
+	cgd_ticky   :: CLabel,		-- current destination for ticky counts
+	cgd_eob     :: EndOfBlockInfo	-- Info for stuff to do at end of basic block:
+  }
+
+initCgInfoDown :: DynFlags -> HomeModules -> Module -> CgInfoDownwards
+initCgInfoDown dflags hmods mod
+  = MkCgInfoDown {	cgd_dflags  = dflags,
+			cgd_hmods   = hmods,
+			cgd_mod     = mod,
+			cgd_statics = emptyVarEnv,
+			cgd_srt     = error "initC: srt",
+			cgd_ticky   = mkTopTickyCtrLabel,
+			cgd_eob     = initEobInfo }
+
+data CgState
+  = MkCgState {
+     cgs_stmts :: OrdList CgStmt,	  -- Current proc
+     cgs_tops  :: OrdList CmmTop,
+	-- Other procedures and data blocks in this compilation unit
+	-- Both the latter two are ordered only so that we can 
+	-- reduce forward references, when it's easy to do so
+     
+     cgs_binds :: CgBindings,	-- [Id -> info] : *local* bindings environment
+     				-- Bindings for top-level things are given in
+				-- the info-down part
+     
+     cgs_stk_usg :: StackUsage,
+     cgs_hp_usg  :: HeapUsage,
+     
+     cgs_uniqs :: UniqSupply }
+
+initCgState :: UniqSupply -> CgState
+initCgState uniqs
+  = MkCgState { cgs_stmts = nilOL, cgs_tops = nilOL,
+		cgs_binds = emptyVarEnv, 
+		cgs_stk_usg = initStkUsage, 
+		cgs_hp_usg = initHpUsage,
+		cgs_uniqs = uniqs }
+\end{code}
+
+@EndOfBlockInfo@ tells what to do at the end of this block of code or,
+if the expression is a @case@, what to do at the end of each
+alternative.
+
+\begin{code}
+data EndOfBlockInfo
+  = EndOfBlockInfo
+	VirtualSpOffset   -- Args Sp: trim the stack to this point at a
+			  -- return; push arguments starting just
+			  -- above this point on a tail call.
+			  
+			  -- This is therefore the stk ptr as seen
+			  -- by a case alternative.
+	Sequel
+
+initEobInfo = EndOfBlockInfo 0 OnStack
+\end{code}
+
+Any addressing modes inside @Sequel@ must be ``robust,'' in the sense
+that it must survive stack pointer adjustments at the end of the
+block.
+
+\begin{code}
+data Sequel
+  = OnStack 		-- Continuation is on the stack
+  | UpdateCode		-- Continuation is update
+
+  | CaseAlts
+	  CLabel     -- Jump to this; if the continuation is for a vectored
+		     -- case this might be the label of a return vector
+	  SemiTaggingStuff
+	  Id	      -- The case binder, only used to see if it's dead
+	  Bool	      -- True <=> polymorphic, push a SEQ frame too
+
+type SemiTaggingStuff
+  = Maybe			-- Maybe[1] we don't have any semi-tagging stuff...
+     ([(ConTagZ, CmmLit)],	-- Alternatives
+      CmmLit)			-- Default (will be a can't happen RTS label if can't happen)
+
+type ConTagZ = Int	-- A *zero-indexed* contructor tag
+
+-- The case branch is executed only from a successful semitagging
+-- venture, when a case has looked at a variable, found that it's
+-- evaluated, and wants to load up the contents and go to the join
+-- point.
+\end{code}
+
+%************************************************************************
+%*									*
+		CgStmt type
+%*									*
+%************************************************************************
+
+The CgStmts type is what the code generator outputs: it is a tree of
+statements, including in-line labels.  The job of flattenCgStmts is to
+turn this into a list of basic blocks, each of which ends in a jump
+statement (either a local branch or a non-local jump).
+
+\begin{code}
+type CgStmts = OrdList CgStmt
+
+data CgStmt
+  = CgStmt  CmmStmt
+  | CgLabel BlockId
+  | CgFork  BlockId CgStmts
+
+flattenCgStmts :: BlockId -> CgStmts -> [CmmBasicBlock]
+flattenCgStmts id stmts = 
+	case flatten (fromOL stmts) of
+	  ([],blocks)    -> blocks
+	  (block,blocks) -> BasicBlock id block : blocks
+ where
+  flatten [] = ([],[])
+
+  -- A label at the end of a function or fork: this label must not be reachable,
+  -- but it might be referred to from another BB that also isn't reachable.
+  -- Eliminating these has to be done with a dead-code analysis.  For now,
+  -- we just make it into a well-formed block by adding a recursive jump.
+  flatten [CgLabel id]
+    = ( [], [BasicBlock id [CmmBranch id]] )
+
+  -- A jump/branch: throw away all the code up to the next label, because
+  -- it is unreachable.  Be careful to keep forks that we find on the way.
+  flatten (CgStmt stmt : stmts)
+    | isJump stmt
+    = case dropWhile isOrdinaryStmt stmts of
+	[]                     -> ( [stmt], [] )
+	[CgLabel id]	       -> ( [stmt], [BasicBlock id [CmmBranch id]])
+	(CgLabel id : stmts)   -> ( [stmt], BasicBlock id block : blocks )
+	    where (block,blocks) = flatten stmts
+	(CgFork fork_id stmts : ss) -> 
+	   flatten (CgFork fork_id stmts : CgStmt stmt : ss)
+
+  flatten (s:ss) = 
+	case s of
+	  CgStmt stmt -> (stmt:block,blocks)
+	  CgLabel id  -> ([CmmBranch id],BasicBlock id block:blocks)
+	  CgFork fork_id stmts -> 
+		(block, BasicBlock fork_id fork_block : fork_blocks ++ blocks)
+		where (fork_block, fork_blocks) = flatten (fromOL stmts)
+    where (block,blocks) = flatten ss
+
+isJump (CmmJump _ _) = True
+isJump (CmmBranch _) = True
+isJump _ = False
+
+isOrdinaryStmt (CgStmt _) = True
+isOrdinaryStmt _ = False
+\end{code}
+
+%************************************************************************
+%*									*
+		Stack and heap models
+%*									*
+%************************************************************************
+
+\begin{code}
+type VirtualHpOffset = WordOff	-- Both are in
+type VirtualSpOffset = WordOff	-- units of words
+
+data StackUsage 
+  = StackUsage {
+	virtSp :: VirtualSpOffset,
+		-- Virtual offset of topmost allocated slot
+
+	frameSp :: VirtualSpOffset,
+		-- Virtual offset of the return address of the enclosing frame.
+		-- This RA describes the liveness/pointedness of
+		-- all the stack from frameSp downwards
+		-- INVARIANT: less than or equal to virtSp
+
+	 freeStk :: [VirtualSpOffset], 
+		-- List of free slots, in *increasing* order
+		-- INVARIANT: all <= virtSp
+		-- All slots <= virtSp are taken except these ones
+
+	 realSp :: VirtualSpOffset,	
+		-- Virtual offset of real stack pointer register
+
+	 hwSp :: VirtualSpOffset
+  }		   -- Highest value ever taken by virtSp
+
+-- INVARIANT: The environment contains no Stable references to
+-- 	      stack slots below (lower offset) frameSp
+--	      It can contain volatile references to this area though.
+
+data HeapUsage =
+  HeapUsage {
+	virtHp :: VirtualHpOffset,	-- Virtual offset of highest-allocated word
+	realHp :: VirtualHpOffset	-- realHp: Virtual offset of real heap ptr
+  }
+\end{code}
+
+The heap high water mark is the larger of virtHp and hwHp.  The latter is
+only records the high water marks of forked-off branches, so to find the
+heap high water mark you have to take the max of virtHp and hwHp.  Remember,
+virtHp never retreats!
+
+Note Jan 04: ok, so why do we only look at the virtual Hp??
+
+\begin{code}
+heapHWM :: HeapUsage -> VirtualHpOffset
+heapHWM = virtHp
+\end{code}
+
+Initialisation.
+
+\begin{code}
+initStkUsage :: StackUsage
+initStkUsage = StackUsage {
+			virtSp = 0,
+			frameSp = 0,
+			freeStk = [],
+			realSp = 0,
+			hwSp = 0
+	       }
+		
+initHpUsage :: HeapUsage 
+initHpUsage = HeapUsage {
+	      	virtHp = 0,
+		realHp = 0
+	      }
+\end{code}
+
+@stateIncUsage@$~e_1~e_2$ incorporates in $e_1$ the stack and heap high water
+marks found in $e_2$.
+
+\begin{code}
+stateIncUsage :: CgState -> CgState -> CgState
+stateIncUsage s1 s2@(MkCgState { cgs_stk_usg = stk_usg, cgs_hp_usg = hp_usg })
+     = s1 { cgs_hp_usg  = cgs_hp_usg  s1 `maxHpHw`  virtHp hp_usg,
+	    cgs_stk_usg = cgs_stk_usg s1 `maxStkHw` hwSp   stk_usg }
+       `addCodeBlocksFrom` s2
+		
+stateIncUsageEval :: CgState -> CgState -> CgState
+stateIncUsageEval s1 s2
+     = s1 { cgs_stk_usg = cgs_stk_usg s1 `maxStkHw` hwSp (cgs_stk_usg s2) }
+       `addCodeBlocksFrom` s2
+	-- We don't max the heap high-watermark because stateIncUsageEval is
+	-- used only in forkEval, which in turn is only used for blocks of code
+	-- which do their own heap-check.
+
+addCodeBlocksFrom :: CgState -> CgState -> CgState
+-- Add code blocks from the latter to the former
+-- (The cgs_stmts will often be empty, but not always; see codeOnly)
+s1 `addCodeBlocksFrom` s2
+  = s1 { cgs_stmts = cgs_stmts s1 `appOL` cgs_stmts s2,
+	 cgs_tops  = cgs_tops  s1 `appOL` cgs_tops  s2 }
+
+maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage
+hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw }
+
+maxStkHw :: StackUsage -> VirtualSpOffset -> StackUsage
+stk_usg `maxStkHw` hw = stk_usg { hwSp = hwSp stk_usg `max` hw }
+\end{code}
+
+%************************************************************************
+%*									*
+		The FCode monad
+%*									*
+%************************************************************************
+
+\begin{code}
+newtype FCode a = FCode (CgInfoDownwards -> CgState -> (a, CgState))
+type Code       = FCode ()
+
+instance Monad FCode where
+	(>>=) = thenFC
+	return = returnFC
+
+{-# INLINE thenC #-}
+{-# INLINE thenFC #-}
+{-# INLINE returnFC #-}
+\end{code}
+The Abstract~C is not in the environment so as to improve strictness.
+
+\begin{code}
+initC :: DynFlags -> HomeModules -> Module -> FCode a -> IO a
+
+initC dflags hmods mod (FCode code)
+  = do	{ uniqs <- mkSplitUniqSupply 'c'
+	; case code (initCgInfoDown dflags hmods mod) (initCgState uniqs) of
+	      (res, _) -> return res
+	}
+
+returnFC :: a -> FCode a
+returnFC val = FCode (\info_down state -> (val, state))
+\end{code}
+
+\begin{code}
+thenC :: Code -> FCode a -> FCode a
+thenC (FCode m) (FCode k) = 
+  	FCode (\info_down state -> let (_,new_state) = m info_down state in 
+  		k info_down new_state)
+
+listCs :: [Code] -> Code
+listCs [] = return ()
+listCs (fc:fcs) = do
+	fc
+	listCs fcs
+   	
+mapCs :: (a -> Code) -> [a] -> Code
+mapCs = mapM_
+\end{code}
+
+\begin{code}
+thenFC	:: FCode a -> (a -> FCode c) -> FCode c
+thenFC (FCode m) k = FCode (
+	\info_down state ->
+		let 
+			(m_result, new_state) = m info_down state
+			(FCode kcode) = k m_result
+		in 
+			kcode info_down new_state
+	)
+
+listFCs :: [FCode a] -> FCode [a]
+listFCs = sequence
+
+mapFCs :: (a -> FCode b) -> [a] -> FCode [b]
+mapFCs = mapM
+\end{code}
+
+And the knot-tying combinator:
+\begin{code}
+fixC :: (a -> FCode a) -> FCode a
+fixC fcode = FCode (
+	\info_down state -> 
+		let
+			FCode fc = fcode v
+			result@(v,_) = fc info_down state
+			--	    ^--------^
+		in
+			result
+	)
+\end{code}
+
+%************************************************************************
+%*									*
+	Operators for getting and setting the state and "info_down".
+
+%*									*
+%************************************************************************
+
+\begin{code}
+getState :: FCode CgState
+getState = FCode $ \info_down state -> (state,state)
+
+setState :: CgState -> FCode ()
+setState state = FCode $ \info_down _ -> ((),state)
+
+getStkUsage :: FCode StackUsage
+getStkUsage = do
+	state <- getState
+	return $ cgs_stk_usg state
+
+setStkUsage :: StackUsage -> Code
+setStkUsage new_stk_usg = do
+	state <- getState
+	setState $ state {cgs_stk_usg = new_stk_usg}
+
+getHpUsage :: FCode HeapUsage
+getHpUsage = do
+	state <- getState
+	return $ cgs_hp_usg state
+	
+setHpUsage :: HeapUsage -> Code
+setHpUsage new_hp_usg = do
+	state <- getState
+	setState $ state {cgs_hp_usg = new_hp_usg}
+
+getBinds :: FCode CgBindings
+getBinds = do
+	state <- getState
+	return $ cgs_binds state
+	
+setBinds :: CgBindings -> FCode ()
+setBinds new_binds = do
+	state <- getState
+	setState $ state {cgs_binds = new_binds}
+
+getStaticBinds :: FCode CgBindings
+getStaticBinds = do
+	info  <- getInfoDown
+	return (cgd_statics info)
+
+withState :: FCode a -> CgState -> FCode (a,CgState)
+withState (FCode fcode) newstate = FCode $ \info_down state -> 
+	let (retval, state2) = fcode info_down newstate in ((retval,state2), state)
+
+newUniqSupply :: FCode UniqSupply
+newUniqSupply = do
+	state <- getState
+	let (us1, us2) = splitUniqSupply (cgs_uniqs state)
+	setState $ state { cgs_uniqs = us1 }
+	return us2
+
+newUnique :: FCode Unique
+newUnique = do
+	us <- newUniqSupply
+	return (uniqFromSupply us)
+
+------------------
+getInfoDown :: FCode CgInfoDownwards
+getInfoDown = FCode $ \info_down state -> (info_down,state)
+
+getDynFlags :: FCode DynFlags
+getDynFlags = liftM cgd_dflags getInfoDown
+
+getHomeModules :: FCode HomeModules
+getHomeModules = liftM cgd_hmods getInfoDown
+
+withInfoDown :: FCode a -> CgInfoDownwards -> FCode a
+withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state 
+
+doFCode :: FCode a -> CgInfoDownwards -> CgState -> (a,CgState)
+doFCode (FCode fcode) info_down state = fcode info_down state
+\end{code}
+
+
+%************************************************************************
+%*									*
+		Forking
+%*									*
+%************************************************************************
+
+@forkClosureBody@ takes a code, $c$, and compiles it in a completely
+fresh environment, except that:
+	- compilation info and statics are passed in unchanged.
+The current environment is passed on completely unaltered, except that
+abstract C from the fork is incorporated.
+
+@forkProc@ takes a code and compiles it in the current environment,
+returning the basic blocks thus constructed.  The current environment
+is passed on completely unchanged.  It is pretty similar to
+@getBlocks@, except that the latter does affect the environment.
+
+@forkStatics@ $fc$ compiles $fc$ in an environment whose statics come
+from the current bindings, but which is otherwise freshly initialised.
+The Abstract~C returned is attached to the current state, but the
+bindings and usage information is otherwise unchanged.
+
+\begin{code}
+forkClosureBody :: Code -> Code
+forkClosureBody body_code
+  = do	{ info <- getInfoDown
+	; us   <- newUniqSupply
+	; state <- getState
+   	; let	body_info_down = info { cgd_eob = initEobInfo }
+		((),fork_state)	= doFCode body_code body_info_down 
+					  (initCgState us)
+	; ASSERT( isNilOL (cgs_stmts fork_state) )
+	  setState $ state `addCodeBlocksFrom` fork_state }
+	
+forkStatics :: FCode a -> FCode a
+forkStatics body_code
+  = do	{ info  <- getInfoDown
+	; us    <- newUniqSupply
+	; state <- getState
+	; let	rhs_info_down = info { cgd_statics = cgs_binds state,
+				       cgd_eob     = initEobInfo }
+		(result, fork_state_out) = doFCode body_code rhs_info_down 
+						   (initCgState us)
+	; ASSERT( isNilOL (cgs_stmts fork_state_out) )
+	  setState (state `addCodeBlocksFrom` fork_state_out)
+	; return result }
+
+forkProc :: Code -> FCode CgStmts
+forkProc body_code
+  = do	{ info_down <- getInfoDown
+	; us    <- newUniqSupply
+	; state <- getState
+	; let	fork_state_in = (initCgState us) 
+					{ cgs_binds   = cgs_binds state,
+					  cgs_stk_usg = cgs_stk_usg state,
+					  cgs_hp_usg  = cgs_hp_usg state }
+			-- ToDo: is the hp usage necesary?
+		(code_blks, fork_state_out) = doFCode (getCgStmts body_code) 
+						      info_down fork_state_in
+	; setState $ state `stateIncUsageEval` fork_state_out
+	; return code_blks }
+
+codeOnly :: Code -> Code
+-- Emit any code from the inner thing into the outer thing
+-- Do not affect anything else in the outer state
+-- Used in almost-circular code to prevent false loop dependencies
+codeOnly body_code
+  = do	{ info_down <- getInfoDown
+	; us   <- newUniqSupply
+	; state <- getState
+	; let	fork_state_in = (initCgState us) { cgs_binds   = cgs_binds state,
+					           cgs_stk_usg = cgs_stk_usg state,
+					           cgs_hp_usg  = cgs_hp_usg state }
+		((), fork_state_out) = doFCode body_code info_down fork_state_in
+	; setState $ state `addCodeBlocksFrom` fork_state_out }
+\end{code}
+
+@forkAlts@ $bs~d$ takes fcodes $bs$ for the branches of a @case@, and
+an fcode for the default case $d$, and compiles each in the current
+environment.  The current environment is passed on unmodified, except
+that
+	- the worst stack high-water mark is incorporated
+	- the virtual Hp is moved on to the worst virtual Hp for the branches
+
+\begin{code}
+forkAlts :: [FCode a] -> FCode [a]
+
+forkAlts branch_fcodes
+  = do	{ info_down <- getInfoDown
+	; us <- newUniqSupply
+	; state <- getState
+	; let compile us branch 
+		= (us2, doFCode branch info_down branch_state)
+		where
+		  (us1,us2) = splitUniqSupply us
+	          branch_state = (initCgState us1) {
+					cgs_binds   = cgs_binds state,
+					cgs_stk_usg = cgs_stk_usg state,
+					cgs_hp_usg  = cgs_hp_usg state }
+
+	      (_us, results) = mapAccumL compile us branch_fcodes
+	      (branch_results, branch_out_states) = unzip results
+	; setState $ foldl stateIncUsage state branch_out_states
+		-- NB foldl.  state is the *left* argument to stateIncUsage
+	; return branch_results }
+\end{code}
+
+@forkEval@ takes two blocks of code.
+
+   -  The first meddles with the environment to set it up as expected by
+      the alternatives of a @case@ which does an eval (or gc-possible primop).
+   -  The second block is the code for the alternatives.
+      (plus info for semi-tagging purposes)
+
+@forkEval@ picks up the virtual stack pointer and returns a suitable
+@EndOfBlockInfo@ for the caller to use, together with whatever value
+is returned by the second block.
+
+It uses @initEnvForAlternatives@ to initialise the environment, and
+@stateIncUsageAlt@ to incorporate usage; the latter ignores the heap
+usage.
+
+\begin{code}
+forkEval :: EndOfBlockInfo              -- For the body
+    	 -> Code			-- Code to set environment
+	 -> FCode Sequel		-- Semi-tagging info to store
+	 -> FCode EndOfBlockInfo	-- The new end of block info
+
+forkEval body_eob_info env_code body_code
+  = do  { (v, sequel) <- forkEvalHelp body_eob_info env_code body_code
+ 	; returnFC (EndOfBlockInfo v sequel) }
+
+forkEvalHelp :: EndOfBlockInfo  -- For the body
+    	     -> Code		-- Code to set environment
+	     -> FCode a		-- The code to do after the eval
+	     -> FCode (VirtualSpOffset,	-- Sp
+		       a)		-- Result of the FCode
+	-- A disturbingly complicated function
+forkEvalHelp body_eob_info env_code body_code
+  = do	{ info_down <- getInfoDown
+	; us   <- newUniqSupply
+	; state <- getState
+	; let { info_down_for_body = info_down {cgd_eob = body_eob_info}
+	      ; (_, env_state) = doFCode env_code info_down_for_body 
+					 (state {cgs_uniqs = us})
+	      ; state_for_body = (initCgState (cgs_uniqs env_state)) 
+					{ cgs_binds   = binds_for_body,
+	      				  cgs_stk_usg = stk_usg_for_body }
+	      ; binds_for_body   = nukeVolatileBinds (cgs_binds env_state)
+	      ; stk_usg_from_env = cgs_stk_usg env_state
+	      ; virtSp_from_env  = virtSp stk_usg_from_env
+	      ; stk_usg_for_body = stk_usg_from_env {realSp = virtSp_from_env,
+	      					     hwSp   = virtSp_from_env}
+	      ; (value_returned, state_at_end_return)
+	        	= doFCode body_code info_down_for_body state_for_body		
+	  } 
+	; ASSERT( isNilOL (cgs_stmts state_at_end_return) )
+		 -- The code coming back should consist only of nested declarations,
+		 -- notably of the return vector!
+	  setState $ state `stateIncUsageEval` state_at_end_return
+	; return (virtSp_from_env, value_returned) }
+
+
+-- ----------------------------------------------------------------------------
+-- Combinators for emitting code
+
+nopC :: Code
+nopC = return ()
+
+whenC :: Bool -> Code -> Code
+whenC True  code = code
+whenC False code = nopC
+
+stmtC :: CmmStmt -> Code
+stmtC stmt = emitCgStmt (CgStmt stmt)
+
+labelC :: BlockId -> Code
+labelC id = emitCgStmt (CgLabel id)
+
+newLabelC :: FCode BlockId
+newLabelC = do { id <- newUnique; return (BlockId id) }
+
+checkedAbsC :: CmmStmt -> Code
+-- Emit code, eliminating no-ops
+checkedAbsC stmt = emitStmts (if isNopStmt stmt then nilOL
+	 		      else unitOL stmt)
+
+stmtsC :: [CmmStmt] -> Code
+stmtsC stmts = emitStmts (toOL stmts)
+
+-- Emit code; no no-op checking
+emitStmts :: CmmStmts -> Code
+emitStmts stmts = emitCgStmts (fmap CgStmt stmts)
+
+-- forkLabelledCode is for emitting a chunk of code with a label, outside
+-- of the current instruction stream.
+forkLabelledCode :: Code -> FCode BlockId
+forkLabelledCode code = getCgStmts code >>= forkCgStmts
+
+emitCgStmt :: CgStmt -> Code
+emitCgStmt stmt
+  = do	{ state <- getState
+	; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt }
+	}
+
+emitData :: Section -> [CmmStatic] -> Code
+emitData sect lits
+  = do 	{ state <- getState
+	; setState $ state { cgs_tops = cgs_tops state `snocOL` data_block } }
+  where
+    data_block = CmmData sect lits
+
+emitProc :: [CmmLit] -> CLabel -> [LocalReg] -> [CmmBasicBlock] -> Code
+emitProc lits lbl args blocks
+  = do  { let proc_block = CmmProc (map CmmStaticLit lits) lbl args blocks
+	; state <- getState
+	; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } }
+
+emitSimpleProc :: CLabel -> Code -> Code
+-- Emit a procedure whose body is the specified code; no info table
+emitSimpleProc lbl code
+  = do	{ stmts <- getCgStmts code
+	; blks <- cgStmtsToBlocks stmts
+	; emitProc [] lbl [] blks }
+
+getCmm :: Code -> FCode Cmm
+-- Get all the CmmTops (there should be no stmts)
+getCmm code 
+  = do	{ state1 <- getState
+	; ((), state2) <- withState code (state1 { cgs_tops  = nilOL })
+	; setState $ state2 { cgs_tops = cgs_tops state1 } 
+	; return (Cmm (fromOL (cgs_tops state2))) }
+
+-- ----------------------------------------------------------------------------
+-- CgStmts
+
+-- These functions deal in terms of CgStmts, which is an abstract type
+-- representing the code in the current proc.
+
+
+-- emit CgStmts into the current instruction stream
+emitCgStmts :: CgStmts -> Code
+emitCgStmts stmts
+  = do	{ state <- getState
+	; setState $ state { cgs_stmts = cgs_stmts state `appOL` stmts } }
+
+-- emit CgStmts outside the current instruction stream, and return a label
+forkCgStmts :: CgStmts -> FCode BlockId
+forkCgStmts stmts
+  = do  { id <- newLabelC
+	; emitCgStmt (CgFork id stmts)
+	; return id
+	}
+
+-- turn CgStmts into [CmmBasicBlock], for making a new proc.
+cgStmtsToBlocks :: CgStmts -> FCode [CmmBasicBlock]
+cgStmtsToBlocks stmts
+  = do  { id <- newLabelC
+	; return (flattenCgStmts id stmts)
+	}	
+
+-- collect the code emitted by an FCode computation
+getCgStmts' :: FCode a -> FCode (a, CgStmts)
+getCgStmts' fcode
+  = do	{ state1 <- getState
+	; (a, state2) <- withState fcode (state1 { cgs_stmts = nilOL })
+	; setState $ state2 { cgs_stmts = cgs_stmts state1  }
+	; return (a, cgs_stmts state2) }
+
+getCgStmts :: FCode a -> FCode CgStmts
+getCgStmts fcode = do { (_,stmts) <- getCgStmts' fcode; return stmts }
+
+-- Simple ways to construct CgStmts:
+noCgStmts :: CgStmts
+noCgStmts = nilOL
+
+oneCgStmt :: CmmStmt -> CgStmts
+oneCgStmt stmt = unitOL (CgStmt stmt)
+
+consCgStmt :: CmmStmt -> CgStmts -> CgStmts
+consCgStmt stmt stmts = CgStmt stmt `consOL` stmts
+
+-- ----------------------------------------------------------------------------
+-- Get the current module name
+
+moduleName :: FCode Module
+moduleName = do { info <- getInfoDown; return (cgd_mod info) }
+
+-- ----------------------------------------------------------------------------
+-- Get/set the end-of-block info
+
+setEndOfBlockInfo :: EndOfBlockInfo -> Code -> Code
+setEndOfBlockInfo eob_info code	= do
+	info  <- getInfoDown
+	withInfoDown code (info {cgd_eob = eob_info})
+
+getEndOfBlockInfo :: FCode EndOfBlockInfo
+getEndOfBlockInfo = do
+	info <- getInfoDown
+	return (cgd_eob info)
+
+-- ----------------------------------------------------------------------------
+-- Get/set the current SRT label
+
+-- There is just one SRT for each top level binding; all the nested
+-- bindings use sub-sections of this SRT.  The label is passed down to
+-- the nested bindings via the monad.
+
+getSRTLabel :: FCode CLabel	-- Used only by cgPanic
+getSRTLabel = do info  <- getInfoDown
+		 return (cgd_srt info)
+
+setSRTLabel :: CLabel -> FCode a -> FCode a
+setSRTLabel srt_lbl code
+  = do  info <- getInfoDown
+	withInfoDown code (info { cgd_srt = srt_lbl})
+
+-- ----------------------------------------------------------------------------
+-- Get/set the current ticky counter label
+
+getTickyCtrLabel :: FCode CLabel
+getTickyCtrLabel = do
+	info <- getInfoDown
+	return (cgd_ticky info)
+
+setTickyCtrLabel :: CLabel -> Code -> Code
+setTickyCtrLabel ticky code = do
+	info <- getInfoDown
+	withInfoDown code (info {cgd_ticky = ticky})
+\end{code}
diff --git a/compiler/codeGen/CgParallel.hs b/compiler/codeGen/CgParallel.hs
new file mode 100644
index 0000000000..b826a33cba
--- /dev/null
+++ b/compiler/codeGen/CgParallel.hs
@@ -0,0 +1,90 @@
+-- Code generation relaed to GpH
+-- 	(a) parallel
+--	(b) GranSim
+
+module CgParallel(
+	staticGranHdr,staticParHdr,
+	granFetchAndReschedule, granYield,
+	doGranAllocate
+  ) where
+
+import CgMonad
+import CgCallConv	( mkRegLiveness )
+import Id		( Id )
+import Cmm		( CmmLit, GlobalReg(..), node, CmmExpr )
+import StaticFlags	( opt_GranMacros )
+import Outputable
+
+staticParHdr :: [CmmLit]
+-- Parallel header words in a static closure
+staticParHdr = []
+
+--------------------------------------------------------
+--		GranSim stuff
+--------------------------------------------------------
+
+staticGranHdr :: [CmmLit]
+-- Gransim header words in a static closure
+staticGranHdr = []
+
+doGranAllocate :: CmmExpr -> Code	
+-- macro DO_GRAN_ALLOCATE
+doGranAllocate hp 
+  | not opt_GranMacros = nopC
+  | otherwise	       = panic "doGranAllocate"
+
+
+
+-------------------------
+granFetchAndReschedule :: [(Id,GlobalReg)]  -- Live registers
+		       -> Bool           	-- Node reqd?
+		       -> Code
+-- Emit code for simulating a fetch and then reschedule.
+granFetchAndReschedule regs node_reqd
+  | opt_GranMacros && (node `elem` map snd regs || node_reqd)
+  = do { fetch
+       ; reschedule liveness node_reqd }
+  | otherwise
+  = nopC
+  where
+    liveness = mkRegLiveness regs 0 0
+
+fetch = panic "granFetch"
+	-- Was: absC (CMacroStmt GRAN_FETCH [])
+	--HWL: generate GRAN_FETCH macro for GrAnSim
+ 	--     currently GRAN_FETCH and GRAN_FETCH_AND_RESCHEDULE are miai
+
+reschedule liveness node_reqd = panic "granReschedule"
+	-- Was: absC  (CMacroStmt GRAN_RESCHEDULE [
+  	--                mkIntCLit (I# (word2Int# liveness_mask)), 
+    	--		  mkIntCLit (if node_reqd then 1 else 0)])
+    
+
+-------------------------
+-- The @GRAN_YIELD@ macro is taken from JSM's  code for Concurrent Haskell. It
+-- allows to context-switch at  places where @node@ is  not alive (it uses the
+-- @Continue@ rather  than the @EnterNodeCode@  function in the  RTS). We emit
+-- this kind of macro at the beginning of the following kinds of basic bocks:
+-- \begin{itemize}
+--  \item Slow entry code where node is not alive (see @CgClosure.lhs@). Normally 
+--        we use @fetchAndReschedule@ at a slow entry code.
+--  \item Fast entry code (see @CgClosure.lhs@).
+--  \item Alternatives in case expressions (@CLabelledCode@ structures), provided
+--        that they are not inlined (see @CgCases.lhs@). These alternatives will 
+--        be turned into separate functions.
+
+granYield :: [(Id,GlobalReg)]   -- Live registers
+          -> Bool               -- Node reqd?
+          -> Code 
+
+granYield regs node_reqd
+  | opt_GranMacros && node_reqd = yield liveness
+  | otherwise			= nopC
+  where
+     liveness = mkRegLiveness regs 0 0
+
+yield liveness = panic "granYield"
+	-- Was : absC (CMacroStmt GRAN_YIELD 
+        --                  [mkIntCLit (I# (word2Int# liveness_mask))])
+
+
diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs
new file mode 100644
index 0000000000..bc7c9140ed
--- /dev/null
+++ b/compiler/codeGen/CgPrimOp.hs
@@ -0,0 +1,584 @@
+-----------------------------------------------------------------------------
+--
+-- Code generation for PrimOps.
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+module CgPrimOp (
+   cgPrimOp
+ ) where
+
+#include "HsVersions.h"
+
+import ForeignCall	( CCallConv(CCallConv) )
+import StgSyn		( StgLiveVars, StgArg )
+import CgForeignCall	( emitForeignCall' )
+import CgBindery	( getVolatileRegs, getArgAmodes )
+import CgMonad
+import CgInfoTbls	( getConstrTag )
+import CgUtils		( cmmOffsetW, cmmOffsetB, cmmLoadIndexW )
+import ForeignCall
+import Cmm
+import CLabel		( mkMAP_FROZEN_infoLabel, mkMAP_DIRTY_infoLabel,
+			  mkDirty_MUT_VAR_Label, mkRtsCodeLabel )
+import CmmUtils
+import MachOp
+import SMRep
+import PrimOp		( PrimOp(..) )
+import SMRep		( tablesNextToCode )
+import Constants 	( wORD_SIZE, wORD_SIZE_IN_BITS )
+import StaticFlags	( opt_Parallel )
+import Outputable
+
+-- ---------------------------------------------------------------------------
+-- Code generation for PrimOps
+
+cgPrimOp   :: [CmmReg] 		-- where to put the results
+	   -> PrimOp		-- the op
+	   -> [StgArg]		-- arguments
+	   -> StgLiveVars	-- live vars, in case we need to save them
+	   -> Code
+
+cgPrimOp results op args live
+  = do arg_exprs <- getArgAmodes args
+       let non_void_args = [ e | (r,e) <- arg_exprs, nonVoidArg r ] 
+       emitPrimOp results op non_void_args live
+
+
+emitPrimOp :: [CmmReg] 		-- where to put the results
+	   -> PrimOp		-- the op
+	   -> [CmmExpr]		-- arguments
+	   -> StgLiveVars	-- live vars, in case we need to save them
+	   -> Code
+
+--  First we handle various awkward cases specially.  The remaining
+-- easy cases are then handled by translateOp, defined below.
+
+emitPrimOp [res_r,res_c] IntAddCOp [aa,bb] live
+{- 
+   With some bit-twiddling, we can define int{Add,Sub}Czh portably in
+   C, and without needing any comparisons.  This may not be the
+   fastest way to do it - if you have better code, please send it! --SDM
+  
+   Return : r = a + b,  c = 0 if no overflow, 1 on overflow.
+  
+   We currently don't make use of the r value if c is != 0 (i.e. 
+   overflow), we just convert to big integers and try again.  This
+   could be improved by making r and c the correct values for
+   plugging into a new J#.  
+   
+   { r = ((I_)(a)) + ((I_)(b));					\
+     c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r)))	\
+         >> (BITS_IN (I_) - 1);					\
+   } 
+   Wading through the mass of bracketry, it seems to reduce to:
+   c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
+
+-}
+   = stmtsC [
+        CmmAssign res_r (CmmMachOp mo_wordAdd [aa,bb]),
+        CmmAssign res_c $
+	  CmmMachOp mo_wordUShr [
+		CmmMachOp mo_wordAnd [
+		    CmmMachOp mo_wordNot [CmmMachOp mo_wordXor [aa,bb]],
+		    CmmMachOp mo_wordXor [aa, CmmReg res_r]
+		], 
+	        CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1))
+	  ]
+     ]
+
+
+emitPrimOp [res_r,res_c] IntSubCOp [aa,bb] live
+{- Similarly:
+   #define subIntCzh(r,c,a,b)					\
+   { r = ((I_)(a)) - ((I_)(b));					\
+     c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r)))	\
+         >> (BITS_IN (I_) - 1);					\
+   }
+
+   c =  ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
+-}
+   = stmtsC [
+        CmmAssign res_r (CmmMachOp mo_wordSub [aa,bb]),
+        CmmAssign res_c $
+	  CmmMachOp mo_wordUShr [
+		CmmMachOp mo_wordAnd [
+		    CmmMachOp mo_wordXor [aa,bb],
+		    CmmMachOp mo_wordXor [aa, CmmReg res_r]
+		], 
+	        CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1))
+	  ]
+     ]
+
+
+emitPrimOp [res] ParOp [arg] live
+  = do
+	-- for now, just implement this in a C function
+	-- later, we might want to inline it.
+    vols <- getVolatileRegs live
+    emitForeignCall' PlayRisky
+	[(res,NoHint)]
+    	(CmmForeignCall newspark CCallConv) 
+	[(CmmReg (CmmGlobal BaseReg), PtrHint), (arg,PtrHint)] 
+	(Just vols)
+  where
+	newspark = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("newSpark")))
+
+emitPrimOp [res] ReadMutVarOp [mutv] live
+   = stmtC (CmmAssign res (cmmLoadIndexW mutv fixedHdrSize))
+
+emitPrimOp [] WriteMutVarOp [mutv,var] live
+   = do
+	stmtC (CmmStore (cmmOffsetW mutv fixedHdrSize) var)
+	vols <- getVolatileRegs live
+	emitForeignCall' PlayRisky
+		[{-no results-}]
+		(CmmForeignCall (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
+			 CCallConv)
+		[(CmmReg (CmmGlobal BaseReg), PtrHint), (mutv,PtrHint)]
+		(Just vols)
+
+--  #define sizzeofByteArrayzh(r,a) \
+--     r = (((StgArrWords *)(a))->words * sizeof(W_))
+emitPrimOp [res] SizeofByteArrayOp [arg] live
+   = stmtC $
+	CmmAssign res (CmmMachOp mo_wordMul [
+			  cmmLoadIndexW arg fixedHdrSize,
+			  CmmLit (mkIntCLit wORD_SIZE)
+			])
+
+--  #define sizzeofMutableByteArrayzh(r,a) \
+--      r = (((StgArrWords *)(a))->words * sizeof(W_))
+emitPrimOp [res] SizeofMutableByteArrayOp [arg] live
+   = emitPrimOp [res] SizeofByteArrayOp [arg] live
+
+
+--  #define touchzh(o)                  /* nothing */
+emitPrimOp [] TouchOp [arg] live
+   = nopC
+
+--  #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
+emitPrimOp [res] ByteArrayContents_Char [arg] live
+   = stmtC (CmmAssign res (cmmOffsetB arg arrWordsHdrSize))
+
+--  #define stableNameToIntzh(r,s)   (r = ((StgStableName *)s)->sn)
+emitPrimOp [res] StableNameToIntOp [arg] live
+   = stmtC (CmmAssign res (cmmLoadIndexW arg fixedHdrSize))
+
+--  #define eqStableNamezh(r,sn1,sn2)					\
+--    (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
+emitPrimOp [res] EqStableNameOp [arg1,arg2] live
+   = stmtC (CmmAssign res (CmmMachOp mo_wordEq [
+				cmmLoadIndexW arg1 fixedHdrSize,
+				cmmLoadIndexW arg2 fixedHdrSize
+			 ]))
+
+
+emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2] live
+   = stmtC (CmmAssign res (CmmMachOp mo_wordEq [arg1,arg2]))
+
+--  #define addrToHValuezh(r,a) r=(P_)a
+emitPrimOp [res] AddrToHValueOp [arg] live
+   = stmtC (CmmAssign res arg)
+
+--  #define dataToTagzh(r,a)  r=(GET_TAG(((StgClosure *)a)->header.info))
+emitPrimOp [res] DataToTagOp [arg] live
+   = stmtC (CmmAssign res (getConstrTag arg))
+
+{- Freezing arrays-of-ptrs requires changing an info table, for the
+   benefit of the generational collector.  It needs to scavenge mutable
+   objects, even if they are in old space.  When they become immutable,
+   they can be removed from this scavenge list.	 -}
+
+--  #define unsafeFreezzeArrayzh(r,a)
+--	{
+--        SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
+--	  r = a;
+--	}
+emitPrimOp [res] UnsafeFreezeArrayOp [arg] live
+   = stmtsC [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
+	     CmmAssign res arg ]
+
+--  #define unsafeFreezzeByteArrayzh(r,a)	r=(a)
+emitPrimOp [res] UnsafeFreezeByteArrayOp [arg] live
+   = stmtC (CmmAssign res arg)
+
+-- Reading/writing pointer arrays
+
+emitPrimOp [r] ReadArrayOp  [obj,ix]   live  = doReadPtrArrayOp r obj ix
+emitPrimOp [r] IndexArrayOp [obj,ix]   live  = doReadPtrArrayOp r obj ix
+emitPrimOp []  WriteArrayOp [obj,ix,v] live  = doWritePtrArrayOp obj ix v
+
+-- IndexXXXoffAddr
+
+emitPrimOp res IndexOffAddrOp_Char      args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args
+emitPrimOp res IndexOffAddrOp_WideChar  args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res IndexOffAddrOp_Int       args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res IndexOffAddrOp_Word      args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res IndexOffAddrOp_Addr      args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res IndexOffAddrOp_Float     args live = doIndexOffAddrOp Nothing F32 res args
+emitPrimOp res IndexOffAddrOp_Double    args live = doIndexOffAddrOp Nothing F64 res args
+emitPrimOp res IndexOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res IndexOffAddrOp_Int8      args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8  res args
+emitPrimOp res IndexOffAddrOp_Int16     args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args
+emitPrimOp res IndexOffAddrOp_Int32     args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args
+emitPrimOp res IndexOffAddrOp_Int64     args live = doIndexOffAddrOp Nothing I64 res args
+emitPrimOp res IndexOffAddrOp_Word8     args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8  res args
+emitPrimOp res IndexOffAddrOp_Word16    args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args
+emitPrimOp res IndexOffAddrOp_Word32    args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res IndexOffAddrOp_Word64    args live = doIndexOffAddrOp Nothing I64 res args
+
+-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
+
+emitPrimOp res ReadOffAddrOp_Char      args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args
+emitPrimOp res ReadOffAddrOp_WideChar  args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res ReadOffAddrOp_Int       args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res ReadOffAddrOp_Word      args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res ReadOffAddrOp_Addr      args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res ReadOffAddrOp_Float     args live = doIndexOffAddrOp Nothing F32 res args
+emitPrimOp res ReadOffAddrOp_Double    args live = doIndexOffAddrOp Nothing F64 res args
+emitPrimOp res ReadOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res ReadOffAddrOp_Int8      args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8  res args
+emitPrimOp res ReadOffAddrOp_Int16     args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args
+emitPrimOp res ReadOffAddrOp_Int32     args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args
+emitPrimOp res ReadOffAddrOp_Int64     args live = doIndexOffAddrOp Nothing I64 res args
+emitPrimOp res ReadOffAddrOp_Word8     args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8  res args
+emitPrimOp res ReadOffAddrOp_Word16    args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args
+emitPrimOp res ReadOffAddrOp_Word32    args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res ReadOffAddrOp_Word64    args live = doIndexOffAddrOp Nothing I64 res args
+
+-- IndexXXXArray
+
+emitPrimOp res IndexByteArrayOp_Char      args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args
+emitPrimOp res IndexByteArrayOp_WideChar  args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res IndexByteArrayOp_Int       args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res IndexByteArrayOp_Word      args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res IndexByteArrayOp_Addr      args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res IndexByteArrayOp_Float     args live = doIndexByteArrayOp Nothing F32 res args
+emitPrimOp res IndexByteArrayOp_Double    args live = doIndexByteArrayOp Nothing F64 res args
+emitPrimOp res IndexByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res IndexByteArrayOp_Int8      args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8  res args
+emitPrimOp res IndexByteArrayOp_Int16     args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16  res args
+emitPrimOp res IndexByteArrayOp_Int32     args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32  res args
+emitPrimOp res IndexByteArrayOp_Int64     args live = doIndexByteArrayOp Nothing I64  res args
+emitPrimOp res IndexByteArrayOp_Word8     args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8  res args
+emitPrimOp res IndexByteArrayOp_Word16    args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16  res args
+emitPrimOp res IndexByteArrayOp_Word32    args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32  res args
+emitPrimOp res IndexByteArrayOp_Word64    args live = doIndexByteArrayOp Nothing I64  res args
+
+-- ReadXXXArray, identical to IndexXXXArray.
+
+emitPrimOp res ReadByteArrayOp_Char       args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args
+emitPrimOp res ReadByteArrayOp_WideChar   args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res ReadByteArrayOp_Int        args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res ReadByteArrayOp_Word       args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res ReadByteArrayOp_Addr       args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res ReadByteArrayOp_Float      args live = doIndexByteArrayOp Nothing F32 res args
+emitPrimOp res ReadByteArrayOp_Double     args live = doIndexByteArrayOp Nothing F64 res args
+emitPrimOp res ReadByteArrayOp_StablePtr  args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res ReadByteArrayOp_Int8       args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8  res args
+emitPrimOp res ReadByteArrayOp_Int16      args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16  res args
+emitPrimOp res ReadByteArrayOp_Int32      args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32  res args
+emitPrimOp res ReadByteArrayOp_Int64      args live = doIndexByteArrayOp Nothing I64  res args
+emitPrimOp res ReadByteArrayOp_Word8      args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8  res args
+emitPrimOp res ReadByteArrayOp_Word16     args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16  res args
+emitPrimOp res ReadByteArrayOp_Word32     args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32  res args
+emitPrimOp res ReadByteArrayOp_Word64     args live = doIndexByteArrayOp Nothing I64  res args
+
+-- WriteXXXoffAddr
+
+emitPrimOp res WriteOffAddrOp_Char       args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args
+emitPrimOp res WriteOffAddrOp_WideChar   args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args
+emitPrimOp res WriteOffAddrOp_Int        args live = doWriteOffAddrOp Nothing wordRep res args
+emitPrimOp res WriteOffAddrOp_Word       args live = doWriteOffAddrOp Nothing wordRep res args
+emitPrimOp res WriteOffAddrOp_Addr       args live = doWriteOffAddrOp Nothing wordRep res args
+emitPrimOp res WriteOffAddrOp_Float      args live = doWriteOffAddrOp Nothing F32 res args
+emitPrimOp res WriteOffAddrOp_Double     args live = doWriteOffAddrOp Nothing F64 res args
+emitPrimOp res WriteOffAddrOp_StablePtr  args live = doWriteOffAddrOp Nothing wordRep res args
+emitPrimOp res WriteOffAddrOp_Int8       args live = doWriteOffAddrOp (Just mo_WordTo8) I8  res args
+emitPrimOp res WriteOffAddrOp_Int16      args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args
+emitPrimOp res WriteOffAddrOp_Int32      args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args
+emitPrimOp res WriteOffAddrOp_Int64      args live = doWriteOffAddrOp Nothing I64 res args
+emitPrimOp res WriteOffAddrOp_Word8      args live = doWriteOffAddrOp (Just mo_WordTo8) I8  res args
+emitPrimOp res WriteOffAddrOp_Word16     args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args
+emitPrimOp res WriteOffAddrOp_Word32     args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args
+emitPrimOp res WriteOffAddrOp_Word64     args live = doWriteOffAddrOp Nothing I64 res args
+
+-- WriteXXXArray
+
+emitPrimOp res WriteByteArrayOp_Char      args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args
+emitPrimOp res WriteByteArrayOp_WideChar  args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args
+emitPrimOp res WriteByteArrayOp_Int       args live = doWriteByteArrayOp Nothing wordRep res args
+emitPrimOp res WriteByteArrayOp_Word      args live = doWriteByteArrayOp Nothing wordRep res args
+emitPrimOp res WriteByteArrayOp_Addr      args live = doWriteByteArrayOp Nothing wordRep res args
+emitPrimOp res WriteByteArrayOp_Float     args live = doWriteByteArrayOp Nothing F32 res args
+emitPrimOp res WriteByteArrayOp_Double    args live = doWriteByteArrayOp Nothing F64 res args
+emitPrimOp res WriteByteArrayOp_StablePtr args live = doWriteByteArrayOp Nothing wordRep res args
+emitPrimOp res WriteByteArrayOp_Int8      args live = doWriteByteArrayOp (Just mo_WordTo8) I8  res args
+emitPrimOp res WriteByteArrayOp_Int16     args live = doWriteByteArrayOp (Just mo_WordTo16) I16  res args
+emitPrimOp res WriteByteArrayOp_Int32     args live = doWriteByteArrayOp (Just mo_WordTo32) I32  res args
+emitPrimOp res WriteByteArrayOp_Int64     args live = doWriteByteArrayOp Nothing I64  res args
+emitPrimOp res WriteByteArrayOp_Word8     args live = doWriteByteArrayOp (Just mo_WordTo8) I8  res args
+emitPrimOp res WriteByteArrayOp_Word16    args live = doWriteByteArrayOp (Just mo_WordTo16) I16  res args
+emitPrimOp res WriteByteArrayOp_Word32    args live = doWriteByteArrayOp (Just mo_WordTo32) I32  res args
+emitPrimOp res WriteByteArrayOp_Word64    args live = doWriteByteArrayOp Nothing I64  res args
+
+
+-- The rest just translate straightforwardly
+emitPrimOp [res] op [arg] live
+   | nopOp op
+   = stmtC (CmmAssign res arg)
+
+   | Just (mop,rep) <- narrowOp op
+   = stmtC (CmmAssign res (CmmMachOp (mop rep wordRep) [
+			  CmmMachOp (mop wordRep rep) [arg]]))
+
+emitPrimOp [res] op args live
+   | Just prim <- callishOp op
+   = do vols <- getVolatileRegs live
+	emitForeignCall' PlayRisky
+	   [(res,NoHint)] 
+	   (CmmPrim prim) 
+	   [(a,NoHint) | a<-args]  -- ToDo: hints?
+	   (Just vols)
+
+   | Just mop <- translateOp op
+   = let stmt = CmmAssign res (CmmMachOp mop args) in
+     stmtC stmt
+
+emitPrimOp _ op _ _
+ = pprPanic "emitPrimOp: can't translate PrimOp" (ppr op)
+
+
+-- These PrimOps are NOPs in Cmm
+
+nopOp Int2WordOp     = True
+nopOp Word2IntOp     = True
+nopOp Int2AddrOp     = True
+nopOp Addr2IntOp     = True
+nopOp ChrOp	     = True  -- Int# and Char# are rep'd the same
+nopOp OrdOp	     = True
+nopOp _		     = False
+
+-- These PrimOps turn into double casts
+
+narrowOp Narrow8IntOp   = Just (MO_S_Conv, I8)
+narrowOp Narrow16IntOp  = Just (MO_S_Conv, I16)
+narrowOp Narrow32IntOp  = Just (MO_S_Conv, I32)
+narrowOp Narrow8WordOp  = Just (MO_U_Conv, I8)
+narrowOp Narrow16WordOp = Just (MO_U_Conv, I16)
+narrowOp Narrow32WordOp = Just (MO_U_Conv, I32)
+narrowOp _ 		= Nothing
+
+-- Native word signless ops
+
+translateOp IntAddOp       = Just mo_wordAdd
+translateOp IntSubOp       = Just mo_wordSub
+translateOp WordAddOp      = Just mo_wordAdd
+translateOp WordSubOp      = Just mo_wordSub
+translateOp AddrAddOp      = Just mo_wordAdd
+translateOp AddrSubOp      = Just mo_wordSub
+
+translateOp IntEqOp        = Just mo_wordEq
+translateOp IntNeOp        = Just mo_wordNe
+translateOp WordEqOp       = Just mo_wordEq
+translateOp WordNeOp       = Just mo_wordNe
+translateOp AddrEqOp       = Just mo_wordEq
+translateOp AddrNeOp       = Just mo_wordNe
+
+translateOp AndOp          = Just mo_wordAnd
+translateOp OrOp           = Just mo_wordOr
+translateOp XorOp          = Just mo_wordXor
+translateOp NotOp          = Just mo_wordNot
+translateOp SllOp	   = Just mo_wordShl
+translateOp SrlOp	   = Just mo_wordUShr
+
+translateOp AddrRemOp	   = Just mo_wordURem
+
+-- Native word signed ops
+
+translateOp IntMulOp        = Just mo_wordMul
+translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordRep)
+translateOp IntQuotOp       = Just mo_wordSQuot
+translateOp IntRemOp        = Just mo_wordSRem
+translateOp IntNegOp        = Just mo_wordSNeg
+
+
+translateOp IntGeOp        = Just mo_wordSGe
+translateOp IntLeOp        = Just mo_wordSLe
+translateOp IntGtOp        = Just mo_wordSGt
+translateOp IntLtOp        = Just mo_wordSLt
+
+translateOp ISllOp	   = Just mo_wordShl
+translateOp ISraOp	   = Just mo_wordSShr
+translateOp ISrlOp	   = Just mo_wordUShr
+
+-- Native word unsigned ops
+
+translateOp WordGeOp       = Just mo_wordUGe
+translateOp WordLeOp       = Just mo_wordULe
+translateOp WordGtOp       = Just mo_wordUGt
+translateOp WordLtOp       = Just mo_wordULt
+
+translateOp WordMulOp      = Just mo_wordMul
+translateOp WordQuotOp     = Just mo_wordUQuot
+translateOp WordRemOp      = Just mo_wordURem
+
+translateOp AddrGeOp       = Just mo_wordUGe
+translateOp AddrLeOp       = Just mo_wordULe
+translateOp AddrGtOp       = Just mo_wordUGt
+translateOp AddrLtOp       = Just mo_wordULt
+
+-- Char# ops
+
+translateOp CharEqOp       = Just (MO_Eq wordRep)
+translateOp CharNeOp       = Just (MO_Ne wordRep)
+translateOp CharGeOp       = Just (MO_U_Ge wordRep)
+translateOp CharLeOp       = Just (MO_U_Le wordRep)
+translateOp CharGtOp       = Just (MO_U_Gt wordRep)
+translateOp CharLtOp       = Just (MO_U_Lt wordRep)
+
+-- Double ops
+
+translateOp DoubleEqOp     = Just (MO_Eq F64)
+translateOp DoubleNeOp     = Just (MO_Ne F64)
+translateOp DoubleGeOp     = Just (MO_S_Ge F64)
+translateOp DoubleLeOp     = Just (MO_S_Le F64)
+translateOp DoubleGtOp     = Just (MO_S_Gt F64)
+translateOp DoubleLtOp     = Just (MO_S_Lt F64)
+
+translateOp DoubleAddOp    = Just (MO_Add F64)
+translateOp DoubleSubOp    = Just (MO_Sub F64)
+translateOp DoubleMulOp    = Just (MO_Mul F64)
+translateOp DoubleDivOp    = Just (MO_S_Quot F64)
+translateOp DoubleNegOp    = Just (MO_S_Neg F64)
+
+-- Float ops
+
+translateOp FloatEqOp     = Just (MO_Eq F32)
+translateOp FloatNeOp     = Just (MO_Ne F32)
+translateOp FloatGeOp     = Just (MO_S_Ge F32)
+translateOp FloatLeOp     = Just (MO_S_Le F32)
+translateOp FloatGtOp     = Just (MO_S_Gt F32)
+translateOp FloatLtOp     = Just (MO_S_Lt F32)
+
+translateOp FloatAddOp    = Just (MO_Add F32)
+translateOp FloatSubOp    = Just (MO_Sub F32)
+translateOp FloatMulOp    = Just (MO_Mul F32)
+translateOp FloatDivOp    = Just (MO_S_Quot F32)
+translateOp FloatNegOp    = Just (MO_S_Neg F32)
+
+-- Conversions
+
+translateOp Int2DoubleOp   = Just (MO_S_Conv wordRep F64)
+translateOp Double2IntOp   = Just (MO_S_Conv F64 wordRep)
+
+translateOp Int2FloatOp    = Just (MO_S_Conv wordRep F32)
+translateOp Float2IntOp    = Just (MO_S_Conv F32 wordRep)
+
+translateOp Float2DoubleOp = Just (MO_S_Conv F32 F64)
+translateOp Double2FloatOp = Just (MO_S_Conv F64 F32)
+
+-- Word comparisons masquerading as more exotic things.
+
+translateOp SameMutVarOp           = Just mo_wordEq
+translateOp SameMVarOp             = Just mo_wordEq
+translateOp SameMutableArrayOp     = Just mo_wordEq
+translateOp SameMutableByteArrayOp = Just mo_wordEq
+translateOp SameTVarOp             = Just mo_wordEq
+translateOp EqStablePtrOp          = Just mo_wordEq
+
+translateOp _ = Nothing
+
+-- These primops are implemented by CallishMachOps, because they sometimes
+-- turn into foreign calls depending on the backend.
+
+callishOp DoublePowerOp  = Just MO_F64_Pwr
+callishOp DoubleSinOp    = Just MO_F64_Sin
+callishOp DoubleCosOp    = Just MO_F64_Cos
+callishOp DoubleTanOp    = Just MO_F64_Tan
+callishOp DoubleSinhOp   = Just MO_F64_Sinh
+callishOp DoubleCoshOp   = Just MO_F64_Cosh
+callishOp DoubleTanhOp   = Just MO_F64_Tanh
+callishOp DoubleAsinOp   = Just MO_F64_Asin
+callishOp DoubleAcosOp   = Just MO_F64_Acos
+callishOp DoubleAtanOp   = Just MO_F64_Atan
+callishOp DoubleLogOp    = Just MO_F64_Log
+callishOp DoubleExpOp    = Just MO_F64_Exp
+callishOp DoubleSqrtOp   = Just MO_F64_Sqrt
+
+callishOp FloatPowerOp  = Just MO_F32_Pwr
+callishOp FloatSinOp    = Just MO_F32_Sin
+callishOp FloatCosOp    = Just MO_F32_Cos
+callishOp FloatTanOp    = Just MO_F32_Tan
+callishOp FloatSinhOp   = Just MO_F32_Sinh
+callishOp FloatCoshOp   = Just MO_F32_Cosh
+callishOp FloatTanhOp   = Just MO_F32_Tanh
+callishOp FloatAsinOp   = Just MO_F32_Asin
+callishOp FloatAcosOp   = Just MO_F32_Acos
+callishOp FloatAtanOp   = Just MO_F32_Atan
+callishOp FloatLogOp    = Just MO_F32_Log
+callishOp FloatExpOp    = Just MO_F32_Exp
+callishOp FloatSqrtOp   = Just MO_F32_Sqrt
+
+callishOp _ = Nothing
+
+------------------------------------------------------------------------------
+-- Helpers for translating various minor variants of array indexing.
+
+doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
+   = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx
+doIndexOffAddrOp _ _ _ _
+   = panic "CgPrimOp: doIndexOffAddrOp"
+
+doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
+   = mkBasicIndexedRead arrWordsHdrSize maybe_post_read_cast rep res addr idx
+doIndexByteArrayOp _ _ _ _ 
+   = panic "CgPrimOp: doIndexByteArrayOp"
+
+doReadPtrArrayOp res addr idx
+   = mkBasicIndexedRead arrPtrsHdrSize Nothing wordRep res addr idx
+
+
+doWriteOffAddrOp maybe_pre_write_cast rep [] [addr,idx,val]
+   = mkBasicIndexedWrite 0 maybe_pre_write_cast rep addr idx val
+doWriteOffAddrOp _ _ _ _
+   = panic "CgPrimOp: doWriteOffAddrOp"
+
+doWriteByteArrayOp maybe_pre_write_cast rep [] [addr,idx,val]
+   = mkBasicIndexedWrite arrWordsHdrSize maybe_pre_write_cast rep addr idx val
+doWriteByteArrayOp _ _ _ _ 
+   = panic "CgPrimOp: doWriteByteArrayOp"
+
+doWritePtrArrayOp addr idx val
+   = do stmtC (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
+        mkBasicIndexedWrite arrPtrsHdrSize Nothing wordRep addr idx val
+
+
+mkBasicIndexedRead off Nothing read_rep res base idx
+   = stmtC (CmmAssign res (cmmLoadIndexOffExpr off read_rep base idx))
+mkBasicIndexedRead off (Just cast) read_rep res base idx
+   = stmtC (CmmAssign res (CmmMachOp cast [
+				cmmLoadIndexOffExpr off read_rep base idx]))
+
+mkBasicIndexedWrite off Nothing write_rep base idx val
+   = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) val)
+mkBasicIndexedWrite off (Just cast) write_rep base idx val
+   = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) (CmmMachOp cast [val]))
+
+-- ----------------------------------------------------------------------------
+-- Misc utils
+
+cmmIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr
+cmmIndexOffExpr off rep base idx
+   = cmmIndexExpr rep (cmmOffsetB base off) idx
+
+cmmLoadIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr
+cmmLoadIndexOffExpr off rep base idx
+   = CmmLoad (cmmIndexOffExpr off rep base idx) rep
+
+setInfo :: CmmExpr -> CmmExpr -> CmmStmt
+setInfo closure_ptr info_ptr = CmmStore closure_ptr info_ptr
+
diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs
new file mode 100644
index 0000000000..1488e34956
--- /dev/null
+++ b/compiler/codeGen/CgProf.hs
@@ -0,0 +1,478 @@
+-----------------------------------------------------------------------------
+--
+-- Code generation for profiling
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+module CgProf (
+	mkCCostCentre, mkCCostCentreStack,
+
+	-- Cost-centre Profiling
+	dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
+	enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, 
+	chooseDynCostCentres, 
+	costCentreFrom, 
+	curCCS, curCCSAddr,
+	emitCostCentreDecl, emitCostCentreStackDecl, 
+	emitRegisterCC, emitRegisterCCS,
+	emitSetCCC, emitCCS,
+
+	-- Lag/drag/void stuff
+	ldvEnter, ldvRecordCreate
+  ) where
+
+#include "HsVersions.h"
+#include "MachDeps.h"
+ -- For WORD_SIZE_IN_BITS only.
+#include "../includes/Constants.h"
+	-- For LDV_CREATE_MASK, LDV_STATE_USE
+	-- which are StgWords
+#include "../includes/DerivedConstants.h"
+	-- For REP_xxx constants, which are MachReps
+
+import ClosureInfo	( ClosureInfo, closureSize,
+			  closureName, isToplevClosure, closureReEntrant, )
+import CgUtils
+import CgMonad
+import SMRep		( StgWord, profHdrSize )
+
+import Cmm
+import MachOp
+import CmmUtils		( zeroCLit, mkIntCLit, mkLblExpr )
+import CLabel		( mkCCLabel, mkCCSLabel, mkRtsDataLabel )
+
+import Module		( moduleString )
+import Id		( Id )
+import CostCentre
+import StgSyn		( GenStgExpr(..), StgExpr )
+import StaticFlags	( opt_SccProfilingOn )
+import FastString	( FastString, mkFastString, LitString )	
+import Constants	-- Lots of field offsets
+import Outputable
+
+import Maybe
+import Char		( ord )
+import Monad		( when )
+
+-----------------------------------------------------------------------------
+--
+-- Cost-centre-stack Profiling
+--
+-----------------------------------------------------------------------------
+
+-- Expression representing the current cost centre stack
+curCCS :: CmmExpr
+curCCS = CmmLoad curCCSAddr wordRep
+
+-- Address of current CCS variable, for storing into
+curCCSAddr :: CmmExpr
+curCCSAddr = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCCS")))
+
+mkCCostCentre :: CostCentre -> CmmLit
+mkCCostCentre cc = CmmLabel (mkCCLabel cc)
+
+mkCCostCentreStack :: CostCentreStack -> CmmLit
+mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs)
+
+costCentreFrom :: CmmExpr 	-- A closure pointer
+	       -> CmmExpr	-- The cost centre from that closure
+costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) wordRep
+
+staticProfHdr :: CostCentreStack -> [CmmLit]
+-- The profiling header words in a static closure
+-- Was SET_STATIC_PROF_HDR
+staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs, 
+			  	  staticLdvInit]
+
+dynProfHdr :: CmmExpr -> [CmmExpr]
+-- Profiling header words in a dynamic closure
+dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit]
+
+initUpdFrameProf :: CmmExpr -> Code
+-- Initialise the profiling field of an update frame
+initUpdFrameProf frame_amode 
+  = ifProfiling $	-- frame->header.prof.ccs = CCCS
+    stmtC (CmmStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS)
+	-- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) 
+	-- is unnecessary because it is not used anyhow.
+
+-- -----------------------------------------------------------------------------
+-- Recording allocation in a cost centre
+
+-- | Record the allocation of a closure.  The CmmExpr is the cost
+-- centre stack to which to attribute the allocation.
+profDynAlloc :: ClosureInfo -> CmmExpr -> Code
+profDynAlloc cl_info ccs
+  = ifProfiling $
+    profAlloc (CmmLit (mkIntCLit (closureSize cl_info))) ccs
+
+-- | Record the allocation of a closure (size is given by a CmmExpr)
+-- The size must be in words, because the allocation counter in a CCS counts
+-- in words.
+profAlloc :: CmmExpr -> CmmExpr -> Code
+profAlloc words ccs
+  = ifProfiling $
+    stmtC (addToMemE alloc_rep
+		(cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc)
+	  	(CmmMachOp (MO_U_Conv wordRep alloc_rep) $
+		  [CmmMachOp mo_wordSub [words, 
+					 CmmLit (mkIntCLit profHdrSize)]]))
+		-- subtract the "profiling overhead", which is the
+		-- profiling header in a closure.
+ where 
+	alloc_rep =  REP_CostCentreStack_mem_alloc
+
+-- ----------------------------------------------------------------------
+-- Setting the cost centre in a new closure
+
+chooseDynCostCentres :: CostCentreStack
+		     -> [Id] 	 	-- Args
+		     -> StgExpr		-- Body
+		     -> FCode (CmmExpr, CmmExpr)
+-- Called when alllcating a closure
+-- Tells which cost centre to put in the object, and which
+-- to blame the cost of allocation on
+chooseDynCostCentres ccs args body = do
+  -- Cost-centre we record in the object
+  use_ccs <- emitCCS ccs
+
+  -- Cost-centre on whom we blame the allocation
+  let blame_ccs
+	| null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS)
+	| otherwise		  = use_ccs
+
+  return (use_ccs, blame_ccs)
+
+
+-- Some CostCentreStacks are a sequence of pushes on top of CCCS.
+-- These pushes must be performed before we can refer to the stack in
+-- an expression.
+emitCCS :: CostCentreStack -> FCode CmmExpr
+emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's)
+  where
+	(cc's, ccs') = decomposeCCS ccs
+
+	push_em ccs [] = return ccs
+	push_em ccs (cc:rest) = do
+  	  tmp <- newTemp wordRep
+	  pushCostCentre tmp ccs cc
+	  push_em (CmmReg tmp) rest
+
+ccsExpr :: CostCentreStack -> CmmExpr
+ccsExpr ccs
+  | isCurrentCCS ccs = curCCS
+  | otherwise        = CmmLit (mkCCostCentreStack ccs)
+
+
+isBox :: StgExpr -> Bool
+-- If it's an utterly trivial RHS, then it must be
+-- one introduced by boxHigherOrderArgs for profiling,
+-- so we charge it to "OVERHEAD".
+-- This looks like a GROSS HACK to me --SDM
+isBox (StgApp fun []) = True
+isBox other	      = False
+
+
+-- -----------------------------------------------------------------------
+-- Setting the current cost centre on entry to a closure
+
+-- For lexically scoped profiling we have to load the cost centre from
+-- the closure entered, if the costs are not supposed to be inherited.
+-- This is done immediately on entering the fast entry point.
+
+-- Load current cost centre from closure, if not inherited.
+-- Node is guaranteed to point to it, if profiling and not inherited.
+
+enterCostCentre
+   :: ClosureInfo 
+   -> CostCentreStack
+   -> StgExpr	-- The RHS of the closure
+   -> Code
+
+-- We used to have a special case for bindings of form
+--	f = g True
+-- where g has arity 2.  The RHS is a thunk, but we don't
+-- need to update it; and we want to subsume costs.
+-- We don't have these sort of PAPs any more, so the special
+-- case has gone away.
+
+enterCostCentre closure_info ccs body
+  = ifProfiling $
+    ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs)
+    enter_cost_centre closure_info ccs body
+
+enter_cost_centre closure_info ccs body
+  | isSubsumedCCS ccs
+  = ASSERT(isToplevClosure closure_info)
+    ASSERT(re_entrant)
+    enter_ccs_fsub
+	
+  | isDerivedFromCurrentCCS ccs
+  = do {
+	if re_entrant && not is_box
+    	  then
+		enter_ccs_fun node_ccs
+    	  else
+	  	stmtC (CmmStore curCCSAddr node_ccs)
+
+	-- don't forget to bump the scc count.  This closure might have been
+	-- of the form   let x = _scc_ "x" e in ...x..., which the SCCfinal
+	-- pass has turned into simply  let x = e in ...x... and attached
+	-- the _scc_ as PushCostCentre(x,CCCS) on the x closure.  So that
+	-- we don't lose the scc counter, bump it in the entry code for x.
+	-- ToDo: for a multi-push we should really bump the counter for
+	-- each of the intervening CCSs, not just the top one.
+       ; when (not (isCurrentCCS ccs)) $
+		stmtC (bumpSccCount curCCS)
+       }
+
+  | isCafCCS ccs
+  = ASSERT(isToplevClosure closure_info)
+    ASSERT(not re_entrant)
+    do	{ 	-- This is just a special case of the isDerivedFromCurrentCCS
+		-- case above.  We could delete this, but it's a micro
+		-- optimisation and saves a bit of code.
+	  stmtC (CmmStore curCCSAddr enc_ccs)
+	; stmtC (bumpSccCount node_ccs)
+	}
+
+  | otherwise
+  = panic "enterCostCentre"
+  where
+    enc_ccs    = CmmLit (mkCCostCentreStack ccs)
+    re_entrant = closureReEntrant closure_info
+    node_ccs   = costCentreFrom (CmmReg nodeReg)
+    is_box     = isBox body
+
+-- set the current CCS when entering a PAP
+enterCostCentrePAP :: CmmExpr -> Code
+enterCostCentrePAP closure = 
+  ifProfiling $ do 
+    enter_ccs_fun (costCentreFrom closure)
+    enteringPAP 1
+  
+enterCostCentreThunk :: CmmExpr -> Code
+enterCostCentreThunk closure = 
+  ifProfiling $ do 
+    stmtC $ CmmStore curCCSAddr (costCentreFrom closure)
+
+enter_ccs_fun stack = emitRtsCall SLIT("EnterFunCCS") [(stack,PtrHint)]
+			-- ToDo: vols
+
+enter_ccs_fsub = enteringPAP 0
+
+-- When entering a PAP, EnterFunCCS is called by both the PAP entry
+-- code and the function entry code; we don't want the function's
+-- entry code to also update CCCS in the event that it was called via
+-- a PAP, so we set the flag entering_PAP to indicate that we are
+-- entering via a PAP.
+enteringPAP :: Integer -> Code
+enteringPAP n
+  = stmtC (CmmStore (CmmLit (CmmLabel (mkRtsDataLabel SLIT("entering_PAP"))))
+		(CmmLit (CmmInt n cIntRep)))
+
+ifProfiling :: Code -> Code
+ifProfiling code
+  | opt_SccProfilingOn = code
+  | otherwise	       = nopC
+
+ifProfilingL :: [a] -> [a]
+ifProfilingL xs
+  | opt_SccProfilingOn = xs
+  | otherwise	       = []
+
+
+-- ---------------------------------------------------------------------------
+-- Initialising Cost Centres & CCSs
+
+emitCostCentreDecl
+   :: CostCentre
+   -> Code
+emitCostCentreDecl cc = do 
+  { label <- mkStringCLit (costCentreUserName cc)
+  ; modl  <- mkStringCLit (moduleString (cc_mod cc))
+  ; let
+     lits = [ zero,   	-- StgInt ccID,
+	      label,	-- char *label,
+	      modl,	-- char *module,
+              zero,	-- StgWord time_ticks
+              zero64,	-- StgWord64 mem_alloc
+	      subsumed, -- StgInt is_caf
+	      zero	-- struct _CostCentre *link
+	    ] 
+  ; emitDataLits (mkCCLabel cc) lits
+  }
+  where
+	subsumed | isCafCC cc = mkIntCLit (ord 'c')  -- 'c' == is a CAF
+		 | otherwise  = mkIntCLit (ord 'B')  -- 'B' == is boring
+	    
+
+emitCostCentreStackDecl
+   :: CostCentreStack
+   -> Code
+emitCostCentreStackDecl ccs 
+  | Just cc <- maybeSingletonCCS ccs = do
+  { let
+	-- Note: to avoid making any assumptions about how the
+	-- C compiler (that compiles the RTS, in particular) does
+	-- layouts of structs containing long-longs, simply
+	-- pad out the struct with zero words until we hit the
+	-- size of the overall struct (which we get via DerivedConstants.h)
+	--
+     lits = zero : mkCCostCentre cc : replicate (sizeof_ccs_words - 2) zero
+  ; emitDataLits (mkCCSLabel ccs) lits
+  }
+  | otherwise = pprPanic "emitCostCentreStackDecl" (ppr ccs)
+
+zero = mkIntCLit 0
+zero64 = CmmInt 0 I64
+
+sizeof_ccs_words :: Int
+sizeof_ccs_words 
+    -- round up to the next word.
+  | ms == 0   = ws
+  | otherwise = ws + 1
+  where
+   (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE
+
+-- ---------------------------------------------------------------------------
+-- Registering CCs and CCSs
+
+--   (cc)->link = CC_LIST;
+--   CC_LIST = (cc);
+--   (cc)->ccID = CC_ID++;
+
+emitRegisterCC :: CostCentre -> Code
+emitRegisterCC cc = do
+  { tmp <- newTemp cIntRep
+  ; stmtsC [
+     CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_link)
+		 (CmmLoad cC_LIST wordRep),
+     CmmStore cC_LIST cc_lit,
+     CmmAssign tmp (CmmLoad cC_ID cIntRep),
+     CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg tmp),
+     CmmStore cC_ID (cmmRegOffB tmp 1)
+   ]
+  }
+  where
+    cc_lit = CmmLit (CmmLabel (mkCCLabel cc))
+
+--  (ccs)->prevStack = CCS_LIST;
+--  CCS_LIST = (ccs);
+--  (ccs)->ccsID = CCS_ID++;
+
+emitRegisterCCS :: CostCentreStack -> Code
+emitRegisterCCS ccs = do
+  { tmp <- newTemp cIntRep
+  ; stmtsC [
+     CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack) 
+			(CmmLoad cCS_LIST wordRep),
+     CmmStore cCS_LIST ccs_lit,
+     CmmAssign tmp (CmmLoad cCS_ID cIntRep),
+     CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg tmp),
+     CmmStore cCS_ID (cmmRegOffB tmp 1)
+   ]
+  }
+  where
+    ccs_lit = CmmLit (CmmLabel (mkCCSLabel ccs))
+
+
+cC_LIST = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CC_LIST")))
+cC_ID   = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CC_ID")))
+
+cCS_LIST = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCS_LIST")))
+cCS_ID   = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCS_ID")))
+
+-- ---------------------------------------------------------------------------
+-- Set the current cost centre stack
+
+emitSetCCC :: CostCentre -> Code
+emitSetCCC cc
+  | not opt_SccProfilingOn = nopC
+  | otherwise = do 
+    tmp <- newTemp wordRep
+    ASSERT( sccAbleCostCentre cc )
+      pushCostCentre tmp curCCS cc
+    stmtC (CmmStore curCCSAddr (CmmReg tmp))
+    when (isSccCountCostCentre cc) $ 
+	stmtC (bumpSccCount curCCS)
+
+pushCostCentre :: CmmReg -> CmmExpr -> CostCentre -> Code
+pushCostCentre result ccs cc
+  = emitRtsCallWithResult result PtrHint
+	SLIT("PushCostCentre") [(ccs,PtrHint), 
+				(CmmLit (mkCCostCentre cc), PtrHint)]
+
+bumpSccCount :: CmmExpr -> CmmStmt
+bumpSccCount ccs
+  = addToMem REP_CostCentreStack_scc_count
+	 (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1
+
+-----------------------------------------------------------------------------
+--
+--		Lag/drag/void stuff
+--
+-----------------------------------------------------------------------------
+
+--
+-- Initial value for the LDV field in a static closure
+--
+staticLdvInit :: CmmLit
+staticLdvInit = zeroCLit
+
+--
+-- Initial value of the LDV field in a dynamic closure
+--
+dynLdvInit :: CmmExpr
+dynLdvInit =     -- (era << LDV_SHIFT) | LDV_STATE_CREATE  
+  CmmMachOp mo_wordOr [
+      CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ],
+      CmmLit (mkWordCLit lDV_STATE_CREATE)
+  ]
+        
+--
+-- Initialise the LDV word of a new closure
+--
+ldvRecordCreate :: CmmExpr -> Code
+ldvRecordCreate closure = stmtC $ CmmStore (ldvWord closure) dynLdvInit
+
+--
+-- Called when a closure is entered, marks the closure as having been "used".
+-- The closure is not an 'inherently used' one.
+-- The closure is not IND or IND_OLDGEN because neither is considered for LDV
+-- profiling.
+--
+ldvEnter :: CmmExpr -> Code
+-- Argument is a closure pointer
+ldvEnter cl_ptr 
+  =  ifProfiling $
+     -- if (era > 0) {
+     --    LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
+     --                era | LDV_STATE_USE }
+    emitIf (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])
+	   (stmtC (CmmStore ldv_wd new_ldv_wd))
+  where
+    ldv_wd = ldvWord cl_ptr
+    new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd wordRep)
+				       (CmmLit (mkWordCLit lDV_CREATE_MASK)))
+		 (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE)))
+
+loadEra :: CmmExpr 
+loadEra = CmmMachOp (MO_U_Conv cIntRep wordRep)
+	  [CmmLoad (mkLblExpr (mkRtsDataLabel SLIT("era"))) cIntRep]
+
+ldvWord :: CmmExpr -> CmmExpr
+-- Takes the address of a closure, and returns 
+-- the address of the LDV word in the closure
+ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw
+
+-- LDV constants, from ghc/includes/Constants.h
+lDV_SHIFT	 = (LDV_SHIFT :: Int)
+--lDV_STATE_MASK   = (LDV_STATE_MASK :: StgWord)
+lDV_CREATE_MASK  = (LDV_CREATE_MASK :: StgWord)
+--lDV_LAST_MASK    = (LDV_LAST_MASK :: StgWord)
+lDV_STATE_CREATE = (LDV_STATE_CREATE :: StgWord)
+lDV_STATE_USE    = (LDV_STATE_USE :: StgWord)
+
diff --git a/compiler/codeGen/CgStackery.lhs b/compiler/codeGen/CgStackery.lhs
new file mode 100644
index 0000000000..7cb310d521
--- /dev/null
+++ b/compiler/codeGen/CgStackery.lhs
@@ -0,0 +1,339 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgStackery.lhs,v 1.27 2004/09/30 10:35:49 simonpj Exp $
+%
+\section[CgStackery]{Stack management functions}
+
+Stack-twiddling operations, which are pretty low-down and grimy.
+(This is the module that knows all about stack layouts, etc.)
+
+\begin{code}
+module CgStackery (
+	spRel, getVirtSp, getRealSp, setRealSp,
+	setRealAndVirtualSp, getSpRelOffset,
+
+	allocPrimStack, allocStackTop, deAllocStackTop,
+	adjustStackHW, getFinalStackHW, 
+	setStackFrame, getStackFrame,
+	mkVirtStkOffsets, mkStkAmodes,
+	freeStackSlots, 
+	pushUpdateFrame, emitPushUpdateFrame,
+    ) where
+
+#include "HsVersions.h"
+
+import CgMonad
+import CgUtils		( cmmOffsetB, cmmRegOffW )
+import CgProf		( initUpdFrameProf )
+import SMRep
+import Cmm
+import CmmUtils		( CmmStmts, mkLblExpr )
+import CLabel		( mkUpdInfoLabel )
+import Constants
+import Util		( sortLe )
+import FastString	( LitString )
+import OrdList		( toOL )
+import Outputable
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[CgUsages-stackery]{Monad things for fiddling with stack usage}
+%*									*
+%************************************************************************
+
+spRel is a little function that abstracts the stack direction.  Note that most
+of the code generator is dependent on the stack direction anyway, so
+changing this on its own spells certain doom.  ToDo: remove?
+
+	THIS IS DIRECTION SENSITIVE!
+
+Stack grows down, positive virtual offsets correspond to negative
+additions to the stack pointer.
+
+\begin{code}
+spRel :: VirtualSpOffset 	-- virtual offset of Sp
+      -> VirtualSpOffset 	-- virtual offset of The Thing
+      -> WordOff		-- integer offset
+spRel sp off = sp - off
+\end{code}
+
+@setRealAndVirtualSp@ sets into the environment the offsets of the
+current position of the real and virtual stack pointers in the current
+stack frame.  The high-water mark is set too.  It generates no code.
+It is used to initialise things at the beginning of a closure body.
+
+\begin{code}
+setRealAndVirtualSp :: VirtualSpOffset 	-- New real Sp
+		     -> Code
+
+setRealAndVirtualSp new_sp 
+  = do	{ stk_usg <- getStkUsage
+	; setStkUsage (stk_usg {virtSp = new_sp, 
+				realSp = new_sp, 
+				hwSp   = new_sp}) }
+
+getVirtSp :: FCode VirtualSpOffset
+getVirtSp
+  = do	{ stk_usg <- getStkUsage
+	; return (virtSp stk_usg) }
+
+getRealSp :: FCode VirtualSpOffset
+getRealSp
+  = do	{ stk_usg <- getStkUsage
+	; return (realSp stk_usg) }
+
+setRealSp :: VirtualSpOffset -> Code
+setRealSp new_real_sp
+  = do	{ stk_usg <- getStkUsage
+	; setStkUsage (stk_usg {realSp = new_real_sp}) }
+
+getSpRelOffset :: VirtualSpOffset -> FCode CmmExpr
+getSpRelOffset virtual_offset
+  = do	{ real_sp <- getRealSp
+	; return (cmmRegOffW spReg (spRel real_sp virtual_offset)) }
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[CgStackery-layout]{Laying out a stack frame}
+%*									*
+%************************************************************************
+
+'mkVirtStkOffsets' is given a list of arguments.  The first argument
+gets the /largest/ virtual stack offset (remember, virtual offsets
+increase towards the top of stack).
+
+\begin{code}
+mkVirtStkOffsets
+	  :: VirtualSpOffset 	-- Offset of the last allocated thing
+	  -> [(CgRep,a)]		-- things to make offsets for
+	  -> (VirtualSpOffset,		-- OUTPUTS: Topmost allocated word
+	      [(a, VirtualSpOffset)])	-- things with offsets (voids filtered out)
+
+mkVirtStkOffsets init_Sp_offset things
+    = loop init_Sp_offset [] (reverse things)
+  where
+    loop offset offs [] = (offset,offs)
+    loop offset offs ((VoidArg,t):things) = loop offset offs things
+	-- ignore Void arguments
+    loop offset offs ((rep,t):things)
+	= loop thing_slot ((t,thing_slot):offs) things
+	where
+	  thing_slot = offset + cgRepSizeW rep
+	    -- offset of thing is offset+size, because we're 
+	    -- growing the stack *downwards* as the offsets increase.
+
+-- | 'mkStkAmodes' is a higher-level version of
+-- 'mkVirtStkOffsets'.  It starts from the tail-call locations.
+-- It returns a single list of addressing modes for the stack
+-- locations, and therefore is in the monad.  It /doesn't/ adjust the
+-- high water mark.
+
+mkStkAmodes 
+	:: VirtualSpOffset	    -- Tail call positions
+	-> [(CgRep,CmmExpr)]	    -- things to make offsets for
+	-> FCode (VirtualSpOffset,  -- OUTPUTS: Topmost allocated word
+	          CmmStmts)	    -- Assignments to appropriate stk slots
+
+mkStkAmodes tail_Sp things
+  = do	{ rSp <- getRealSp
+	; let (last_Sp_offset, offsets) = mkVirtStkOffsets tail_Sp things
+	      abs_cs = [ CmmStore (cmmRegOffW spReg (spRel rSp offset)) amode
+		       | (amode, offset) <- offsets
+		       ]
+	; returnFC (last_Sp_offset, toOL abs_cs) }
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[CgStackery-monadery]{Inside-monad functions for stack manipulation}
+%*									*
+%************************************************************************
+
+Allocate a virtual offset for something.
+
+\begin{code}
+allocPrimStack :: CgRep -> FCode VirtualSpOffset
+allocPrimStack rep
+  = do	{ stk_usg <- getStkUsage
+	; let free_stk = freeStk stk_usg
+	; case find_block free_stk of
+	     Nothing -> do 
+		{ let push_virt_sp = virtSp stk_usg + size
+		; setStkUsage (stk_usg { virtSp = push_virt_sp,
+					 hwSp   = hwSp stk_usg `max` push_virt_sp })
+						-- Adjust high water mark
+		; return push_virt_sp }
+	     Just slot -> do
+		{ setStkUsage (stk_usg { freeStk = delete_block free_stk slot }) 
+		; return slot }
+	}
+  where
+    size :: WordOff
+    size = cgRepSizeW rep
+
+	-- Find_block looks for a contiguous chunk of free slots
+	-- returning the offset of its topmost word
+    find_block :: [VirtualSpOffset] -> Maybe VirtualSpOffset
+    find_block [] = Nothing
+    find_block (slot:slots)
+	| take size (slot:slots) == [slot..top_slot]
+	= Just top_slot
+	| otherwise
+	= find_block slots
+	where	-- The stack grows downwards, with increasing virtual offsets.
+		-- Therefore, the address of a multi-word object is the *highest*
+		-- virtual offset it occupies (top_slot below).
+	    top_slot = slot+size-1
+
+    delete_block free_stk slot = [ s | s <- free_stk, 
+				       (s<=slot-size) || (s>slot) ]
+		      -- Retain slots which are not in the range
+		      -- slot-size+1..slot
+\end{code}
+
+Allocate a chunk ON TOP OF the stack.  
+
+\begin{code}
+allocStackTop :: WordOff -> FCode VirtualSpOffset
+allocStackTop size
+  = do	{ stk_usg <- getStkUsage
+	; let push_virt_sp = virtSp stk_usg + size
+	; setStkUsage (stk_usg { virtSp = push_virt_sp,
+				 hwSp   = hwSp stk_usg `max` push_virt_sp })
+	; return push_virt_sp }
+\end{code}
+
+Pop some words from the current top of stack.  This is used for
+de-allocating the return address in a case alternative.
+
+\begin{code}
+deAllocStackTop :: WordOff -> FCode VirtualSpOffset
+deAllocStackTop size
+  = do	{ stk_usg <- getStkUsage
+	; let pop_virt_sp = virtSp stk_usg - size
+	; setStkUsage (stk_usg { virtSp = pop_virt_sp })
+	; return pop_virt_sp }
+\end{code}
+
+\begin{code}
+adjustStackHW :: VirtualSpOffset -> Code
+adjustStackHW offset
+  = do	{ stk_usg <- getStkUsage
+	; setStkUsage (stk_usg { hwSp = hwSp stk_usg `max` offset }) }
+\end{code}
+
+A knot-tying beast.
+
+\begin{code}
+getFinalStackHW :: (VirtualSpOffset -> Code) -> Code
+getFinalStackHW fcode
+  = do	{ fixC (\hw_sp -> do
+		{ fcode hw_sp
+		; stk_usg <- getStkUsage
+		; return (hwSp stk_usg) })
+	; return () }
+\end{code}
+
+\begin{code}
+setStackFrame :: VirtualSpOffset -> Code
+setStackFrame offset
+  = do	{ stk_usg <- getStkUsage
+	; setStkUsage (stk_usg { frameSp = offset }) }
+
+getStackFrame :: FCode VirtualSpOffset
+getStackFrame
+  = do	{ stk_usg <- getStkUsage
+	; return (frameSp stk_usg) }
+\end{code}
+
+
+%********************************************************
+%*							*
+%*		Setting up update frames		*
+%*							*
+%********************************************************
+
+@pushUpdateFrame@ $updatee$ pushes a general update frame which
+points to $updatee$ as the thing to be updated.  It is only used
+when a thunk has just been entered, so the (real) stack pointers
+are guaranteed to be nicely aligned with the top of stack.
+@pushUpdateFrame@ adjusts the virtual and tail stack pointers
+to reflect the frame pushed.
+
+\begin{code}
+pushUpdateFrame :: CmmExpr -> Code -> Code
+
+pushUpdateFrame updatee code
+  = do	{
+#ifdef DEBUG
+	  EndOfBlockInfo _ sequel <- getEndOfBlockInfo ;
+	  ASSERT(case sequel of { OnStack -> True; _ -> False})
+#endif
+
+	  allocStackTop (fixedHdrSize + 
+			   sIZEOF_StgUpdateFrame_NoHdr `quot` wORD_SIZE)
+	; vsp <- getVirtSp
+	; setStackFrame vsp
+	; frame_addr <- getSpRelOffset vsp
+		-- The location of the lowest-address
+		-- word of the update frame itself
+
+	; setEndOfBlockInfo (EndOfBlockInfo vsp UpdateCode) $
+	    do	{ emitPushUpdateFrame frame_addr updatee
+		; code }
+	}
+
+emitPushUpdateFrame :: CmmExpr -> CmmExpr -> Code
+emitPushUpdateFrame frame_addr updatee = do
+	stmtsC [  -- Set the info word
+		  CmmStore frame_addr (mkLblExpr mkUpdInfoLabel)
+		, -- And the updatee
+		  CmmStore (cmmOffsetB frame_addr off_updatee) updatee ]
+	initUpdFrameProf frame_addr
+
+off_updatee :: ByteOff
+off_updatee = fixedHdrSize*wORD_SIZE + oFFSET_StgUpdateFrame_updatee
+\end{code}			
+
+
+%************************************************************************
+%*									*
+\subsection[CgStackery-free]{Free stack slots}
+%*									*
+%************************************************************************
+
+Explicitly free some stack space.
+
+\begin{code}
+freeStackSlots :: [VirtualSpOffset] -> Code
+freeStackSlots extra_free
+  = do	{ stk_usg <- getStkUsage
+	; let all_free = addFreeSlots (freeStk stk_usg) (sortLe (<=) extra_free)
+	; let (new_vsp, new_free) = trim (virtSp stk_usg) all_free
+	; setStkUsage (stk_usg { virtSp = new_vsp, freeStk = new_free }) }
+
+addFreeSlots :: [VirtualSpOffset] -> [VirtualSpOffset] -> [VirtualSpOffset]
+-- Merge the two, assuming both are in increasing order
+addFreeSlots cs [] = cs
+addFreeSlots [] ns = ns
+addFreeSlots (c:cs) (n:ns)
+  | c < n     = c : addFreeSlots cs (n:ns)
+  | otherwise = n : addFreeSlots (c:cs) ns
+
+trim :: VirtualSpOffset -> [VirtualSpOffset] -> (VirtualSpOffset, [VirtualSpOffset])
+-- Try to trim back the virtual stack pointer, where there is a
+-- continuous bunch of free slots at the end of the free list
+trim vsp [] = (vsp, [])
+trim vsp (slot:slots)
+  = case trim vsp slots of
+      (vsp', []) 
+	| vsp' < slot  -> pprTrace "trim: strange" (ppr vsp <+> ppr (slot:slots))
+			  (vsp',   [])
+	| vsp' == slot -> (vsp'-1, [])
+	| otherwise    -> (vsp',   [slot])
+      (vsp', slots')   -> (vsp',   slot:slots')
+\end{code}
diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs
new file mode 100644
index 0000000000..dd7327b745
--- /dev/null
+++ b/compiler/codeGen/CgTailCall.lhs
@@ -0,0 +1,455 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgTailCall.lhs,v 1.43 2005/06/21 10:44:41 simonmar Exp $
+%
+%********************************************************
+%*							*
+\section[CgTailCall]{Tail calls: converting @StgApps@}
+%*							*
+%********************************************************
+
+\begin{code}
+module CgTailCall (
+	cgTailCall, performTailCall,
+	performReturn, performPrimReturn,
+	emitKnownConReturnCode, emitAlgReturnCode,
+	returnUnboxedTuple, ccallReturnUnboxedTuple,
+	pushUnboxedTuple,
+	tailCallPrimOp,
+
+	pushReturnAddress
+    ) where
+
+#include "HsVersions.h"
+
+import CgMonad
+import CgBindery	( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape,
+			  idInfoToAmode, cgIdInfoId, cgIdInfoLF,
+			  cgIdInfoArgRep )
+import CgInfoTbls	( entryCode, emitDirectReturnInstr, dataConTagZ,
+			  emitVectoredReturnInstr, closureInfoPtr )
+import CgCallConv
+import CgStackery	( setRealSp, mkStkAmodes, adjustStackHW,
+			  getSpRelOffset )
+import CgHeapery	( setRealHp, getHpRelOffset )
+import CgUtils		( emitSimultaneously )
+import CgTicky
+import ClosureInfo
+import SMRep		( CgRep, isVoidArg, separateByPtrFollowness )
+import Cmm	
+import CmmUtils
+import CLabel		( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel )
+import Type		( isUnLiftedType )
+import Id		( Id, idName, idUnique, idType )
+import DataCon		( DataCon, dataConTyCon )
+import StgSyn		( StgArg )
+import TyCon            ( TyCon )
+import PrimOp		( PrimOp )
+import Outputable
+
+import Monad		( when )
+
+-----------------------------------------------------------------------------
+-- Tail Calls
+
+cgTailCall :: Id -> [StgArg] -> Code
+
+-- Here's the code we generate for a tail call.  (NB there may be no
+-- arguments, in which case this boils down to just entering a variable.)
+-- 
+--    *	Put args in the top locations of the stack.
+--    *	Adjust the stack ptr
+--    *	Make R1 point to the function closure if necessary.
+--    *	Perform the call.
+--
+-- Things to be careful about:
+--
+--    *	Don't overwrite stack locations before you have finished with
+-- 	them (remember you need the function and the as-yet-unmoved
+-- 	arguments).
+--    *	Preferably, generate no code to replace x by x on the stack (a
+-- 	common situation in tail-recursion).
+--    *	Adjust the stack high water mark appropriately.
+-- 
+-- Treat unboxed locals exactly like literals (above) except use the addr
+-- mode for the local instead of (CLit lit) in the assignment.
+
+cgTailCall fun args
+  = do	{ fun_info <- getCgIdInfo fun
+
+	; if isUnLiftedType (idType fun)
+	  then 	-- Primitive return
+		ASSERT( null args )
+	    do	{ fun_amode <- idInfoToAmode fun_info
+		; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } 
+
+	  else -- Normal case, fun is boxed
+	    do  { arg_amodes <- getArgAmodes args
+		; performTailCall fun_info arg_amodes noStmts }
+	}
+		
+
+-- -----------------------------------------------------------------------------
+-- The guts of a tail-call
+
+performTailCall 
+	:: CgIdInfo		-- The function
+	-> [(CgRep,CmmExpr)]	-- Args
+	-> CmmStmts		-- Pending simultaneous assignments
+				--  *** GUARANTEED to contain only stack assignments.
+	-> Code
+
+performTailCall fun_info arg_amodes pending_assts
+  | Just join_sp <- maybeLetNoEscape fun_info
+  = 	   -- A let-no-escape is slightly different, because we
+	   -- arrange the stack arguments into pointers and non-pointers
+	   -- to make the heap check easier.  The tail-call sequence
+	   -- is very similar to returning an unboxed tuple, so we
+	   -- share some code.
+     do	{ (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
+	; emitSimultaneously (pending_assts `plusStmts` arg_assts)
+	; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
+	; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
+
+  | otherwise
+  = do 	{ fun_amode <- idInfoToAmode fun_info
+	; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
+	      opt_node_asst | nodeMustPointToIt lf_info = node_asst
+			    | otherwise		        = noStmts
+	; EndOfBlockInfo sp _ <- getEndOfBlockInfo
+	; hmods <- getHomeModules
+
+	; case (getCallMethod hmods fun_name lf_info (length arg_amodes)) of
+
+	    -- Node must always point to things we enter
+	    EnterIt -> do
+		{ emitSimultaneously (node_asst `plusStmts` pending_assts) 
+		; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
+		; doFinalJump sp False (stmtC (CmmJump target [])) }
+    
+	    -- A function, but we have zero arguments.  It is already in WHNF,
+	    -- so we can just return it.  
+	    -- As with any return, Node must point to it.
+	    ReturnIt -> do
+		{ emitSimultaneously (node_asst `plusStmts` pending_assts)
+		; doFinalJump sp False emitDirectReturnInstr }
+    
+	    -- A real constructor.  Don't bother entering it, 
+	    -- just do the right sort of return instead.
+	    -- As with any return, Node must point to it.
+	    ReturnCon con -> do
+		{ emitSimultaneously (node_asst `plusStmts` pending_assts)
+		; doFinalJump sp False (emitKnownConReturnCode con) }
+
+	    JumpToIt lbl -> do
+		{ emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
+		; doFinalJump sp False (jumpToLbl lbl) }
+    
+	    -- A slow function call via the RTS apply routines
+	    -- Node must definitely point to the thing
+	    SlowCall -> do 
+		{  when (not (null arg_amodes)) $ do
+		   { if (isKnownFun lf_info) 
+			then tickyKnownCallTooFewArgs
+			else tickyUnknownCall
+		   ; tickySlowCallPat (map fst arg_amodes) 
+		   }
+
+		; let (apply_lbl, args, extra_args) 
+			= constructSlowCall arg_amodes
+
+		; directCall sp apply_lbl args extra_args 
+			(node_asst `plusStmts` pending_assts)
+		}
+    
+	    -- A direct function call (possibly with some left-over arguments)
+	    DirectEntry lbl arity -> do
+		{ if arity == length arg_amodes
+			then tickyKnownCallExact
+			else do tickyKnownCallExtraArgs
+				tickySlowCallPat (map fst (drop arity arg_amodes))
+
+ 		; let
+		     -- The args beyond the arity go straight on the stack
+		     (arity_args, extra_args) = splitAt arity arg_amodes
+     
+		; directCall sp lbl arity_args extra_args
+			(opt_node_asst `plusStmts` pending_assts)
+	        }
+	}
+  where
+    fun_name  = idName (cgIdInfoId fun_info)
+    lf_info   = cgIdInfoLF fun_info
+
+
+
+directCall sp lbl args extra_args assts = do
+  let
+	-- First chunk of args go in registers
+	(reg_arg_amodes, stk_args) = assignCallRegs args
+     
+	-- Any "extra" arguments are placed in frames on the
+	-- stack after the other arguments.
+	slow_stk_args = slowArgs extra_args
+
+	reg_assts = assignToRegs reg_arg_amodes
+  --
+  (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
+
+  emitSimultaneously (reg_assts     `plusStmts`
+		      stk_assts     `plusStmts`
+		      assts)
+
+  doFinalJump final_sp False (jumpToLbl lbl)
+
+-- -----------------------------------------------------------------------------
+-- The final clean-up before we do a jump at the end of a basic block.
+-- This code is shared by tail-calls and returns.
+
+doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code 
+doFinalJump final_sp is_let_no_escape jump_code
+  = do	{ -- Adjust the high-water mark if necessary
+	  adjustStackHW final_sp
+
+	-- Push a return address if necessary (after the assignments
+	-- above, in case we clobber a live stack location)
+	--
+	-- DONT push the return address when we're about to jump to a
+	-- let-no-escape: the final tail call in the let-no-escape
+	-- will do this.
+	; eob <- getEndOfBlockInfo
+	; whenC (not is_let_no_escape) (pushReturnAddress eob)
+
+	    -- Final adjustment of Sp/Hp
+	; adjustSpAndHp final_sp
+
+	    -- and do the jump
+	; jump_code }
+
+-- -----------------------------------------------------------------------------
+-- A general return (just a special case of doFinalJump, above)
+
+performReturn :: Code		-- The code to execute to actually do the return
+	      -> Code
+
+performReturn finish_code
+  = do  { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
+	; doFinalJump args_sp False{-not a LNE-} finish_code }
+
+-- -----------------------------------------------------------------------------
+-- Primitive Returns
+-- Just load the return value into the right register, and return.
+
+performPrimReturn :: CgRep -> CmmExpr	-- The thing to return
+		  -> Code
+performPrimReturn rep amode
+  =  do { whenC (not (isVoidArg rep))
+		(stmtC (CmmAssign ret_reg amode))
+	; performReturn emitDirectReturnInstr }
+  where
+    ret_reg = dataReturnConvPrim rep
+
+-- -----------------------------------------------------------------------------
+-- Algebraic constructor returns
+
+-- Constructor is built on the heap; Node is set.
+-- All that remains is to do the right sort of jump.
+
+emitKnownConReturnCode :: DataCon -> Code
+emitKnownConReturnCode con
+  = emitAlgReturnCode (dataConTyCon con)
+		      (CmmLit (mkIntCLit (dataConTagZ con)))
+			-- emitAlgReturnCode requires zero-indexed tag
+
+emitAlgReturnCode :: TyCon -> CmmExpr -> Code
+-- emitAlgReturnCode is used both by emitKnownConReturnCode,
+-- and by by PrimOps that return enumerated types (i.e.
+-- all the comparison operators).
+emitAlgReturnCode tycon tag
+ =  do	{ case ctrlReturnConvAlg tycon of
+	    VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz
+	 			     	; emitVectoredReturnInstr tag }
+	    UnvectoredReturn _    -> emitDirectReturnInstr 
+	}
+
+
+-- ---------------------------------------------------------------------------
+-- Unboxed tuple returns
+
+-- These are a bit like a normal tail call, except that:
+--
+--   - The tail-call target is an info table on the stack
+--
+--   - We separate stack arguments into pointers and non-pointers,
+--     to make it easier to leave things in a sane state for a heap check.
+--     This is OK because we can never partially-apply an unboxed tuple,
+--     unlike a function.  The same technique is used when calling
+--     let-no-escape functions, because they also can't be partially
+--     applied.
+
+returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
+returnUnboxedTuple amodes
+  = do 	{ eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
+	; tickyUnboxedTupleReturn (length amodes)
+	; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
+	; emitSimultaneously assts
+	; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr }
+
+pushUnboxedTuple :: VirtualSpOffset		-- Sp at which to start pushing
+		 -> [(CgRep, CmmExpr)]		-- amodes of the components
+		 -> FCode (VirtualSpOffset,	-- final Sp
+			   CmmStmts)		-- assignments (regs+stack)
+
+pushUnboxedTuple sp [] 
+  = return (sp, noStmts)
+pushUnboxedTuple sp amodes
+  = do	{ let	(reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
+	
+		-- separate the rest of the args into pointers and non-pointers
+		(ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
+		reg_arg_assts = assignToRegs reg_arg_amodes
+		
+	    -- push ptrs, then nonptrs, on the stack
+	; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
+	; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
+
+	; returnFC (final_sp,
+	  	    reg_arg_assts `plusStmts` 
+		    ptr_assts `plusStmts` nptr_assts) }
+    
+		  
+-- -----------------------------------------------------------------------------
+-- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
+-- we want to do things in a slightly different order to normal:
+-- 
+-- 		- push return address
+-- 		- adjust stack pointer
+-- 		- r = call(args...)
+-- 		- assign regs for unboxed tuple (usually just R1 = r)
+-- 		- return to continuation
+-- 
+-- The return address (i.e. stack frame) must be on the stack before
+-- doing the call in case the call ends up in the garbage collector.
+-- 
+-- Sadly, the information about the continuation is lost after we push it
+-- (in order to avoid pushing it again), so we end up doing a needless
+-- indirect jump (ToDo).
+
+ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
+ccallReturnUnboxedTuple amodes before_jump
+  = do 	{ eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
+
+	-- Push a return address if necessary
+	; pushReturnAddress eob
+	; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
+	    (do	{ adjustSpAndHp args_sp
+		; before_jump
+  		; returnUnboxedTuple amodes })
+    }
+
+-- -----------------------------------------------------------------------------
+-- Calling an out-of-line primop
+
+tailCallPrimOp :: PrimOp -> [StgArg] -> Code
+tailCallPrimOp op args
+ = do	{	-- We're going to perform a normal-looking tail call, 
+		-- except that *all* the arguments will be in registers.
+		-- Hence the ASSERT( null leftovers )
+	  arg_amodes <- getArgAmodes args
+	; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
+	      jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
+
+	; ASSERT(null leftovers) -- no stack-resident args
+ 	  emitSimultaneously (assignToRegs arg_regs)
+
+	; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
+	; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
+
+-- -----------------------------------------------------------------------------
+-- Return Addresses
+
+-- We always push the return address just before performing a tail call
+-- or return.  The reason we leave it until then is because the stack
+-- slot that the return address is to go into might contain something
+-- useful.
+-- 
+-- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
+-- case expression and the return address is still to be pushed.
+-- 
+-- There are cases where it doesn't look necessary to push the return
+-- address: for example, just before doing a return to a known
+-- continuation.  However, the continuation will expect to find the
+-- return address on the stack in case it needs to do a heap check.
+
+pushReturnAddress :: EndOfBlockInfo -> Code
+
+pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False))
+  = do	{ sp_rel <- getSpRelOffset args_sp
+	; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
+
+-- For a polymorphic case, we have two return addresses to push: the case
+-- return, and stg_seq_frame_info which turns a possible vectored return
+-- into a direct one.
+pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True))
+  = do	{ sp_rel <- getSpRelOffset (args_sp-1)
+	; stmtC (CmmStore sp_rel (mkLblExpr lbl))
+	; sp_rel <- getSpRelOffset args_sp
+	; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) }
+
+pushReturnAddress _ = nopC
+
+-- -----------------------------------------------------------------------------
+-- Misc.
+
+jumpToLbl :: CLabel -> Code
+-- Passes no argument to the destination procedure
+jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
+
+assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
+assignToRegs reg_args 
+  = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
+	    | (expr, reg_id) <- reg_args ] 
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[CgStackery-adjust]{Adjusting the stack pointers}
+%*									*
+%************************************************************************
+
+This function adjusts the stack and heap pointers just before a tail
+call or return.  The stack pointer is adjusted to its final position
+(i.e. to point to the last argument for a tail call, or the activation
+record for a return).  The heap pointer may be moved backwards, in
+cases where we overallocated at the beginning of the basic block (see
+CgCase.lhs for discussion).
+
+These functions {\em do not} deal with high-water-mark adjustment.
+That's done by functions which allocate stack space.
+
+\begin{code}
+adjustSpAndHp :: VirtualSpOffset 	-- New offset for Arg stack ptr
+	      -> Code
+adjustSpAndHp newRealSp 
+  = do	{ -- Adjust stack, if necessary.
+	  -- NB: the conditional on the monad-carried realSp
+	  --     is out of line (via codeOnly), to avoid a black hole
+	; new_sp <- getSpRelOffset newRealSp
+	; checkedAbsC (CmmAssign spReg new_sp)	-- Will generate no code in the case
+	; setRealSp newRealSp			-- where realSp==newRealSp
+
+	  -- Adjust heap.  The virtual heap pointer may be less than the real Hp
+	  -- because the latter was advanced to deal with the worst-case branch
+	  -- of the code, and we may be in a better-case branch.  In that case,
+ 	  -- move the real Hp *back* and retract some ticky allocation count.
+	; hp_usg <- getHpUsage
+	; let rHp = realHp hp_usg
+	      vHp = virtHp hp_usg
+	; new_hp <- getHpRelOffset vHp
+	; checkedAbsC (CmmAssign hpReg new_hp)	-- Generates nothing when vHp==rHp
+	; tickyAllocHeap (vHp - rHp)		-- ...ditto
+	; setRealHp vHp
+	}
+\end{code}
diff --git a/compiler/codeGen/CgTicky.hs b/compiler/codeGen/CgTicky.hs
new file mode 100644
index 0000000000..3e72981c50
--- /dev/null
+++ b/compiler/codeGen/CgTicky.hs
@@ -0,0 +1,370 @@
+-----------------------------------------------------------------------------
+--
+-- Code generation for ticky-ticky profiling
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+module CgTicky (
+	emitTickyCounter,
+
+	tickyDynAlloc,
+	tickyAllocHeap,
+	tickyAllocPrim,
+	tickyAllocThunk,
+	tickyAllocPAP,
+
+	tickyPushUpdateFrame,
+	tickyUpdateFrameOmitted,
+
+	tickyEnterDynCon,
+	tickyEnterStaticCon,
+	tickyEnterViaNode,
+
+	tickyEnterFun, 
+	tickyEnterThunk,
+
+	tickyUpdateBhCaf,
+	tickyBlackHole,
+	tickyUnboxedTupleReturn, tickyVectoredReturn,
+	tickyReturnOldCon, tickyReturnNewCon,
+
+	tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs,
+	tickyUnknownCall, tickySlowCallPat,
+
+	staticTickyHdr,
+  ) where
+
+#include "HsVersions.h"
+#include "../includes/DerivedConstants.h"
+	-- For REP_xxx constants, which are MachReps
+
+import ClosureInfo	( ClosureInfo, closureSize, slopSize, closureSMRep,
+			  closureUpdReqd, closureName, isStaticClosure )
+import CgUtils
+import CgMonad
+import SMRep		( ClosureType(..), smRepClosureType, CgRep )
+
+import Cmm
+import MachOp
+import CmmUtils		( zeroCLit, mkIntCLit, mkLblExpr, cmmIndexExpr )
+import CLabel		( CLabel, mkRtsDataLabel, mkRednCountsLabel )
+
+import Name		( isInternalName )
+import Id		( Id, idType )
+import StaticFlags	( opt_DoTickyProfiling )
+import BasicTypes	( Arity )
+import FastString	( FastString, mkFastString, LitString )	
+import Constants	-- Lots of field offsets
+import Outputable
+
+-- Turgid imports for showTypeCategory
+import PrelNames
+import TcType		( Type, isDictTy, tcSplitTyConApp_maybe,
+			  tcSplitFunTy_maybe )
+import TyCon		( isPrimTyCon, isTupleTyCon, isEnumerationTyCon,
+			  maybeTyConSingleCon )
+import Maybe
+
+-----------------------------------------------------------------------------
+--
+--		Ticky-ticky profiling
+--
+-----------------------------------------------------------------------------
+
+staticTickyHdr :: [CmmLit]
+-- The ticky header words in a static closure
+-- Was SET_STATIC_TICKY_HDR
+staticTickyHdr 
+  | not opt_DoTickyProfiling = []
+  | otherwise		     = [zeroCLit]
+
+emitTickyCounter :: ClosureInfo -> [Id] -> Int -> Code
+emitTickyCounter cl_info args on_stk
+  = ifTicky $
+    do	{ mod_name <- moduleName
+	; fun_descr_lit <- mkStringCLit (fun_descr mod_name)
+	; arg_descr_lit <- mkStringCLit arg_descr
+	; emitDataLits ticky_ctr_label 	-- Must match layout of StgEntCounter
+	    [ CmmInt 0 I16,
+	      CmmInt (fromIntegral (length args)) I16, 	-- Arity
+	      CmmInt (fromIntegral on_stk) I16,		-- Words passed on stack
+	      CmmInt 0 I16,				-- 2-byte gap
+	      fun_descr_lit,
+	      arg_descr_lit,
+	      zeroCLit, 		-- Entry count
+	      zeroCLit, 		-- Allocs
+	      zeroCLit 			-- Link
+	    ] }
+  where
+    name = closureName cl_info
+    ticky_ctr_label = mkRednCountsLabel name
+    arg_descr = map (showTypeCategory . idType) args
+    fun_descr mod_name = ppr_for_ticky_name mod_name name
+
+-- When printing the name of a thing in a ticky file, we want to
+-- give the module name even for *local* things.   We print
+-- just "x (M)" rather that "M.x" to distinguish them from the global kind.
+ppr_for_ticky_name mod_name name
+  | isInternalName name = showSDocDebug (ppr name <+> (parens (ppr mod_name)))
+  | otherwise	        = showSDocDebug (ppr name)
+
+-- -----------------------------------------------------------------------------
+-- Ticky stack frames
+
+tickyPushUpdateFrame    = ifTicky $ bumpTickyCounter SLIT("UPDF_PUSHED_ctr")
+tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter SLIT("UPDF_OMITTED_ctr")
+
+-- -----------------------------------------------------------------------------
+-- Ticky entries
+
+tickyEnterDynCon      = ifTicky $ bumpTickyCounter SLIT("ENT_DYN_CON_ctr")
+tickyEnterDynThunk    = ifTicky $ bumpTickyCounter SLIT("ENT_DYN_THK_ctr")
+tickyEnterStaticCon   = ifTicky $ bumpTickyCounter SLIT("ENT_STATIC_CON_ctr")
+tickyEnterStaticThunk = ifTicky $ bumpTickyCounter SLIT("ENT_STATIC_THK_ctr")
+tickyEnterViaNode     = ifTicky $ bumpTickyCounter SLIT("ENT_VIA_NODE_ctr")
+
+tickyEnterThunk :: ClosureInfo -> Code
+tickyEnterThunk cl_info
+  | isStaticClosure cl_info = tickyEnterStaticThunk
+  | otherwise		    = tickyEnterDynThunk
+
+tickyBlackHole :: Bool{-updatable-} -> Code
+tickyBlackHole updatable
+  = ifTicky (bumpTickyCounter ctr)
+  where
+    ctr | updatable = SLIT("UPD_BH_SINGLE_ENTRY_ctr")
+	| otherwise = SLIT("UPD_BH_UPDATABLE_ctr")
+
+tickyUpdateBhCaf cl_info
+  = ifTicky (bumpTickyCounter ctr)
+  where
+    ctr | closureUpdReqd cl_info = SLIT("UPD_CAF_BH_SINGLE_ENTRY_ctr")
+	| otherwise	         = SLIT("UPD_CAF_BH_UPDATABLE_ctr")
+
+tickyEnterFun :: ClosureInfo -> Code
+tickyEnterFun cl_info
+  = ifTicky $ 
+    do 	{ bumpTickyCounter ctr
+	; fun_ctr_lbl <- getTickyCtrLabel
+	; registerTickyCtr fun_ctr_lbl
+	; bumpTickyCounter' fun_ctr_lbl }
+  where
+    ctr | isStaticClosure cl_info = SLIT("TICK_ENT_STATIC_FUN_DIRECT")
+	| otherwise		  = SLIT("TICK_ENT_DYN_FUN_DIRECT")
+
+registerTickyCtr :: CLabel -> Code
+-- Register a ticky counter
+--   if ( ! f_ct.registeredp ) {
+--	    f_ct.link = ticky_entry_ctrs; 	/* hook this one onto the front of the list */
+--	    ticky_entry_ctrs = & (f_ct);	/* mark it as "registered" */
+--	    f_ct.registeredp = 1 }
+registerTickyCtr ctr_lbl
+  = emitIf test (stmtsC register_stmts)
+  where
+    test = CmmMachOp (MO_Not I16) 
+	    [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl 
+				oFFSET_StgEntCounter_registeredp)) I16]
+    register_stmts
+      =	[ CmmStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_link))
+		   (CmmLoad ticky_entry_ctrs wordRep)
+	, CmmStore ticky_entry_ctrs (mkLblExpr ctr_lbl)
+	, CmmStore (CmmLit (cmmLabelOffB ctr_lbl 
+				oFFSET_StgEntCounter_registeredp))
+		   (CmmLit (mkIntCLit 1)) ]
+    ticky_entry_ctrs = mkLblExpr (mkRtsDataLabel SLIT("ticky_entry_ctrs"))
+
+tickyReturnOldCon, tickyReturnNewCon :: Arity -> Code
+tickyReturnOldCon arity 
+  = ifTicky $ do { bumpTickyCounter SLIT("RET_OLD_ctr")
+	         ; bumpHistogram SLIT("RET_OLD_hst") arity }
+tickyReturnNewCon arity 
+  | not opt_DoTickyProfiling = nopC
+  | otherwise
+  = ifTicky $ do { bumpTickyCounter SLIT("RET_NEW_ctr")
+	         ; bumpHistogram SLIT("RET_NEW_hst") arity }
+
+tickyUnboxedTupleReturn :: Int -> Code
+tickyUnboxedTupleReturn arity
+  = ifTicky $ do { bumpTickyCounter SLIT("RET_UNBOXED_TUP_ctr")
+ 	         ; bumpHistogram SLIT("RET_UNBOXED_TUP_hst") arity }
+
+tickyVectoredReturn :: Int -> Code
+tickyVectoredReturn family_size 
+  = ifTicky $ do { bumpTickyCounter SLIT("VEC_RETURN_ctr")
+		 ; bumpHistogram SLIT("RET_VEC_RETURN_hst") family_size }
+
+-- -----------------------------------------------------------------------------
+-- Ticky calls
+
+-- Ticks at a *call site*:
+tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_TOO_FEW_ARGS_ctr")
+tickyKnownCallExact = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_ctr")
+tickyKnownCallExtraArgs = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_EXTRA_ctr")
+tickyUnknownCall = ifTicky $ bumpTickyCounter SLIT("UNKNOWN_CALL_ctr")
+
+-- Tick for the call pattern at slow call site (i.e. in addition to
+-- tickyUnknownCall, tickyKnownCallExtraArgs, etc.)
+tickySlowCallPat :: [CgRep] -> Code
+tickySlowCallPat args = return ()
+{- LATER: (introduces recursive module dependency now).
+  case callPattern args of
+    (str, True)  -> bumpTickyCounter' (mkRtsSlowTickyCtrLabel pat)
+    (str, False) -> bumpTickyCounter  SLIT("TICK_SLOW_CALL_OTHER")
+
+callPattern :: [CgRep] -> (String,Bool)
+callPattern reps 
+  | match == length reps = (chars, True)
+  | otherwise            = (chars, False)
+  where (_,match) = findMatch reps
+	chars     = map argChar reps
+
+argChar VoidArg   = 'v'
+argChar PtrArg    = 'p'
+argChar NonPtrArg = 'n'
+argChar LongArg   = 'l'
+argChar FloatArg  = 'f'
+argChar DoubleArg = 'd'
+-}
+
+-- -----------------------------------------------------------------------------
+-- Ticky allocation
+
+tickyDynAlloc :: ClosureInfo -> Code
+-- Called when doing a dynamic heap allocation
+tickyDynAlloc cl_info
+  = ifTicky $
+    case smRepClosureType (closureSMRep cl_info) of
+	Constr        -> tick_alloc_con
+	ConstrNoCaf   -> tick_alloc_con
+	Fun	      -> tick_alloc_fun
+	Thunk 	      -> tick_alloc_thk
+	ThunkSelector -> tick_alloc_thk
+  where
+	-- will be needed when we fill in stubs
+    cl_size   =	closureSize cl_info
+    slop_size = slopSize cl_info
+
+    tick_alloc_thk 
+	| closureUpdReqd cl_info = tick_alloc_up_thk
+	| otherwise	         = tick_alloc_se_thk
+
+    tick_alloc_con = panic "ToDo: tick_alloc"
+    tick_alloc_fun = panic "ToDo: tick_alloc"
+    tick_alloc_up_thk = panic "ToDo: tick_alloc"
+    tick_alloc_se_thk = panic "ToDo: tick_alloc"
+
+tickyAllocPrim :: CmmExpr -> CmmExpr -> CmmExpr -> Code
+tickyAllocPrim hdr goods slop = ifTicky $ panic "ToDo: tickyAllocPrim"
+
+tickyAllocThunk :: CmmExpr -> CmmExpr -> Code
+tickyAllocThunk goods slop = ifTicky $ panic "ToDo: tickyAllocThunk"
+
+tickyAllocPAP :: CmmExpr -> CmmExpr -> Code
+tickyAllocPAP goods slop = ifTicky $ panic "ToDo: tickyAllocPAP"
+
+tickyAllocHeap :: VirtualHpOffset -> Code
+-- Called when doing a heap check [TICK_ALLOC_HEAP]
+tickyAllocHeap hp
+  = ifTicky $
+    do	{ ticky_ctr <- getTickyCtrLabel
+	; stmtsC $
+	  if hp == 0 then [] 	-- Inside the stmtC to avoid control
+	  else [		-- dependency on the argument
+		-- Bump the allcoation count in the StgEntCounter
+	    addToMem REP_StgEntCounter_allocs 
+			(CmmLit (cmmLabelOffB ticky_ctr 
+				oFFSET_StgEntCounter_allocs)) hp,
+		-- Bump ALLOC_HEAP_ctr
+	    addToMemLbl cLongRep (mkRtsDataLabel SLIT("ALLOC_HEAP_ctr")) 1,
+		-- Bump ALLOC_HEAP_tot
+	    addToMemLbl cLongRep (mkRtsDataLabel SLIT("ALLOC_HEAP_tot")) hp] }
+
+-- -----------------------------------------------------------------------------
+-- Ticky utils
+
+ifTicky :: Code -> Code
+ifTicky code
+  | opt_DoTickyProfiling = code
+  | otherwise		 = nopC
+
+addToMemLbl :: MachRep -> CLabel -> Int -> CmmStmt
+addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n
+
+-- All the ticky-ticky counters are declared "unsigned long" in C
+bumpTickyCounter :: LitString -> Code
+bumpTickyCounter lbl = bumpTickyCounter' (mkRtsDataLabel lbl)
+
+bumpTickyCounter' :: CLabel -> Code
+bumpTickyCounter' lbl = stmtC (addToMemLbl cLongRep lbl 1)
+
+addToMemLong = addToMem cLongRep
+
+bumpHistogram :: LitString -> Int -> Code
+bumpHistogram lbl n 
+  = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongRep))
+
+bumpHistogramE :: LitString -> CmmExpr -> Code
+bumpHistogramE lbl n 
+  = do  t <- newTemp cLongRep
+	stmtC (CmmAssign t n)
+	emitIf (CmmMachOp (MO_U_Le cLongRep) [CmmReg t, eight]) $
+		stmtC (CmmAssign t eight)
+	stmtC (addToMemLong (cmmIndexExpr cLongRep 
+				(CmmLit (CmmLabel (mkRtsDataLabel lbl)))
+				(CmmReg t))
+			    1)
+  where 
+   eight = CmmLit (CmmInt 8 cLongRep)
+
+------------------------------------------------------------------
+-- Showing the "type category" for ticky-ticky profiling
+
+showTypeCategory :: Type -> Char
+  {-	{C,I,F,D}   char, int, float, double
+	T	    tuple
+	S	    other single-constructor type
+	{c,i,f,d}   unboxed ditto
+	t	    *unpacked* tuple
+	s	    *unpacked" single-cons...
+
+	v	    void#
+	a	    primitive array
+
+	E	    enumeration type
+	+	    dictionary, unless it's a ...
+	L	    List
+	>	    function
+	M	    other (multi-constructor) data-con type
+	.	    other type
+	-	    reserved for others to mark as "uninteresting"
+    -}
+showTypeCategory ty
+  = if isDictTy ty
+    then '+'
+    else
+      case tcSplitTyConApp_maybe ty of
+	Nothing -> if isJust (tcSplitFunTy_maybe ty)
+		   then '>'
+		   else '.'
+
+	Just (tycon, _) ->
+          let utc = getUnique tycon in
+	  if	  utc == charDataConKey    then 'C'
+	  else if utc == intDataConKey     then 'I'
+	  else if utc == floatDataConKey   then 'F'
+	  else if utc == doubleDataConKey  then 'D'
+	  else if utc == smallIntegerDataConKey ||
+		  utc == largeIntegerDataConKey   then 'J'
+	  else if utc == charPrimTyConKey  then 'c'
+	  else if (utc == intPrimTyConKey || utc == wordPrimTyConKey
+		|| utc == addrPrimTyConKey)		   then 'i'
+	  else if utc  == floatPrimTyConKey		   then 'f'
+	  else if utc  == doublePrimTyConKey		   then 'd'
+	  else if isPrimTyCon tycon {- array, we hope -}   then 'A'	-- Bogus
+	  else if isEnumerationTyCon tycon		   then 'E'
+	  else if isTupleTyCon tycon			   then 'T'
+	  else if isJust (maybeTyConSingleCon tycon)       then 'S'
+	  else if utc == listTyConKey			   then 'L'
+	  else 'M' -- oh, well...
diff --git a/compiler/codeGen/CgUsages.hi-boot-5 b/compiler/codeGen/CgUsages.hi-boot-5
new file mode 100644
index 0000000000..abb98cec1a
--- /dev/null
+++ b/compiler/codeGen/CgUsages.hi-boot-5
@@ -0,0 +1,3 @@
+__interface CgUsages 1 0 where
+__export CgUsages getSpRelOffset;
+1 getSpRelOffset :: AbsCSyn.VirtualSpOffset -> CgMonad.FCode AbsCSyn.RegRelative ;
diff --git a/compiler/codeGen/CgUsages.hi-boot-6 b/compiler/codeGen/CgUsages.hi-boot-6
new file mode 100644
index 0000000000..9640603cfb
--- /dev/null
+++ b/compiler/codeGen/CgUsages.hi-boot-6
@@ -0,0 +1,3 @@
+module CgUsages where
+
+getSpRelOffset :: AbsCSyn.VirtualSpOffset -> CgMonad.FCode AbsCSyn.RegRelative
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
new file mode 100644
index 0000000000..2f69927db0
--- /dev/null
+++ b/compiler/codeGen/CgUtils.hs
@@ -0,0 +1,688 @@
+-----------------------------------------------------------------------------
+--
+-- Code generator utilities; mostly monadic
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+module CgUtils (
+	addIdReps,
+	cgLit,
+	emitDataLits, emitRODataLits, emitIf, emitIfThenElse,
+	emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult,
+	assignTemp, newTemp,
+	emitSimultaneously,
+	emitSwitch, emitLitSwitch,
+	tagToClosure,
+
+	cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
+	cmmOffsetExprW, cmmOffsetExprB,
+	cmmRegOffW, cmmRegOffB,
+	cmmLabelOffW, cmmLabelOffB,
+	cmmOffsetW, cmmOffsetB,
+	cmmOffsetLitW, cmmOffsetLitB,
+	cmmLoadIndexW,
+
+	addToMem, addToMemE,
+	mkWordCLit,
+	mkStringCLit,
+	packHalfWordsCLit,
+	blankWord
+  ) where
+
+#include "HsVersions.h"
+
+import CgMonad
+import TyCon		( TyCon, tyConName )
+import Id		( Id )
+import Constants	( wORD_SIZE )
+import SMRep		( CgRep, StgWord, hALF_WORD_SIZE_IN_BITS, ByteOff,
+			  WordOff, idCgRep )
+import PprCmm		( {- instances -} )
+import Cmm
+import CLabel
+import CmmUtils
+import MachOp		( MachRep(..), wordRep, MachOp(..),  MachHint(..),
+			  mo_wordOr, mo_wordAnd, mo_wordNe, mo_wordEq,
+			  mo_wordULt, mo_wordUGt, mo_wordUGe, machRepByteWidth )
+import ForeignCall	( CCallConv(..) )
+import Literal		( Literal(..) )
+import CLabel		( CLabel, mkStringLitLabel )
+import Digraph		( SCC(..), stronglyConnComp )
+import ListSetOps	( assocDefault )
+import Util		( filterOut, sortLe )
+import DynFlags		( DynFlags(..), HscTarget(..) )
+import Packages		( HomeModules )
+import FastString	( LitString, FastString, bytesFS )
+import Outputable
+
+import Char		( ord )
+import DATA_BITS
+import DATA_WORD	( Word8 )
+import Maybe		( isNothing )
+
+-------------------------------------------------------------------------
+--
+--	Random small functions
+--
+-------------------------------------------------------------------------
+
+addIdReps :: [Id] -> [(CgRep, Id)]
+addIdReps ids = [(idCgRep id, id) | id <- ids]
+
+-------------------------------------------------------------------------
+--
+--	Literals
+--
+-------------------------------------------------------------------------
+
+cgLit :: Literal -> FCode CmmLit
+cgLit (MachStr s) = mkByteStringCLit (bytesFS s)
+ -- not unpackFS; we want the UTF-8 byte stream.
+cgLit other_lit   = return (mkSimpleLit other_lit)
+
+mkSimpleLit :: Literal -> CmmLit
+mkSimpleLit (MachChar	c)    = CmmInt (fromIntegral (ord c)) wordRep
+mkSimpleLit MachNullAddr      = zeroCLit
+mkSimpleLit (MachInt i)       = CmmInt i wordRep
+mkSimpleLit (MachInt64 i)     = CmmInt i I64
+mkSimpleLit (MachWord i)      = CmmInt i wordRep
+mkSimpleLit (MachWord64 i)    = CmmInt i I64
+mkSimpleLit (MachFloat r)     = CmmFloat r F32
+mkSimpleLit (MachDouble r)    = CmmFloat r F64
+mkSimpleLit (MachLabel fs ms) = CmmLabel (mkForeignLabel fs ms is_dyn)
+			      where
+				is_dyn = False	-- ToDo: fix me
+	
+mkLtOp :: Literal -> MachOp
+-- On signed literals we must do a signed comparison
+mkLtOp (MachInt _)    = MO_S_Lt wordRep
+mkLtOp (MachFloat _)  = MO_S_Lt F32
+mkLtOp (MachDouble _) = MO_S_Lt F64
+mkLtOp lit	      = MO_U_Lt (cmmLitRep (mkSimpleLit lit))
+
+
+---------------------------------------------------
+--
+--	Cmm data type functions
+--
+---------------------------------------------------
+
+-----------------------
+-- The "B" variants take byte offsets
+cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr
+cmmRegOffB = cmmRegOff
+
+cmmOffsetB :: CmmExpr -> ByteOff -> CmmExpr
+cmmOffsetB = cmmOffset
+
+cmmOffsetExprB :: CmmExpr -> CmmExpr -> CmmExpr
+cmmOffsetExprB = cmmOffsetExpr
+
+cmmLabelOffB :: CLabel -> ByteOff -> CmmLit
+cmmLabelOffB = cmmLabelOff
+
+cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit
+cmmOffsetLitB = cmmOffsetLit
+
+-----------------------
+-- The "W" variants take word offsets
+cmmOffsetExprW :: CmmExpr -> CmmExpr -> CmmExpr
+-- The second arg is a *word* offset; need to change it to bytes
+cmmOffsetExprW e (CmmLit (CmmInt n _)) = cmmOffsetW e (fromInteger n)
+cmmOffsetExprW e wd_off = cmmIndexExpr wordRep e wd_off
+
+cmmOffsetW :: CmmExpr -> WordOff -> CmmExpr
+cmmOffsetW e n = cmmOffsetB e (wORD_SIZE * n)
+
+cmmRegOffW :: CmmReg -> WordOff -> CmmExpr
+cmmRegOffW reg wd_off = cmmRegOffB reg (wd_off * wORD_SIZE)
+
+cmmOffsetLitW :: CmmLit -> WordOff -> CmmLit
+cmmOffsetLitW lit wd_off = cmmOffsetLitB lit (wORD_SIZE * wd_off)
+
+cmmLabelOffW :: CLabel -> WordOff -> CmmLit
+cmmLabelOffW lbl wd_off = cmmLabelOffB lbl (wORD_SIZE * wd_off)
+
+cmmLoadIndexW :: CmmExpr -> Int -> CmmExpr
+cmmLoadIndexW base off
+  = CmmLoad (cmmOffsetW base off) wordRep
+
+-----------------------
+cmmNeWord, cmmEqWord, cmmOrWord, cmmAndWord :: CmmExpr -> CmmExpr -> CmmExpr
+cmmOrWord  e1 e2 = CmmMachOp mo_wordOr  [e1, e2]
+cmmAndWord e1 e2 = CmmMachOp mo_wordAnd [e1, e2]
+cmmNeWord  e1 e2 = CmmMachOp mo_wordNe  [e1, e2]
+cmmEqWord  e1 e2 = CmmMachOp mo_wordEq  [e1, e2]
+cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2]
+cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2]
+cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2]
+
+cmmNegate :: CmmExpr -> CmmExpr
+cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep)
+cmmNegate e			  = CmmMachOp (MO_S_Neg (cmmExprRep e)) [e]
+
+blankWord :: CmmStatic
+blankWord = CmmUninitialised wORD_SIZE
+
+-----------------------
+--	Making literals
+
+mkWordCLit :: StgWord -> CmmLit
+mkWordCLit wd = CmmInt (fromIntegral wd) wordRep
+
+packHalfWordsCLit :: (Integral a, Integral b) => a -> b -> CmmLit
+-- Make a single word literal in which the lower_half_word is
+-- at the lower address, and the upper_half_word is at the 
+-- higher address
+-- ToDo: consider using half-word lits instead
+-- 	 but be careful: that's vulnerable when reversed
+packHalfWordsCLit lower_half_word upper_half_word
+#ifdef WORDS_BIGENDIAN
+   = mkWordCLit ((fromIntegral lower_half_word `shiftL` hALF_WORD_SIZE_IN_BITS)
+		 .|. fromIntegral upper_half_word)
+#else 
+   = mkWordCLit ((fromIntegral lower_half_word) 
+		 .|. (fromIntegral upper_half_word `shiftL` hALF_WORD_SIZE_IN_BITS))
+#endif
+
+--------------------------------------------------------------------------
+--
+-- Incrementing a memory location
+--
+--------------------------------------------------------------------------
+
+addToMem :: MachRep 	-- rep of the counter
+	 -> CmmExpr	-- Address
+	 -> Int		-- What to add (a word)
+	 -> CmmStmt
+addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) rep))
+
+addToMemE :: MachRep 	-- rep of the counter
+	  -> CmmExpr	-- Address
+	  -> CmmExpr	-- What to add (a word-typed expression)
+	  -> CmmStmt
+addToMemE rep ptr n
+  = CmmStore ptr (CmmMachOp (MO_Add rep) [CmmLoad ptr rep, n])
+
+-------------------------------------------------------------------------
+--
+--	Converting a closure tag to a closure for enumeration types
+--      (this is the implementation of tagToEnum#).
+--
+-------------------------------------------------------------------------
+
+tagToClosure :: HomeModules -> TyCon -> CmmExpr -> CmmExpr
+tagToClosure hmods tycon tag
+  = CmmLoad (cmmOffsetExprW closure_tbl tag) wordRep
+  where closure_tbl = CmmLit (CmmLabel lbl)
+	lbl = mkClosureTableLabel hmods (tyConName tycon)
+
+-------------------------------------------------------------------------
+--
+--	Conditionals and rts calls
+--
+-------------------------------------------------------------------------
+
+emitIf :: CmmExpr 	-- Boolean
+       -> Code		-- Then part
+       -> Code		
+-- Emit (if e then x)
+-- ToDo: reverse the condition to avoid the extra branch instruction if possible
+-- (some conditionals aren't reversible. eg. floating point comparisons cannot
+-- be inverted because there exist some values for which both comparisons
+-- return False, such as NaN.)
+emitIf cond then_part
+  = do { then_id <- newLabelC
+       ; join_id <- newLabelC
+       ; stmtC (CmmCondBranch cond then_id)
+       ; stmtC (CmmBranch join_id)
+       ; labelC then_id
+       ; then_part
+       ; labelC join_id
+       }
+
+emitIfThenElse :: CmmExpr 	-- Boolean
+       		-> Code		-- Then part
+       		-> Code		-- Else part
+       		-> Code		
+-- Emit (if e then x else y)
+emitIfThenElse cond then_part else_part
+  = do { then_id <- newLabelC
+       ; else_id <- newLabelC
+       ; join_id <- newLabelC
+       ; stmtC (CmmCondBranch cond then_id)
+       ; else_part
+       ; stmtC (CmmBranch join_id)
+       ; labelC then_id
+       ; then_part
+       ; labelC join_id
+       }
+
+emitRtsCall :: LitString -> [(CmmExpr,MachHint)] -> Code
+emitRtsCall fun args = emitRtsCall' [] fun args Nothing
+   -- The 'Nothing' says "save all global registers"
+
+emitRtsCallWithVols :: LitString -> [(CmmExpr,MachHint)] -> [GlobalReg] -> Code
+emitRtsCallWithVols fun args vols
+   = emitRtsCall' [] fun args (Just vols)
+
+emitRtsCallWithResult :: CmmReg -> MachHint -> LitString
+	-> [(CmmExpr,MachHint)] -> Code
+emitRtsCallWithResult res hint fun args
+   = emitRtsCall' [(res,hint)] fun args Nothing
+
+-- Make a call to an RTS C procedure
+emitRtsCall'
+   :: [(CmmReg,MachHint)]
+   -> LitString
+   -> [(CmmExpr,MachHint)]
+   -> Maybe [GlobalReg]
+   -> Code
+emitRtsCall' res fun args vols = stmtC (CmmCall target res args vols)
+  where
+    target   = CmmForeignCall fun_expr CCallConv
+    fun_expr = mkLblExpr (mkRtsCodeLabel fun)
+
+
+-------------------------------------------------------------------------
+--
+--	Strings gnerate a top-level data block
+--
+-------------------------------------------------------------------------
+
+emitDataLits :: CLabel -> [CmmLit] -> Code
+-- Emit a data-segment data block
+emitDataLits lbl lits
+  = emitData Data (CmmDataLabel lbl : map CmmStaticLit lits)
+
+emitRODataLits :: CLabel -> [CmmLit] -> Code
+-- Emit a read-only data block
+emitRODataLits lbl lits
+  = emitData section (CmmDataLabel lbl : map CmmStaticLit lits)
+  where section | any needsRelocation lits = RelocatableReadOnlyData
+                | otherwise                = ReadOnlyData
+        needsRelocation (CmmLabel _)      = True
+        needsRelocation (CmmLabelOff _ _) = True
+        needsRelocation _                 = False
+
+mkStringCLit :: String -> FCode CmmLit
+-- Make a global definition for the string,
+-- and return its label
+mkStringCLit str = mkByteStringCLit (map (fromIntegral.ord) str)
+
+mkByteStringCLit :: [Word8] -> FCode CmmLit
+mkByteStringCLit bytes
+  = do 	{ uniq <- newUnique
+	; let lbl = mkStringLitLabel uniq
+	; emitData ReadOnlyData [CmmDataLabel lbl, CmmString bytes]
+	; return (CmmLabel lbl) }
+
+-------------------------------------------------------------------------
+--
+--	Assigning expressions to temporaries
+--
+-------------------------------------------------------------------------
+
+assignTemp :: CmmExpr -> FCode CmmExpr
+-- For a non-trivial expression, e, create a local
+-- variable and assign the expression to it
+assignTemp e 
+  | isTrivialCmmExpr e = return e
+  | otherwise 	       = do { reg <- newTemp (cmmExprRep e)
+			    ; stmtC (CmmAssign reg e)
+			    ; return (CmmReg reg) }
+
+
+newTemp :: MachRep -> FCode CmmReg
+newTemp rep = do { uniq <- newUnique; return (CmmLocal (LocalReg uniq rep)) }
+
+
+-------------------------------------------------------------------------
+--
+--	Building case analysis
+--
+-------------------------------------------------------------------------
+
+emitSwitch
+	:: CmmExpr  		  -- Tag to switch on
+	-> [(ConTagZ, CgStmts)]	  -- Tagged branches
+	-> Maybe CgStmts	  -- Default branch (if any)
+	-> ConTagZ -> ConTagZ	  -- Min and Max possible values; behaviour
+				  -- 	outside this range is undefined
+	-> Code
+
+-- ONLY A DEFAULT BRANCH: no case analysis to do
+emitSwitch tag_expr [] (Just stmts) _ _
+  = emitCgStmts stmts
+
+-- Right, off we go
+emitSwitch tag_expr branches mb_deflt lo_tag hi_tag
+  = 	-- Just sort the branches before calling mk_sritch
+    do	{ mb_deflt_id <-
+		case mb_deflt of
+		  Nothing    -> return Nothing
+		  Just stmts -> do id <- forkCgStmts stmts; return (Just id)
+
+	; dflags <- getDynFlags
+	; let via_C | HscC <- hscTarget dflags = True
+		    | otherwise                = False
+
+	; stmts <- mk_switch tag_expr (sortLe le branches) 
+			mb_deflt_id lo_tag hi_tag via_C
+	; emitCgStmts stmts
+	}
+  where
+    (t1,_) `le` (t2,_) = t1 <= t2
+
+
+mk_switch :: CmmExpr -> [(ConTagZ, CgStmts)]
+	  -> Maybe BlockId -> ConTagZ -> ConTagZ -> Bool
+	  -> FCode CgStmts
+
+-- SINGLETON TAG RANGE: no case analysis to do
+mk_switch tag_expr [(tag,stmts)] _ lo_tag hi_tag via_C
+  | lo_tag == hi_tag
+  = ASSERT( tag == lo_tag )
+    return stmts
+
+-- SINGLETON BRANCH, NO DEFUALT: no case analysis to do
+mk_switch tag_expr [(tag,stmts)] Nothing lo_tag hi_tag via_C
+  = return stmts
+	-- The simplifier might have eliminated a case
+	-- 	 so we may have e.g. case xs of 
+	--				 [] -> e
+	-- In that situation we can be sure the (:) case 
+	-- can't happen, so no need to test
+
+-- SINGLETON BRANCH: one equality check to do
+mk_switch tag_expr [(tag,stmts)] (Just deflt) lo_tag hi_tag via_C
+  = return (CmmCondBranch cond deflt `consCgStmt` stmts)
+  where
+    cond  =  cmmNeWord tag_expr (CmmLit (mkIntCLit tag))
+	-- We have lo_tag < hi_tag, but there's only one branch, 
+	-- so there must be a default
+
+-- ToDo: we might want to check for the two branch case, where one of
+-- the branches is the tag 0, because comparing '== 0' is likely to be
+-- more efficient than other kinds of comparison.
+
+-- DENSE TAG RANGE: use a switch statment.
+--
+-- We also use a switch uncoditionally when compiling via C, because
+-- this will get emitted as a C switch statement and the C compiler
+-- should do a good job of optimising it.  Also, older GCC versions
+-- (2.95 in particular) have problems compiling the complicated
+-- if-trees generated by this code, so compiling to a switch every
+-- time works around that problem.
+--
+mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
+  | use_switch 	-- Use a switch
+  = do	{ branch_ids <- mapM forkCgStmts (map snd branches)
+	; let 
+		tagged_blk_ids = zip (map fst branches) (map Just branch_ids)
+
+		find_branch :: ConTagZ -> Maybe BlockId
+		find_branch i = assocDefault mb_deflt tagged_blk_ids i
+
+		-- NB. we have eliminated impossible branches at
+		-- either end of the range (see below), so the first
+		-- tag of a real branch is real_lo_tag (not lo_tag).
+		arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]]
+
+	        switch_stmt = CmmSwitch (cmmOffset tag_expr (- real_lo_tag)) arms
+
+	; ASSERT(not (all isNothing arms)) 
+	  return (oneCgStmt switch_stmt)
+	}
+
+  -- if we can knock off a bunch of default cases with one if, then do so
+  | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches
+  = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
+       ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch))
+	     branch = CmmCondBranch cond deflt
+       ; stmts <- mk_switch tag_expr' branches mb_deflt 
+			lowest_branch hi_tag via_C
+       ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
+       }
+
+  | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches
+  = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
+       ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch))
+	     branch = CmmCondBranch cond deflt
+       ; stmts <- mk_switch tag_expr' branches mb_deflt 
+			lo_tag highest_branch via_C
+       ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
+       }
+
+  | otherwise	-- Use an if-tree
+  = do	{ (assign_tag, tag_expr') <- assignTemp' tag_expr
+		-- To avoid duplication
+	; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt 
+				lo_tag (mid_tag-1) via_C
+	; hi_stmts <- mk_switch tag_expr' hi_branches mb_deflt 
+				mid_tag hi_tag via_C
+	; hi_id <- forkCgStmts hi_stmts
+	; let cond = cmmUGeWord tag_expr' (CmmLit (mkIntCLit mid_tag))
+	      branch_stmt = CmmCondBranch cond hi_id
+	; return (assign_tag `consCgStmt` (branch_stmt `consCgStmt` lo_stmts)) 
+	}
+	-- we test (e >= mid_tag) rather than (e < mid_tag), because
+	-- the former works better when e is a comparison, and there
+	-- are two tags 0 & 1 (mid_tag == 1).  In this case, the code
+	-- generator can reduce the condition to e itself without
+	-- having to reverse the sense of the comparison: comparisons
+	-- can't always be easily reversed (eg. floating
+	-- pt. comparisons).
+  where
+    use_switch 	 = {- pprTrace "mk_switch" (
+			ppr tag_expr <+> text "n_tags:" <+> int n_tags <+>
+			text "n_branches:" <+> int n_branches <+>
+			text "lo_tag: " <+> int lo_tag <+>
+			text "hi_tag: " <+> int hi_tag <+>
+			text "real_lo_tag: " <+> int real_lo_tag <+>
+			text "real_hi_tag: " <+> int real_hi_tag) $ -}
+		   ASSERT( n_branches > 1 && n_tags > 1 ) 
+		   n_tags > 2 && (small || dense || via_C)
+		 -- a 2-branch switch always turns into an if.
+    small      	 = n_tags <= 4
+    dense      	 = n_branches > (n_tags `div` 2)
+    exhaustive   = n_tags == n_branches
+    n_branches   = length branches
+    
+    -- ignore default slots at each end of the range if there's 
+    -- no default branch defined.
+    lowest_branch  = fst (head branches)
+    highest_branch = fst (last branches)
+
+    real_lo_tag
+	| isNothing mb_deflt = lowest_branch
+	| otherwise          = lo_tag
+
+    real_hi_tag
+	| isNothing mb_deflt = highest_branch
+	| otherwise          = hi_tag
+
+    n_tags = real_hi_tag - real_lo_tag + 1
+
+	-- INVARIANT: Provided hi_tag > lo_tag (which is true)
+	--	lo_tag <= mid_tag < hi_tag
+	--	lo_branches have tags <  mid_tag
+	--	hi_branches have tags >= mid_tag
+
+    (mid_tag,_) = branches !! (n_branches `div` 2)
+	-- 2 branches => n_branches `div` 2 = 1
+	--	      => branches !! 1 give the *second* tag
+	-- There are always at least 2 branches here
+
+    (lo_branches, hi_branches) = span is_lo branches
+    is_lo (t,_) = t < mid_tag
+
+
+assignTemp' e
+  | isTrivialCmmExpr e = return (CmmNop, e)
+  | otherwise          = do { reg <- newTemp (cmmExprRep e)
+                            ; return (CmmAssign reg e, CmmReg reg) }
+
+
+emitLitSwitch :: CmmExpr			-- Tag to switch on
+	      -> [(Literal, CgStmts)]		-- Tagged branches
+	      -> CgStmts			-- Default branch (always)
+	      -> Code				-- Emit the code
+-- Used for general literals, whose size might not be a word, 
+-- where there is always a default case, and where we don't know
+-- the range of values for certain.  For simplicity we always generate a tree.
+--
+-- ToDo: for integers we could do better here, perhaps by generalising
+-- mk_switch and using that.  --SDM 15/09/2004
+emitLitSwitch scrut [] deflt 
+  = emitCgStmts deflt
+emitLitSwitch scrut branches deflt_blk
+  = do	{ scrut' <- assignTemp scrut
+	; deflt_blk_id <- forkCgStmts deflt_blk
+	; blk <- mk_lit_switch scrut' deflt_blk_id (sortLe le branches)
+	; emitCgStmts blk }
+  where
+    le (t1,_) (t2,_) = t1 <= t2
+
+mk_lit_switch :: CmmExpr -> BlockId 
+ 	      -> [(Literal,CgStmts)]
+	      -> FCode CgStmts
+mk_lit_switch scrut deflt_blk_id [(lit,blk)] 
+  = return (consCgStmt if_stmt blk)
+  where
+    cmm_lit = mkSimpleLit lit
+    rep     = cmmLitRep cmm_lit
+    cond    = CmmMachOp (MO_Ne rep) [scrut, CmmLit cmm_lit]
+    if_stmt = CmmCondBranch cond deflt_blk_id
+
+mk_lit_switch scrut deflt_blk_id branches
+  = do	{ hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches
+ 	; lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches
+	; lo_blk_id <- forkCgStmts lo_blk
+	; let if_stmt = CmmCondBranch cond lo_blk_id
+	; return (if_stmt `consCgStmt` hi_blk) }
+  where
+    n_branches = length branches
+    (mid_lit,_) = branches !! (n_branches `div` 2)
+	-- See notes above re mid_tag
+
+    (lo_branches, hi_branches) = span is_lo branches
+    is_lo (t,_) = t < mid_lit
+
+    cond    = CmmMachOp (mkLtOp mid_lit) 
+			[scrut, CmmLit (mkSimpleLit mid_lit)]
+
+-------------------------------------------------------------------------
+--
+--	Simultaneous assignment
+--
+-------------------------------------------------------------------------
+
+
+emitSimultaneously :: CmmStmts -> Code
+-- Emit code to perform the assignments in the
+-- input simultaneously, using temporary variables when necessary.
+--
+-- The Stmts must be:
+--	CmmNop, CmmComment, CmmAssign, CmmStore
+-- and nothing else
+
+
+-- We use the strongly-connected component algorithm, in which
+--	* the vertices are the statements
+--	* an edge goes from s1 to s2 iff
+--		s1 assigns to something s2 uses
+--	  that is, if s1 should *follow* s2 in the final order
+
+type CVertex = (Int, CmmStmt)	-- Give each vertex a unique number,
+				-- for fast comparison
+
+emitSimultaneously stmts
+  = codeOnly $
+    case filterOut isNopStmt (stmtList stmts) of 
+	-- Remove no-ops
+      []     	-> nopC
+      [stmt] 	-> stmtC stmt	-- It's often just one stmt
+      stmt_list -> doSimultaneously1 (zip [(1::Int)..] stmt_list)
+
+doSimultaneously1 :: [CVertex] -> Code
+doSimultaneously1 vertices
+  = let
+	edges = [ (vertex, key1, edges_from stmt1)
+		| vertex@(key1, stmt1) <- vertices
+		]
+	edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices, 
+				    stmt1 `mustFollow` stmt2
+			   ]
+	components = stronglyConnComp edges
+
+	-- do_components deal with one strongly-connected component
+	-- Not cyclic, or singleton?  Just do it
+	do_component (AcyclicSCC (n,stmt))  = stmtC stmt
+	do_component (CyclicSCC [(n,stmt)]) = stmtC stmt
+
+		-- Cyclic?  Then go via temporaries.  Pick one to
+		-- break the loop and try again with the rest.
+	do_component (CyclicSCC ((n,first_stmt) : rest))
+	  = do	{ from_temp <- go_via_temp first_stmt
+		; doSimultaneously1 rest
+		; stmtC from_temp }
+
+	go_via_temp (CmmAssign dest src)
+	  = do	{ tmp <- newTemp (cmmRegRep dest)
+		; stmtC (CmmAssign tmp src)
+		; return (CmmAssign dest (CmmReg tmp)) }
+	go_via_temp (CmmStore dest src)
+	  = do	{ tmp <- newTemp (cmmExprRep src)
+		; stmtC (CmmAssign tmp src)
+		; return (CmmStore dest (CmmReg tmp)) }
+    in
+    mapCs do_component components
+
+mustFollow :: CmmStmt -> CmmStmt -> Bool
+CmmAssign reg _  `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt
+CmmStore loc e   `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprRep e)) stmt
+CmmNop           `mustFollow` stmt = False
+CmmComment _     `mustFollow` stmt = False
+
+
+anySrc :: (CmmExpr -> Bool) -> CmmStmt -> Bool
+-- True if the fn is true of any input of the stmt
+anySrc p (CmmAssign _ e)    = p e
+anySrc p (CmmStore e1 e2)   = p e1 || p e2	-- Might be used in either side
+anySrc p (CmmComment _)	    = False
+anySrc p CmmNop		    = False
+anySrc p other		    = True		-- Conservative
+
+regUsedIn :: CmmReg -> CmmExpr -> Bool
+reg `regUsedIn` CmmLit _ 	 = False
+reg `regUsedIn` CmmLoad e  _ 	 = reg `regUsedIn` e
+reg `regUsedIn` CmmReg reg' 	 = reg == reg'
+reg `regUsedIn` CmmRegOff reg' _ = reg == reg'
+reg `regUsedIn` CmmMachOp _ es   = any (reg `regUsedIn`) es
+
+locUsedIn :: CmmExpr -> MachRep -> CmmExpr -> Bool
+-- (locUsedIn a r e) checks whether writing to r[a] could affect the value of
+-- 'e'.  Returns True if it's not sure.
+locUsedIn loc rep (CmmLit _) 	     = False
+locUsedIn loc rep (CmmLoad e ld_rep) = possiblySameLoc loc rep e ld_rep
+locUsedIn loc rep (CmmReg reg')      = False
+locUsedIn loc rep (CmmRegOff reg' _) = False
+locUsedIn loc rep (CmmMachOp _ es)   = any (locUsedIn loc rep) es
+
+possiblySameLoc :: CmmExpr -> MachRep -> CmmExpr -> MachRep -> Bool
+-- Assumes that distinct registers (eg Hp, Sp) do not 
+-- point to the same location, nor any offset thereof.
+possiblySameLoc (CmmReg r1)       rep1 (CmmReg r2)      rep2  = r1==r2
+possiblySameLoc (CmmReg r1)       rep1 (CmmRegOff r2 0) rep2  = r1==r2
+possiblySameLoc (CmmRegOff r1 0)  rep1 (CmmReg r2)      rep2  = r1==r2
+possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2 
+  = r1==r2 && end1 > start2 && end2 > start1
+  where
+    end1 = start1 + machRepByteWidth rep1
+    end2 = start2 + machRepByteWidth rep2
+
+possiblySameLoc l1 rep1 (CmmLit _) rep2 = False
+possiblySameLoc l1 rep1 l2	   rep2 = True	-- Conservative
diff --git a/compiler/codeGen/ClosureInfo.hi-boot-5 b/compiler/codeGen/ClosureInfo.hi-boot-5
new file mode 100644
index 0000000000..2291f93cc6
--- /dev/null
+++ b/compiler/codeGen/ClosureInfo.hi-boot-5
@@ -0,0 +1,4 @@
+__interface ClosureInfo 1 0 where
+__export ClosureInfo ClosureInfo LambdaFormInfo;
+1 data LambdaFormInfo;
+1 data ClosureInfo;
diff --git a/compiler/codeGen/ClosureInfo.hi-boot-6 b/compiler/codeGen/ClosureInfo.hi-boot-6
new file mode 100644
index 0000000000..d313ccde80
--- /dev/null
+++ b/compiler/codeGen/ClosureInfo.hi-boot-6
@@ -0,0 +1,4 @@
+module ClosureInfo where
+
+data LambdaFormInfo
+data ClosureInfo
diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs
new file mode 100644
index 0000000000..84d9dd95ef
--- /dev/null
+++ b/compiler/codeGen/ClosureInfo.lhs
@@ -0,0 +1,951 @@
+%
+% (c) The Univserity of Glasgow 1992-2004
+%
+
+	Data structures which describe closures, and
+	operations over those data structures
+
+		Nothing monadic in here
+
+Much of the rationale for these things is in the ``details'' part of
+the STG paper.
+
+\begin{code}
+module ClosureInfo (
+	ClosureInfo, LambdaFormInfo, SMRep, 	-- all abstract
+	StandardFormInfo, 
+
+	ArgDescr(..), Liveness(..), 
+	C_SRT(..), needsSRT,
+
+	mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo,
+	mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape,
+
+	mkClosureInfo, mkConInfo,
+
+	closureSize, closureNonHdrSize,
+	closureGoodStuffSize, closurePtrsSize,
+	slopSize, 
+
+	closureName, infoTableLabelFromCI,
+	closureLabelFromCI, closureSRT,
+	closureLFInfo, isLFThunk,closureSMRep, closureUpdReqd, 
+	closureNeedsUpdSpace, closureIsThunk,
+	closureSingleEntry, closureReEntrant, isConstrClosure_maybe,
+	closureFunInfo,	isStandardFormThunk, isKnownFun,
+
+	enterIdLabel, enterLocalIdLabel, enterReturnPtLabel,
+
+	nodeMustPointToIt, 
+	CallMethod(..), getCallMethod,
+
+	blackHoleOnEntry,
+
+	staticClosureRequired,
+	getClosureType,
+
+	isToplevClosure,
+	closureValDescr, closureTypeDescr,	-- profiling
+
+	isStaticClosure,
+	cafBlackHoleClosureInfo, seCafBlackHoleClosureInfo,
+
+	staticClosureNeedsLink,
+    ) where
+
+#include "../includes/MachDeps.h"
+#include "HsVersions.h"
+
+import StgSyn
+import SMRep		-- all of it
+
+import CLabel
+
+import Constants	( mIN_PAYLOAD_SIZE )
+import Packages		( isDllName, HomeModules )
+import StaticFlags	( opt_SccProfilingOn, opt_OmitBlackHoling,
+			  opt_Parallel, opt_DoTickyProfiling )
+import Id		( Id, idType, idArity, idName )
+import DataCon		( DataCon, dataConTyCon, isNullaryRepDataCon, dataConName )
+import Name		( Name, nameUnique, getOccName, getOccString )
+import OccName		( occNameString )
+import Type		( isUnLiftedType, Type, repType, splitTyConApp_maybe )
+import TcType		( tcSplitSigmaTy )
+import TyCon		( isFunTyCon, isAbstractTyCon )
+import BasicTypes	( TopLevelFlag(..), isNotTopLevel, isTopLevel, ipNameName )
+import FastString
+import Outputable
+import Constants
+
+import TypeRep	-- TEMP
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[ClosureInfo-datatypes]{Data types for closure information}
+%*									*
+%************************************************************************
+
+Information about a closure, from the code generator's point of view.
+
+A ClosureInfo decribes the info pointer of a closure.  It has
+enough information 
+  a) to construct the info table itself
+  b) to allocate a closure containing that info pointer (i.e.
+	it knows the info table label)
+
+We make a ClosureInfo for
+	- each let binding (both top level and not)
+	- each data constructor (for its shared static and
+		dynamic info tables)
+
+\begin{code}
+data ClosureInfo
+  = ClosureInfo {
+	closureName   :: !Name,		  -- The thing bound to this closure
+	closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon (see below)
+	closureSMRep  :: !SMRep,	  -- representation used by storage mgr
+	closureSRT    :: !C_SRT,	  -- What SRT applies to this closure
+	closureType   :: !Type,		  -- Type of closure (ToDo: remove)
+	closureDescr  :: !String	  -- closure description (for profiling)
+    }
+
+  -- Constructor closures don't have a unique info table label (they use
+  -- the constructor's info table), and they don't have an SRT.
+  | ConInfo {
+	closureCon       :: !DataCon,
+	closureSMRep     :: !SMRep,
+	closureDllCon	 :: !Bool	-- is in a separate DLL
+    }
+
+-- C_SRT is what StgSyn.SRT gets translated to... 
+-- we add a label for the table, and expect only the 'offset/length' form
+
+data C_SRT = NoC_SRT
+	   | C_SRT !CLabel !WordOff !StgHalfWord {-bitmap or escape-}
+
+needsSRT :: C_SRT -> Bool
+needsSRT NoC_SRT       = False
+needsSRT (C_SRT _ _ _) = True
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[LambdaFormInfo-datatype]{@LambdaFormInfo@: source-derivable info}
+%*									*
+%************************************************************************
+
+Information about an identifier, from the code generator's point of
+view.  Every identifier is bound to a LambdaFormInfo in the
+environment, which gives the code generator enough info to be able to
+tail call or return that identifier.
+
+Note that a closure is usually bound to an identifier, so a
+ClosureInfo contains a LambdaFormInfo.
+
+\begin{code}
+data LambdaFormInfo
+  = LFReEntrant		-- Reentrant closure (a function)
+	TopLevelFlag	-- True if top level
+	!Int		-- Arity. Invariant: always > 0
+	!Bool		-- True <=> no fvs
+	ArgDescr	-- Argument descriptor (should reall be in ClosureInfo)
+
+  | LFCon		-- A saturated constructor application
+	DataCon		-- The constructor
+
+  | LFThunk		-- Thunk (zero arity)
+	TopLevelFlag
+	!Bool		-- True <=> no free vars
+	!Bool		-- True <=> updatable (i.e., *not* single-entry)
+	StandardFormInfo
+	!Bool		-- True <=> *might* be a function type
+
+  | LFUnknown		-- Used for function arguments and imported things.
+			--  We know nothing about  this closure.  Treat like
+			-- updatable "LFThunk"...
+			-- Imported things which we do know something about use
+			-- one of the other LF constructors (eg LFReEntrant for
+			-- known functions)
+	!Bool		-- True <=> *might* be a function type
+
+  | LFLetNoEscape	-- See LetNoEscape module for precise description of
+			-- these "lets".
+	!Int		-- arity;
+
+  | LFBlackHole		-- Used for the closures allocated to hold the result
+			-- of a CAF.  We want the target of the update frame to
+			-- be in the heap, so we make a black hole to hold it.
+        CLabel          -- Flavour (info label, eg CAF_BLACKHOLE_info).
+
+
+-------------------------
+-- An ArgDsecr describes the argument pattern of a function
+
+data ArgDescr
+  = ArgSpec		-- Fits one of the standard patterns
+	!Int		-- RTS type identifier ARG_P, ARG_N, ...
+
+  | ArgGen	 	-- General case
+	Liveness	-- Details about the arguments
+
+
+-------------------------
+-- We represent liveness bitmaps as a Bitmap (whose internal
+-- representation really is a bitmap).  These are pinned onto case return
+-- vectors to indicate the state of the stack for the garbage collector.
+-- 
+-- In the compiled program, liveness bitmaps that fit inside a single
+-- word (StgWord) are stored as a single word, while larger bitmaps are
+-- stored as a pointer to an array of words. 
+
+data Liveness
+  = SmallLiveness	-- Liveness info that fits in one word
+	StgWord		-- Here's the bitmap
+
+  | BigLiveness		-- Liveness info witha a multi-word bitmap
+	CLabel		-- Label for the bitmap
+
+
+-------------------------
+-- StandardFormInfo tells whether this thunk has one of 
+-- a small number of standard forms
+
+data StandardFormInfo
+  = NonStandardThunk
+	-- Not of of the standard forms
+
+  | SelectorThunk
+	-- A SelectorThunk is of form
+	--      case x of
+	--	       con a1,..,an -> ak
+	-- and the constructor is from a single-constr type.
+       WordOff             	-- 0-origin offset of ak within the "goods" of 
+			-- constructor (Recall that the a1,...,an may be laid
+			-- out in the heap in a non-obvious order.)
+
+  | ApThunk 
+	-- An ApThunk is of form
+	--	x1 ... xn
+	-- The code for the thunk just pushes x2..xn on the stack and enters x1.
+	-- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled
+	-- in the RTS to save space.
+	Int		-- Arity, n
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[ClosureInfo-construction]{Functions which build LFInfos}
+%*									*
+%************************************************************************
+
+\begin{code}
+mkLFReEntrant :: TopLevelFlag	-- True of top level
+	      -> [Id]		-- Free vars
+	      -> [Id] 		-- Args
+	      -> ArgDescr	-- Argument descriptor
+	      -> LambdaFormInfo
+
+mkLFReEntrant top fvs args arg_descr 
+  = LFReEntrant top (length args) (null fvs) arg_descr
+
+mkLFThunk thunk_ty top fvs upd_flag
+  = ASSERT( not (isUpdatable upd_flag) || not (isUnLiftedType thunk_ty) )
+    LFThunk top (null fvs) 
+	    (isUpdatable upd_flag)
+	    NonStandardThunk 
+	    (might_be_a_function thunk_ty)
+
+might_be_a_function :: Type -> Bool
+might_be_a_function ty
+  | Just (tc,_) <- splitTyConApp_maybe (repType ty), 
+    not (isFunTyCon tc)  && not (isAbstractTyCon tc) = False
+	-- don't forget to check for abstract types, which might
+	-- be functions too.
+  | otherwise = True
+\end{code}
+
+@mkConLFInfo@ is similar, for constructors.
+
+\begin{code}
+mkConLFInfo :: DataCon -> LambdaFormInfo
+mkConLFInfo con = LFCon con
+
+mkSelectorLFInfo id offset updatable
+  = LFThunk NotTopLevel False updatable (SelectorThunk offset) 
+	(might_be_a_function (idType id))
+
+mkApLFInfo id upd_flag arity
+  = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity)
+	(might_be_a_function (idType id))
+\end{code}
+
+Miscellaneous LF-infos.
+
+\begin{code}
+mkLFArgument id = LFUnknown (might_be_a_function (idType id))
+
+mkLFLetNoEscape = LFLetNoEscape
+
+mkLFImported :: Id -> LambdaFormInfo
+mkLFImported id
+  = case idArity id of
+      n | n > 0 -> LFReEntrant TopLevel n True (panic "arg_descr")  -- n > 0
+      other -> mkLFArgument id -- Not sure of exact arity
+\end{code}
+
+\begin{code}
+isLFThunk :: LambdaFormInfo -> Bool
+isLFThunk (LFThunk _ _ _ _ _)  = True
+isLFThunk (LFBlackHole _)      = True
+	-- return True for a blackhole: this function is used to determine
+	-- whether to use the thunk header in SMP mode, and a blackhole
+	-- must have one.
+isLFThunk _ = False
+\end{code}
+
+%************************************************************************
+%*									*
+	Building ClosureInfos
+%*									*
+%************************************************************************
+
+\begin{code}
+mkClosureInfo :: Bool		-- Is static
+	      -> Id
+	      -> LambdaFormInfo 
+	      -> Int -> Int	-- Total and pointer words
+	      -> C_SRT
+	      -> String		-- String descriptor
+	      -> ClosureInfo
+mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info descr
+  = ClosureInfo { closureName = name, 
+		  closureLFInfo = lf_info,
+		  closureSMRep = sm_rep, 
+		  closureSRT = srt_info,
+		  closureType = idType id,
+		  closureDescr = descr }
+  where
+    name   = idName id
+    sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds
+
+mkConInfo :: HomeModules
+	  -> Bool	-- Is static
+	  -> DataCon	
+	  -> Int -> Int	-- Total and pointer words
+	  -> ClosureInfo
+mkConInfo hmods is_static data_con tot_wds ptr_wds
+   = ConInfo {	closureSMRep = sm_rep,
+		closureCon = data_con,
+		closureDllCon = isDllName hmods (dataConName data_con) }
+  where
+    sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[ClosureInfo-sizes]{Functions about closure {\em sizes}}
+%*									*
+%************************************************************************
+
+\begin{code}
+closureSize :: ClosureInfo -> WordOff
+closureSize cl_info = hdr_size + closureNonHdrSize cl_info
+  where hdr_size  | closureIsThunk cl_info = thunkHdrSize
+  		  | otherwise      	   = fixedHdrSize
+	-- All thunks use thunkHdrSize, even if they are non-updatable.
+	-- this is because we don't have separate closure types for
+	-- updatable vs. non-updatable thunks, so the GC can't tell the
+	-- difference.  If we ever have significant numbers of non-
+	-- updatable thunks, it might be worth fixing this.
+
+closureNonHdrSize :: ClosureInfo -> WordOff
+closureNonHdrSize cl_info
+  = tot_wds + computeSlopSize tot_wds cl_info
+  where
+    tot_wds = closureGoodStuffSize cl_info
+
+closureGoodStuffSize :: ClosureInfo -> WordOff
+closureGoodStuffSize cl_info
+  = let (ptrs, nonptrs) = sizes_from_SMRep (closureSMRep cl_info)
+    in	ptrs + nonptrs
+
+closurePtrsSize :: ClosureInfo -> WordOff
+closurePtrsSize cl_info
+  = let (ptrs, _) = sizes_from_SMRep (closureSMRep cl_info)
+    in	ptrs
+
+-- not exported:
+sizes_from_SMRep :: SMRep -> (WordOff,WordOff)
+sizes_from_SMRep (GenericRep _ ptrs nonptrs _)   = (ptrs, nonptrs)
+sizes_from_SMRep BlackHoleRep			 = (0, 0)
+\end{code}
+
+Computing slop size.  WARNING: this looks dodgy --- it has deep
+knowledge of what the storage manager does with the various
+representations...
+
+Slop Requirements: every thunk gets an extra padding word in the
+header, which takes the the updated value.
+
+\begin{code}
+slopSize cl_info = computeSlopSize payload_size cl_info
+  where payload_size = closureGoodStuffSize cl_info
+
+computeSlopSize :: WordOff -> ClosureInfo -> WordOff
+computeSlopSize payload_size cl_info
+  = max 0 (minPayloadSize smrep updatable - payload_size)
+  where
+	smrep        = closureSMRep cl_info
+	updatable    = closureNeedsUpdSpace cl_info
+
+-- we leave space for an update if either (a) the closure is updatable
+-- or (b) it is a static thunk.  This is because a static thunk needs
+-- a static link field in a predictable place (after the slop), regardless
+-- of whether it is updatable or not.
+closureNeedsUpdSpace (ClosureInfo { closureLFInfo = 
+					LFThunk TopLevel _ _ _ _ }) = True
+closureNeedsUpdSpace cl_info = closureUpdReqd cl_info
+
+minPayloadSize :: SMRep -> Bool -> WordOff
+minPayloadSize smrep updatable
+  = case smrep of
+	BlackHoleRep		 		-> min_upd_size
+	GenericRep _ _ _ _      | updatable     -> min_upd_size
+	GenericRep True _ _ _                   -> 0 -- static
+	GenericRep False _ _ _                  -> mIN_PAYLOAD_SIZE
+          --       ^^^^^___ dynamic
+  where
+   min_upd_size =
+	ASSERT(mIN_PAYLOAD_SIZE <= sIZEOF_StgSMPThunkHeader)
+	0 	-- check that we already have enough
+		-- room for mIN_SIZE_NonUpdHeapObject,
+		-- due to the extra header word in SMP
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[SMreps]{Choosing SM reps}
+%*									*
+%************************************************************************
+
+\begin{code}
+chooseSMRep
+	:: Bool			-- True <=> static closure
+	-> LambdaFormInfo
+	-> WordOff -> WordOff	-- Tot wds, ptr wds
+	-> SMRep
+
+chooseSMRep is_static lf_info tot_wds ptr_wds
+  = let
+	 nonptr_wds   = tot_wds - ptr_wds
+	 closure_type = getClosureType is_static ptr_wds lf_info
+    in
+    GenericRep is_static ptr_wds nonptr_wds closure_type	
+
+-- We *do* get non-updatable top-level thunks sometimes.  eg. f = g
+-- gets compiled to a jump to g (if g has non-zero arity), instead of
+-- messing around with update frames and PAPs.  We set the closure type
+-- to FUN_STATIC in this case.
+
+getClosureType :: Bool -> WordOff -> LambdaFormInfo -> ClosureType
+getClosureType is_static ptr_wds lf_info
+  = case lf_info of
+	LFCon con | is_static && ptr_wds == 0	-> ConstrNoCaf
+		  | otherwise			-> Constr
+  	LFReEntrant _ _ _ _ 			-> Fun
+	LFThunk _ _ _ (SelectorThunk _) _ 	-> ThunkSelector
+	LFThunk _ _ _ _ _ 			-> Thunk
+	_ -> panic "getClosureType"
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[ClosureInfo-4-questions]{Four major questions about @ClosureInfo@}
+%*									*
+%************************************************************************
+
+Be sure to see the stg-details notes about these...
+
+\begin{code}
+nodeMustPointToIt :: LambdaFormInfo -> Bool
+nodeMustPointToIt (LFReEntrant top _ no_fvs _)
+  = not no_fvs ||   -- Certainly if it has fvs we need to point to it
+    isNotTopLevel top
+		    -- If it is not top level we will point to it
+		    --   We can have a \r closure with no_fvs which
+		    --   is not top level as special case cgRhsClosure
+		    --   has been dissabled in favour of let floating
+
+		-- For lex_profiling we also access the cost centre for a
+		-- non-inherited function i.e. not top level
+		-- the  not top  case above ensures this is ok.
+
+nodeMustPointToIt (LFCon _) = True
+
+	-- Strictly speaking, the above two don't need Node to point
+	-- to it if the arity = 0.  But this is a *really* unlikely
+	-- situation.  If we know it's nil (say) and we are entering
+	-- it. Eg: let x = [] in x then we will certainly have inlined
+	-- x, since nil is a simple atom.  So we gain little by not
+	-- having Node point to known zero-arity things.  On the other
+	-- hand, we do lose something; Patrick's code for figuring out
+	-- when something has been updated but not entered relies on
+	-- having Node point to the result of an update.  SLPJ
+	-- 27/11/92.
+
+nodeMustPointToIt (LFThunk _ no_fvs updatable NonStandardThunk _)
+  = updatable || not no_fvs || opt_SccProfilingOn
+	  -- For the non-updatable (single-entry case):
+	  --
+	  -- True if has fvs (in which case we need access to them, and we
+	  --		    should black-hole it)
+	  -- or profiling (in which case we need to recover the cost centre
+	  --		 from inside it)
+
+nodeMustPointToIt (LFThunk _ no_fvs updatable some_standard_form_thunk _)
+  = True  -- Node must point to any standard-form thunk
+
+nodeMustPointToIt (LFUnknown _)     = True
+nodeMustPointToIt (LFBlackHole _)   = True    -- BH entry may require Node to point
+nodeMustPointToIt (LFLetNoEscape _) = False 
+\end{code}
+
+The entry conventions depend on the type of closure being entered,
+whether or not it has free variables, and whether we're running
+sequentially or in parallel.
+
+\begin{tabular}{lllll}
+Closure Characteristics & Parallel & Node Req'd & Argument Passing & Enter Via \\
+Unknown 			& no & yes & stack	& node \\
+Known fun ($\ge$ 1 arg), no fvs 	& no & no  & registers 	& fast entry (enough args) \\
+\ & \ & \ & \ 						& slow entry (otherwise) \\
+Known fun ($\ge$ 1 arg), fvs	& no & yes & registers 	& fast entry (enough args) \\
+0 arg, no fvs @\r,\s@ 		& no & no  & n/a 	& direct entry \\
+0 arg, no fvs @\u@ 		& no & yes & n/a 	& node \\
+0 arg, fvs @\r,\s@ 		& no & yes & n/a 	& direct entry \\
+0 arg, fvs @\u@ 		& no & yes & n/a 	& node \\
+
+Unknown 			& yes & yes & stack	& node \\
+Known fun ($\ge$ 1 arg), no fvs 	& yes & no  & registers & fast entry (enough args) \\
+\ & \ & \ & \ 						& slow entry (otherwise) \\
+Known fun ($\ge$ 1 arg), fvs	& yes & yes & registers & node \\
+0 arg, no fvs @\r,\s@ 		& yes & no  & n/a 	& direct entry \\
+0 arg, no fvs @\u@ 		& yes & yes & n/a 	& node \\
+0 arg, fvs @\r,\s@ 		& yes & yes & n/a 	& node \\
+0 arg, fvs @\u@ 		& yes & yes & n/a 	& node\\
+\end{tabular}
+
+When black-holing, single-entry closures could also be entered via node
+(rather than directly) to catch double-entry.
+
+\begin{code}
+data CallMethod
+  = EnterIt				-- no args, not a function
+
+  | JumpToIt CLabel			-- no args, not a function, but we
+					-- know what its entry code is
+
+  | ReturnIt				-- it's a function, but we have
+					-- zero args to apply to it, so just
+					-- return it.
+
+  | ReturnCon DataCon			-- It's a data constructor, just return it
+
+  | SlowCall				-- Unknown fun, or known fun with
+					-- too few args.
+
+  | DirectEntry 			-- Jump directly, with args in regs
+	CLabel 				--   The code label
+	Int 				--   Its arity
+
+getCallMethod :: HomeModules
+	      -> Name		-- Function being applied
+	      -> LambdaFormInfo	-- Its info
+	      -> Int		-- Number of available arguments
+	      -> CallMethod
+
+getCallMethod hmods name lf_info n_args
+  | nodeMustPointToIt lf_info && opt_Parallel
+  =	-- If we're parallel, then we must always enter via node.  
+	-- The reason is that the closure may have been 	
+	-- fetched since we allocated it.
+    EnterIt
+
+getCallMethod hmods name (LFReEntrant _ arity _ _) n_args
+  | n_args == 0    = ASSERT( arity /= 0 )
+		     ReturnIt	-- No args at all
+  | n_args < arity = SlowCall	-- Not enough args
+  | otherwise      = DirectEntry (enterIdLabel hmods name) arity
+
+getCallMethod hmods name (LFCon con) n_args
+  = ASSERT( n_args == 0 )
+    ReturnCon con
+
+getCallMethod hmods name (LFThunk _ _ updatable std_form_info is_fun) n_args
+  | is_fun 	-- Must always "call" a function-typed 
+  = SlowCall	-- thing, cannot just enter it [in eval/apply, the entry code
+		-- is the fast-entry code]
+
+  | updatable || opt_DoTickyProfiling  -- to catch double entry
+      {- OLD: || opt_SMP
+	 I decided to remove this, because in SMP mode it doesn't matter
+	 if we enter the same thunk multiple times, so the optimisation
+	 of jumping directly to the entry code is still valid.  --SDM
+	-}
+  = ASSERT( n_args == 0 ) EnterIt
+
+  | otherwise	-- Jump direct to code for single-entry thunks
+  = ASSERT( n_args == 0 )
+    JumpToIt (thunkEntryLabel hmods name std_form_info updatable)
+
+getCallMethod hmods name (LFUnknown True) n_args
+  = SlowCall -- might be a function
+
+getCallMethod hmods name (LFUnknown False) n_args
+  = ASSERT2 ( n_args == 0, ppr name <+> ppr n_args ) 
+    EnterIt -- Not a function
+
+getCallMethod hmods name (LFBlackHole _) n_args
+  = SlowCall	-- Presumably the black hole has by now
+		-- been updated, but we don't know with
+		-- what, so we slow call it
+
+getCallMethod hmods name (LFLetNoEscape 0) n_args
+  = JumpToIt (enterReturnPtLabel (nameUnique name))
+
+getCallMethod hmods name (LFLetNoEscape arity) n_args
+  | n_args == arity = DirectEntry (enterReturnPtLabel (nameUnique name)) arity
+  | otherwise = pprPanic "let-no-escape: " (ppr name <+> ppr arity)
+
+blackHoleOnEntry :: ClosureInfo -> Bool
+-- Static closures are never themselves black-holed.
+-- Updatable ones will be overwritten with a CAFList cell, which points to a 
+-- black hole;
+-- Single-entry ones have no fvs to plug, and we trust they don't form part 
+-- of a loop.
+
+blackHoleOnEntry ConInfo{} = False
+blackHoleOnEntry (ClosureInfo { closureLFInfo = lf_info, closureSMRep = rep })
+  | isStaticRep rep
+  = False	-- Never black-hole a static closure
+
+  | otherwise
+  = case lf_info of
+	LFReEntrant _ _ _ _	  -> False
+	LFLetNoEscape _		  -> False
+	LFThunk _ no_fvs updatable _ _
+	  -> if updatable
+	     then not opt_OmitBlackHoling
+	     else opt_DoTickyProfiling || not no_fvs
+                  -- the former to catch double entry,
+                  -- and the latter to plug space-leaks.  KSW/SDM 1999-04.
+
+	other -> panic "blackHoleOnEntry"	-- Should never happen
+
+isStandardFormThunk :: LambdaFormInfo -> Bool
+isStandardFormThunk (LFThunk _ _ _ (SelectorThunk _) _) = True
+isStandardFormThunk (LFThunk _ _ _ (ApThunk _) _)	= True
+isStandardFormThunk other_lf_info 			= False
+
+isKnownFun :: LambdaFormInfo -> Bool
+isKnownFun (LFReEntrant _ _ _ _) = True
+isKnownFun (LFLetNoEscape _) = True
+isKnownFun _ = False
+\end{code}
+
+-----------------------------------------------------------------------------
+SRT-related stuff
+
+\begin{code}
+staticClosureNeedsLink :: ClosureInfo -> Bool
+-- A static closure needs a link field to aid the GC when traversing
+-- the static closure graph.  But it only needs such a field if either
+-- 	a) it has an SRT
+--	b) it's a constructor with one or more pointer fields
+-- In case (b), the constructor's fields themselves play the role
+-- of the SRT.
+staticClosureNeedsLink (ClosureInfo { closureSRT = srt })
+  = needsSRT srt
+staticClosureNeedsLink (ConInfo { closureSMRep = sm_rep, closureCon = con })
+  = not (isNullaryRepDataCon con) && not_nocaf_constr
+  where
+    not_nocaf_constr = 
+	case sm_rep of 
+	   GenericRep _ _ _ ConstrNoCaf -> False
+	   _other			-> True
+\end{code}
+
+Avoiding generating entries and info tables
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+At present, for every function we generate all of the following,
+just in case.  But they aren't always all needed, as noted below:
+
+[NB1: all of this applies only to *functions*.  Thunks always
+have closure, info table, and entry code.]
+
+[NB2: All are needed if the function is *exported*, just to play safe.]
+
+
+* Fast-entry code  ALWAYS NEEDED
+
+* Slow-entry code
+	Needed iff (a) we have any un-saturated calls to the function
+	OR	   (b) the function is passed as an arg
+	OR	   (c) we're in the parallel world and the function has free vars
+			[Reason: in parallel world, we always enter functions
+			with free vars via the closure.]
+
+* The function closure
+	Needed iff (a) we have any un-saturated calls to the function
+	OR	   (b) the function is passed as an arg
+	OR	   (c) if the function has free vars (ie not top level)
+
+  Why case (a) here?  Because if the arg-satis check fails,
+  UpdatePAP stuffs a pointer to the function closure in the PAP.
+  [Could be changed; UpdatePAP could stuff in a code ptr instead,
+   but doesn't seem worth it.]
+
+  [NB: these conditions imply that we might need the closure
+  without the slow-entry code.  Here's how.
+
+	f x y = let g w = ...x..y..w...
+		in
+		...(g t)...
+
+  Here we need a closure for g which contains x and y,
+  but since the calls are all saturated we just jump to the
+  fast entry point for g, with R1 pointing to the closure for g.]
+
+
+* Standard info table
+	Needed iff (a) we have any un-saturated calls to the function
+	OR	   (b) the function is passed as an arg
+	OR 	   (c) the function has free vars (ie not top level)
+
+	NB.  In the sequential world, (c) is only required so that the function closure has
+	an info table to point to, to keep the storage manager happy.
+	If (c) alone is true we could fake up an info table by choosing
+	one of a standard family of info tables, whose entry code just
+	bombs out.
+
+	[NB In the parallel world (c) is needed regardless because
+	we enter functions with free vars via the closure.]
+
+	If (c) is retained, then we'll sometimes generate an info table
+	(for storage mgr purposes) without slow-entry code.  Then we need
+	to use an error label in the info table to substitute for the absent
+	slow entry code.
+
+\begin{code}
+staticClosureRequired
+	:: Name
+	-> StgBinderInfo
+	-> LambdaFormInfo
+	-> Bool
+staticClosureRequired binder bndr_info
+		      (LFReEntrant top_level _ _ _)	-- It's a function
+  = ASSERT( isTopLevel top_level )
+	-- Assumption: it's a top-level, no-free-var binding
+	not (satCallsOnly bndr_info)
+
+staticClosureRequired binder other_binder_info other_lf_info = True
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[ClosureInfo-misc-funs]{Misc functions about @ClosureInfo@, etc.}
+%*									*
+%************************************************************************
+
+\begin{code}
+
+isStaticClosure :: ClosureInfo -> Bool
+isStaticClosure cl_info = isStaticRep (closureSMRep cl_info)
+
+closureUpdReqd :: ClosureInfo -> Bool
+closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info
+closureUpdReqd ConInfo{} = False
+
+lfUpdatable :: LambdaFormInfo -> Bool
+lfUpdatable (LFThunk _ _ upd _ _)  = upd
+lfUpdatable (LFBlackHole _)	   = True
+	-- Black-hole closures are allocated to receive the results of an
+	-- alg case with a named default... so they need to be updated.
+lfUpdatable _ = False
+
+closureIsThunk :: ClosureInfo -> Bool
+closureIsThunk ClosureInfo{ closureLFInfo = lf_info } = isLFThunk lf_info
+closureIsThunk ConInfo{} = False
+
+closureSingleEntry :: ClosureInfo -> Bool
+closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd
+closureSingleEntry other_closure = False
+
+closureReEntrant :: ClosureInfo -> Bool
+closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant _ _ _ _ }) = True
+closureReEntrant other_closure = False
+
+isConstrClosure_maybe :: ClosureInfo -> Maybe DataCon
+isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con
+isConstrClosure_maybe _ 				  = Nothing
+
+closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr)
+closureFunInfo (ClosureInfo { closureLFInfo = LFReEntrant _ arity _ arg_desc})
+  = Just (arity, arg_desc)
+closureFunInfo _
+  = Nothing
+\end{code}
+
+\begin{code}
+isToplevClosure :: ClosureInfo -> Bool
+isToplevClosure (ClosureInfo { closureLFInfo = lf_info })
+  = case lf_info of
+      LFReEntrant TopLevel _ _ _ -> True
+      LFThunk TopLevel _ _ _ _   -> True
+      other -> False
+isToplevClosure _ = False
+\end{code}
+
+Label generation.
+
+\begin{code}
+infoTableLabelFromCI :: ClosureInfo -> CLabel
+infoTableLabelFromCI (ClosureInfo { closureName = name,
+				    closureLFInfo = lf_info, 
+				    closureSMRep = rep })
+  = case lf_info of
+	LFBlackHole info -> info
+
+	LFThunk _ _ upd_flag (SelectorThunk offset) _ -> 
+		mkSelectorInfoLabel upd_flag offset
+
+	LFThunk _ _ upd_flag (ApThunk arity) _ -> 
+		mkApInfoTableLabel upd_flag arity
+
+	LFThunk{}      -> mkLocalInfoTableLabel name
+
+	LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name
+
+	other -> panic "infoTableLabelFromCI"
+
+infoTableLabelFromCI (ConInfo { closureCon = con, 
+				closureSMRep = rep,
+				closureDllCon = dll })
+  | isStaticRep rep = mkStaticInfoTableLabel  name dll
+  | otherwise	    = mkConInfoTableLabel     name dll
+  where
+    name = dataConName con
+
+-- ClosureInfo for a closure (as opposed to a constructor) is always local
+closureLabelFromCI (ClosureInfo { closureName = nm }) = mkLocalClosureLabel nm
+closureLabelFromCI _ = panic "closureLabelFromCI"
+
+-- thunkEntryLabel is a local help function, not exported.  It's used from both
+-- entryLabelFromCI and getCallMethod.
+
+thunkEntryLabel hmods thunk_id (ApThunk arity) is_updatable
+  = enterApLabel is_updatable arity
+thunkEntryLabel hmods thunk_id (SelectorThunk offset) upd_flag
+  = enterSelectorLabel upd_flag offset
+thunkEntryLabel hmods thunk_id _ is_updatable
+  = enterIdLabel hmods thunk_id
+
+enterApLabel is_updatable arity
+  | tablesNextToCode = mkApInfoTableLabel is_updatable arity
+  | otherwise        = mkApEntryLabel is_updatable arity
+
+enterSelectorLabel upd_flag offset
+  | tablesNextToCode = mkSelectorInfoLabel upd_flag offset
+  | otherwise        = mkSelectorEntryLabel upd_flag offset
+
+enterIdLabel hmods id
+  | tablesNextToCode = mkInfoTableLabel hmods id
+  | otherwise        = mkEntryLabel hmods id
+
+enterLocalIdLabel id
+  | tablesNextToCode = mkLocalInfoTableLabel id
+  | otherwise        = mkLocalEntryLabel id
+
+enterReturnPtLabel name
+  | tablesNextToCode = mkReturnInfoLabel name
+  | otherwise        = mkReturnPtLabel name
+\end{code}
+
+
+We need a black-hole closure info to pass to @allocDynClosure@ when we
+want to allocate the black hole on entry to a CAF.  These are the only
+ways to build an LFBlackHole, maintaining the invariant that it really
+is a black hole and not something else.
+
+\begin{code}
+cafBlackHoleClosureInfo (ClosureInfo { closureName = nm,
+				       closureType = ty })
+  = ClosureInfo { closureName   = nm,
+		  closureLFInfo = LFBlackHole mkCAFBlackHoleInfoTableLabel,
+		  closureSMRep  = BlackHoleRep,
+		  closureSRT    = NoC_SRT,
+		  closureType   = ty,
+		  closureDescr  = "" }
+cafBlackHoleClosureInfo _ = panic "cafBlackHoleClosureInfo"
+
+seCafBlackHoleClosureInfo (ClosureInfo { closureName = nm,
+				         closureType = ty })
+  = ClosureInfo { closureName   = nm,
+		  closureLFInfo = LFBlackHole mkSECAFBlackHoleInfoTableLabel,
+		  closureSMRep  = BlackHoleRep,
+		  closureSRT    = NoC_SRT,
+		  closureType   = ty,
+		  closureDescr  = ""  }
+seCafBlackHoleClosureInfo _ = panic "seCafBlackHoleClosureInfo"
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[ClosureInfo-Profiling-funs]{Misc functions about for profiling info.}
+%*									*
+%************************************************************************
+
+Profiling requires two pieces of information to be determined for
+each closure's info table --- description and type.
+
+The description is stored directly in the @CClosureInfoTable@ when the
+info table is built.
+
+The type is determined from the type information stored with the @Id@
+in the closure info using @closureTypeDescr@.
+
+\begin{code}
+closureValDescr, closureTypeDescr :: ClosureInfo -> String
+closureValDescr (ClosureInfo {closureDescr = descr}) 
+  = descr
+closureValDescr (ConInfo {closureCon = con})
+  = occNameString (getOccName con)
+
+closureTypeDescr (ClosureInfo { closureType = ty })
+  = getTyDescription ty
+closureTypeDescr (ConInfo { closureCon = data_con })
+  = occNameString (getOccName (dataConTyCon data_con))
+
+getTyDescription :: Type -> String
+getTyDescription ty
+  = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) ->
+    case tau_ty of
+      TyVarTy _	       	     -> "*"
+      AppTy fun _      	     -> getTyDescription fun
+      FunTy _ res      	     -> '-' : '>' : fun_result res
+      TyConApp tycon _ 	     -> getOccString tycon
+      NoteTy (FTVNote _) ty  -> getTyDescription ty
+      PredTy sty	     -> getPredTyDescription sty
+      ForAllTy _ ty          -> getTyDescription ty
+    }
+  where
+    fun_result (FunTy _ res) = '>' : fun_result res
+    fun_result other	     = getTyDescription other
+
+getPredTyDescription (ClassP cl tys) = getOccString cl
+getPredTyDescription (IParam ip ty)  = getOccString (ipNameName ip)
+\end{code}
+
+
diff --git a/compiler/codeGen/ClosureInfo.lhs-boot b/compiler/codeGen/ClosureInfo.lhs-boot
new file mode 100644
index 0000000000..b069905d3e
--- /dev/null
+++ b/compiler/codeGen/ClosureInfo.lhs-boot
@@ -0,0 +1,6 @@
+\begin{code}
+module ClosureInfo where
+
+data LambdaFormInfo
+data ClosureInfo
+\end{code}
+\ No newline at end of file
diff --git a/compiler/codeGen/CodeGen.lhs b/compiler/codeGen/CodeGen.lhs
new file mode 100644
index 0000000000..e8d83a5a43
--- /dev/null
+++ b/compiler/codeGen/CodeGen.lhs
@@ -0,0 +1,343 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[CodeGen]{@CodeGen@: main module of the code generator}
+
+This module says how things get going at the top level.
+
+@codeGen@ is the interface to the outside world.  The \tr{cgTop*}
+functions drive the mangling of top-level bindings.
+
+%************************************************************************
+%*									*
+\subsection[codeGen-outside-interface]{The code generator's offering to the world}
+%*									*
+%************************************************************************
+
+\begin{code}
+module CodeGen ( codeGen ) where
+
+#include "HsVersions.h"
+
+-- Kludge (??) so that CgExpr is reached via at least one non-SOURCE
+-- import.  Before, that wasn't the case, and CM therefore didn't 
+-- bother to compile it.
+import CgExpr           ( {-NOTHING!-} )	-- DO NOT DELETE THIS IMPORT
+import CgProf
+import CgMonad
+import CgBindery	( CgIdInfo, addBindC, addBindsC, getCgIdInfo,
+			  cgIdInfoId )
+import CgClosure	( cgTopRhsClosure )
+import CgCon		( cgTopRhsCon, cgTyCon )
+import CgUtils		( cmmRegOffW, emitRODataLits, cmmNeWord )
+
+import CLabel
+import Cmm
+import CmmUtils		( zeroCLit, mkIntCLit, mkLblExpr )
+import PprCmm		( pprCmms )
+import MachOp		( wordRep, MachHint(..) )
+
+import StgSyn
+import PrelNames	( gHC_PRIM, rOOT_MAIN, mAIN, pREL_TOP_HANDLER )
+import Packages		( HomeModules )
+import DynFlags		( DynFlags(..), DynFlag(..), dopt )
+import StaticFlags	( opt_SccProfilingOn )
+
+import HscTypes		( ForeignStubs(..), TypeEnv, typeEnvTyCons )
+import CostCentre       ( CollectedCCs )
+import Id               ( Id, idName, setIdName )
+import Name		( nameSrcLoc, nameOccName, nameUnique, isInternalName, mkExternalName )
+import OccName		( mkLocalOcc )
+import TyCon            ( TyCon )
+import Module		( Module, mkModule )
+import ErrUtils		( dumpIfSet_dyn, showPass )
+import Panic		( assertPanic )
+
+#ifdef DEBUG
+import Outputable
+#endif
+\end{code}
+
+\begin{code}
+codeGen :: DynFlags
+	-> HomeModules
+	-> Module
+	-> [TyCon]
+	-> ForeignStubs
+	-> [Module]		-- directly-imported modules
+	-> CollectedCCs		-- (Local/global) cost-centres needing declaring/registering.
+	-> [(StgBinding,[(Id,[Id])])]	-- Bindings to convert, with SRTs
+	-> IO [Cmm]		-- Output
+
+codeGen dflags hmods this_mod data_tycons foreign_stubs imported_mods 
+	cost_centre_info stg_binds
+  = do	
+  { showPass dflags "CodeGen"
+  ; let way = buildTag dflags
+        main_mod = mainModIs dflags
+
+-- Why?
+--   ; mapM_ (\x -> seq x (return ())) data_tycons
+
+  ; code_stuff <- initC dflags hmods this_mod $ do 
+		{ cmm_binds  <- mapM (getCmm . cgTopBinding dflags hmods) stg_binds
+		; cmm_tycons <- mapM cgTyCon data_tycons
+		; cmm_init   <- getCmm (mkModuleInit dflags hmods way cost_centre_info 
+					     this_mod main_mod
+				  	     foreign_stubs imported_mods)
+		; return (cmm_binds ++ concat cmm_tycons ++ [cmm_init])
+		}
+		-- Put datatype_stuff after code_stuff, because the
+		-- datatype closure table (for enumeration types) to
+		-- (say) PrelBase_True_closure, which is defined in
+		-- code_stuff
+
+  ; dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" (pprCmms code_stuff)
+
+  ; return code_stuff }
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[codegen-init]{Module initialisation code}
+%*									*
+%************************************************************************
+
+/* -----------------------------------------------------------------------------
+   Module initialisation
+
+   The module initialisation code looks like this, roughly:
+
+	FN(__stginit_Foo) {
+ 	  JMP_(__stginit_Foo_1_p)
+	}
+
+	FN(__stginit_Foo_1_p) {
+	...
+	}
+
+   We have one version of the init code with a module version and the
+   'way' attached to it.  The version number helps to catch cases
+   where modules are not compiled in dependency order before being
+   linked: if a module has been compiled since any modules which depend on
+   it, then the latter modules will refer to a different version in their
+   init blocks and a link error will ensue.
+
+   The 'way' suffix helps to catch cases where modules compiled in different
+   ways are linked together (eg. profiled and non-profiled).
+
+   We provide a plain, unadorned, version of the module init code
+   which just jumps to the version with the label and way attached.  The
+   reason for this is that when using foreign exports, the caller of
+   startupHaskell() must supply the name of the init function for the "top"
+   module in the program, and we don't want to require that this name
+   has the version and way info appended to it.
+   --------------------------------------------------------------------------  */
+
+We initialise the module tree by keeping a work-stack, 
+	* pointed to by Sp
+	* that grows downward
+	* Sp points to the last occupied slot
+
+
+\begin{code}
+mkModuleInit 
+	:: DynFlags
+	-> HomeModules
+	-> String		-- the "way"
+	-> CollectedCCs         -- cost centre info
+	-> Module
+	-> Module		-- name of the Main module
+	-> ForeignStubs
+	-> [Module]
+	-> Code
+mkModuleInit dflags hmods way cost_centre_info this_mod main_mod foreign_stubs imported_mods
+  = do	{ 	
+        if opt_SccProfilingOn
+            then do { -- Allocate the static boolean that records if this
+                      -- module has been registered already
+	              emitData Data [CmmDataLabel moduleRegdLabel, 
+		                     CmmStaticLit zeroCLit]
+
+                    ; emitSimpleProc real_init_lbl $ do
+                        { ret_blk <- forkLabelledCode ret_code
+
+                        ; init_blk <- forkLabelledCode $ do
+                                        { mod_init_code; stmtC (CmmBranch ret_blk) }
+                                    
+                        ; stmtC (CmmCondBranch (cmmNeWord (CmmLit zeroCLit) mod_reg_val)
+                                    ret_blk)
+                        ; stmtC (CmmBranch init_blk)	    
+                        }
+                    }
+            else emitSimpleProc real_init_lbl ret_code
+
+ 	    -- Make the "plain" procedure jump to the "real" init procedure
+	; emitSimpleProc plain_init_lbl jump_to_init
+
+	-- When compiling the module in which the 'main' function lives,
+	-- (that is, this_mod == main_mod)
+	-- we inject an extra stg_init procedure for stg_init_ZCMain, for the 
+	-- RTS to invoke.  We must consult the -main-is flag in case the
+	-- user specified a different function to Main.main
+	; whenC (this_mod == main_mod)
+		(emitSimpleProc plain_main_init_lbl jump_to_init)
+    }
+  where
+    plain_init_lbl = mkPlainModuleInitLabel hmods this_mod
+    real_init_lbl  = mkModuleInitLabel hmods this_mod way
+    plain_main_init_lbl = mkPlainModuleInitLabel hmods rOOT_MAIN
+
+    jump_to_init = stmtC (CmmJump (mkLblExpr real_init_lbl) [])
+
+    mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) wordRep
+
+    -- Main refers to GHC.TopHandler.runIO, so make sure we call the
+    -- init function for GHC.TopHandler.
+    extra_imported_mods
+	| this_mod == main_mod = [pREL_TOP_HANDLER]
+	| otherwise	       = []
+
+    mod_init_code = do
+	{ 	-- Set mod_reg to 1 to record that we've been here
+	  stmtC (CmmStore (mkLblExpr moduleRegdLabel) (CmmLit (mkIntCLit 1)))
+
+		-- Now do local stuff
+	; initCostCentres cost_centre_info
+	; mapCs (registerModuleImport hmods way) 
+		(imported_mods++extra_imported_mods)
+	} 
+
+                    -- The return-code pops the work stack by 
+                    -- incrementing Sp, and then jumpd to the popped item
+    ret_code = stmtsC [ CmmAssign spReg (cmmRegOffW spReg 1)
+                      , CmmJump (CmmLoad (cmmRegOffW spReg (-1)) wordRep) [] ]
+
+-----------------------
+registerModuleImport :: HomeModules -> String -> Module -> Code
+registerModuleImport hmods way mod 
+  | mod == gHC_PRIM
+  = nopC 
+  | otherwise 	-- Push the init procedure onto the work stack
+  = stmtsC [ CmmAssign spReg (cmmRegOffW spReg (-1))
+	   , CmmStore (CmmReg spReg) (mkLblExpr (mkModuleInitLabel hmods mod way)) ]
+\end{code}
+
+
+
+Cost-centre profiling: Besides the usual stuff, we must produce
+declarations for the cost-centres defined in this module;
+
+(The local cost-centres involved in this are passed into the
+code-generator.)
+
+\begin{code}
+initCostCentres :: CollectedCCs -> Code
+-- Emit the declarations, and return code to register them
+initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs)
+  | not opt_SccProfilingOn = nopC
+  | otherwise
+  = do	{ mapM_ emitCostCentreDecl  	 local_CCs
+	; mapM_ emitCostCentreStackDecl  singleton_CCSs
+	; mapM_ emitRegisterCC           local_CCs
+	; mapM_ emitRegisterCCS          singleton_CCSs
+	}
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[codegen-top-bindings]{Converting top-level STG bindings}
+%*									*
+%************************************************************************
+
+@cgTopBinding@ is only used for top-level bindings, since they need
+to be allocated statically (not in the heap) and need to be labelled.
+No unboxed bindings can happen at top level.
+
+In the code below, the static bindings are accumulated in the
+@MkCgState@, and transferred into the ``statics'' slot by @forkStatics@.
+This is so that we can write the top level processing in a compositional
+style, with the increasing static environment being plumbed as a state
+variable.
+
+\begin{code}
+cgTopBinding :: DynFlags -> HomeModules -> (StgBinding,[(Id,[Id])]) -> Code
+cgTopBinding dflags hmods (StgNonRec id rhs, srts)
+  = do	{ id' <- maybeExternaliseId dflags id
+	; mapM_ (mkSRT hmods [id']) srts
+	; (id,info) <- cgTopRhs id' rhs
+	; addBindC id info 	-- Add the *un-externalised* Id to the envt,
+				-- so we find it when we look up occurrences
+	}
+
+cgTopBinding dflags hmods (StgRec pairs, srts)
+  = do	{ let (bndrs, rhss) = unzip pairs
+	; bndrs' <- mapFCs (maybeExternaliseId dflags) bndrs
+	; let pairs' = zip bndrs' rhss
+	; mapM_ (mkSRT hmods bndrs')  srts
+	; _new_binds <- fixC (\ new_binds -> do 
+		{ addBindsC new_binds
+		; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' })
+	; nopC }
+
+mkSRT :: HomeModules -> [Id] -> (Id,[Id]) -> Code
+mkSRT hmods these (id,[])  = nopC
+mkSRT hmods these (id,ids)
+  = do	{ ids <- mapFCs remap ids
+	; id  <- remap id
+	; emitRODataLits (mkSRTLabel (idName id)) 
+		       (map (CmmLabel . mkClosureLabel hmods . idName) ids)
+	}
+  where
+	-- Sigh, better map all the ids against the environment in 
+	-- case they've been externalised (see maybeExternaliseId below).
+    remap id = case filter (==id) these of
+		(id':_) -> returnFC id'
+		[] -> do { info <- getCgIdInfo id; return (cgIdInfoId info) }
+
+-- Urgh!  I tried moving the forkStatics call from the rhss of cgTopRhs
+-- to enclose the listFCs in cgTopBinding, but that tickled the
+-- statics "error" call in initC.  I DON'T UNDERSTAND WHY!
+
+cgTopRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo)
+	-- The Id is passed along for setting up a binding...
+	-- It's already been externalised if necessary
+
+cgTopRhs bndr (StgRhsCon cc con args)
+  = forkStatics (cgTopRhsCon bndr con args)
+
+cgTopRhs bndr (StgRhsClosure cc bi fvs upd_flag srt args body)
+  = ASSERT(null fvs)    -- There should be no free variables
+    setSRTLabel (mkSRTLabel (idName bndr)) $ 
+    forkStatics (cgTopRhsClosure bndr cc bi srt upd_flag args body)
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection{Stuff to support splitting}
+%*									*
+%************************************************************************
+
+If we're splitting the object, we need to externalise all the top-level names
+(and then make sure we only use the externalised one in any C label we use
+which refers to this name).
+
+\begin{code}
+maybeExternaliseId :: DynFlags -> Id -> FCode Id
+maybeExternaliseId dflags id
+  | dopt Opt_SplitObjs dflags, 	-- Externalise the name for -split-objs
+    isInternalName name = do { mod <- moduleName
+			     ; returnFC (setIdName id (externalise mod)) }
+  | otherwise		= returnFC id
+  where
+    externalise mod = mkExternalName uniq mod new_occ Nothing loc
+    name    = idName id
+    uniq    = nameUnique name
+    new_occ = mkLocalOcc uniq (nameOccName name)
+    loc     = nameSrcLoc name
+	-- We want to conjure up a name that can't clash with any
+	-- existing name.  So we generate
+	--	Mod_$L243foo
+	-- where 243 is the unique.
+\end{code}
diff --git a/compiler/codeGen/SMRep.lhs b/compiler/codeGen/SMRep.lhs
new file mode 100644
index 0000000000..c807703b13
--- /dev/null
+++ b/compiler/codeGen/SMRep.lhs
@@ -0,0 +1,361 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[SMRep]{Storage manager representations of closure}
+
+This is here, rather than in ClosureInfo, just to keep nhc happy.
+Other modules should access this info through ClosureInfo.
+
+\begin{code}
+module SMRep (
+	-- Words and bytes
+	StgWord, StgHalfWord, 
+	hALF_WORD_SIZE, hALF_WORD_SIZE_IN_BITS,
+	WordOff, ByteOff,
+
+	-- Argument/return representations
+	CgRep(..), nonVoidArg,
+	argMachRep, primRepToCgRep, primRepHint,
+	isFollowableArg, isVoidArg, 
+	isFloatingArg, isNonPtrArg, is64BitArg,
+	separateByPtrFollowness,
+	cgRepSizeW, cgRepSizeB,
+	retAddrSizeW,
+
+	typeCgRep, idCgRep, tyConCgRep, typeHint,
+
+	-- Closure repesentation
+	SMRep(..), ClosureType(..),
+	isStaticRep,
+	fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize,
+	profHdrSize, thunkHdrSize,
+	tablesNextToCode,
+	smRepClosureType, smRepClosureTypeInt,
+
+	rET_SMALL, rET_VEC_SMALL, rET_BIG, rET_VEC_BIG
+    ) where
+
+#include "HsVersions.h"
+#include "../includes/MachDeps.h"
+
+import Id		( Id, idType )
+import Type		( Type, typePrimRep, PrimRep(..) )
+import TyCon		( TyCon, tyConPrimRep )
+import MachOp--		( MachRep(..), MachHint(..), wordRep )
+import StaticFlags	( opt_SccProfilingOn, opt_GranMacros,
+			  opt_Unregisterised )
+import Constants
+import Outputable
+
+import DATA_WORD
+\end{code}
+
+
+%************************************************************************
+%*									*
+		Words and bytes
+%*									*
+%************************************************************************
+
+\begin{code}
+type WordOff = Int	-- Word offset, or word count
+type ByteOff = Int	-- Byte offset, or byte count
+\end{code}
+
+StgWord is a type representing an StgWord on the target platform.
+
+\begin{code}
+#if SIZEOF_HSWORD == 4
+type StgWord     = Word32
+type StgHalfWord = Word16
+hALF_WORD_SIZE = 2 :: ByteOff
+hALF_WORD_SIZE_IN_BITS = 16 :: Int
+#elif SIZEOF_HSWORD == 8
+type StgWord     = Word64
+type StgHalfWord = Word32
+hALF_WORD_SIZE = 4 :: ByteOff
+hALF_WORD_SIZE_IN_BITS = 32 :: Int
+#else
+#error unknown SIZEOF_HSWORD
+#endif
+\end{code}
+
+
+%************************************************************************
+%*									*
+			CgRep
+%*									*
+%************************************************************************
+
+An CgRep is an abstraction of a Type which tells the code generator
+all it needs to know about the calling convention for arguments (and
+results) of that type.  In particular, the ArgReps of a function's
+arguments are used to decide which of the RTS's generic apply
+functions to call when applying an unknown function.
+
+It contains more information than the back-end data type MachRep,
+so one can easily convert from CgRep -> MachRep.  (Except that
+there's no MachRep for a VoidRep.)
+
+It distinguishes 
+	pointers from non-pointers (we sort the pointers together
+	when building closures)
+
+	void from other types: a void argument is different from no argument
+
+All 64-bit types map to the same CgRep, because they're passed in the
+same register, but a PtrArg is still different from an NonPtrArg
+because the function's entry convention has to take into account the
+pointer-hood of arguments for the purposes of describing the stack on
+entry to the garbage collector.
+
+\begin{code}
+data CgRep 
+  = VoidArg 	-- Void
+  | PtrArg 	-- Word-sized Ptr
+  | NonPtrArg 	-- Word-sized non-pointer
+  | LongArg	-- 64-bit non-pointer
+  | FloatArg 	-- 32-bit float
+  | DoubleArg 	-- 64-bit float
+  deriving Eq
+
+instance Outputable CgRep where
+    ppr VoidArg   = ptext SLIT("V_")
+    ppr PtrArg    = ptext SLIT("P_")
+    ppr NonPtrArg = ptext SLIT("I_")
+    ppr LongArg   = ptext SLIT("L_")
+    ppr FloatArg  = ptext SLIT("F_")
+    ppr DoubleArg = ptext SLIT("D_")
+
+argMachRep :: CgRep -> MachRep
+argMachRep PtrArg    = wordRep
+argMachRep NonPtrArg = wordRep
+argMachRep LongArg   = I64
+argMachRep FloatArg  = F32
+argMachRep DoubleArg = F64
+argMachRep VoidArg   = panic "argMachRep:VoidRep"
+
+primRepToCgRep :: PrimRep -> CgRep
+primRepToCgRep VoidRep    = VoidArg
+primRepToCgRep PtrRep     = PtrArg
+primRepToCgRep IntRep	  = NonPtrArg
+primRepToCgRep WordRep	  = NonPtrArg
+primRepToCgRep Int64Rep   = LongArg
+primRepToCgRep Word64Rep  = LongArg
+primRepToCgRep AddrRep    = NonPtrArg
+primRepToCgRep FloatRep   = FloatArg
+primRepToCgRep DoubleRep  = DoubleArg
+
+primRepHint :: PrimRep -> MachHint
+primRepHint VoidRep	= panic "primRepHint:VoidRep"
+primRepHint PtrRep	= PtrHint
+primRepHint IntRep	= SignedHint
+primRepHint WordRep	= NoHint
+primRepHint Int64Rep	= SignedHint
+primRepHint Word64Rep	= NoHint
+primRepHint AddrRep     = PtrHint -- NB! PtrHint, but NonPtrArg
+primRepHint FloatRep	= FloatHint
+primRepHint DoubleRep	= FloatHint
+
+idCgRep :: Id -> CgRep
+idCgRep = typeCgRep . idType
+
+tyConCgRep :: TyCon -> CgRep
+tyConCgRep = primRepToCgRep . tyConPrimRep
+
+typeCgRep :: Type -> CgRep
+typeCgRep = primRepToCgRep . typePrimRep
+
+typeHint :: Type -> MachHint
+typeHint = primRepHint . typePrimRep
+\end{code}
+
+Whether or not the thing is a pointer that the garbage-collector
+should follow. Or, to put it another (less confusing) way, whether
+the object in question is a heap object. 
+
+Depending on the outcome, this predicate determines what stack
+the pointer/object possibly will have to be saved onto, and the
+computation of GC liveness info.
+
+\begin{code}
+isFollowableArg :: CgRep -> Bool  -- True <=> points to a heap object
+isFollowableArg PtrArg  = True
+isFollowableArg other = False
+
+isVoidArg :: CgRep -> Bool
+isVoidArg VoidArg = True
+isVoidArg other   = False
+
+nonVoidArg :: CgRep -> Bool
+nonVoidArg VoidArg = False
+nonVoidArg other   = True
+
+-- isFloatingArg is used to distinguish @Double@ and @Float@ which
+-- cause inadvertent numeric conversions if you aren't jolly careful.
+-- See codeGen/CgCon:cgTopRhsCon.
+
+isFloatingArg :: CgRep -> Bool
+isFloatingArg DoubleArg = True
+isFloatingArg FloatArg  = True
+isFloatingArg _         = False
+
+isNonPtrArg :: CgRep -> Bool
+-- Identify anything which is one word large and not a pointer.
+isNonPtrArg NonPtrArg = True
+isNonPtrArg other     = False
+
+is64BitArg :: CgRep -> Bool
+is64BitArg LongArg = True
+is64BitArg _       = False
+\end{code}
+
+\begin{code}
+separateByPtrFollowness :: [(CgRep,a)] -> ([(CgRep,a)], [(CgRep,a)])
+-- Returns (ptrs, non-ptrs)
+separateByPtrFollowness things
+  = sep_things things [] []
+    -- accumulating params for follow-able and don't-follow things...
+  where
+    sep_things []    	       bs us = (reverse bs, reverse us)
+    sep_things ((PtrArg,a):ts) bs us = sep_things ts ((PtrArg,a):bs) us
+    sep_things (t         :ts) bs us = sep_things ts bs		     (t:us)
+\end{code}
+
+\begin{code}
+cgRepSizeB :: CgRep -> ByteOff
+cgRepSizeB DoubleArg = dOUBLE_SIZE
+cgRepSizeB LongArg   = wORD64_SIZE
+cgRepSizeB VoidArg   = 0
+cgRepSizeB _         = wORD_SIZE
+
+cgRepSizeW :: CgRep -> ByteOff
+cgRepSizeW DoubleArg = dOUBLE_SIZE `quot` wORD_SIZE
+cgRepSizeW LongArg   = wORD64_SIZE `quot` wORD_SIZE
+cgRepSizeW VoidArg   = 0
+cgRepSizeW _         = 1
+
+retAddrSizeW :: WordOff
+retAddrSizeW = 1	-- One word
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[SMRep-datatype]{@SMRep@---storage manager representation}
+%*									*
+%************************************************************************
+
+\begin{code}
+data SMRep
+     -- static closure have an extra static link field at the end.
+  = GenericRep		-- GC routines consult sizes in info tbl
+	Bool		-- True <=> This is a static closure.  Affects how 
+			-- 	    we garbage-collect it
+	!Int		--  # ptr words
+	!Int		--  # non-ptr words
+	ClosureType	-- closure type
+
+  | BlackHoleRep
+
+data ClosureType	-- Corresponds 1-1 with the varieties of closures
+			-- implemented by the RTS.  Compare with ghc/includes/ClosureTypes.h
+    = Constr
+    | ConstrNoCaf
+    | Fun
+    | Thunk
+    | ThunkSelector
+\end{code}
+
+Size of a closure header.
+
+\begin{code}
+fixedHdrSize :: WordOff
+fixedHdrSize = sTD_HDR_SIZE + profHdrSize + granHdrSize
+
+profHdrSize  :: WordOff
+profHdrSize  | opt_SccProfilingOn   = pROF_HDR_SIZE
+	     | otherwise	    = 0
+
+granHdrSize  :: WordOff
+granHdrSize  | opt_GranMacros	    = gRAN_HDR_SIZE
+	     | otherwise	    = 0
+
+arrWordsHdrSize   :: ByteOff
+arrWordsHdrSize   = fixedHdrSize*wORD_SIZE + sIZEOF_StgArrWords_NoHdr
+
+arrPtrsHdrSize    :: ByteOff
+arrPtrsHdrSize    = fixedHdrSize*wORD_SIZE + sIZEOF_StgMutArrPtrs_NoHdr
+
+-- Thunks have an extra header word on SMP, so the update doesn't 
+-- splat the payload.
+thunkHdrSize :: WordOff
+thunkHdrSize = fixedHdrSize + smp_hdr
+	where smp_hdr = sIZEOF_StgSMPThunkHeader `quot` wORD_SIZE
+\end{code}
+
+\begin{code}
+-- IA64 mangler doesn't place tables next to code
+tablesNextToCode :: Bool
+#if defined(ia64_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH)
+tablesNextToCode = False
+#else
+tablesNextToCode = not opt_Unregisterised
+#endif
+\end{code}
+
+\begin{code}
+isStaticRep :: SMRep -> Bool
+isStaticRep (GenericRep is_static _ _ _) = is_static
+isStaticRep BlackHoleRep	         = False
+\end{code}
+
+\begin{code}
+#include "../includes/ClosureTypes.h"
+-- Defines CONSTR, CONSTR_1_0 etc
+
+
+smRepClosureType :: SMRep -> ClosureType
+smRepClosureType (GenericRep _ _ _ ty) = ty
+smRepClosureType BlackHoleRep	       = panic "smRepClosureType: black hole"
+
+smRepClosureTypeInt :: SMRep -> Int
+smRepClosureTypeInt (GenericRep False 1 0 Constr) = CONSTR_1_0
+smRepClosureTypeInt (GenericRep False 0 1 Constr) = CONSTR_0_1
+smRepClosureTypeInt (GenericRep False 2 0 Constr) = CONSTR_2_0
+smRepClosureTypeInt (GenericRep False 1 1 Constr) = CONSTR_1_1
+smRepClosureTypeInt (GenericRep False 0 2 Constr) = CONSTR_0_2
+smRepClosureTypeInt (GenericRep False _ _ Constr) = CONSTR
+
+smRepClosureTypeInt (GenericRep False 1 0 Fun) = FUN_1_0
+smRepClosureTypeInt (GenericRep False 0 1 Fun) = FUN_0_1
+smRepClosureTypeInt (GenericRep False 2 0 Fun) = FUN_2_0
+smRepClosureTypeInt (GenericRep False 1 1 Fun) = FUN_1_1
+smRepClosureTypeInt (GenericRep False 0 2 Fun) = FUN_0_2
+smRepClosureTypeInt (GenericRep False _ _ Fun) = FUN
+
+smRepClosureTypeInt (GenericRep False 1 0 Thunk) = THUNK_1_0
+smRepClosureTypeInt (GenericRep False 0 1 Thunk) = THUNK_0_1
+smRepClosureTypeInt (GenericRep False 2 0 Thunk) = THUNK_2_0
+smRepClosureTypeInt (GenericRep False 1 1 Thunk) = THUNK_1_1
+smRepClosureTypeInt (GenericRep False 0 2 Thunk) = THUNK_0_2
+smRepClosureTypeInt (GenericRep False _ _ Thunk) = THUNK
+
+smRepClosureTypeInt (GenericRep False _ _ ThunkSelector) =  THUNK_SELECTOR
+
+smRepClosureTypeInt (GenericRep True _ _ Constr)      = CONSTR_STATIC
+smRepClosureTypeInt (GenericRep True _ _ ConstrNoCaf) = CONSTR_NOCAF_STATIC
+smRepClosureTypeInt (GenericRep True _ _ Fun)         = FUN_STATIC
+smRepClosureTypeInt (GenericRep True _ _ Thunk)       = THUNK_STATIC
+
+smRepClosureTypeInt BlackHoleRep = BLACKHOLE
+
+smRepClosureTypeInt rep = panic "smRepClosuretypeint"
+
+
+-- We export these ones
+rET_SMALL     = (RET_SMALL     :: Int)
+rET_VEC_SMALL = (RET_VEC_SMALL :: Int)
+rET_BIG       = (RET_BIG       :: Int)
+rET_VEC_BIG   = (RET_VEC_BIG   :: Int)
+\end{code}
+