6 files changed, 2160 insertions, 553 deletions
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
new file mode 100644
index 0000000000..d921c12e7a
--- /dev/null
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -0,0 +1,1545 @@
+{-# OPTIONS -w #-}
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+module SPARC.CodeGen ( 
+	cmmTopCodeGen, 
+	InstrBlock 
+) 
+
+where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import SPARC.Instr
+import SPARC.Cond
+import SPARC.Regs
+import SPARC.RegInfo
+import Instruction
+import Size
+import Reg
+import PIC
+import NCGMonad
+
+-- Our intermediate code:
+import BlockId
+import Cmm
+import CLabel
+
+-- The rest:
+import BasicTypes
+import StaticFlags	( opt_PIC )
+import OrdList
+import qualified Outputable as O
+import Outputable
+import FastString
+
+import Control.Monad	( mapAndUnzipM )
+import Data.Int
+import DynFlags
+
+-- | Top level code generation
+cmmTopCodeGen 
+	:: DynFlags
+	-> RawCmmTop 
+	-> NatM [NatCmmTop Instr]
+
+cmmTopCodeGen _
+	(CmmProc info lab params (ListGraph blocks)) 
+ = do	
+ 	(nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+
+--	picBaseMb 	<- getPicBaseMaybeNat
+	let proc 	= CmmProc info lab params (ListGraph $ concat nat_blocks)
+	let tops 	= proc : concat statics
+
+--	case picBaseMb of
+--	 Just picBase -> initializePicBase picBase tops
+--	 Nothing -> return tops
+  
+  	return tops
+  
+  
+cmmTopCodeGen _ (CmmData sec dat) = do
+  return [CmmData sec dat]  -- no translation, we just use CmmStatic
+
+
+
+basicBlockCodeGen 
+	:: CmmBasicBlock
+	-> NatM ( [NatBasicBlock Instr]
+		, [NatCmmTop Instr])
+
+basicBlockCodeGen (BasicBlock id stmts) = do
+  instrs <- stmtsToInstrs stmts
+  -- code generation may introduce new basic block boundaries, which
+  -- are indicated by the NEWBLOCK instruction.  We must split up the
+  -- instruction stream into basic blocks again.  Also, we extract
+  -- LDATAs here too.
+  let
+	(top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+	
+	mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
+	  = ([], BasicBlock id instrs : blocks, statics)
+	mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
+	  = (instrs, blocks, CmmData sec dat:statics)
+	mkBlocks instr (instrs,blocks,statics)
+	  = (instr:instrs, blocks, statics)
+  -- in
+  return (BasicBlock id top : other_blocks, statics)
+
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+   = do instrss <- mapM stmtToInstrs stmts
+        return (concatOL instrss)
+
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+    CmmNop	   -> return nilOL
+    CmmComment s   -> return (unitOL (COMMENT s))
+
+    CmmAssign reg src
+      | isFloatType ty	-> assignReg_FltCode size reg src
+      | isWord64 ty	-> assignReg_I64Code      reg src
+      | otherwise	-> assignReg_IntCode size reg src
+	where ty = cmmRegType reg
+	      size = cmmTypeSize ty
+
+    CmmStore addr src
+      | isFloatType ty	-> assignMem_FltCode size addr src
+      | isWord64 ty	-> assignMem_I64Code      addr src
+      | otherwise	-> assignMem_IntCode size addr src
+	where ty = cmmExprType src
+	      size = cmmTypeSize ty
+
+    CmmCall target result_regs args _ _
+       -> genCCall target result_regs args
+
+    CmmBranch	id		-> genBranch id
+    CmmCondBranch arg id	-> genCondJump id arg
+    CmmSwitch	arg ids		-> genSwitch arg ids
+    CmmJump	arg _		-> genJump arg
+
+    CmmReturn	_		
+     -> panic "stmtToInstrs: return statement should have been cps'd away"
+
+
+--------------------------------------------------------------------------------
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- 	They are really trees of insns to facilitate fast appending, where a
+-- 	left-to-right traversal yields the insns in the correct	order.
+--
+type InstrBlock 
+	= OrdList Instr
+
+
+-- | Condition codes passed up the tree.
+--
+data CondCode 	
+	= CondCode Bool Cond InstrBlock
+
+
+-- | a.k.a "Register64"
+--	Reg is the lower 32-bit temporary which contains the result. 
+--	Use getHiVRegFromLo to find the other VRegUnique.  
+--
+--	Rules of this simplified insn selection game are therefore that
+--	the returned Reg may be modified
+--
+data ChildCode64 	
+   = ChildCode64 
+        InstrBlock
+        Reg	 	
+
+
+-- | Register's passed up the tree.  If the stix code forces the register
+-- 	to live in a pre-decided machine register, it comes out as @Fixed@;
+-- 	otherwise, it comes out as @Any@, and the parent can decide which
+-- 	register to put it in.
+--
+data Register
+	= Fixed	Size Reg InstrBlock
+	| Any	Size (Reg -> InstrBlock)
+
+
+swizzleRegisterRep :: Register -> Size -> Register
+swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
+swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
+
+
+-- | Grab the Reg for a CmmReg
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+  = mkVReg u (cmmTypeSize pk)
+
+getRegisterReg (CmmGlobal mid)
+  = case get_GlobalReg_reg_or_addr mid of
+       Left (RealReg rrno) -> RealReg rrno
+       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+          -- By this stage, the only MagicIds remaining should be the
+          -- ones which map to a real machine register on this
+          -- platform.  Hence ...
+
+
+-- | Memory addressing modes passed up the tree.
+data Amode 
+	= Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to.  So you can't put
+anything in between, lest it overwrite some of those registers.  If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+    code
+    LEA amode, tmp
+    ... other computation ...
+    ... (tmp) ...
+-}
+
+
+-- | Check whether an integer will fit in 32 bits.
+--	A CmmInt is intended to be truncated to the appropriate 
+-- 	number of bits, so here we truncate it to Int64.  This is
+-- 	important because e.g. -1 as a CmmInt might be either
+-- 	-1 or 18446744073709551615.
+--
+is32BitInteger :: Integer -> Bool
+is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
+  where i64 = fromIntegral i :: Int64
+
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+    where blockLabel = mkAsmTempLabel id
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+  where width = typeWidth (cmmRegType reg)
+
+
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_I64Code addrTree valueTree = do
+     Amode _ addr_code 		<- getAmode addrTree
+     ChildCode64 vcode rlo 	<- iselExpr64 valueTree  
+
+     (src, code) <- getSomeReg addrTree
+     let 
+         rhi = getHiVRegFromLo rlo
+         -- Big-endian store
+         mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
+         mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
+
+     return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
+
+
+assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+     ChildCode64 vcode r_src_lo <- iselExpr64 valueTree    
+     let 
+         r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = mkMOV r_src_lo r_dst_lo
+         mov_hi = mkMOV r_src_hi r_dst_hi
+         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+     return (vcode `snocOL` mov_hi `snocOL` mov_lo)
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(sparc): invalid lvalue"
+
+
+-- Load a 64 bit word
+iselExpr64 (CmmLoad addrTree ty) 
+ | isWord64 ty
+ = do	Amode amode addr_code	<- getAmode addrTree
+ 	let result
+
+		| AddrRegReg r1 r2	<- amode
+		= do	rlo 	<- getNewRegNat II32
+			tmp	<- getNewRegNat II32
+			let rhi = getHiVRegFromLo rlo
+
+			return	$ ChildCode64 
+				(	 addr_code 
+				`appOL`	 toOL
+					 [ ADD False False r1 (RIReg r2) tmp
+					 , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
+					 , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
+				rlo
+
+		| AddrRegImm r1 (ImmInt i) <- amode
+		= do	rlo	<- getNewRegNat II32
+			let rhi = getHiVRegFromLo rlo
+			
+			return	$ ChildCode64 
+				(	 addr_code 
+				`appOL`	 toOL
+					 [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
+					 , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
+				rlo
+		
+	result
+
+
+-- Add a literal to a 64 bit integer
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) 
+ = do	ChildCode64 code1 r1_lo <- iselExpr64 e1
+ 	let r1_hi	= getHiVRegFromLo r1_lo
+	
+	r_dst_lo	<- getNewRegNat II32
+	let r_dst_hi	=  getHiVRegFromLo r_dst_lo 
+	
+	return	$ ChildCode64
+			( toOL
+			[ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
+			, ADD True  False r1_hi (RIReg g0)	   r_dst_hi ])
+			r_dst_lo
+
+
+-- Addition of II64
+iselExpr64 (CmmMachOp (MO_Add width) [e1, e2])
+ = do	ChildCode64 code1 r1_lo	<- iselExpr64 e1
+	let r1_hi	= getHiVRegFromLo r1_lo
+
+ 	ChildCode64 code2 r2_lo	<- iselExpr64 e2
+	let r2_hi	= getHiVRegFromLo r2_lo
+	
+	r_dst_lo	<- getNewRegNat II32
+	let r_dst_hi	= getHiVRegFromLo r_dst_lo
+	
+	let code =	code1
+		`appOL`	code2
+		`appOL`	toOL
+			[ ADD False False r1_lo (RIReg r2_lo) r_dst_lo
+			, ADD True  False r1_hi (RIReg r2_hi) r_dst_hi ]
+	
+	return	$ ChildCode64 code r_dst_lo
+
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
+     r_dst_lo <-  getNewRegNat II32
+     let r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_lo = mkVReg uq II32
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = mkMOV r_src_lo r_dst_lo
+         mov_hi = mkMOV r_src_hi r_dst_hi
+         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+     return (
+            ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
+         )
+
+-- Convert something into II64
+iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) 
+ = do
+	r_dst_lo 	<- getNewRegNat II32
+	let r_dst_hi	= getHiVRegFromLo r_dst_lo
+
+	-- compute expr and load it into r_dst_lo
+	(a_reg, a_code)	<- getSomeReg expr
+
+	let code	= a_code
+		`appOL`	toOL
+			[ mkRegRegMoveInstr g0    r_dst_hi 	-- clear high 32 bits
+			, mkRegRegMoveInstr a_reg r_dst_lo ]
+			
+	return	$ ChildCode64 code r_dst_lo
+
+
+iselExpr64 expr
+   = pprPanic "iselExpr64(sparc)" (ppr expr)
+
+
+-- | The dual to getAnyReg: compute an expression into a register, but
+-- 	we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed _ reg code -> 
+	return (reg, code)
+
+
+-- 
+getRegister :: CmmExpr -> NatM Register
+
+getRegister (CmmReg reg) 
+  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
+		  (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _) 
+  = getRegister (mangleIndexTree tree)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code       
+
+
+
+-- Load a literal float into a float register.
+--	The actual literal is stored in a new data area, and we load it 
+--	at runtime.
+getRegister (CmmLit (CmmFloat f W32)) = do
+
+    -- a label for the new data area
+    lbl <- getNewLabelNat
+    tmp <- getNewRegNat II32
+
+    let code dst = toOL [
+            -- the data area         
+	    LDATA ReadOnlyData
+	                [CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat f W32)],
+
+            -- load the literal
+	    SETHI (HI (ImmCLbl lbl)) tmp,
+	    LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
+
+    return (Any FF32 code)
+
+getRegister (CmmLit (CmmFloat d W64)) = do
+    lbl <- getNewLabelNat
+    tmp <- getNewRegNat II32
+    let code dst = toOL [
+	    LDATA ReadOnlyData
+	                [CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat d W64)],
+	    SETHI (HI (ImmCLbl lbl)) tmp,
+	    LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
+    return (Any FF64 code)
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+      MO_F_Neg W32     -> trivialUFCode FF32 (FNEG FF32) x
+      MO_F_Neg W64     -> trivialUFCode FF64 (FNEG FF64) x
+
+      MO_S_Neg rep     -> trivialUCode (intSize rep) (SUB False False g0) x
+      MO_Not rep       -> trivialUCode (intSize rep) (XNOR False g0) x
+
+      MO_FF_Conv W64 W32-> coerceDbl2Flt x
+      MO_FF_Conv W32 W64-> coerceFlt2Dbl x
+
+      MO_FS_Conv from to -> coerceFP2Int from to x
+      MO_SF_Conv from to -> coerceInt2FP from to x
+
+      -- Conversions which are a nop on sparc
+      MO_UU_Conv from to
+	| from == to    -> conversionNop (intSize to)  x
+      MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
+      MO_UU_Conv W32 to -> conversionNop (intSize to) x
+      MO_SS_Conv W32 to -> conversionNop (intSize to) x
+
+      MO_UU_Conv W8  to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W16 to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W8  to@W16  -> conversionNop (intSize to)  x
+
+      -- sign extension
+      MO_SS_Conv W8  W32  -> integerExtend W8  W32 x
+      MO_SS_Conv W16 W32  -> integerExtend W16 W32 x
+      MO_SS_Conv W8  W16  -> integerExtend W8  W16 x
+
+      other_op -> panic ("Unknown unary mach op: " ++ show mop)
+    where
+
+	-- | sign extend and widen
+	integerExtend 
+		:: Width 		-- ^ width of source expression
+		-> Width 		-- ^ width of result
+		-> CmmExpr 		-- ^ source expression
+		-> NatM Register	
+
+	integerExtend from to expr
+	 = do	-- load the expr into some register
+	 	(reg, e_code) 	<- getSomeReg expr
+		tmp		<- getNewRegNat II32
+		let bitCount
+			= case (from, to) of
+				(W8,  W32)	-> 24
+				(W16, W32)	-> 16
+				(W8,  W16)	-> 24
+	 	let code dst
+			= e_code 	
+
+			-- local shift word left to load the sign bit
+			`snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp
+			
+			-- arithmetic shift right to sign extend
+			`snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst
+			
+		return (Any (intSize to) code)
+				
+
+        conversionNop new_rep expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_rep)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+  = case mop of
+      MO_Eq rep -> condIntReg EQQ x y
+      MO_Ne rep -> condIntReg NE x y
+
+      MO_S_Gt rep -> condIntReg GTT x y
+      MO_S_Ge rep -> condIntReg GE x y
+      MO_S_Lt rep -> condIntReg LTT x y
+      MO_S_Le rep -> condIntReg LE x y
+	      
+      MO_U_Gt W32  -> condIntReg GTT x y
+      MO_U_Ge W32  -> condIntReg GE x y
+      MO_U_Lt W32  -> condIntReg LTT x y
+      MO_U_Le W32  -> condIntReg LE x y
+
+      MO_U_Gt W16 -> condIntReg GU  x y
+      MO_U_Ge W16 -> condIntReg GEU x y
+      MO_U_Lt W16 -> condIntReg LU  x y
+      MO_U_Le W16 -> condIntReg LEU x y
+
+      MO_Add W32 -> trivialCode W32 (ADD False False) x y
+      MO_Sub W32 -> trivialCode W32 (SUB False False) x y
+
+      MO_S_MulMayOflo rep -> imulMayOflo rep x y
+
+      MO_S_Quot W32 	-> idiv True  False x y
+      MO_U_Quot W32 	-> idiv False False x y
+       
+      MO_S_Rem  W32	-> irem True  x y
+      MO_U_Rem	W32	-> irem False x y
+       
+      MO_F_Eq w -> condFltReg EQQ x y
+      MO_F_Ne w -> condFltReg NE x y
+
+      MO_F_Gt w -> condFltReg GTT x y
+      MO_F_Ge w -> condFltReg GE x y 
+      MO_F_Lt w -> condFltReg LTT x y
+      MO_F_Le w -> condFltReg LE x y
+
+      MO_F_Add  w  -> trivialFCode w FADD x y
+      MO_F_Sub  w  -> trivialFCode w FSUB x y
+      MO_F_Mul  w  -> trivialFCode w FMUL x y
+      MO_F_Quot w  -> trivialFCode w FDIV x y
+
+      MO_And rep   -> trivialCode rep (AND False) x y
+      MO_Or rep    -> trivialCode rep (OR  False) x y
+      MO_Xor rep   -> trivialCode rep (XOR False) x y
+
+      MO_Mul rep -> trivialCode rep (SMUL False) x y
+
+      MO_Shl rep   -> trivialCode rep SLL  x y
+      MO_U_Shr rep   -> trivialCode rep SRL x y
+      MO_S_Shr rep   -> trivialCode rep SRA x y
+
+{-
+      MO_F32_Pwr  -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 
+                                         [promote x, promote y])
+		       where promote x = CmmMachOp MO_F32_to_Dbl [x]
+      MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 
+                                        [x, y])
+-}
+      other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
+  where
+    -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+
+
+    -- | Generate an integer division instruction.
+    idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+	
+    -- For unsigned division with a 32 bit numerator, 
+    --		we can just clear the Y register.
+    idiv False cc x y = do
+	(a_reg, a_code)		<- getSomeReg x
+       	(b_reg, b_code)		<- getSomeReg y
+	
+	let code dst
+		= 	a_code 
+		`appOL`	b_code  
+		`appOL`	toOL
+			[ WRY  g0 g0
+			, UDIV cc a_reg (RIReg b_reg) dst]
+			
+	return (Any II32 code)
+    	
+
+    -- For _signed_ division with a 32 bit numerator,
+    --		we have to sign extend the numerator into the Y register.
+    idiv True cc x y = do
+	(a_reg, a_code)		<- getSomeReg x
+       	(b_reg, b_code)		<- getSomeReg y
+	
+	tmp			<- getNewRegNat II32
+	
+	let code dst
+		= 	a_code 
+		`appOL`	b_code  
+		`appOL`	toOL
+			[ SRA  a_reg (RIImm (ImmInt 16)) tmp		-- sign extend
+			, SRA  tmp   (RIImm (ImmInt 16)) tmp
+
+			, WRY  tmp g0				
+			, SDIV cc a_reg (RIReg b_reg) dst]
+			
+	return (Any II32 code)
+
+
+    -- | Do an integer remainder.
+    --
+    --	 NOTE:	The SPARC v8 architecture manual says that integer division
+    --		instructions _may_ generate a remainder, depending on the implementation.
+    --		If so it is _recommended_ that the remainder is placed in the Y register.
+    --
+    --          The UltraSparc 2007 manual says Y is _undefined_ after division.
+    --
+    --		The SPARC T2 doesn't store the remainder, not sure about the others. 
+    --		It's probably best not to worry about it, and just generate our own
+    --		remainders. 
+    --
+    irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+    -- For unsigned operands: 
+    --		Division is between a 64 bit numerator and a 32 bit denominator, 
+    --		so we still have to clear the Y register.
+    irem False x y = do
+    	(a_reg, a_code)	<- getSomeReg x
+	(b_reg, b_code)	<- getSomeReg y
+
+	tmp_reg		<- getNewRegNat II32
+
+	let code dst
+		= 	a_code
+		`appOL`	b_code
+		`appOL`	toOL
+			[ WRY	g0 g0
+			, UDIV  False         a_reg (RIReg b_reg) tmp_reg
+			, UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
+			, SUB   False False   a_reg (RIReg tmp_reg) dst]
+    
+    	return	(Any II32 code)
+
+    
+    -- For signed operands:
+    --		Make sure to sign extend into the Y register, or the remainder
+    --		will have the wrong sign when the numerator is negative.
+    --
+    --	TODO:	When sign extending, GCC only shifts the a_reg right by 17 bits,
+    --		not the full 32. Not sure why this is, something to do with overflow?
+    --		If anyone cares enough about the speed of signed remainder they
+    --		can work it out themselves (then tell me). -- BL 2009/01/20
+    
+    irem True x y = do
+    	(a_reg, a_code)	<- getSomeReg x
+	(b_reg, b_code)	<- getSomeReg y
+	
+	tmp1_reg	<- getNewRegNat II32
+	tmp2_reg	<- getNewRegNat II32
+		
+	let code dst
+		=	a_code
+		`appOL`	b_code
+		`appOL`	toOL
+			[ SRA	a_reg      (RIImm (ImmInt 16)) tmp1_reg	-- sign extend
+			, SRA	tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg	-- sign extend
+			, WRY	tmp1_reg g0
+
+			, SDIV  False          a_reg (RIReg b_reg)    tmp2_reg	
+			, SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
+			, SUB   False False    a_reg (RIReg tmp2_reg) dst]
+			
+	return (Any II32 code)
+   
+
+    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
+    imulMayOflo rep a b = do
+         (a_reg, a_code) <- getSomeReg a
+	 (b_reg, b_code) <- getSomeReg b
+	 res_lo <- getNewRegNat II32
+	 res_hi <- getNewRegNat II32
+	 let
+	    shift_amt  = case rep of
+			  W32 -> 31
+			  W64 -> 63
+			  _ -> panic "shift_amt"
+	    code dst = a_code `appOL` b_code `appOL`
+                       toOL [
+                           SMUL False a_reg (RIReg b_reg) res_lo,
+                           RDY res_hi,
+                           SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
+                           SUB False False res_lo (RIReg res_hi) dst
+                        ]
+         return (Any II32 code)
+
+getRegister (CmmLoad mem pk) = do
+    Amode src code <- getAmode mem
+    let
+	code__2 dst 	= code `snocOL` LD (cmmTypeSize pk) src dst
+    return (Any (cmmTypeSize pk) code__2)
+
+getRegister (CmmLit (CmmInt i _))
+  | fits13Bits i
+  = let
+    	src = ImmInt (fromInteger i)
+    	code dst = unitOL (OR False g0 (RIImm src) dst)
+    in
+    	return (Any II32 code)
+
+getRegister (CmmLit lit)
+  = let rep = cmmLitType lit
+	imm = litToImm lit
+    	code dst = toOL [
+    	    SETHI (HI imm) dst,
+    	    OR False dst (RIImm (LO imm)) dst]
+    in return (Any II32 code)
+
+
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
+  | fits13Bits (-i)
+  = do
+       (reg, code) <- getSomeReg x
+       let
+         off  = ImmInt (-(fromInteger i))
+       return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
+  | fits13Bits i
+  = do
+       (reg, code) <- getSomeReg x
+       let
+     	 off  = ImmInt (fromInteger i)
+       return (Amode (AddrRegImm reg off) code)
+
+getAmode (CmmMachOp (MO_Add rep) [x, y])
+  = do
+    (regX, codeX) <- getSomeReg x
+    (regY, codeY) <- getSomeReg y
+    let
+    	code = codeX `appOL` codeY
+    return (Amode (AddrRegReg regX regY) code)
+
+getAmode (CmmLit lit)
+  = do
+	let imm__2	= litToImm lit
+	tmp1 	<- getNewRegNat II32
+	tmp2	<- getNewRegNat II32
+
+	let code = toOL	[ SETHI (HI imm__2) tmp1
+			, OR    False tmp1 (RIImm (LO imm__2)) tmp2]
+		
+	return (Amode (AddrRegReg tmp2 g0) code)
+
+getAmode other
+  = do
+       (reg, code) <- getSomeReg other
+       let
+    	    off  = ImmInt 0
+       return (Amode (AddrRegImm reg off) code)
+
+
+getCondCode :: CmmExpr -> NatM CondCode
+getCondCode (CmmMachOp mop [x, y])
+  = 
+    case mop of
+      MO_F_Eq W32 -> condFltCode EQQ x y
+      MO_F_Ne W32 -> condFltCode NE  x y
+      MO_F_Gt W32 -> condFltCode GTT x y
+      MO_F_Ge W32 -> condFltCode GE  x y
+      MO_F_Lt W32 -> condFltCode LTT x y
+      MO_F_Le W32 -> condFltCode LE  x y
+
+      MO_F_Eq W64 -> condFltCode EQQ x y
+      MO_F_Ne W64 -> condFltCode NE  x y
+      MO_F_Gt W64 -> condFltCode GTT x y
+      MO_F_Ge W64 -> condFltCode GE  x y
+      MO_F_Lt W64 -> condFltCode LTT x y
+      MO_F_Le W64 -> condFltCode LE  x y
+
+      MO_Eq rep -> condIntCode EQQ  x y
+      MO_Ne rep -> condIntCode NE   x y
+
+      MO_S_Gt rep -> condIntCode GTT  x y
+      MO_S_Ge rep -> condIntCode GE   x y
+      MO_S_Lt rep -> condIntCode LTT  x y
+      MO_S_Le rep -> condIntCode LE   x y
+
+      MO_U_Gt rep -> condIntCode GU   x y
+      MO_U_Ge rep -> condIntCode GEU  x y
+      MO_U_Lt rep -> condIntCode LU   x y
+      MO_U_Le rep -> condIntCode LEU  x y
+
+      other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
+
+getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
+
+
+
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+condIntCode cond x (CmmLit (CmmInt y rep))
+  | fits13Bits y
+  = do
+       (src1, code) <- getSomeReg x
+       let
+           src2 = ImmInt (fromInteger y)
+           code' = code `snocOL` SUB False True src1 (RIImm src2) g0
+       return (CondCode False cond code')
+
+condIntCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+	code__2 = code1 `appOL` code2 `snocOL`
+    	    	  SUB False True src1 (RIReg src2) g0
+    return (CondCode False cond code__2)
+
+
+condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+condFltCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp <- getNewRegNat FF64
+    let
+   	promote x = FxTOy FF32 FF64 x tmp
+
+    	pk1   = cmmExprType x
+    	pk2   = cmmExprType y
+
+    	code__2 =
+		if pk1 `cmmEqType` pk2 then
+    	            code1 `appOL` code2 `snocOL`
+    	    	    FCMP True (cmmTypeSize pk1) src1 src2
+    	    	else if typeWidth pk1 == W32 then
+    	    	    code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+    	    	    FCMP True FF64 tmp src2
+    	    	else
+    	    	    code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+    	    	    FCMP True FF64 src1 tmp
+    return (CondCode True cond code__2)
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business.  Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers.  If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side.  This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_IntCode pk addr src = do
+    (srcReg, code) <- getSomeReg src
+    Amode dstAddr addr_code <- getAmode addr
+    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+
+assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode pk reg src = do
+    r <- getRegister src
+    return $ case r of
+	Any _ code         -> code dst
+	Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
+    where
+      dst = getRegisterReg reg
+
+
+
+-- Floating point assignment to memory
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_FltCode pk addr src = do
+    Amode dst__2 code1 <- getAmode addr
+    (src__2, code2) <- getSomeReg src
+    tmp1 <- getNewRegNat pk
+    let
+    	pk__2   = cmmExprType src
+    	code__2 = code1 `appOL` code2 `appOL`
+	    if   sizeToWidth pk == typeWidth pk__2 
+            then unitOL (ST pk src__2 dst__2)
+	    else toOL 	[ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
+	    		, ST    pk tmp1 dst__2]
+    return code__2
+
+-- Floating point assignment to a register/temporary
+assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode pk dstCmmReg srcCmmExpr = do
+    srcRegister <- getRegister srcCmmExpr
+    let dstReg	= getRegisterReg dstCmmReg
+
+    return $ case srcRegister of
+        Any _ code         	    -> code dstReg
+	Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
+
+
+
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+genJump (CmmLit (CmmLabel lbl))
+  = return (toOL [CALL (Left target) 0 True, NOP])
+  where
+    target = ImmCLbl lbl
+
+genJump tree
+  = do
+        (target, code) <- getSomeReg tree
+	return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
+
+-- -----------------------------------------------------------------------------
+--  Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+genBranch = return . toOL . mkJumpInstr
+
+
+-- -----------------------------------------------------------------------------
+--  Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions.  We peek at the arguments to decide what kind of
+comparison to do.
+
+SPARC: First, we have to ensure that the condition codes are set
+according to the supplied comparison operation.  We generate slightly
+different code for floating point comparisons, because a floating
+point operation cannot directly precede a @BF@.  We assume the worst
+and fill that slot with a @NOP@.
+
+SPARC: Do not fill the delay slots here; you will confuse the register
+allocator.
+-}
+
+
+genCondJump
+    :: BlockId	    -- the branch target
+    -> CmmExpr      -- the condition on which to branch
+    -> NatM InstrBlock
+
+
+
+genCondJump bid bool = do
+  CondCode is_float cond code <- getCondCode bool
+  return (
+       code `appOL` 
+       toOL (
+         if   is_float
+         then [NOP, BF cond False bid, NOP]
+         else [BI cond False bid, NOP]
+       )
+    )
+
+
+
+-- -----------------------------------------------------------------------------
+--  Generating C calls
+
+-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations.  Apart from that, the code is easy.
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+    :: CmmCallTarget		-- function to call
+    -> HintedCmmFormals		-- where to put the result
+    -> HintedCmmActuals		-- arguments (of mixed type)
+    -> NatM InstrBlock
+
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+{- 
+   The SPARC calling convention is an absolute
+   nightmare.  The first 6x32 bits of arguments are mapped into
+   %o0 through %o5, and the remaining arguments are dumped to the
+   stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
+
+   If we have to put args on the stack, move %o6==%sp down by
+   the number of words to go on the stack, to ensure there's enough space.
+
+   According to Fraser and Hanson's lcc book, page 478, fig 17.2,
+   16 words above the stack pointer is a word for the address of
+   a structure return value.  I use this as a temporary location
+   for moving values from float to int regs.  Certainly it isn't
+   safe to put anything in the 16 words starting at %sp, since
+   this area can get trashed at any time due to window overflows
+   caused by signal handlers.
+
+   A final complication (if the above isn't enough) is that 
+   we can't blithely calculate the arguments one by one into
+   %o0 .. %o5.  Consider the following nested calls:
+
+       fff a (fff b c)
+
+   Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
+   the inner call will itself use %o0, which trashes the value put there
+   in preparation for the outer call.  Upshot: we need to calculate the
+   args into temporary regs, and move those to arg regs or onto the
+   stack only immediately prior to the call proper.  Sigh.
+
+genCCall
+    :: CmmCallTarget		-- function to call
+    -> HintedCmmFormals		-- where to put the result
+    -> HintedCmmActuals		-- arguments (of mixed type)
+    -> NatM InstrBlock
+
+-}
+
+
+-- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
+-- are guaranteed to take place before writes afterwards (unlike on PowerPC). 
+-- Ref: Section 8.4 of the SPARC V9 Architecture manual.
+--
+-- In the SPARC case we don't need a barrier.
+--
+genCCall (CmmPrim (MO_WriteBarrier)) _ _
+ = do	return nilOL
+
+genCCall target dest_regs argsAndHints 
+ = do	 	
+	-- strip hints from the arg regs
+	let args :: [CmmExpr]
+	    args  = map hintlessCmm argsAndHints
+
+
+	-- work out the arguments, and assign them to integer regs
+	argcode_and_vregs	<- mapM arg_to_int_vregs args
+	let (argcodes, vregss)	= unzip argcode_and_vregs
+	let vregs		= concat vregss
+
+	let n_argRegs		= length allArgRegs
+	let n_argRegs_used 	= min (length vregs) n_argRegs
+
+
+	-- deal with static vs dynamic call targets
+	callinsns <- case target of
+		CmmCallee (CmmLit (CmmLabel lbl)) conv -> 
+			return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+		CmmCallee expr conv 
+		 -> do	(dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+			return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+		CmmPrim mop 
+		 -> do	res	<- outOfLineFloatOp mop
+			lblOrMopExpr <- case res of
+				Left lbl -> do
+					return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+		       		Right mopExpr -> do
+					(dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+					return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+			return lblOrMopExpr
+
+	let argcode = concatOL argcodes
+
+	let (move_sp_down, move_sp_up)
+	           = let diff = length vregs - n_argRegs
+	                 nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
+	             in  if   nn <= 0
+	                 then (nilOL, nilOL)
+	                 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+
+        let transfer_code
+           	= toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
+				
+	return 
+	 $ 	argcode			`appOL`
+		move_sp_down		`appOL`
+		transfer_code		`appOL`
+		callinsns		`appOL`
+		unitOL NOP		`appOL`
+		move_sp_up		`appOL`
+		assign_code dest_regs
+
+
+-- | Generate code to calculate an argument, and move it into one
+-- 	or two integer vregs.
+arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
+arg_to_int_vregs arg
+
+	-- If the expr produces a 64 bit int, then we can just use iselExpr64
+	| isWord64 (cmmExprType arg)
+	= do	(ChildCode64 code r_lo) <- iselExpr64 arg
+	  	let r_hi 		= getHiVRegFromLo r_lo
+		return (code, [r_hi, r_lo])
+
+	| otherwise
+	= do	(src, code) 	<- getSomeReg arg
+		tmp 		<- getNewRegNat (cmmTypeSize $ cmmExprType arg)
+		let pk   	= cmmExprType arg
+
+		case cmmTypeSize pk of
+
+		 -- Load a 64 bit float return value into two integer regs.
+		 FF64 -> do
+			v1 <- getNewRegNat II32
+			v2 <- getNewRegNat II32
+
+			let Just f0_high = fPair f0
+			
+			let code2 = 
+				code				`snocOL`
+		                FMOV FF64 src f0		`snocOL`
+		                ST   FF32  f0 (spRel 16)	`snocOL`
+		                LD   II32  (spRel 16) v1	`snocOL`
+		                ST   FF32  f0_high (spRel 16)	`snocOL`
+		                LD   II32  (spRel 16) v2
+
+			return	(code2, [v1,v2])
+
+		 -- Load a 32 bit float return value into an integer reg
+		 FF32 -> do
+			v1 <- getNewRegNat II32
+			
+			let code2 =
+				code                    	`snocOL`
+				ST   FF32  src (spRel 16)  	`snocOL`
+				LD   II32  (spRel 16) v1
+				
+			return (code2, [v1])
+
+		 -- Move an integer return value into its destination reg.
+		 other -> do
+			v1 <- getNewRegNat II32
+	                
+			let code2 = 
+				code				`snocOL`
+				OR False g0 (RIReg src) v1
+			
+			return (code2, [v1])
+
+
+-- | Move args from the integer vregs into which they have been 
+-- 	marshalled, into %o0 .. %o5, and the rest onto the stack.
+--
+move_final :: [Reg] -> [Reg] -> Int -> [Instr]
+
+-- all args done
+move_final [] _ offset          
+	= []
+
+-- out of aregs; move to stack
+move_final (v:vs) [] offset     
+	= ST II32 v (spRel offset)
+	: move_final vs [] (offset+1)
+
+-- move into an arg (%o[0..5]) reg
+move_final (v:vs) (a:az) offset 
+	= OR False g0 (RIReg v) a
+	: move_final vs az offset
+
+
+-- | Assign results returned from the call into their 
+--	desination regs.
+--
+assign_code :: [CmmHinted LocalReg] -> OrdList Instr
+assign_code []	= nilOL
+
+assign_code [CmmHinted dest _hint]	
+ = let	rep	= localRegType dest
+	width	= typeWidth rep
+	r_dest 	= getRegisterReg (CmmLocal dest)
+
+	result
+		| isFloatType rep 
+		, W32	<- width
+		= unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
+
+		| isFloatType rep
+		, W64	<- width
+		= unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
+
+		| not $ isFloatType rep
+		, W32	<- width
+		= unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
+
+		| not $ isFloatType rep
+		, W64		<- width
+		, r_dest_hi	<- getHiVRegFromLo r_dest
+		= toOL 	[ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
+			, mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
+   in	result
+
+
+-- | Generate a call to implement an out-of-line floating point operation
+outOfLineFloatOp 
+	:: CallishMachOp 
+	-> NatM (Either CLabel CmmExpr)
+
+outOfLineFloatOp mop 
+ = do	let functionName
+ 		= outOfLineFloatOp_table mop
+	
+ 	dflags	<- getDynFlagsNat
+	mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference 
+		$  mkForeignLabel functionName Nothing True IsFunction
+
+	let mopLabelOrExpr 
+		= case mopExpr of
+			CmmLit (CmmLabel lbl) 	-> Left lbl
+                        _ 			-> Right mopExpr
+
+	return mopLabelOrExpr
+
+
+-- | Decide what C function to use to implement a CallishMachOp
+--
+outOfLineFloatOp_table 
+	:: CallishMachOp
+	-> FastString
+	
+outOfLineFloatOp_table mop
+ = case mop of
+	MO_F32_Exp    -> fsLit "expf"
+	MO_F32_Log    -> fsLit "logf"
+	MO_F32_Sqrt   -> fsLit "sqrtf"
+	MO_F32_Pwr    -> fsLit "powf"
+
+	MO_F32_Sin    -> fsLit "sinf"
+	MO_F32_Cos    -> fsLit "cosf"
+	MO_F32_Tan    -> fsLit "tanf"
+
+	MO_F32_Asin   -> fsLit "asinf"
+	MO_F32_Acos   -> fsLit "acosf"
+	MO_F32_Atan   -> fsLit "atanf"
+
+	MO_F32_Sinh   -> fsLit "sinhf"
+	MO_F32_Cosh   -> fsLit "coshf"
+	MO_F32_Tanh   -> fsLit "tanhf"
+
+	MO_F64_Exp    -> fsLit "exp"
+	MO_F64_Log    -> fsLit "log"
+	MO_F64_Sqrt   -> fsLit "sqrt"
+	MO_F64_Pwr    -> fsLit "pow"
+
+	MO_F64_Sin    -> fsLit "sin"
+	MO_F64_Cos    -> fsLit "cos"
+	MO_F64_Tan    -> fsLit "tan"
+
+	MO_F64_Asin   -> fsLit "asin"
+	MO_F64_Acos   -> fsLit "acos"
+	MO_F64_Atan   -> fsLit "atan"
+
+	MO_F64_Sinh   -> fsLit "sinh"
+	MO_F64_Cosh   -> fsLit "cosh"
+	MO_F64_Tanh   -> fsLit "tanh"
+
+	other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
+              		(pprCallishMachOp mop)
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+genSwitch expr ids
+	| opt_PIC
+	= error "MachCodeGen: sparc genSwitch PIC not finished\n"
+  
+	| otherwise
+	= do	(e_reg, e_code) <- getSomeReg expr
+
+		base_reg	<- getNewRegNat II32
+		offset_reg	<- getNewRegNat II32
+		dst		<- getNewRegNat II32
+
+		label 		<- getNewLabelNat
+		let jumpTable	= map jumpTableEntry ids
+
+		return $ e_code `appOL`
+		 toOL	
+		 	-- the jump table
+			[ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
+
+			-- load base of jump table
+			, SETHI (HI (ImmCLbl label)) base_reg
+			, OR    False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
+			
+			-- the addrs in the table are 32 bits wide..
+			, SLL   e_reg (RIImm $ ImmInt 2) offset_reg
+
+			-- load and jump to the destination
+			, LD 	  II32 (AddrRegReg base_reg offset_reg) dst
+			, JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids]
+			, NOP ]
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat II32
+    let
+	code__2 dst = code `appOL` toOL [
+    	    SUB False True g0 (RIReg src) g0,
+    	    SUB True False g0 (RIImm (ImmInt (-1))) dst]
+    return (Any II32 code__2)
+
+condIntReg EQQ x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat II32
+    tmp2 <- getNewRegNat II32
+    let
+    	code__2 dst = code1 `appOL` code2 `appOL` toOL [
+    	    XOR False src1 (RIReg src2) dst,
+    	    SUB False True g0 (RIReg dst) g0,
+    	    SUB True False g0 (RIImm (ImmInt (-1))) dst]
+    return (Any II32 code__2)
+
+condIntReg NE x (CmmLit (CmmInt 0 d)) = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat II32
+    let
+    	code__2 dst = code `appOL` toOL [
+    	    SUB False True g0 (RIReg src) g0,
+    	    ADD True False g0 (RIImm (ImmInt 0)) dst]
+    return (Any II32 code__2)
+
+condIntReg NE x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat II32
+    tmp2 <- getNewRegNat II32
+    let
+	code__2 dst = code1 `appOL` code2 `appOL` toOL [
+    	    XOR False src1 (RIReg src2) dst,
+    	    SUB False True g0 (RIReg dst) g0,
+    	    ADD True False g0 (RIImm (ImmInt 0)) dst]
+    return (Any II32 code__2)
+
+condIntReg cond x y = do
+    bid1@(BlockId lbl1) <- getBlockIdNat
+    bid2@(BlockId lbl2) <- getBlockIdNat
+    CondCode _ cond cond_code <- condIntCode cond x y
+    let
+	code__2 dst = cond_code `appOL` toOL [
+	    BI cond False bid1, NOP,
+	    OR False g0 (RIImm (ImmInt 0)) dst,
+	    BI ALWAYS False bid2, NOP,
+	    NEWBLOCK bid1,
+	    OR False g0 (RIImm (ImmInt 1)) dst,
+	    NEWBLOCK bid2]
+    return (Any II32 code__2)
+
+condFltReg cond x y = do
+    bid1@(BlockId lbl1) <- getBlockIdNat
+    bid2@(BlockId lbl2) <- getBlockIdNat
+    CondCode _ cond cond_code <- condFltCode cond x y
+    let
+    	code__2 dst = cond_code `appOL` toOL [ 
+    	    NOP,
+	    BF cond False bid1, NOP,
+	    OR False g0 (RIImm (ImmInt 0)) dst,
+	    BI ALWAYS False bid2, NOP,
+	    NEWBLOCK bid1,
+	    OR False g0 (RIImm (ImmInt 1)) dst,
+	    NEWBLOCK bid2]
+    return (Any II32 code__2)
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+trivialCode pk instr x (CmmLit (CmmInt y d))
+  | fits13Bits y
+  = do
+      (src1, code) <- getSomeReg x
+      tmp <- getNewRegNat II32
+      let
+    	src2 = ImmInt (fromInteger y)
+    	code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
+      return (Any II32 code__2)
+
+trivialCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat II32
+    tmp2 <- getNewRegNat II32
+    let
+    	code__2 dst = code1 `appOL` code2 `snocOL`
+    	    	      instr src1 (RIReg src2) dst
+    return (Any II32 code__2)
+
+------------
+trivialFCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
+    tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
+    tmp <- getNewRegNat FF64
+    let
+    	promote x = FxTOy FF32 FF64 x tmp
+
+    	pk1   = cmmExprType x
+    	pk2   = cmmExprType y
+
+    	code__2 dst =
+    	    	if pk1 `cmmEqType` pk2 then
+    	            code1 `appOL` code2 `snocOL`
+    	    	    instr (floatSize pk) src1 src2 dst
+    	    	else if typeWidth pk1 == W32 then
+    	    	    code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+    	    	    instr FF64 tmp src2 dst
+    	    	else
+    	    	    code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+    	    	    instr FF64 src1 tmp dst
+    return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64) 
+    		code__2)
+
+------------
+trivialUCode size instr x = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat size
+    let
+    	code__2 dst = code `snocOL` instr (RIReg src) dst
+    return (Any size code__2)
+
+-------------
+trivialUFCode pk instr x = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat pk
+    let
+    	code__2 dst = code `snocOL` instr src dst
+    return (Any pk code__2)
+
+
+
+coerceDbl2Flt :: CmmExpr -> NatM Register
+coerceFlt2Dbl :: CmmExpr -> NatM Register
+
+
+coerceInt2FP width1 width2 x = do
+    (src, code) <- getSomeReg x
+    let
+    	code__2 dst = code `appOL` toOL [
+    	    ST (intSize width1) src (spRel (-2)),
+    	    LD (intSize width1) (spRel (-2)) dst,
+    	    FxTOy (intSize width1) (floatSize width2) dst dst]
+    return (Any (floatSize $ width2) code__2)
+
+
+-- | Coerce a floating point value to integer
+--
+--   NOTE: On sparc v9 there are no instructions to move a value from an
+--	   FP register directly to an int register, so we have to use a load/store.
+--
+coerceFP2Int width1 width2 x 
+ = do	let fsize1	= floatSize width1
+	    fsize2	= floatSize width2
+	
+            isize2	= intSize   width2
+
+	(fsrc, code)	<- getSomeReg x
+	fdst		<- getNewRegNat fsize2
+    
+	let code2 dst	
+		= 	code
+		`appOL` toOL
+			-- convert float to int format, leaving it in a float reg.
+			[ FxTOy fsize1 isize2 fsrc fdst
+
+			-- store the int into mem, then load it back to move
+			--	it into an actual int reg.
+			, ST    fsize2 fdst (spRel (-2))
+			, LD	isize2 (spRel (-2)) dst]
+
+	return (Any isize2 code2)
+
+------------
+coerceDbl2Flt x = do
+    (src, code) <- getSomeReg x
+    return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst)) 
+
+------------
+coerceFlt2Dbl x = do
+    (src, code) <- getSomeReg x
+    return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
+
+
+
+-- eXTRA_STK_ARGS_HERE
+
+-- We (allegedly) put the first six C-call arguments in registers;
+-- where do we start putting the rest of them?
+
+-- Moved from Instrs (SDM):
+
+eXTRA_STK_ARGS_HERE :: Int
+eXTRA_STK_ARGS_HERE
+	= 23
diff --git a/compiler/nativeGen/SPARC/Cond.hs b/compiler/nativeGen/SPARC/Cond.hs
new file mode 100644
index 0000000000..d0f12efcf5
--- /dev/null
+++ b/compiler/nativeGen/SPARC/Cond.hs
@@ -0,0 +1,53 @@
+
+module SPARC.Cond (
+	Cond(..),
+	condUnsigned,
+	condToSigned,
+	condToUnsigned
+)
+
+where
+
+-- | Branch condition codes.
+data Cond
+	= ALWAYS
+	| EQQ
+	| GE
+	| GEU
+	| GTT
+	| GU
+	| LE
+	| LEU
+	| LTT
+	| LU
+	| NE
+	| NEG
+	| NEVER
+	| POS
+	| VC
+	| VS
+	deriving Eq
+
+
+condUnsigned :: Cond -> Bool
+condUnsigned GU  = True
+condUnsigned LU  = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _   = False
+
+
+condToSigned :: Cond -> Cond
+condToSigned GU  = GTT
+condToSigned LU  = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x   = x
+
+
+condToUnsigned :: Cond -> Cond
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE  = GEU
+condToUnsigned LE  = LEU
+condToUnsigned x   = x
diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs
index 9c332317ea..6dc6477f9c 100644
--- a/compiler/nativeGen/SPARC/Instr.hs
+++ b/compiler/nativeGen/SPARC/Instr.hs
@@ -10,56 +10,55 @@
 #include "nativeGen/NCG.h"
 
 module SPARC.Instr (
-	Cond(..),
 	RI(..),
 	Instr(..),
-	riZero,
-	fpRelEA,
-	moveSp,
-	fPair,
+	maxSpillSlots
 )
 
 where
 
-import BlockId
-import RegsBase
 import SPARC.Regs
+import SPARC.Cond
+import Instruction
+import RegClass
+import Reg
+import Size
+
+import BlockId
 import Cmm
 import Outputable
-import Constants	( wORD_SIZE )
+import Constants	(rESERVED_C_STACK_BYTES )
 import FastString
+import FastBool
 
 import GHC.Exts
 
 
--- | Branch condition codes.
-data Cond
-	= ALWAYS
-	| EQQ
-	| GE
-	| GEU
-	| GTT
-	| GU
-	| LE
-	| LEU
-	| LTT
-	| LU
-	| NE
-	| NEG
-	| NEVER
-	| POS
-	| VC
-	| VS
-	deriving Eq
-
-
 -- | Register or immediate
 data RI 
 	= RIReg Reg
 	| RIImm Imm
 
 
--- | SPARC isntruction set.
+-- | instance for sparc instruction set
+instance Instruction Instr where
+	regUsageOfInstr		= sparc_regUsageOfInstr
+	patchRegsOfInstr	= sparc_patchRegsOfInstr
+	isJumpishInstr		= sparc_isJumpishInstr
+	jumpDestsOfInstr	= sparc_jumpDestsOfInstr
+	patchJumpInstr		= sparc_patchJumpInstr
+	mkSpillInstr		= sparc_mkSpillInstr
+	mkLoadInstr		= sparc_mkLoadInstr
+	takeDeltaInstr		= sparc_takeDeltaInstr
+	isMetaInstr		= sparc_isMetaInstr
+	mkRegRegMoveInstr	= sparc_mkRegRegMoveInstr
+	takeRegRegMoveInstr	= sparc_takeRegRegMoveInstr
+	mkJumpInstr		= sparc_mkJumpInstr
+
+
+-- | SPARC instruction set.
+--	Not complete. This is only the ones we need.
+--
 data Instr
 
 	-- meta ops --------------------------------------------------
@@ -78,12 +77,6 @@ data Instr
 	-- specify current stack offset for benefit of subsequent passes.
 	| DELTA   Int
 
-	-- | spill this reg to a stack slot
-	| SPILL   Reg Int
-
-	-- | reload this reg from a stack slot
-	| RELOAD  Int Reg
-
 	-- real instrs -----------------------------------------------
 	-- Loads and stores.
 	| LD		Size AddrMode Reg 		-- size, src, dst
@@ -157,39 +150,290 @@ data Instr
 	| CALL		(Either Imm Reg) Int Bool 	-- target, args, terminal
 
 
--- | Check if a RI represents a zero value.
---  	- a literal zero
---	- register %g0, which is always zero.
+-- | regUsage returns the sets of src and destination registers used
+-- 	by a particular instruction.  Machine registers that are
+-- 	pre-allocated to stgRegs are filtered out, because they are
+-- 	uninteresting from a register allocation standpoint.  (We wouldn't
+-- 	want them to end up on the free list!)  As far as we are concerned,
+-- 	the fixed registers simply don't exist (for allocation purposes,
+-- 	anyway).
+
+-- 	regUsage doesn't need to do any trickery for jumps and such.  Just
+-- 	state precisely the regs read and written by that insn.  The
+-- 	consequences of control flow transfers, as far as register
+-- 	allocation goes, are taken care of by the register allocator.
+--
+sparc_regUsageOfInstr :: Instr -> RegUsage
+sparc_regUsageOfInstr instr 
+ = case instr of
+    LD    _ addr reg  		-> usage (regAddr addr, 	[reg])
+    ST    _ reg addr  		-> usage (reg : regAddr addr, 	[])
+    ADD   _ _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
+    SUB   _ _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
+    UMUL    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
+    SMUL    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
+    UDIV    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
+    SDIV    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
+    RDY       rd           	-> usage ([], 		 	[rd])
+    WRY       r1 r2        	-> usage ([r1, r2], 	 	[])
+    AND     _ r1 ar r2  	-> usage (r1 : regRI ar, 	[r2])
+    ANDN    _ r1 ar r2 		-> usage (r1 : regRI ar, 	[r2])
+    OR      _ r1 ar r2   	-> usage (r1 : regRI ar, 	[r2])
+    ORN     _ r1 ar r2  	-> usage (r1 : regRI ar, 	[r2])
+    XOR     _ r1 ar r2  	-> usage (r1 : regRI ar, 	[r2])
+    XNOR    _ r1 ar r2 		-> usage (r1 : regRI ar, 	[r2])
+    SLL       r1 ar r2    	-> usage (r1 : regRI ar, 	[r2])
+    SRL       r1 ar r2    	-> usage (r1 : regRI ar,	[r2])
+    SRA       r1 ar r2    	-> usage (r1 : regRI ar, 	[r2])
+    SETHI   _ reg   		-> usage ([], 			[reg])
+    FABS    _ r1 r2    		-> usage ([r1], 		[r2])
+    FADD    _ r1 r2 r3 		-> usage ([r1, r2],		[r3])
+    FCMP    _ _  r1 r2  	-> usage ([r1, r2], 		[])
+    FDIV    _ r1 r2 r3 		-> usage ([r1, r2], 		[r3])
+    FMOV    _ r1 r2    		-> usage ([r1],			[r2])
+    FMUL    _ r1 r2 r3 		-> usage ([r1, r2],	 	[r3])
+    FNEG    _ r1 r2    		-> usage ([r1], 		[r2])
+    FSQRT   _ r1 r2   		-> usage ([r1], 		[r2])
+    FSUB    _ r1 r2 r3 		-> usage ([r1, r2], 		[r3])
+    FxTOy   _ _  r1 r2 		-> usage ([r1], 		[r2])
+
+    JMP     addr 		-> usage (regAddr addr, [])
+    JMP_TBL addr _      	-> usage (regAddr addr, [])
+
+    CALL  (Left _  )  _ True  	-> noUsage
+    CALL  (Left _  )  n False 	-> usage (argRegs n, callClobberedRegs)
+    CALL  (Right reg) _ True  	-> usage ([reg], [])
+    CALL  (Right reg) n False 	-> usage (reg : (argRegs n), callClobberedRegs)
+    _ 	    	    		-> noUsage
+
+  where
+    usage (src, dst) 
+     = RU (filter interesting src) (filter interesting dst)
+
+    regAddr (AddrRegReg r1 r2)	= [r1, r2]
+    regAddr (AddrRegImm r1 _)	= [r1]
+
+    regRI (RIReg r)		= [r]
+    regRI  _			= []
+
+
+-- | Interesting regs are virtuals, or ones that are allocatable 
+--	by the register allocator.
+interesting :: Reg -> Bool
+interesting reg
+ = case reg of
+ 	VirtualRegI  _	-> True
+	VirtualRegHi _	-> True
+	VirtualRegF  _	-> True
+	VirtualRegD  _	-> True
+	RealReg i	-> isFastTrue (freeReg i)
+
+
+
+-- | Apply a given mapping to tall the register references in this instruction.
+sparc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+sparc_patchRegsOfInstr instr env = case instr of
+    LD    sz addr reg   	-> LD sz (fixAddr addr) (env reg)
+    ST    sz reg addr   	-> ST sz (env reg) (fixAddr addr)
+
+    ADD   x cc r1 ar r2 	-> ADD   x cc  (env r1) (fixRI ar) (env r2)
+    SUB   x cc r1 ar r2	 	-> SUB   x cc  (env r1) (fixRI ar) (env r2)
+    UMUL    cc r1 ar r2		-> UMUL    cc  (env r1) (fixRI ar) (env r2)
+    SMUL    cc r1 ar r2		-> SMUL    cc  (env r1) (fixRI ar) (env r2)
+    UDIV    cc r1 ar r2		-> UDIV    cc  (env r1) (fixRI ar) (env r2)
+    SDIV    cc r1 ar r2		-> SDIV    cc  (env r1) (fixRI ar) (env r2)
+    RDY   rd            	-> RDY         (env rd)
+    WRY   r1 r2			-> WRY         (env r1) (env r2)
+    AND   b r1 ar r2   	 	-> AND   b     (env r1) (fixRI ar) (env r2)
+    ANDN  b r1 ar r2    	-> ANDN  b     (env r1) (fixRI ar) (env r2)
+    OR    b r1 ar r2   		-> OR    b     (env r1) (fixRI ar) (env r2)
+    ORN   b r1 ar r2    	-> ORN   b     (env r1) (fixRI ar) (env r2)
+    XOR   b r1 ar r2    	-> XOR   b     (env r1) (fixRI ar) (env r2)
+    XNOR  b r1 ar r2    	-> XNOR  b     (env r1) (fixRI ar) (env r2)
+    SLL   r1 ar r2      	-> SLL         (env r1) (fixRI ar) (env r2)
+    SRL   r1 ar r2      	-> SRL         (env r1) (fixRI ar) (env r2)
+    SRA   r1 ar r2      	-> SRA         (env r1) (fixRI ar) (env r2)
+
+    SETHI imm reg       	-> SETHI imm (env reg)
+
+    FABS  s r1 r2       	-> FABS    s   (env r1) (env r2)
+    FADD  s r1 r2 r3    	-> FADD    s   (env r1) (env r2) (env r3)
+    FCMP  e s r1 r2     	-> FCMP e  s   (env r1) (env r2)
+    FDIV  s r1 r2 r3    	-> FDIV    s   (env r1) (env r2) (env r3)
+    FMOV  s r1 r2       	-> FMOV    s   (env r1) (env r2)
+    FMUL  s r1 r2 r3    	-> FMUL    s   (env r1) (env r2) (env r3)
+    FNEG  s r1 r2       	-> FNEG    s   (env r1) (env r2)
+    FSQRT s r1 r2       	-> FSQRT   s   (env r1) (env r2)
+    FSUB  s r1 r2 r3   	 	-> FSUB    s   (env r1) (env r2) (env r3)
+    FxTOy s1 s2 r1 r2   	-> FxTOy s1 s2 (env r1) (env r2)
+
+    JMP     addr        	-> JMP     (fixAddr addr)
+    JMP_TBL addr ids    	-> JMP_TBL (fixAddr addr) ids
+
+    CALL  (Left i) n t  	-> CALL (Left i) n t
+    CALL  (Right r) n t 	-> CALL (Right (env r)) n t
+    _ 				-> instr
+
+  where
+    fixAddr (AddrRegReg r1 r2) 	= AddrRegReg   (env r1) (env r2)
+    fixAddr (AddrRegImm r1 i)  	= AddrRegImm   (env r1) i
+
+    fixRI (RIReg r) 		= RIReg (env r)
+    fixRI other			= other
+
+
+--------------------------------------------------------------------------------
+sparc_isJumpishInstr :: Instr -> Bool
+sparc_isJumpishInstr instr
+ = case instr of
+	BI{}		-> True
+	BF{}		-> True
+	JMP{}		-> True
+	JMP_TBL{}	-> True
+	CALL{}		-> True
+	_		-> False
+
+sparc_jumpDestsOfInstr :: Instr -> [BlockId]
+sparc_jumpDestsOfInstr insn
+  = case insn of
+	BI   _ _ id	-> [id]
+	BF   _ _ id	-> [id]
+	JMP_TBL _ ids	-> ids
+	_		-> []
+
+
+sparc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
+sparc_patchJumpInstr insn patchF
+  = case insn of
+	BI cc annul id	-> BI cc annul (patchF id)
+	BF cc annul id	-> BF cc annul (patchF id)
+	_		-> insn
+
+
+--------------------------------------------------------------------------------
+-- | Make a spill instruction.
+-- 	On SPARC we spill below frame pointer leaving 2 words/spill
+sparc_mkSpillInstr
+	:: Reg		-- ^ register to spill
+	-> Int		-- ^ current stack delta
+	-> Int		-- ^ spill slot to use
+	-> Instr
+
+sparc_mkSpillInstr reg _ slot
+ = let	off     = spillSlotToOffset slot
+        off_w	= 1 + (off `div` 4)
+        sz 	= case regClass reg of
+			RcInteger -> II32
+			RcFloat   -> FF32
+			RcDouble  -> FF64
+		
+    in ST sz reg (fpRel (negate off_w))
+
+
+-- | Make a spill reload instruction.
+sparc_mkLoadInstr
+	:: Reg		-- ^ register to load
+	-> Int		-- ^ current stack delta
+	-> Int		-- ^ spill slot to use
+	-> Instr
+
+sparc_mkLoadInstr reg _ slot
+  = let off     = spillSlotToOffset slot
+	off_w	= 1 + (off `div` 4)
+        sz	= case regClass reg of
+			RcInteger -> II32
+			RcFloat   -> FF32
+			RcDouble  -> FF64
+
+        in LD sz (fpRel (- off_w)) reg
+
+-- | Convert a spill slot number to a *byte* offset, with no sign.
 --
-riZero :: RI -> Bool
-riZero (RIImm (ImmInt 0))	    = True
-riZero (RIImm (ImmInteger 0))	    = True
-riZero (RIReg (RealReg 0))          = True
-riZero _			    = False
+spillSlotToOffset :: Int -> Int
+spillSlotToOffset slot
+	| slot >= 0 && slot < maxSpillSlots
+	= 64 + spillSlotSize * slot
+
+	| otherwise
+	= pprPanic "spillSlotToOffset:" 
+	              (   text "invalid spill location: " <> int slot
+		      $$  text "maxSpillSlots:          " <> int maxSpillSlots)
+
+
+-- | We need 8 bytes because our largest registers are 64 bit.
+spillSlotSize :: Int
+spillSlotSize = 8
 
 
--- | Calculate the effective address which would be used by the
--- 	corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
--- 	alas -- can't have fpRelEA here because of module dependencies.
-fpRelEA :: Int -> Reg -> Instr
-fpRelEA n dst
-   = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
+-- | The maximum number of spill slots available on the C stack.
+--	If we use up all of the slots, then we're screwed.
+maxSpillSlots :: Int
+maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
 
 
--- | Code to shift the stack pointer by n words.
-moveSp :: Int -> Instr
-moveSp n
-   = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
+--------------------------------------------------------------------------------
+-- | See if this instruction is telling us the current C stack delta
+sparc_takeDeltaInstr
+	:: Instr
+	-> Maybe Int
+	
+sparc_takeDeltaInstr instr
+ = case instr of
+ 	DELTA i		-> Just i
+	_		-> Nothing
+
+
+sparc_isMetaInstr
+	:: Instr
+	-> Bool
+	
+sparc_isMetaInstr instr
+ = case instr of
+ 	COMMENT{}	-> True
+	LDATA{}		-> True
+	NEWBLOCK{}	-> True
+	DELTA{}		-> True
+	_		-> False
+	
+
+-- | Make a reg-reg move instruction.
+--	On SPARC v8 there are no instructions to move directly between
+--	floating point and integer regs. If we need to do that then we
+--	have to go via memory.
+--
+sparc_mkRegRegMoveInstr
+	:: Reg
+	-> Reg
+	-> Instr
+
+sparc_mkRegRegMoveInstr src dst
+ = case regClass src of
+	RcInteger -> ADD  False False src (RIReg g0) dst
+	RcDouble  -> FMOV FF64 src dst
+	RcFloat   -> FMOV FF32 src dst
+
+
+-- | Check whether an instruction represents a reg-reg move.
+-- 	The register allocator attempts to eliminate reg->reg moves whenever it can,
+-- 	by assigning the src and dest temporaries to the same real register.
+--
+sparc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
+sparc_takeRegRegMoveInstr instr
+ = case instr of
+ 	ADD False False src (RIReg src2) dst
+	 | g0 == src2		-> Just (src, dst)
+
+	FMOV FF64 src dst	-> Just (src, dst)
+	FMOV FF32  src dst	-> Just (src, dst)
+	_			-> Nothing
 
 
--- | Produce the second-half-of-a-double register given the first half.
-fPair :: Reg -> Maybe Reg
-fPair (RealReg n) 
-	| n >= 32 && n `mod` 2 == 0  = Just (RealReg (n+1))
+-- | Make an unconditional branch instruction.
+sparc_mkJumpInstr
+	:: BlockId
+	-> [Instr]
 
-fPair (VirtualRegD u)
-	= Just (VirtualRegHi u)
+sparc_mkJumpInstr id 
+ = 	 [BI ALWAYS False id
+	, NOP]			-- fill the branch delay slot.
 
-fPair _
-	= trace ("MachInstrs.fPair: can't get high half of supposed double reg ") 
-		Nothing
diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs
index 7d64df1b15..a0d5fffce1 100644
--- a/compiler/nativeGen/SPARC/Ppr.hs
+++ b/compiler/nativeGen/SPARC/Ppr.hs
@@ -7,12 +7,15 @@
 -----------------------------------------------------------------------------
 
 module SPARC.Ppr (
+	pprNatCmmTop,
+	pprBasicBlock,
+	pprSectionHeader,
+	pprData,
+	pprInstr,
 	pprUserReg,
 	pprSize,
 	pprImm,
-	pprSectionHeader,
-	pprDataItem,
-	pprInstr
+	pprDataItem
 )
 
 where
@@ -20,20 +23,119 @@ where
 #include "HsVersions.h"
 #include "nativeGen/NCG.h"
 
-import PprBase
-import RegsBase
 import SPARC.Regs
+import SPARC.RegInfo
 import SPARC.Instr
+import SPARC.Cond
+import Instruction
+import Reg
+import Size
+import PprBase
 
 import BlockId
 import Cmm
-
 import CLabel
 
-import Panic		( panic )
 import Unique		( pprUnique )
+import qualified Outputable
+import Outputable	(Outputable, panic)
 import Pretty
 import FastString
+import Data.Word
+
+-- -----------------------------------------------------------------------------
+-- Printing this stuff out
+
+pprNatCmmTop :: NatCmmTop Instr -> Doc
+pprNatCmmTop (CmmData section dats) = 
+  pprSectionHeader section $$ vcat (map pprData dats)
+
+ -- special case for split markers:
+pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
+
+pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) = 
+  pprSectionHeader Text $$
+  (if null info then -- blocks guaranteed not null, so label needed
+       pprLabel lbl
+   else
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+            pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+                <> char ':' $$
+#endif
+       vcat (map pprData info) $$
+       pprLabel (entryLblToInfoLbl lbl)
+  ) $$
+  vcat (map pprBasicBlock blocks)
+     -- above: Even the first block gets a label, because with branch-chain
+     -- elimination, it might be the target of a goto.
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+        -- If we are using the .subsections_via_symbols directive
+        -- (available on recent versions of Darwin),
+        -- we have to make sure that there is some kind of reference
+        -- from the entry code to a label on the _top_ of of the info table,
+        -- so that the linker will not think it is unreferenced and dead-strip
+        -- it. That's why the label is called a DeadStripPreventer (_dsp).
+  $$ if not (null info)
+		    then text "\t.long "
+		      <+> pprCLabel_asm (entryLblToInfoLbl lbl)
+		      <+> char '-'
+		      <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+		    else empty
+#endif
+
+
+pprBasicBlock :: NatBasicBlock Instr -> Doc
+pprBasicBlock (BasicBlock (BlockId id) instrs) =
+  pprLabel (mkAsmTempLabel id) $$
+  vcat (map pprInstr instrs)
+
+
+pprData :: CmmStatic -> Doc
+pprData (CmmAlign bytes)         = pprAlign bytes
+pprData (CmmDataLabel lbl)       = pprLabel lbl
+pprData (CmmString str)          = pprASCII str
+pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
+pprData (CmmStaticLit lit)       = pprDataItem lit
+
+pprGloblDecl :: CLabel -> Doc
+pprGloblDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext IF_ARCH_sparc((sLit ".global "), 
+				    (sLit ".globl ")) <>
+		pprCLabel_asm lbl
+
+pprTypeAndSizeDecl :: CLabel -> Doc
+#if linux_TARGET_OS
+pprTypeAndSizeDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext (sLit ".type ") <>
+		pprCLabel_asm lbl <> ptext (sLit ", @object")
+#else
+pprTypeAndSizeDecl _
+  = empty
+#endif
+
+pprLabel :: CLabel -> Doc
+pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
+
+
+pprASCII :: [Word8] -> Doc
+pprASCII str
+  = vcat (map do1 str) $$ do1 0
+    where
+       do1 :: Word8 -> Doc
+       do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
+
+pprAlign :: Int -> Doc
+pprAlign bytes =
+	ptext (sLit ".align ") <> int bytes
+
+
+-- -----------------------------------------------------------------------------
+-- pprInstr: print an 'Instr'
+
+instance Outputable Instr where
+    ppr	 instr	= Outputable.docToSDoc $ pprInstr instr
 
 
 -- | Pretty print a register.
@@ -101,12 +203,13 @@ pprSize :: Size -> Doc
 pprSize x 
  = ptext 
     (case x of
-	II8   -> sLit "ub"
-	II16  -> sLit "uh"
-	II32  -> sLit ""
-	II64  -> sLit "d"
-	FF32  -> sLit ""
-	FF64  -> sLit "d")
+	II8	-> sLit "ub"
+	II16	-> sLit "uh"
+	II32	-> sLit ""
+	II64	-> sLit "d"
+	FF32	-> sLit ""
+	FF64	-> sLit "d"
+	_	-> panic "SPARC.Ppr.pprSize: no match")
 
 
 -- | Pretty print a size for an instruction suffix.
@@ -120,7 +223,8 @@ pprStSize x
 	II32  -> sLit ""
 	II64  -> sLit "x"
 	FF32  -> sLit ""
-	FF64  -> sLit "d")
+	FF64  -> sLit "d"
+	_	-> panic "SPARC.Ppr.pprSize: no match")
 
 		
 -- | Pretty print a condition code.
@@ -258,6 +362,7 @@ pprInstr (NEWBLOCK _)
 pprInstr (LDATA _ _)
 	= panic "PprMach.pprInstr: LDATA"
 
+{-
 pprInstr (SPILL reg slot)
  = hcat [
  	ptext (sLit "\tSPILL"),
@@ -273,7 +378,7 @@ pprInstr (RELOAD slot reg)
 	ptext (sLit "SLOT") <> parens (int slot),
 	comma,
 	pprReg reg]
-
+-}
 
 -- a clumsy hack for now, to handle possible double alignment problems
 -- even clumsier, to allow for RegReg regs that show when doing indexed
diff --git a/compiler/nativeGen/SPARC/RegInfo.hs b/compiler/nativeGen/SPARC/RegInfo.hs
index 8f8a977ac7..025e302556 100644
--- a/compiler/nativeGen/SPARC/RegInfo.hs
+++ b/compiler/nativeGen/SPARC/RegInfo.hs
@@ -8,241 +8,115 @@
 -----------------------------------------------------------------------------
 
 module SPARC.RegInfo (
-	-- machine specific 
-	RegUsage(..),
-	noUsage,
-	regUsage,
-	patchRegs,
-	jumpDests,
-	isJumpish,
-	patchJump,
-	isRegRegMove,
+	mkVReg,
+
+	riZero,
+	fpRelEA,
+	moveSp,
+	fPair,
+
+	shortcutStatic,
+	regDotColor,
 
         JumpDest(..), 
 	canShortcut, 
-	shortcutJump, 
-
-	mkSpillInstr,
-	mkLoadInstr,
-	mkRegRegMoveInstr,
-	mkBranchInstr,
-	
-	spillSlotSize,
-	maxSpillSlots,
-	spillSlotToOffset		
+	shortcutJump,
 )
 
 where
 
-#include "nativeGen/NCG.h"
-#include "HsVersions.h"
-
 import SPARC.Instr
 import SPARC.Regs
-import RegsBase
+import RegClass
+import Reg
+import Size
 
+import Constants	(wORD_SIZE)
+import Cmm
+import CLabel
 import BlockId
 import Outputable
-import Constants	( rESERVED_C_STACK_BYTES )
-import FastBool
-
-
--- | Represents what regs are read and written to in an instruction.
---	
-data RegUsage 
-	= RU 	[Reg] 	-- regs read from
-		[Reg]	-- regs written to
-
-
--- | No regs read or written to.
-noUsage :: RegUsage
-noUsage  = RU [] []
-
-
--- | regUsage returns the sets of src and destination registers used
--- 	by a particular instruction.  Machine registers that are
--- 	pre-allocated to stgRegs are filtered out, because they are
--- 	uninteresting from a register allocation standpoint.  (We wouldn't
--- 	want them to end up on the free list!)  As far as we are concerned,
--- 	the fixed registers simply don't exist (for allocation purposes,
--- 	anyway).
-
--- 	regUsage doesn't need to do any trickery for jumps and such.  Just
--- 	state precisely the regs read and written by that insn.  The
--- 	consequences of control flow transfers, as far as register
--- 	allocation goes, are taken care of by the register allocator.
---
-regUsage :: Instr -> RegUsage
-regUsage instr 
- = case instr of
-    SPILL  reg _		-> usage ([reg], [])
-    RELOAD _   reg		-> usage ([], [reg])
-
-    LD    _ addr reg  		-> usage (regAddr addr, 	[reg])
-    ST    _ reg addr  		-> usage (reg : regAddr addr, 	[])
-    ADD   _ _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
-    SUB   _ _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
-    UMUL    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
-    SMUL    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
-    UDIV    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
-    SDIV    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
-    RDY       rd           	-> usage ([], 		 	[rd])
-    WRY       r1 r2        	-> usage ([r1, r2], 	 	[])
-    AND     _ r1 ar r2  	-> usage (r1 : regRI ar, 	[r2])
-    ANDN    _ r1 ar r2 		-> usage (r1 : regRI ar, 	[r2])
-    OR      _ r1 ar r2   	-> usage (r1 : regRI ar, 	[r2])
-    ORN     _ r1 ar r2  	-> usage (r1 : regRI ar, 	[r2])
-    XOR     _ r1 ar r2  	-> usage (r1 : regRI ar, 	[r2])
-    XNOR    _ r1 ar r2 		-> usage (r1 : regRI ar, 	[r2])
-    SLL       r1 ar r2    	-> usage (r1 : regRI ar, 	[r2])
-    SRL       r1 ar r2    	-> usage (r1 : regRI ar,	[r2])
-    SRA       r1 ar r2    	-> usage (r1 : regRI ar, 	[r2])
-    SETHI   _ reg   		-> usage ([], 			[reg])
-    FABS    _ r1 r2    		-> usage ([r1], 		[r2])
-    FADD    _ r1 r2 r3 		-> usage ([r1, r2],		[r3])
-    FCMP    _ _  r1 r2  	-> usage ([r1, r2], 		[])
-    FDIV    _ r1 r2 r3 		-> usage ([r1, r2], 		[r3])
-    FMOV    _ r1 r2    		-> usage ([r1],			[r2])
-    FMUL    _ r1 r2 r3 		-> usage ([r1, r2],	 	[r3])
-    FNEG    _ r1 r2    		-> usage ([r1], 		[r2])
-    FSQRT   _ r1 r2   		-> usage ([r1], 		[r2])
-    FSUB    _ r1 r2 r3 		-> usage ([r1, r2], 		[r3])
-    FxTOy   _ _  r1 r2 		-> usage ([r1], 		[r2])
-
-    JMP     addr 		-> usage (regAddr addr, [])
-    JMP_TBL addr _      	-> usage (regAddr addr, [])
-
-    CALL  (Left _  )  _ True  	-> noUsage
-    CALL  (Left _  )  n False 	-> usage (argRegs n, callClobberedRegs)
-    CALL  (Right reg) _ True  	-> usage ([reg], [])
-    CALL  (Right reg) n False 	-> usage (reg : (argRegs n), callClobberedRegs)
-    _ 	    	    		-> noUsage
-
-  where
-    usage (src, dst) 
-     = RU (filter interesting src) (filter interesting dst)
-
-    regAddr (AddrRegReg r1 r2)	= [r1, r2]
-    regAddr (AddrRegImm r1 _)	= [r1]
+import Unique
 
-    regRI (RIReg r)		= [r]
-    regRI  _			= []
 
+-- | Make a virtual reg with this size.
+mkVReg :: Unique -> Size -> Reg
+mkVReg u size
+	| not (isFloatSize size) 
+	= VirtualRegI u
 
--- | Interesting regs are virtuals, or ones that are allocatable 
---	by the register allocator.
-interesting :: Reg -> Bool
-interesting reg
- = case reg of
- 	VirtualRegI  _	-> True
-	VirtualRegHi _	-> True
-	VirtualRegF  _	-> True
-	VirtualRegD  _	-> True
-	RealReg i	-> isFastTrue (freeReg i)
-
-
-
--- | Apply a given mapping to tall the register references in this instruction.
-
-patchRegs :: Instr -> (Reg -> Reg) -> Instr
-patchRegs instr env = case instr of
-    SPILL reg slot		-> SPILL  (env reg) slot
-    RELOAD slot reg		-> RELOAD slot (env reg)
-
-    LD    sz addr reg   	-> LD sz (fixAddr addr) (env reg)
-    ST    sz reg addr   	-> ST sz (env reg) (fixAddr addr)
-
-    ADD   x cc r1 ar r2 	-> ADD   x cc  (env r1) (fixRI ar) (env r2)
-    SUB   x cc r1 ar r2	 	-> SUB   x cc  (env r1) (fixRI ar) (env r2)
-    UMUL    cc r1 ar r2		-> UMUL    cc  (env r1) (fixRI ar) (env r2)
-    SMUL    cc r1 ar r2		-> SMUL    cc  (env r1) (fixRI ar) (env r2)
-    UDIV    cc r1 ar r2		-> UDIV    cc  (env r1) (fixRI ar) (env r2)
-    SDIV    cc r1 ar r2		-> SDIV    cc  (env r1) (fixRI ar) (env r2)
-    RDY   rd            	-> RDY         (env rd)
-    WRY   r1 r2			-> WRY         (env r1) (env r2)
-    AND   b r1 ar r2   	 	-> AND   b     (env r1) (fixRI ar) (env r2)
-    ANDN  b r1 ar r2    	-> ANDN  b     (env r1) (fixRI ar) (env r2)
-    OR    b r1 ar r2   		-> OR    b     (env r1) (fixRI ar) (env r2)
-    ORN   b r1 ar r2    	-> ORN   b     (env r1) (fixRI ar) (env r2)
-    XOR   b r1 ar r2    	-> XOR   b     (env r1) (fixRI ar) (env r2)
-    XNOR  b r1 ar r2    	-> XNOR  b     (env r1) (fixRI ar) (env r2)
-    SLL   r1 ar r2      	-> SLL         (env r1) (fixRI ar) (env r2)
-    SRL   r1 ar r2      	-> SRL         (env r1) (fixRI ar) (env r2)
-    SRA   r1 ar r2      	-> SRA         (env r1) (fixRI ar) (env r2)
-
-    SETHI imm reg       	-> SETHI imm (env reg)
-
-    FABS  s r1 r2       	-> FABS    s   (env r1) (env r2)
-    FADD  s r1 r2 r3    	-> FADD    s   (env r1) (env r2) (env r3)
-    FCMP  e s r1 r2     	-> FCMP e  s   (env r1) (env r2)
-    FDIV  s r1 r2 r3    	-> FDIV    s   (env r1) (env r2) (env r3)
-    FMOV  s r1 r2       	-> FMOV    s   (env r1) (env r2)
-    FMUL  s r1 r2 r3    	-> FMUL    s   (env r1) (env r2) (env r3)
-    FNEG  s r1 r2       	-> FNEG    s   (env r1) (env r2)
-    FSQRT s r1 r2       	-> FSQRT   s   (env r1) (env r2)
-    FSUB  s r1 r2 r3   	 	-> FSUB    s   (env r1) (env r2) (env r3)
-    FxTOy s1 s2 r1 r2   	-> FxTOy s1 s2 (env r1) (env r2)
-
-    JMP     addr        	-> JMP     (fixAddr addr)
-    JMP_TBL addr ids    	-> JMP_TBL (fixAddr addr) ids
-
-    CALL  (Left i) n t  	-> CALL (Left i) n t
-    CALL  (Right r) n t 	-> CALL (Right (env r)) n t
-    _ 				-> instr
-
-  where
-    fixAddr (AddrRegReg r1 r2) 	= AddrRegReg   (env r1) (env r2)
-    fixAddr (AddrRegImm r1 i)  	= AddrRegImm   (env r1) i
-
-    fixRI (RIReg r) 		= RIReg (env r)
-    fixRI other			= other
-
-
--- -----------------------------------------------------------------------------
--- Determine the possible destinations from the current instruction.
-
--- (we always assume that the next instruction is also a valid destination;
--- if this isn't the case then the jump should be at the end of the basic
--- block).
-
-jumpDests :: Instr -> [BlockId] -> [BlockId]
-jumpDests insn acc
-  = case insn of
-	BI   _ _ id	-> id : acc
-	BF   _ _ id	-> id : acc
-	JMP_TBL _ ids	-> ids ++ acc
-	_other		-> acc
+	| otherwise
+	= case size of
+		FF32    -> VirtualRegF u
+		FF64	-> VirtualRegD u
+		_ 	-> panic "mkVReg"
 
 
--- | Check whether a particular instruction is a jump, branch or call instruction (jumpish)
---	We can't just use jumpDests above because the jump might take its arg,
---	so the instr won't contain a blockid.
+-- | Check if a RI represents a zero value.
+--  	- a literal zero
+--	- register %g0, which is always zero.
 --
-isJumpish :: Instr -> Bool
-isJumpish instr
- = case instr of
-	BI{}		-> True
-	BF{}		-> True
-	JMP{}		-> True
-	JMP_TBL{}	-> True
-	CALL{}		-> True
-	_		-> False
-
-
--- | Change the destination of this jump instruction
---	Used in joinToTargets in the linear allocator, when emitting fixup code
---	for join points.
-patchJump :: Instr -> BlockId -> BlockId -> Instr
-patchJump insn old new
-  = case insn of
-	BI cc annul id
-	 | id == old	-> BI cc annul new
-	 
-	BF cc annul id
-	 | id == old	-> BF cc annul new
-
-	_other		-> insn
-
+riZero :: RI -> Bool
+riZero (RIImm (ImmInt 0))	    = True
+riZero (RIImm (ImmInteger 0))	    = True
+riZero (RIReg (RealReg 0))          = True
+riZero _			    = False
+
+
+-- | Calculate the effective address which would be used by the
+-- 	corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
+-- 	alas -- can't have fpRelEA here because of module dependencies.
+fpRelEA :: Int -> Reg -> Instr
+fpRelEA n dst
+   = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
+
+
+-- | Code to shift the stack pointer by n words.
+moveSp :: Int -> Instr
+moveSp n
+   = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
+
+
+-- | Produce the second-half-of-a-double register given the first half.
+fPair :: Reg -> Maybe Reg
+fPair (RealReg n) 
+	| n >= 32 && n `mod` 2 == 0  = Just (RealReg (n+1))
+
+fPair (VirtualRegD u)
+	= Just (VirtualRegHi u)
+
+fPair _
+	= trace ("MachInstrs.fPair: can't get high half of supposed double reg ") 
+		Nothing
+
+-- Here because it knows about JumpDest
+shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
+shortcutStatic fn (CmmStaticLit (CmmLabel lab))
+  | Just uq <- maybeAsmTemp lab 
+  = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
+  | Just uq <- maybeAsmTemp lbl1
+  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
+        -- slightly dodgy, we're ignoring the second label, but this
+        -- works with the way we use CmmLabelDiffOff for jump tables now.
+shortcutStatic _ other_static
+        = other_static
+
+shortBlockId :: (BlockId -> Maybe JumpDest) -> BlockId -> CLabel
+shortBlockId fn blockid@(BlockId uq) =
+   case fn blockid of
+      Nothing -> mkAsmTempLabel uq
+      Just (DestBlockId blockid')  -> shortBlockId fn blockid'
+      Just (DestImm (ImmCLbl lbl)) -> lbl
+      _other -> panic "shortBlockId"
+
+
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = case regClass reg of
+ 	RcInteger	-> text "blue"
+	RcFloat		-> text "red"
+	RcDouble	-> text "green"
 
 
 
@@ -253,108 +127,3 @@ canShortcut _ = Nothing
 
 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
 shortcutJump _ other = other
-
-
-
--- | Make a spill instruction.
--- 	On SPARC we spill below frame pointer leaving 2 words/spill
-mkSpillInstr
-	:: Reg		-- ^ register to spill
-	-> Int		-- ^ current stack delta
-	-> Int		-- ^ spill slot to use
-	-> Instr
-
-mkSpillInstr reg _ slot
- = let	off     = spillSlotToOffset slot
-        off_w	= 1 + (off `div` 4)
-        sz 	= case regClass reg of
-			RcInteger -> II32
-			RcFloat   -> FF32
-			RcDouble  -> FF64
-		
-    in ST sz reg (fpRel (negate off_w))
-
-
--- | Make a spill reload instruction.
-mkLoadInstr
-	:: Reg		-- ^ register to load
-	-> Int		-- ^ current stack delta
-	-> Int		-- ^ spill slot to use
-	-> Instr
-
-mkLoadInstr reg _ slot
-  = let off     = spillSlotToOffset slot
-	off_w	= 1 + (off `div` 4)
-        sz	= case regClass reg of
-			RcInteger -> II32
-			RcFloat   -> FF32
-			RcDouble  -> FF64
-
-        in LD sz (fpRel (- off_w)) reg
-
-
--- | Make a reg-reg move instruction.
---	On SPARC v8 there are no instructions to move directly between
---	floating point and integer regs. If we need to do that then we
---	have to go via memory.
---
-mkRegRegMoveInstr
-	:: Reg
-	-> Reg
-	-> Instr
-
-mkRegRegMoveInstr src dst
- = case regClass src of
-	RcInteger -> ADD  False False src (RIReg g0) dst
-	RcDouble  -> FMOV FF64 src dst
-	RcFloat   -> FMOV FF32 src dst
-
-
--- | Check whether an instruction represents a reg-reg move.
--- 	The register allocator attempts to eliminate reg->reg moves whenever it can,
--- 	by assigning the src and dest temporaries to the same real register.
---
-isRegRegMove :: Instr -> Maybe (Reg,Reg)
-isRegRegMove instr
- = case instr of
- 	ADD False False src (RIReg src2) dst
-	 | g0 == src2		-> Just (src, dst)
-
-	FMOV FF64 src dst	-> Just (src, dst)
-	FMOV FF32  src dst	-> Just (src, dst)
-	_			-> Nothing
-
-
--- | Make an unconditional branch instruction.
-mkBranchInstr
-	:: BlockId
-	-> [Instr]
-
-mkBranchInstr id 
- = 	 [BI ALWAYS False id
-	, NOP]			-- fill the branch delay slot.
-
-
--- | TODO: Why do we need 8 bytes per slot?? -BL 2009/02
-spillSlotSize :: Int
-spillSlotSize = 8
-
-
--- | The maximum number of spill slots available on the C stack.
---	If we use up all of the slots, then we're screwed.
-maxSpillSlots :: Int
-maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
-
-
--- | Convert a spill slot number to a *byte* offset, with no sign.
---
-spillSlotToOffset :: Int -> Int
-spillSlotToOffset slot
-	| slot >= 0 && slot < maxSpillSlots
-	= 64 + spillSlotSize * slot
-
-	| otherwise
-	= pprPanic "spillSlotToOffset:" 
-	              (   text "invalid spill location: " <> int slot
-		      $$  text "maxSpillSlots:          " <> int maxSpillSlots)
-
diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs
index 987fc2da14..1fb6a01b87 100644
--- a/compiler/nativeGen/SPARC/Regs.hs
+++ b/compiler/nativeGen/SPARC/Regs.hs
@@ -5,17 +5,6 @@
 -- -----------------------------------------------------------------------------
 
 module SPARC.Regs (
-
-	-- sizes
-	Size(..),
-	intSize, 
-	floatSize, 
-	isFloatSize, 
-	wordSize,
-	cmmTypeSize,
-	sizeToWidth,
-	mkVReg,
-
 	-- immediate values
 	Imm(..),
 	strImmLit,
@@ -39,113 +28,33 @@ module SPARC.Regs (
 	fits13Bits, 
 	largeOffsetError,
 	gReg, iReg, lReg, oReg, fReg,
-	fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f26, f27,
+	fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f22, f26, f27,
 	nCG_FirstFloatReg,
 
-	-- horror show
+	-- allocatable
 	freeReg,
-	globalRegMaybe
+	allocatableRegs,
+	globalRegMaybe,
+
+	get_GlobalReg_reg_or_addr
 )
 
 where
 
-#include "nativeGen/NCG.h"
-#include "HsVersions.h"
-#include "../includes/MachRegs.h"
 
-import RegsBase
+import Reg
+import RegClass
 
+import CgUtils          ( get_GlobalReg_addr )
 import BlockId
 import Cmm
 import CLabel           ( CLabel )
 import Pretty
-import Outputable	( Outputable(..), pprPanic, panic )
+import Outputable	( panic )
 import qualified Outputable
-import Unique
 import Constants
 import FastBool
 
--- sizes -----------------------------------------------------------------------
-
--- | A 'Size' also includes format information, such as whether
---	the word is signed or unsigned.
---
-data Size
-	= II8     -- byte (signed)
-	| II16    -- halfword (signed, 2 bytes)
-	| II32    -- word (4 bytes)
-	| II64    -- word (8 bytes)
-	| FF32    -- IEEE single-precision floating pt
-	| FF64    -- IEEE single-precision floating pt
-	deriving Eq
-
-
--- | Get the integer size of this width.
-intSize :: Width -> Size
-intSize width
- = case width of
- 	W8 	-> II8
-	W16	-> II16
-	W32	-> II32
-	W64 	-> II64
-	other	-> pprPanic "SPARC.Regs.intSize" (ppr other)
-
-
--- | Get the float size of this width.
-floatSize :: Width -> Size
-floatSize width
- = case width of
- 	W32	-> FF32
-	W64	-> FF64
-	other 	-> pprPanic "SPARC.Regs.intSize" (ppr other)
-
-
--- | Check if a size represents a floating point value.
-isFloatSize :: Size -> Bool
-isFloatSize size
- = case size of
- 	FF32	-> True
-	FF64	-> True
-	_	-> False
-
-
--- | Size of a machine word. 
---	This is big enough to hold a pointer.
-wordSize :: Size
-wordSize = intSize wordWidth
-
-
--- | Convert a Cmm type to a Size.
-cmmTypeSize :: CmmType -> Size
-cmmTypeSize ty 
-	| isFloatType ty	= floatSize (typeWidth ty)
-	| otherwise		= intSize (typeWidth ty)
-
-
--- | Get the Width of a Size.
-sizeToWidth :: Size -> Width
-sizeToWidth size
- = case size of
- 	II8		-> W8
-	II16		-> W16
-	II32		-> W32
-	II64		-> W64
-	FF32		-> W32
-	FF64		-> W64
-
-
--- | Make a virtual reg with this size.
-mkVReg :: Unique -> Size -> Reg
-mkVReg u size
-	| not (isFloatSize size) 
-	= VirtualRegI u
-
-	| otherwise
-	= case size of
-		FF32    -> VirtualRegF u
-		FF64	-> VirtualRegD u
-		_ 	-> panic "mkVReg"
-
 
 -- immediates ------------------------------------------------------------------
 
@@ -390,48 +299,13 @@ o1  = RealReg (oReg 1)
 f0  = RealReg (fReg 0)
 
 
+-- | We use he first few float regs as double precision. 
+--	This is the RegNo of the first float regs we use as single precision.
+--
 nCG_FirstFloatReg :: RegNo
-nCG_FirstFloatReg = unRealReg NCG_FirstFloatReg
-#else
-nCG_FirstFloatReg :: RegNo
-nCG_FirstFloatReg = unRealReg f22
-#endif
-
-
--- horror show -----------------------------------------------------------------
-#if sparc_TARGET_ARCH
-#define g0 0
-#define g1 1
-#define g2 2
-#define g3 3
-#define g4 4
-#define g5 5
-#define g6 6
-#define g7 7
-#define o0 8
-#define o1 9
-#define o2 10
-#define o3 11
-#define o4 12
-#define o5 13
-#define o6 14
-#define o7 15
-#define l0 16
-#define l1 17
-#define l2 18
-#define l3 19
-#define l4 20
-#define l5 21
-#define l6 22
-#define l7 23
-#define i0 24
-#define i1 25
-#define i2 26
-#define i3 27
-#define i4 28
-#define i5 29
-#define i6 30
-#define i7 31
+nCG_FirstFloatReg = 54
+
+
 
 -- | Check whether a machine register is free for allocation.
 --	This needs to match the info in includes/MachRegs.h otherwise modules
@@ -445,7 +319,11 @@ freeReg regno
 
  	-- %g1(r1) - %g4(r4) are allocable -----------------
 
-freeReg :: RegNo -> FastBool
+	-- %g5(r5) - %g7(r7) 
+	--	are reserved for the OS
+	5	-> fastBool False
+	6	-> fastBool False
+	7	-> fastBool False
 
 	-- %o0(r8) - %o5(r13) are allocable ----------------
 
@@ -507,7 +385,15 @@ freeReg :: RegNo -> FastBool
 
 	-- regs not matched above are allocable.
 	_	-> fastBool True
-	
+
+
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: [RegNo]
+allocatableRegs
+   = let isFree i = isFastTrue (freeReg i)
+     in  filter isFree allMachRegNos
 
 
 -- | Returns Just the real register that a global register is stored in.
@@ -539,15 +425,20 @@ globalRegMaybe gg
 	Hp		-> Just (RealReg 27)	-- %i3
 	HpLim		-> Just (RealReg 28)	-- %i4
 
-globalRegMaybe :: GlobalReg -> Maybe Reg
-
-
-
-#else
-
-freeReg	_	= 0#
-globalRegMaybe	= panic "SPARC.Regs.globalRegMaybe: not defined"
+	BaseReg		-> Just (RealReg 25)	-- %i1
+		
+	_		-> Nothing 	
 
-#endif
 
+-- We map STG registers onto appropriate CmmExprs.  Either they map
+-- to real machine registers or stored as offsets from BaseReg.  Given
+-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
+-- register it is in, on this platform, or a CmmExpr denoting the
+-- address in the register table holding it.
+-- (See also get_GlobalReg_addr in CgUtils.)
 
+get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr
+get_GlobalReg_reg_or_addr mid
+   = case globalRegMaybe mid of
+        Just rr -> Left rr
+        Nothing -> Right (get_GlobalReg_addr mid)