NCG: Split up the native code generator into arch specific modules

- nativeGen/Instruction defines a type class for a generic instruction set. Each of the instruction sets we have, X86, PPC and SPARC are instances of it. - The register alloctors use this type class when they need info about a certain register or instruction, such as regUsage, mkSpillInstr, mkJumpInstr, patchRegs.. - nativeGen/Platform defines some data types enumerating the architectures and operating systems supported by the native code generator. - DynFlags now keeps track of the current build platform, and the PositionIndependentCode module uses this to decide what to do instead of relying of #ifdefs. - It's not totally retargetable yet. Some info info about the build target is still hardwired, but I've tried to contain most of it to a single module, TargetRegs. - Moved the SPILL and RELOAD instructions into LiveInstr. - Reg and RegClass now have their own modules, and are shared across all architectures.
author: Ben.Lippmeier@anu.edu.au <unknown> 2009-02-15 05:51:58 +0000
committer: Ben.Lippmeier@anu.edu.au <unknown> 2009-02-15 05:51:58 +0000
commit: b04a210e26ca57242fd052f2aa91011a80b76299 (patch)
tree: 6f26993cc3ef37f4555087bd80da4195edcda4ed /compiler
parent: 77ed23d51b968505b3ad8541c075657ae94f0ea3 (diff)
download: haskell-b04a210e26ca57242fd052f2aa91011a80b76299.tar.gz
55 files changed, 9751 insertions, 8113 deletions
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 2328eca637..b276943f60 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -454,31 +454,38 @@ Library
 
         Exposed-Modules:
             AsmCodeGen
-            MachCodeGen
-            Regs
-            RegsBase
-            Instrs
-            RegAllocInfo
-            PprMach
+            TargetReg
+            NCGMonad
+            Instruction
+            Size
+            Reg
+            RegClass
             PprBase
+            PIC
+            Platform
             Alpha.Regs
             Alpha.RegInfo
             Alpha.Instr
             Alpha.Ppr
+            Alpha.CodeGen
             X86.Regs
             X86.RegInfo
             X86.Instr
+            X86.Cond
             X86.Ppr
+            X86.CodeGen
             PPC.Regs
             PPC.RegInfo
             PPC.Instr
+            PPC.Cond
             PPC.Ppr
+            PPC.CodeGen
             SPARC.Regs
             SPARC.RegInfo
             SPARC.Instr
+            SPARC.Cond
             SPARC.Ppr
-            NCGMonad
-            PositionIndependentCode
+            SPARC.CodeGen
             RegAlloc.Liveness
             RegAlloc.Graph.Main
             RegAlloc.Graph.Stats
@@ -488,6 +495,7 @@ Library
             RegAlloc.Graph.Spill
             RegAlloc.Graph.SpillClean
             RegAlloc.Graph.SpillCost
+            RegAlloc.Graph.TrivColorable
             RegAlloc.Linear.Main
             RegAlloc.Linear.JoinToTargets
             RegAlloc.Linear.State
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 44bd124032..eb9a182997 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -64,6 +64,7 @@ module DynFlags (
 
 #include "HsVersions.h"
 
+import Platform
 import Module
 import PackageConfig
 import PrelNames        ( mAIN, main_RDR_Unqual )
@@ -339,6 +340,7 @@ data DynFlags = DynFlags {
   specConstrCount       :: Maybe Int,   -- ^ Max number of specialisations for any one function
   liberateCaseThreshold :: Maybe Int,   -- ^ Threshold for LiberateCase
 
+  targetPlatform	:: Platform,	-- ^ The platform we're compiling for. Used by the NCG.
   stolen_x86_regs       :: Int,
   cmdlineHcIncludes     :: [String],    -- ^ @\-\#includes@
   importPaths           :: [FilePath],
@@ -584,6 +586,7 @@ defaultDynFlags =
         specConstrThreshold     = Just 200,
         specConstrCount         = Just 3,
         liberateCaseThreshold   = Just 200,
+        targetPlatform          = defaultTargetPlatform,
         stolen_x86_regs         = 4,
         cmdlineHcIncludes       = [],
         importPaths             = ["."],
diff --git a/compiler/nativeGen/Alpha/CodeGen.hs b/compiler/nativeGen/Alpha/CodeGen.hs
new file mode 100644
index 0000000000..4ce774f14f
--- /dev/null
+++ b/compiler/nativeGen/Alpha/CodeGen.hs
@@ -0,0 +1,789 @@
+module Alpha.CodeGen ()
+
+where
+
+{-
+
+getRegister :: CmmExpr -> NatM Register
+
+#if !x86_64_TARGET_ARCH
+    -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
+    -- register, it can only be used for rip-relative addressing.
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+  = do
+      reg <- getPicBaseNat wordSize
+      return (Fixed wordSize reg nilOL)
+#endif
+
+getRegister (CmmReg reg) 
+  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
+		  (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _) 
+  = getRegister (mangleIndexTree tree)
+
+
+#if WORD_SIZE_IN_BITS==32
+    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
+    -- TO_W_(x), TO_W_(x >> 32)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code       
+
+#endif
+
+-- end of machine-"independent" bit; here we go on the rest...
+
+
+getRegister (StDouble d)
+  = getBlockIdNat 	    	    `thenNat` \ lbl ->
+    getNewRegNat PtrRep    	    `thenNat` \ tmp ->
+    let code dst = mkSeqInstrs [
+	    LDATA RoDataSegment lbl [
+		    DATA TF [ImmLab (rational d)]
+		],
+	    LDA tmp (AddrImm (ImmCLbl lbl)),
+	    LD TF dst (AddrReg tmp)]
+    in
+    	return (Any FF64 code)
+
+getRegister (StPrim primop [x]) -- unary PrimOps
+  = case primop of
+      IntNegOp -> trivialUCode (NEG Q False) x
+
+      NotOp    -> trivialUCode NOT x
+
+      FloatNegOp  -> trivialUFCode FloatRep (FNEG TF) x
+      DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
+
+      OrdOp -> coerceIntCode IntRep x
+      ChrOp -> chrCode x
+
+      Float2IntOp  -> coerceFP2Int    x
+      Int2FloatOp  -> coerceInt2FP pr x
+      Double2IntOp -> coerceFP2Int    x
+      Int2DoubleOp -> coerceInt2FP pr x
+
+      Double2FloatOp -> coerceFltCode x
+      Float2DoubleOp -> coerceFltCode x
+
+      other_op -> getRegister (StCall fn CCallConv FF64 [x])
+	where
+	  fn = case other_op of
+		 FloatExpOp    -> fsLit "exp"
+		 FloatLogOp    -> fsLit "log"
+		 FloatSqrtOp   -> fsLit "sqrt"
+		 FloatSinOp    -> fsLit "sin"
+		 FloatCosOp    -> fsLit "cos"
+		 FloatTanOp    -> fsLit "tan"
+		 FloatAsinOp   -> fsLit "asin"
+		 FloatAcosOp   -> fsLit "acos"
+		 FloatAtanOp   -> fsLit "atan"
+		 FloatSinhOp   -> fsLit "sinh"
+		 FloatCoshOp   -> fsLit "cosh"
+		 FloatTanhOp   -> fsLit "tanh"
+		 DoubleExpOp   -> fsLit "exp"
+		 DoubleLogOp   -> fsLit "log"
+		 DoubleSqrtOp  -> fsLit "sqrt"
+		 DoubleSinOp   -> fsLit "sin"
+		 DoubleCosOp   -> fsLit "cos"
+		 DoubleTanOp   -> fsLit "tan"
+		 DoubleAsinOp  -> fsLit "asin"
+		 DoubleAcosOp  -> fsLit "acos"
+		 DoubleAtanOp  -> fsLit "atan"
+		 DoubleSinhOp  -> fsLit "sinh"
+		 DoubleCoshOp  -> fsLit "cosh"
+		 DoubleTanhOp  -> fsLit "tanh"
+  where
+    pr = panic "MachCode.getRegister: no primrep needed for Alpha"
+
+getRegister (StPrim primop [x, y]) -- dyadic PrimOps
+  = case primop of
+      CharGtOp -> trivialCode (CMP LTT) y x
+      CharGeOp -> trivialCode (CMP LE) y x
+      CharEqOp -> trivialCode (CMP EQQ) x y
+      CharNeOp -> int_NE_code x y
+      CharLtOp -> trivialCode (CMP LTT) x y
+      CharLeOp -> trivialCode (CMP LE) x y
+
+      IntGtOp  -> trivialCode (CMP LTT) y x
+      IntGeOp  -> trivialCode (CMP LE) y x
+      IntEqOp  -> trivialCode (CMP EQQ) x y
+      IntNeOp  -> int_NE_code x y
+      IntLtOp  -> trivialCode (CMP LTT) x y
+      IntLeOp  -> trivialCode (CMP LE) x y
+
+      WordGtOp -> trivialCode (CMP ULT) y x
+      WordGeOp -> trivialCode (CMP ULE) x y
+      WordEqOp -> trivialCode (CMP EQQ)  x y
+      WordNeOp -> int_NE_code x y
+      WordLtOp -> trivialCode (CMP ULT) x y
+      WordLeOp -> trivialCode (CMP ULE) x y
+
+      AddrGtOp -> trivialCode (CMP ULT) y x
+      AddrGeOp -> trivialCode (CMP ULE) y x
+      AddrEqOp -> trivialCode (CMP EQQ)  x y
+      AddrNeOp -> int_NE_code x y
+      AddrLtOp -> trivialCode (CMP ULT) x y
+      AddrLeOp -> trivialCode (CMP ULE) x y
+	
+      FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
+      FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
+      FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
+      FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
+      FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
+      FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
+
+      DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
+      DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
+      DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
+      DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
+      DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
+      DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
+
+      IntAddOp  -> trivialCode (ADD Q False) x y
+      IntSubOp  -> trivialCode (SUB Q False) x y
+      IntMulOp  -> trivialCode (MUL Q False) x y
+      IntQuotOp -> trivialCode (DIV Q False) x y
+      IntRemOp  -> trivialCode (REM Q False) x y
+
+      WordAddOp  -> trivialCode (ADD Q False) x y
+      WordSubOp  -> trivialCode (SUB Q False) x y
+      WordMulOp  -> trivialCode (MUL Q False) x y
+      WordQuotOp -> trivialCode (DIV Q True) x y
+      WordRemOp  -> trivialCode (REM Q True) x y
+
+      FloatAddOp -> trivialFCode  W32 (FADD TF) x y
+      FloatSubOp -> trivialFCode  W32 (FSUB TF) x y
+      FloatMulOp -> trivialFCode  W32 (FMUL TF) x y
+      FloatDivOp -> trivialFCode  W32 (FDIV TF) x y
+
+      DoubleAddOp -> trivialFCode  W64 (FADD TF) x y
+      DoubleSubOp -> trivialFCode  W64 (FSUB TF) x y
+      DoubleMulOp -> trivialFCode  W64 (FMUL TF) x y
+      DoubleDivOp -> trivialFCode  W64 (FDIV TF) x y
+
+      AddrAddOp  -> trivialCode (ADD Q False) x y
+      AddrSubOp  -> trivialCode (SUB Q False) x y
+      AddrRemOp  -> trivialCode (REM Q True) x y
+
+      AndOp  -> trivialCode AND x y
+      OrOp   -> trivialCode OR  x y
+      XorOp  -> trivialCode XOR x y
+      SllOp  -> trivialCode SLL x y
+      SrlOp  -> trivialCode SRL x y
+
+      ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
+      ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
+      ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
+
+      FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
+      DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
+  where
+    {- ------------------------------------------------------------
+	Some bizarre special code for getting condition codes into
+	registers.  Integer non-equality is a test for equality
+	followed by an XOR with 1.  (Integer comparisons always set
+	the result register to 0 or 1.)  Floating point comparisons of
+	any kind leave the result in a floating point register, so we
+	need to wrangle an integer register out of things.
+    -}
+    int_NE_code :: StixTree -> StixTree -> NatM Register
+
+    int_NE_code x y
+      = trivialCode (CMP EQQ) x y	`thenNat` \ register ->
+	getNewRegNat IntRep		`thenNat` \ tmp ->
+	let
+	    code = registerCode register tmp
+	    src  = registerName register tmp
+	    code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
+	in
+	return (Any IntRep code__2)
+
+    {- ------------------------------------------------------------
+	Comments for int_NE_code also apply to cmpF_code
+    -}
+    cmpF_code
+	:: (Reg -> Reg -> Reg -> Instr)
+	-> Cond
+	-> StixTree -> StixTree
+	-> NatM Register
+
+    cmpF_code instr cond x y
+      = trivialFCode pr instr x y	`thenNat` \ register ->
+	getNewRegNat FF64		`thenNat` \ tmp ->
+	getBlockIdNat			`thenNat` \ lbl ->
+	let
+	    code = registerCode register tmp
+	    result  = registerName register tmp
+
+	    code__2 dst = code . mkSeqInstrs [
+		OR zeroh (RIImm (ImmInt 1)) dst,
+		BF cond  result (ImmCLbl lbl),
+		OR zeroh (RIReg zeroh) dst,
+		NEWBLOCK lbl]
+	in
+	return (Any IntRep code__2)
+      where
+	pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
+      ------------------------------------------------------------
+
+getRegister (CmmLoad pk mem)
+  = getAmode mem    	    	    `thenNat` \ amode ->
+    let
+    	code = amodeCode amode
+    	src   = amodeAddr amode
+    	size = primRepToSize pk
+    	code__2 dst = code . mkSeqInstr (LD size dst src)
+    in
+    return (Any pk code__2)
+
+getRegister (StInt i)
+  | fits8Bits i
+  = let
+    	code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
+    in
+    return (Any IntRep code)
+  | otherwise
+  = let
+    	code dst = mkSeqInstr (LDI Q dst src)
+    in
+    return (Any IntRep code)
+  where
+    src = ImmInt (fromInteger i)
+
+getRegister leaf
+  | isJust imm
+  = let
+    	code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
+    in
+    return (Any PtrRep code)
+  where
+    imm = maybeImm leaf
+    imm__2 = case imm of Just x -> x
+
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+getAmode (StPrim IntSubOp [x, StInt i])
+  = getNewRegNat PtrRep		`thenNat` \ tmp ->
+    getRegister x		`thenNat` \ register ->
+    let
+    	code = registerCode register tmp
+    	reg  = registerName register tmp
+    	off  = ImmInt (-(fromInteger i))
+    in
+    return (Amode (AddrRegImm reg off) code)
+
+getAmode (StPrim IntAddOp [x, StInt i])
+  = getNewRegNat PtrRep		`thenNat` \ tmp ->
+    getRegister x		`thenNat` \ register ->
+    let
+    	code = registerCode register tmp
+    	reg  = registerName register tmp
+    	off  = ImmInt (fromInteger i)
+    in
+    return (Amode (AddrRegImm reg off) code)
+
+getAmode leaf
+  | isJust imm
+  = return (Amode (AddrImm imm__2) id)
+  where
+    imm = maybeImm leaf
+    imm__2 = case imm of Just x -> x
+
+getAmode other
+  = getNewRegNat PtrRep		`thenNat` \ tmp ->
+    getRegister other		`thenNat` \ register ->
+    let
+    	code = registerCode register tmp
+    	reg  = registerName register tmp
+    in
+    return (Amode (AddrReg reg) code)
+
+#endif /* alpha_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business.  Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers.  If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side.  This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+
+assignIntCode pk (CmmLoad dst _) src
+  = getNewRegNat IntRep    	    `thenNat` \ tmp ->
+    getAmode dst    	    	    `thenNat` \ amode ->
+    getRegister src	     	    `thenNat` \ register ->
+    let
+    	code1   = amodeCode amode []
+    	dst__2  = amodeAddr amode
+    	code2   = registerCode register tmp []
+    	src__2  = registerName register tmp
+    	sz      = primRepToSize pk
+    	code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
+    in
+    return code__2
+
+assignIntCode pk dst src
+  = getRegister dst	    	    	    `thenNat` \ register1 ->
+    getRegister src	    	    	    `thenNat` \ register2 ->
+    let
+    	dst__2  = registerName register1 zeroh
+    	code    = registerCode register2 dst__2
+    	src__2  = registerName register2 dst__2
+    	code__2 = if isFixed register2
+		  then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
+    	    	  else code
+    in
+    return code__2
+
+assignFltCode pk (CmmLoad dst _) src
+  = getNewRegNat pk        	    `thenNat` \ tmp ->
+    getAmode dst    	    	    `thenNat` \ amode ->
+    getRegister src	    	    	    `thenNat` \ register ->
+    let
+    	code1   = amodeCode amode []
+    	dst__2  = amodeAddr amode
+    	code2   = registerCode register tmp []
+    	src__2  = registerName register tmp
+    	sz      = primRepToSize pk
+    	code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
+    in
+    return code__2
+
+assignFltCode pk dst src
+  = getRegister dst	    	    	    `thenNat` \ register1 ->
+    getRegister src	    	    	    `thenNat` \ register2 ->
+    let
+    	dst__2  = registerName register1 zeroh
+    	code    = registerCode register2 dst__2
+    	src__2  = registerName register2 dst__2
+    	code__2 = if isFixed register2
+		  then code . mkSeqInstr (FMOV src__2 dst__2)
+		  else code
+    in
+    return code__2
+
+
+-- -----------------------------------------------------------------------------
+-- Generating an non-local jump
+
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+genJump (CmmLabel lbl)
+  | isAsmTemp lbl = returnInstr (BR target)
+  | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
+  where
+    target = ImmCLbl lbl
+
+genJump tree
+  = getRegister tree	     	    `thenNat` \ register ->
+    getNewRegNat PtrRep    	    `thenNat` \ tmp ->
+    let
+    	dst    = registerName register pv
+    	code   = registerCode register pv
+    	target = registerName register pv
+    in
+    if isFixed register then
+	returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
+    else
+    return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
+
+
+-- -----------------------------------------------------------------------------
+--  Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+
+genBranch = return . toOL . mkBranchInstr
+
+
+-- -----------------------------------------------------------------------------
+--  Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions.  We peek at the arguments to decide what kind of
+comparison to do.
+
+ALPHA: For comparisons with 0, we're laughing, because we can just do
+the desired conditional branch.
+
+-}
+
+
+genCondJump
+    :: BlockId	    -- the branch target
+    -> CmmExpr      -- the condition on which to branch
+    -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+genCondJump id (StPrim op [x, StInt 0])
+  = getRegister x	  	    	    `thenNat` \ register ->
+    getNewRegNat (registerRep register)
+    	    	        	    `thenNat` \ tmp ->
+    let
+    	code   = registerCode register tmp
+    	value  = registerName register tmp
+    	pk     = registerRep register
+	target = ImmCLbl lbl
+    in
+    returnSeq code [BI (cmpOp op) value target]
+  where
+    cmpOp CharGtOp = GTT
+    cmpOp CharGeOp = GE
+    cmpOp CharEqOp = EQQ
+    cmpOp CharNeOp = NE
+    cmpOp CharLtOp = LTT
+    cmpOp CharLeOp = LE
+    cmpOp IntGtOp = GTT
+    cmpOp IntGeOp = GE
+    cmpOp IntEqOp = EQQ
+    cmpOp IntNeOp = NE
+    cmpOp IntLtOp = LTT
+    cmpOp IntLeOp = LE
+    cmpOp WordGtOp = NE
+    cmpOp WordGeOp = ALWAYS
+    cmpOp WordEqOp = EQQ
+    cmpOp WordNeOp = NE
+    cmpOp WordLtOp = NEVER
+    cmpOp WordLeOp = EQQ
+    cmpOp AddrGtOp = NE
+    cmpOp AddrGeOp = ALWAYS
+    cmpOp AddrEqOp = EQQ
+    cmpOp AddrNeOp = NE
+    cmpOp AddrLtOp = NEVER
+    cmpOp AddrLeOp = EQQ
+
+genCondJump lbl (StPrim op [x, StDouble 0.0])
+  = getRegister x	  	    	    `thenNat` \ register ->
+    getNewRegNat (registerRep register)
+    	    	        	    `thenNat` \ tmp ->
+    let
+    	code   = registerCode register tmp
+    	value  = registerName register tmp
+    	pk     = registerRep register
+	target = ImmCLbl lbl
+    in
+    return (code . mkSeqInstr (BF (cmpOp op) value target))
+  where
+    cmpOp FloatGtOp = GTT
+    cmpOp FloatGeOp = GE
+    cmpOp FloatEqOp = EQQ
+    cmpOp FloatNeOp = NE
+    cmpOp FloatLtOp = LTT
+    cmpOp FloatLeOp = LE
+    cmpOp DoubleGtOp = GTT
+    cmpOp DoubleGeOp = GE
+    cmpOp DoubleEqOp = EQQ
+    cmpOp DoubleNeOp = NE
+    cmpOp DoubleLtOp = LTT
+    cmpOp DoubleLeOp = LE
+
+genCondJump lbl (StPrim op [x, y])
+  | fltCmpOp op
+  = trivialFCode pr instr x y 	    `thenNat` \ register ->
+    getNewRegNat FF64    	    `thenNat` \ tmp ->
+    let
+    	code   = registerCode register tmp
+    	result = registerName register tmp
+	target = ImmCLbl lbl
+    in
+    return (code . mkSeqInstr (BF cond result target))
+  where
+    pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
+
+    fltCmpOp op = case op of
+	FloatGtOp -> True
+	FloatGeOp -> True
+	FloatEqOp -> True
+	FloatNeOp -> True
+	FloatLtOp -> True
+	FloatLeOp -> True
+	DoubleGtOp -> True
+	DoubleGeOp -> True
+	DoubleEqOp -> True
+	DoubleNeOp -> True
+	DoubleLtOp -> True
+	DoubleLeOp -> True
+	_ -> False
+    (instr, cond) = case op of
+	FloatGtOp -> (FCMP TF LE, EQQ)
+	FloatGeOp -> (FCMP TF LTT, EQQ)
+	FloatEqOp -> (FCMP TF EQQ, NE)
+	FloatNeOp -> (FCMP TF EQQ, EQQ)
+	FloatLtOp -> (FCMP TF LTT, NE)
+	FloatLeOp -> (FCMP TF LE, NE)
+	DoubleGtOp -> (FCMP TF LE, EQQ)
+	DoubleGeOp -> (FCMP TF LTT, EQQ)
+	DoubleEqOp -> (FCMP TF EQQ, NE)
+	DoubleNeOp -> (FCMP TF EQQ, EQQ)
+	DoubleLtOp -> (FCMP TF LTT, NE)
+	DoubleLeOp -> (FCMP TF LE, NE)
+
+genCondJump lbl (StPrim op [x, y])
+  = trivialCode instr x y    	    `thenNat` \ register ->
+    getNewRegNat IntRep    	    `thenNat` \ tmp ->
+    let
+    	code   = registerCode register tmp
+    	result = registerName register tmp
+	target = ImmCLbl lbl
+    in
+    return (code . mkSeqInstr (BI cond result target))
+  where
+    (instr, cond) = case op of
+	CharGtOp -> (CMP LE, EQQ)
+	CharGeOp -> (CMP LTT, EQQ)
+	CharEqOp -> (CMP EQQ, NE)
+	CharNeOp -> (CMP EQQ, EQQ)
+	CharLtOp -> (CMP LTT, NE)
+	CharLeOp -> (CMP LE, NE)
+	IntGtOp -> (CMP LE, EQQ)
+	IntGeOp -> (CMP LTT, EQQ)
+	IntEqOp -> (CMP EQQ, NE)
+	IntNeOp -> (CMP EQQ, EQQ)
+	IntLtOp -> (CMP LTT, NE)
+	IntLeOp -> (CMP LE, NE)
+	WordGtOp -> (CMP ULE, EQQ)
+	WordGeOp -> (CMP ULT, EQQ)
+	WordEqOp -> (CMP EQQ, NE)
+	WordNeOp -> (CMP EQQ, EQQ)
+	WordLtOp -> (CMP ULT, NE)
+	WordLeOp -> (CMP ULE, NE)
+	AddrGtOp -> (CMP ULE, EQQ)
+	AddrGeOp -> (CMP ULT, EQQ)
+	AddrEqOp -> (CMP EQQ, NE)
+	AddrNeOp -> (CMP EQQ, EQQ)
+	AddrLtOp -> (CMP ULT, NE)
+	AddrLeOp -> (CMP ULE, NE)
+
+-- -----------------------------------------------------------------------------
+--  Generating C calls
+
+-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations.  Apart from that, the code is easy.
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+    :: CmmCallTarget		-- function to call
+    -> HintedCmmFormals		-- where to put the result
+    -> HintedCmmActuals		-- arguments (of mixed type)
+    -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+ccallResultRegs = 
+
+genCCall fn cconv result_regs args
+  = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
+    	    	  	  `thenNat` \ ((unused,_), argCode) ->
+    let
+    	nRegs = length allArgRegs - length unused
+    	code = asmSeqThen (map ($ []) argCode)
+    in
+    	returnSeq code [
+    	    LDA pv (AddrImm (ImmLab (ptext fn))),
+    	    JSR ra (AddrReg pv) nRegs,
+    	    LDGP gp (AddrReg ra)]
+  where
+    ------------------------
+    {-	Try to get a value into a specific register (or registers) for
+	a call.  The first 6 arguments go into the appropriate
+	argument register (separate registers for integer and floating
+	point arguments, but used in lock-step), and the remaining
+	arguments are dumped to the stack, beginning at 0(sp).  Our
+	first argument is a pair of the list of remaining argument
+	registers to be assigned for this call and the next stack
+	offset to use for overflowing arguments.  This way,
+	@get_Arg@ can be applied to all of a call's arguments using
+	@mapAccumLNat@.
+    -}
+    get_arg
+	:: ([(Reg,Reg)], Int)	-- Argument registers and stack offset (accumulator)
+	-> StixTree		-- Current argument
+	-> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
+
+    -- We have to use up all of our argument registers first...
+
+    get_arg ((iDst,fDst):dsts, offset) arg
+      = getRegister arg	    	    	    `thenNat` \ register ->
+	let
+	    reg  = if isFloatType pk then fDst else iDst
+	    code = registerCode register reg
+	    src  = registerName register reg
+	    pk   = registerRep register
+	in
+	return (
+	    if isFloatType pk then
+		((dsts, offset), if isFixed register then
+		    code . mkSeqInstr (FMOV src fDst)
+		    else code)
+	    else
+		((dsts, offset), if isFixed register then
+		    code . mkSeqInstr (OR src (RIReg src) iDst)
+		    else code))
+
+    -- Once we have run out of argument registers, we move to the
+    -- stack...
+
+    get_arg ([], offset) arg
+      = getRegister arg			`thenNat` \ register ->
+	getNewRegNat (registerRep register)
+					`thenNat` \ tmp ->
+	let
+	    code = registerCode register tmp
+	    src  = registerName register tmp
+	    pk   = registerRep register
+	    sz   = primRepToSize pk
+	in
+	return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
+
+trivialCode instr x (StInt y)
+  | fits8Bits y
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat IntRep		`thenNat` \ tmp ->
+    let
+    	code = registerCode register tmp
+    	src1 = registerName register tmp
+    	src2 = ImmInt (fromInteger y)
+    	code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
+    in
+    return (Any IntRep code__2)
+
+trivialCode instr x y
+  = getRegister x		`thenNat` \ register1 ->
+    getRegister y		`thenNat` \ register2 ->
+    getNewRegNat IntRep		`thenNat` \ tmp1 ->
+    getNewRegNat IntRep		`thenNat` \ tmp2 ->
+    let
+    	code1 = registerCode register1 tmp1 []
+    	src1  = registerName register1 tmp1
+    	code2 = registerCode register2 tmp2 []
+    	src2  = registerName register2 tmp2
+    	code__2 dst = asmSeqThen [code1, code2] .
+    	    	     mkSeqInstr (instr src1 (RIReg src2) dst)
+    in
+    return (Any IntRep code__2)
+
+------------
+trivialUCode instr x
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat IntRep		`thenNat` \ tmp ->
+    let
+    	code = registerCode register tmp
+    	src  = registerName register tmp
+    	code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
+    in
+    return (Any IntRep code__2)
+
+------------
+trivialFCode _ instr x y
+  = getRegister x		`thenNat` \ register1 ->
+    getRegister y		`thenNat` \ register2 ->
+    getNewRegNat FF64	`thenNat` \ tmp1 ->
+    getNewRegNat FF64	`thenNat` \ tmp2 ->
+    let
+    	code1 = registerCode register1 tmp1
+    	src1  = registerName register1 tmp1
+
+    	code2 = registerCode register2 tmp2
+    	src2  = registerName register2 tmp2
+
+    	code__2 dst = asmSeqThen [code1 [], code2 []] .
+    	    	      mkSeqInstr (instr src1 src2 dst)
+    in
+    return (Any FF64 code__2)
+
+trivialUFCode _ instr x
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat FF64	`thenNat` \ tmp ->
+    let
+    	code = registerCode register tmp
+    	src  = registerName register tmp
+    	code__2 dst = code . mkSeqInstr (instr src dst)
+    in
+    return (Any FF64 code__2)
+
+#if alpha_TARGET_ARCH
+
+coerceInt2FP _ x
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat IntRep		`thenNat` \ reg ->
+    let
+    	code = registerCode register reg
+    	src  = registerName register reg
+
+    	code__2 dst = code . mkSeqInstrs [
+    	    ST Q src (spRel 0),
+    	    LD TF dst (spRel 0),
+    	    CVTxy Q TF dst dst]
+    in
+    return (Any FF64 code__2)
+
+-------------
+coerceFP2Int x
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat FF64	`thenNat` \ tmp ->
+    let
+    	code = registerCode register tmp
+    	src  = registerName register tmp
+
+    	code__2 dst = code . mkSeqInstrs [
+    	    CVTxy TF Q src tmp,
+    	    ST TF tmp (spRel 0),
+    	    LD Q dst (spRel 0)]
+    in
+    return (Any IntRep code__2)
+
+#endif /* alpha_TARGET_ARCH */
+
+
+-}
+
+
+
+
+
diff --git a/compiler/nativeGen/Alpha/Instr.hs b/compiler/nativeGen/Alpha/Instr.hs
index e2d66d3c02..990ea8bc1a 100644
--- a/compiler/nativeGen/Alpha/Instr.hs
+++ b/compiler/nativeGen/Alpha/Instr.hs
@@ -10,13 +10,14 @@
 #include "nativeGen/NCG.h"
 
 module Alpha.Instr (
-	Cond(..),
-	Instr(..),
-	RI(..)
+--	Cond(..),
+--	Instr(..),
+--	RI(..)
 )
 
 where
 
+{-
 import BlockId
 import Regs
 import Cmm
@@ -138,3 +139,4 @@ data Instr
 	| FUNEND CLabel
 
 
+-}
diff --git a/compiler/nativeGen/ArchReg.hs b/compiler/nativeGen/ArchReg.hs
new file mode 100644
index 0000000000..7170228e4c
--- /dev/null
+++ b/compiler/nativeGen/ArchReg.hs
@@ -0,0 +1,14 @@
+
+
+module ArchReg (
+
+)
+
+where
+
+
+class ArchReg reg format where
+	classOfReg	:: reg    -> RegClass
+	mkVReg		:: format -> VirtReg reg
+	
+	
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs
index ce411ed6fe..8613a8ed1f 100644
--- a/compiler/nativeGen/AsmCodeGen.lhs
+++ b/compiler/nativeGen/AsmCodeGen.lhs
@@ -19,21 +19,56 @@ module AsmCodeGen ( nativeCodeGen ) where
 #include "HsVersions.h"
 #include "nativeGen/NCG.h"
 
-import Instrs
-import Regs
-import MachCodeGen
-import PprMach
-import RegAllocInfo
-import NCGMonad
-import PositionIndependentCode
-import RegAlloc.Liveness
 
-import qualified RegAlloc.Linear.Main	as Linear
+#if   alpha_TARGET_ARCH
+import Alpha.CodeGen
+import Alpha.Regs
+import Alpha.RegInfo
+import Alpha.Instr
+import Alpha.Ppr
+
+#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
+import X86.CodeGen
+import X86.Regs
+import X86.RegInfo
+import X86.Instr
+import X86.Ppr
+
+#elif sparc_TARGET_ARCH
+import SPARC.CodeGen
+import SPARC.Regs
+import SPARC.RegInfo
+import SPARC.Instr
+import SPARC.Ppr
+
+#elif powerpc_TARGET_ARCH
+import PPC.CodeGen
+import PPC.Regs
+import PPC.RegInfo
+import PPC.Instr
+import PPC.Ppr
+
+#else
+#error "AsmCodeGen: unknown architecture"
+
+#endif
+
+import RegAlloc.Liveness
+import qualified RegAlloc.Linear.Main		as Linear
 
 import qualified GraphColor			as Color
 import qualified RegAlloc.Graph.Main		as Color
 import qualified RegAlloc.Graph.Stats		as Color
 import qualified RegAlloc.Graph.Coalesce	as Color
+import qualified RegAlloc.Graph.TrivColorable	as Color
+
+import qualified TargetReg			as Target
+
+import Platform
+import Instruction
+import PIC
+import Reg
+import NCGMonad
 
 import Cmm
 import CmmOpt		( cmmMiniInline, cmmMachOpFold )
@@ -160,7 +195,7 @@ nativeCodeGen dflags h us cmms
 
 		dumpIfSet_dyn dflags
 			Opt_D_dump_asm_conflicts "Register conflict graph"
-			$ Color.dotGraph Color.regDotColor trivColorable
+			$ Color.dotGraph Target.targetRegDotColor (Color.trivColorable Target.targetRegClass)
 			$ graphGlobal)
 
 
@@ -172,7 +207,7 @@ nativeCodeGen dflags h us cmms
 
 	-- write out the imports
 	Pretty.printDoc Pretty.LeftMode h
-		$ makeImportsDoc (concat imports)
+		$ makeImportsDoc dflags (concat imports)
 
 	return	()
 
@@ -225,13 +260,13 @@ cmmNativeGens dflags h us (cmm : cmms) impAcc profAcc count
 cmmNativeGen 
 	:: DynFlags
 	-> UniqSupply
-	-> RawCmmTop				-- ^ the cmm to generate code for
-	-> Int					-- ^ sequence number of this top thing
+	-> RawCmmTop					-- ^ the cmm to generate code for
+	-> Int						-- ^ sequence number of this top thing
 	-> IO	( UniqSupply
-		, [NatCmmTop]			-- native code
-		, [CLabel]			-- things imported by this cmm
-		, Maybe [Color.RegAllocStats]	-- stats for the coloring register allocator
-		, Maybe [Linear.RegAllocStats])	-- stats for the linear register allocators
+		, [NatCmmTop Instr]			-- native code
+		, [CLabel]				-- things imported by this cmm
+		, Maybe [Color.RegAllocStats Instr]	-- stats for the coloring register allocator
+		, Maybe [Linear.RegAllocStats])		-- stats for the linear register allocators
 
 cmmNativeGen dflags us cmm count
  = do
@@ -375,8 +410,8 @@ x86fp_kludge top@(CmmProc info lbl params (ListGraph code)) =
 
 -- | Build a doc for all the imports.
 --
-makeImportsDoc :: [CLabel] -> Pretty.Doc
-makeImportsDoc imports
+makeImportsDoc :: DynFlags -> [CLabel] -> Pretty.Doc
+makeImportsDoc dflags imports
  = dyld_stubs imports
 
 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
@@ -410,13 +445,16 @@ makeImportsDoc imports
 {-      dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
 				    map head $ group $ sort imps-}
 
+	arch	= platformArch	$ targetPlatform dflags
+	os	= platformOS	$ targetPlatform dflags
+	
 	-- (Hack) sometimes two Labels pretty-print the same, but have
 	-- different uniques; so we compare their text versions...
 	dyld_stubs imps
-		| needImportedSymbols
+		| needImportedSymbols arch os
 		= Pretty.vcat $
-			(pprGotDeclaration :) $
-			map (pprImportedSymbol . fst . head) $
+			(pprGotDeclaration arch os :) $
+			map ( pprImportedSymbol arch os . fst . head) $
 			groupBy (\(_,a) (_,b) -> a == b) $
 			sortBy (\(_,a) (_,b) -> compare a b) $
 			map doPpr $
@@ -437,7 +475,11 @@ makeImportsDoc imports
 -- such that as many of the local jumps as possible turn into
 -- fallthroughs.
 
-sequenceTop :: NatCmmTop -> NatCmmTop
+sequenceTop 
+	:: Instruction instr
+	=> NatCmmTop instr
+	-> NatCmmTop instr
+
 sequenceTop top@(CmmData _ _) = top
 sequenceTop (CmmProc info lbl params (ListGraph blocks)) = 
   CmmProc info lbl params (ListGraph $ makeFarBranches $ sequenceBlocks blocks)
@@ -452,21 +494,36 @@ sequenceTop (CmmProc info lbl params (ListGraph blocks)) =
 -- FYI, the classic layout for basic blocks uses postorder DFS; this
 -- algorithm is implemented in cmm/ZipCfg.hs (NR 6 Sep 2007).
 
-sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
+sequenceBlocks 
+	:: Instruction instr
+	=> [NatBasicBlock instr] 
+	-> [NatBasicBlock instr]
+
 sequenceBlocks [] = []
 sequenceBlocks (entry:blocks) = 
   seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
   -- the first block is the entry point ==> it must remain at the start.
 
-sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
+
+sccBlocks 
+	:: Instruction instr
+	=> [NatBasicBlock instr] 
+	-> [SCC ( NatBasicBlock instr
+		, Unique
+		, [Unique])]
+
 sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
 
-getOutEdges :: [Instr] -> [Unique]
-getOutEdges instrs = case jumpDests (last instrs) [] of
-			[one] -> [getUnique one]
-			_many -> []
-		-- we're only interested in the last instruction of
-		-- the block, and only if it has a single destination.
+-- we're only interested in the last instruction of
+-- the block, and only if it has a single destination.
+getOutEdges 
+	:: Instruction instr
+	=> [instr] -> [Unique]
+
+getOutEdges instrs 
+	= case jumpDestsOfInstr (last instrs) of
+		[one] -> [getUnique one]
+		_many -> []
 
 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
 
@@ -494,7 +551,10 @@ reorder id accum (b@(block,id',out) : rest)
 -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
 -- big, we have to work around this limitation.
 
-makeFarBranches :: [NatBasicBlock] -> [NatBasicBlock]
+makeFarBranches 
+	:: Instruction instr
+	=> [NatBasicBlock instr] 
+	-> [NatBasicBlock instr]
 
 #if powerpc_TARGET_ARCH
 makeFarBranches blocks
@@ -530,7 +590,11 @@ makeFarBranches = id
 -- -----------------------------------------------------------------------------
 -- Shortcut branches
 
-shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop]
+shortcutBranches 
+	:: DynFlags 
+	-> [NatCmmTop Instr] 
+	-> [NatCmmTop Instr]
+
 shortcutBranches dflags tops
   | optLevel dflags < 1 = tops    -- only with -O or higher
   | otherwise           = map (apply_mapping mapping) tops'
@@ -589,12 +653,17 @@ apply_mapping ufm (CmmProc info lbl params (ListGraph blocks))
 -- Switching between the two monads whilst carrying along the same
 -- Unique supply breaks abstraction.  Is that bad?
 
-genMachCode :: DynFlags -> RawCmmTop -> UniqSM ([NatCmmTop], [CLabel])
+genMachCode 
+	:: DynFlags 
+	-> RawCmmTop 
+	-> UniqSM 
+		( [NatCmmTop Instr]
+		, [CLabel])
 
 genMachCode dflags cmm_top
   = do	{ initial_us <- getUs
 	; let initial_st           = mkNatM_State initial_us 0 dflags
-	      (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
+	      (new_tops, final_st) = initNat initial_st (cmmTopCodeGen dflags cmm_top)
 	      final_delta          = natm_delta final_st
 	      final_imports        = natm_imports final_st
 	; if   final_delta == 0
diff --git a/compiler/nativeGen/Instrs.hs b/compiler/nativeGen/Instrs.hs
deleted file mode 100644
index 3f38a361d5..0000000000
--- a/compiler/nativeGen/Instrs.hs
+++ /dev/null
@@ -1,97 +0,0 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
------------------------------------------------------------------------------
---
--- Machine-dependent assembly language
---
--- (c) The University of Glasgow 1993-2004
---
------------------------------------------------------------------------------
-
-#include "nativeGen/NCG.h"
-
-
-module Instrs (
-	NatCmm,
-	NatCmmTop,
-	NatBasicBlock,
-	condUnsigned,
-	condToSigned,
-	condToUnsigned,
-
-#if   alpha_TARGET_ARCH
-	module Alpha.Instr
-#elif powerpc_TARGET_ARCH
-	module PPC.Instr
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-	module X86.Instr
-#elif sparc_TARGET_ARCH
-	module SPARC.Instr
-#else
-#error "Instrs: not defined for this architecture"
-#endif
-)
-
-where
-
-#include "HsVersions.h"
-
-import BlockId
-import Regs
-import Cmm
-import CLabel           ( CLabel, pprCLabel )
-import Panic		( panic )
-import Outputable
-import FastString
-import Constants       ( wORD_SIZE )
-
-import GHC.Exts
-
-#if   alpha_TARGET_ARCH
-import Alpha.Instr
-#elif powerpc_TARGET_ARCH
-import PPC.Instr
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-import X86.Instr
-#elif sparc_TARGET_ARCH
-import SPARC.Instr
-#else
-#error "Instrs: not defined for this architecture"
-#endif
-
-
--- Our flavours of the Cmm types
--- Type synonyms for Cmm populated with native code
-
-type NatCmm        = GenCmm CmmStatic [CmmStatic] (ListGraph Instr)
-type NatCmmTop     = GenCmmTop CmmStatic [CmmStatic] (ListGraph Instr)
-type NatBasicBlock = GenBasicBlock Instr
-
-
--- Condition utils
-condUnsigned GU  = True
-condUnsigned LU  = True
-condUnsigned GEU = True
-condUnsigned LEU = True
-condUnsigned _   = False
-
-condToSigned GU  = GTT
-condToSigned LU  = LTT
-condToSigned GEU = GE
-condToSigned LEU = LE
-condToSigned x   = x
-
-condToUnsigned GTT = GU
-condToUnsigned LTT = LU
-condToUnsigned GE  = GEU
-condToUnsigned LE  = LEU
-condToUnsigned x   = x
-
-
-
-
diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs
new file mode 100644
index 0000000000..22c37a5b12
--- /dev/null
+++ b/compiler/nativeGen/Instruction.hs
@@ -0,0 +1,159 @@
+
+module Instruction (
+	RegUsage(..),
+	noUsage,
+	NatCmm,
+	NatCmmTop,
+	NatBasicBlock,
+	Instruction(..)
+)
+
+where
+
+import Reg
+
+import BlockId
+import Cmm
+
+-- | Holds a list of source and destination registers used by a
+--	particular instruction. 
+--
+--   Machine registers that are pre-allocated to stgRegs are filtered
+--	out, because they are uninteresting from a register allocation
+--	standpoint.  (We wouldn't want them to end up on the free list!) 
+--
+--   As far as we are concerned, the fixed registers simply don't exist
+--	(for allocation purposes, anyway).
+--
+data RegUsage 
+	= RU [Reg] [Reg]
+
+-- | No regs read or written to.
+noUsage :: RegUsage
+noUsage  = RU [] []
+
+
+-- Our flavours of the Cmm types
+-- Type synonyms for Cmm populated with native code
+type NatCmm instr
+	= GenCmm
+		CmmStatic
+		[CmmStatic]
+		(ListGraph instr)
+
+type NatCmmTop instr
+	= GenCmmTop
+		CmmStatic
+		[CmmStatic]
+		(ListGraph instr)
+
+
+type NatBasicBlock instr
+	= GenBasicBlock instr
+
+
+
+
+-- | Common things that we can do with instructions, on all architectures.
+--	These are used by the shared parts of the native code generator,
+--	specifically the register allocators.
+--
+class 	Instruction instr where
+	
+	-- | Get the registers that are being used by this instruction.
+	--	regUsage doesn't need to do any trickery for jumps and such.  
+	--	Just state precisely the regs read and written by that insn.  
+	--	The consequences of control flow transfers, as far as register
+	-- 	allocation goes, are taken care of by the register allocator.
+	--
+	regUsageOfInstr
+		:: instr	
+		-> RegUsage
+
+
+	-- | Apply a given mapping to all the register references in this
+	--	instruction.
+	patchRegsOfInstr
+		:: instr 
+		-> (Reg -> Reg) 
+		-> instr
+
+	
+	-- | Checks whether this instruction is a jump/branch instruction. 
+	--	One that can change the flow of control in a way that the 
+	--	register allocator needs to worry about. 
+	isJumpishInstr
+		:: instr -> Bool
+
+
+	-- | Give the possible destinations of this jump instruction.
+	--	Must be defined for all jumpish instructions.
+	jumpDestsOfInstr
+		:: instr -> [BlockId]
+	
+
+	-- | Change the destination of this jump instruction.
+	--	Used in the linear allocator when adding fixup blocks for join
+	--	points.
+	patchJumpInstr
+		:: instr
+		-> (BlockId -> BlockId)
+		-> instr
+
+		
+	-- | An instruction to spill a register into a spill slot.
+	mkSpillInstr	
+		:: Reg		-- ^ the reg to spill
+		-> Int		-- ^ the current stack delta
+		-> Int		-- ^ spill slot to use
+		-> instr
+
+		
+	-- | An instruction to reload a register from a spill slot.
+	mkLoadInstr	
+		:: Reg		-- ^ the reg to reload.
+		-> Int		-- ^ the current stack delta
+		-> Int		-- ^ the spill slot to use
+		-> instr
+
+	-- | See if this instruction is telling us the current C stack delta
+	takeDeltaInstr 
+		:: instr
+		-> Maybe Int
+
+	-- | Check whether this instruction is some meta thing inserted into 
+	--	the instruction stream for other purposes.
+	--
+	--	Not something that has to be treated as a real machine instruction
+	--	and have its registers allocated.
+	--
+	--	eg, comments, delta, ldata, etc. 
+	isMetaInstr	
+		:: instr
+		-> Bool
+		
+
+		
+	-- | Copy the value in a register to another one.
+	--	Must work for all register classes.
+	mkRegRegMoveInstr
+		:: Reg		-- ^ source register
+		-> Reg 		-- ^ destination register
+		-> instr
+
+	-- | Take the source and destination from this reg -> reg move instruction
+	--	or Nothing if it's not one
+	takeRegRegMoveInstr
+		:: instr
+		-> Maybe (Reg, Reg)
+		
+	-- | Make an unconditional jump instruction.
+	--	For architectures with branch delay slots, its ok to put
+	--	a NOP after the jump. Don't fill the delay slot with an
+	--	instruction that references regs or you'll confuse the 
+	--	linear allocator.
+	mkJumpInstr	
+		:: BlockId
+		-> [instr]
+		
+		
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs
deleted file mode 100644
index d94a906bbd..0000000000
--- a/compiler/nativeGen/MachCodeGen.hs
+++ /dev/null
@@ -1,5199 +0,0 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
------------------------------------------------------------------------------
---
--- Generating machine code (instruction selection)
---
--- (c) The University of Glasgow 1996-2004
---
------------------------------------------------------------------------------
-
--- This is a big module, but, if you pay attention to
--- (a) the sectioning, (b) the type signatures, and
--- (c) the #if blah_TARGET_ARCH} things, the
--- structure should not be too overwhelming.
-
-module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
-
-#include "HsVersions.h"
-#include "nativeGen/NCG.h"
-#include "MachDeps.h"
-
--- NCG stuff:
-import Instrs
-import Regs
-import NCGMonad
-import PositionIndependentCode
-import RegAllocInfo 	( mkBranchInstr, mkRegRegMoveInstr )
-import PprMach
-
--- Our intermediate code:
-import BlockId
-import PprCmm		( pprExpr )
-import Cmm
-import CLabel
-import ClosureInfo	( C_SRT(..) )
-
--- The rest:
-import BasicTypes
-import StaticFlags	( opt_PIC )
-import ForeignCall	( CCallConv(..) )
-import OrdList
-import Pretty
-import qualified Outputable as O
-import Outputable
-import FastString
-import FastBool		( isFastTrue )
-import Constants	( wORD_SIZE )
-
-import Debug.Trace	( trace )
-
-import Control.Monad	( mapAndUnzipM )
-import Data.Maybe	( fromJust )
-import Data.Bits
-import Data.Word
-import Data.Int
-
-
--- -----------------------------------------------------------------------------
--- Top-level of the instruction selector
-
--- | 'InstrBlock's are the insn sequences generated by the insn selectors.
--- They are really trees of insns to facilitate fast appending, where a
--- left-to-right traversal (pre-order?) yields the insns in the correct
--- order.
-
-type InstrBlock = OrdList Instr
-
-cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
-cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
-  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
-  picBaseMb <- getPicBaseMaybeNat
-  let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
-      tops = proc : concat statics
-  case picBaseMb of
-      Just picBase -> initializePicBase picBase tops
-      Nothing -> return tops
-  
-cmmTopCodeGen (CmmData sec dat) = do
-  return [CmmData sec dat]  -- no translation, we just use CmmStatic
-
-basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
-basicBlockCodeGen (BasicBlock id stmts) = do
-  instrs <- stmtsToInstrs stmts
-  -- code generation may introduce new basic block boundaries, which
-  -- are indicated by the NEWBLOCK instruction.  We must split up the
-  -- instruction stream into basic blocks again.  Also, we extract
-  -- LDATAs here too.
-  let
-	(top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
-	
-	mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
-	  = ([], BasicBlock id instrs : blocks, statics)
-	mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
-	  = (instrs, blocks, CmmData sec dat:statics)
-	mkBlocks instr (instrs,blocks,statics)
-	  = (instr:instrs, blocks, statics)
-  -- in
-  return (BasicBlock id top : other_blocks, statics)
-
-stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
-stmtsToInstrs stmts
-   = do instrss <- mapM stmtToInstrs stmts
-        return (concatOL instrss)
-
-stmtToInstrs :: CmmStmt -> NatM InstrBlock
-stmtToInstrs stmt = case stmt of
-    CmmNop	   -> return nilOL
-    CmmComment s   -> return (unitOL (COMMENT s))
-
-    CmmAssign reg src
-      | isFloatType ty -> assignReg_FltCode size reg src
-#if WORD_SIZE_IN_BITS==32
-      | isWord64 ty    -> assignReg_I64Code      reg src
-#endif
-      | otherwise	 -> assignReg_IntCode size reg src
-	where ty = cmmRegType reg
-	      size = cmmTypeSize ty
-
-    CmmStore addr src
-      | isFloatType ty -> assignMem_FltCode size addr src
-#if WORD_SIZE_IN_BITS==32
-      | isWord64 ty 	 -> assignMem_I64Code      addr src
-#endif
-      | otherwise	 -> assignMem_IntCode size addr src
-	where ty = cmmExprType src
-	      size = cmmTypeSize ty
-
-    CmmCall target result_regs args _ _
-       -> genCCall target result_regs args
-
-    CmmBranch id	  -> genBranch id
-    CmmCondBranch arg id  -> genCondJump id arg
-    CmmSwitch arg ids     -> genSwitch arg ids
-    CmmJump arg params	  -> genJump arg
-    CmmReturn params	  ->
-      panic "stmtToInstrs: return statement should have been cps'd away"
-
--- -----------------------------------------------------------------------------
--- General things for putting together code sequences
-
--- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
--- CmmExprs into CmmRegOff?
-mangleIndexTree :: CmmExpr -> CmmExpr
-mangleIndexTree (CmmRegOff reg off)
-  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
-  where width = typeWidth (cmmRegType reg)
-
--- -----------------------------------------------------------------------------
---  Code gen for 64-bit arithmetic on 32-bit platforms
-
-{-
-Simple support for generating 64-bit code (ie, 64 bit values and 64
-bit assignments) on 32-bit platforms.  Unlike the main code generator
-we merely shoot for generating working code as simply as possible, and
-pay little attention to code quality.  Specifically, there is no
-attempt to deal cleverly with the fixed-vs-floating register
-distinction; all values are generated into (pairs of) floating
-registers, even if this would mean some redundant reg-reg moves as a
-result.  Only one of the VRegUniques is returned, since it will be
-of the VRegUniqueLo form, and the upper-half VReg can be determined
-by applying getHiVRegFromLo to it.
--}
-
-data ChildCode64 	-- a.k.a "Register64"
-   = ChildCode64 
-        InstrBlock 	-- code
-        Reg	 	-- the lower 32-bit temporary which contains the
-			-- result; use getHiVRegFromLo to find the other
-			-- VRegUnique.  Rules of this simplified insn
-			-- selection game are therefore that the returned
-			-- Reg may be modified
-
-#if WORD_SIZE_IN_BITS==32
-assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
-#endif
-
-#ifndef x86_64_TARGET_ARCH
-iselExpr64        :: CmmExpr -> NatM ChildCode64
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-assignMem_I64Code addrTree valueTree = do
-  Amode addr addr_code <- getAmode addrTree
-  ChildCode64 vcode rlo <- iselExpr64 valueTree
-  let 
-        rhi = getHiVRegFromLo rlo
-
-        -- Little-endian store
-        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
-        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
-  -- in
-  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
-
-
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
-   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
-   let 
-         r_dst_lo = mkVReg u_dst II32
-         r_dst_hi = getHiVRegFromLo r_dst_lo
-         r_src_hi = getHiVRegFromLo r_src_lo
-         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
-         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
-   -- in
-   return (
-        vcode `snocOL` mov_lo `snocOL` mov_hi
-     )
-
-assignReg_I64Code lvalue valueTree
-   = panic "assignReg_I64Code(i386): invalid lvalue"
-
-------------
-
-iselExpr64 (CmmLit (CmmInt i _)) = do
-  (rlo,rhi) <- getNewRegPairNat II32
-  let
-	r = fromIntegral (fromIntegral i :: Word32)
-	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
-	code = toOL [
-		MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
-		MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
-		]
-  -- in
-  return (ChildCode64 code rlo)
-
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
-   Amode addr addr_code <- getAmode addrTree
-   (rlo,rhi) <- getNewRegPairNat II32
-   let 
-        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
-        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
-   -- in
-   return (
-            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
-                        rlo
-     )
-
-iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
-   = return (ChildCode64 nilOL (mkVReg vu II32))
-         
--- we handle addition, but rather badly
-iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
-   ChildCode64 code1 r1lo <- iselExpr64 e1
-   (rlo,rhi) <- getNewRegPairNat II32
-   let
-	r = fromIntegral (fromIntegral i :: Word32)
-	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
-	r1hi = getHiVRegFromLo r1lo
-	code =  code1 `appOL`
-		toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
-		       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
-		       MOV II32 (OpReg r1hi) (OpReg rhi),
-		       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
-   -- in
-   return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
-   ChildCode64 code1 r1lo <- iselExpr64 e1
-   ChildCode64 code2 r2lo <- iselExpr64 e2
-   (rlo,rhi) <- getNewRegPairNat II32
-   let
-	r1hi = getHiVRegFromLo r1lo
-	r2hi = getHiVRegFromLo r2lo
-	code =  code1 `appOL`
-		code2 `appOL`
-		toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
-		       ADD II32 (OpReg r2lo) (OpReg rlo),
-		       MOV II32 (OpReg r1hi) (OpReg rhi),
-		       ADC II32 (OpReg r2hi) (OpReg rhi) ]
-   -- in
-   return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
-     fn <- getAnyReg expr
-     r_dst_lo <-  getNewRegNat II32
-     let r_dst_hi = getHiVRegFromLo r_dst_lo
-         code = fn r_dst_lo
-     return (
-             ChildCode64 (code `snocOL` 
-                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
-                          r_dst_lo
-            )
-
-iselExpr64 expr
-   = pprPanic "iselExpr64(i386)" (ppr expr)
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-assignMem_I64Code addrTree valueTree = do
-     Amode addr addr_code <- getAmode addrTree
-     ChildCode64 vcode rlo <- iselExpr64 valueTree  
-     (src, code) <- getSomeReg addrTree
-     let 
-         rhi = getHiVRegFromLo rlo
-         -- Big-endian store
-         mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
-         mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
-     return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
-
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
-     ChildCode64 vcode r_src_lo <- iselExpr64 valueTree    
-     let 
-         r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
-         r_dst_hi = getHiVRegFromLo r_dst_lo
-         r_src_hi = getHiVRegFromLo r_src_lo
-         mov_lo = mkMOV r_src_lo r_dst_lo
-         mov_hi = mkMOV r_src_hi r_dst_hi
-         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
-     return (vcode `snocOL` mov_hi `snocOL` mov_lo)
-assignReg_I64Code lvalue valueTree
-   = panic "assignReg_I64Code(sparc): invalid lvalue"
-
-
--- Load a 64 bit word
-iselExpr64 (CmmLoad addrTree ty) 
- | isWord64 ty
- = do	Amode amode addr_code	<- getAmode addrTree
- 	let result
-
-		| AddrRegReg r1 r2	<- amode
-		= do	rlo 	<- getNewRegNat II32
-			tmp	<- getNewRegNat II32
-			let rhi = getHiVRegFromLo rlo
-
-			return	$ ChildCode64 
-				(	 addr_code 
-				`appOL`	 toOL
-					 [ ADD False False r1 (RIReg r2) tmp
-					 , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
-					 , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
-				rlo
-
-		| AddrRegImm r1 (ImmInt i) <- amode
-		= do	rlo	<- getNewRegNat II32
-			let rhi = getHiVRegFromLo rlo
-			
-			return	$ ChildCode64 
-				(	 addr_code 
-				`appOL`	 toOL
-					 [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
-					 , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
-				rlo
-		
-	result
-
-
--- Add a literal to a 64 bit integer
-iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) 
- = do	ChildCode64 code1 r1_lo <- iselExpr64 e1
- 	let r1_hi	= getHiVRegFromLo r1_lo
-	
-	r_dst_lo	<- getNewRegNat II32
-	let r_dst_hi	=  getHiVRegFromLo r_dst_lo 
-	
-	return	$ ChildCode64
-			( toOL
-			[ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
-			, ADD True  False r1_hi (RIReg g0)	   r_dst_hi ])
-			r_dst_lo
-
-
--- Addition of II64
-iselExpr64 (CmmMachOp (MO_Add width) [e1, e2])
- = do	ChildCode64 code1 r1_lo	<- iselExpr64 e1
-	let r1_hi	= getHiVRegFromLo r1_lo
-
- 	ChildCode64 code2 r2_lo	<- iselExpr64 e2
-	let r2_hi	= getHiVRegFromLo r2_lo
-	
-	r_dst_lo	<- getNewRegNat II32
-	let r_dst_hi	= getHiVRegFromLo r_dst_lo
-	
-	let code =	code1
-		`appOL`	code2
-		`appOL`	toOL
-			[ ADD False False r1_lo (RIReg r2_lo) r_dst_lo
-			, ADD True  False r1_hi (RIReg r2_hi) r_dst_hi ]
-	
-	return	$ ChildCode64 code r_dst_lo
-
-
-iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
-     r_dst_lo <-  getNewRegNat II32
-     let r_dst_hi = getHiVRegFromLo r_dst_lo
-         r_src_lo = mkVReg uq II32
-         r_src_hi = getHiVRegFromLo r_src_lo
-         mov_lo = mkMOV r_src_lo r_dst_lo
-         mov_hi = mkMOV r_src_hi r_dst_hi
-         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
-     return (
-            ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
-         )
-
--- Convert something into II64
-iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) 
- = do
-	r_dst_lo 	<- getNewRegNat II32
-	let r_dst_hi	= getHiVRegFromLo r_dst_lo
-
-	-- compute expr and load it into r_dst_lo
-	(a_reg, a_code)	<- getSomeReg expr
-
-	let code	= a_code
-		`appOL`	toOL
-			[ mkRegRegMoveInstr g0    r_dst_hi 	-- clear high 32 bits
-			, mkRegRegMoveInstr a_reg r_dst_lo ]
-			
-	return	$ ChildCode64 code r_dst_lo
-
-
-iselExpr64 expr
-   = pprPanic "iselExpr64(sparc)" (ppr expr)
-
-#endif /* sparc_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if powerpc_TARGET_ARCH
-
-getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
-getI64Amodes addrTree = do
-    Amode hi_addr addr_code <- getAmode addrTree
-    case addrOffset hi_addr 4 of
-        Just lo_addr -> return (hi_addr, lo_addr, addr_code)
-        Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
-                           return (AddrRegImm hi_ptr (ImmInt 0),
-                                   AddrRegImm hi_ptr (ImmInt 4),
-                                   code)
-
-assignMem_I64Code addrTree valueTree = do
-        (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
-	ChildCode64 vcode rlo <- iselExpr64 valueTree
-	let 
-		rhi = getHiVRegFromLo rlo
-
-		-- Big-endian store
-		mov_hi = ST II32 rhi hi_addr
-		mov_lo = ST II32 rlo lo_addr
-	-- in
-	return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
-
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
-   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
-   let 
-         r_dst_lo = mkVReg u_dst II32
-         r_dst_hi = getHiVRegFromLo r_dst_lo
-         r_src_hi = getHiVRegFromLo r_src_lo
-         mov_lo = MR r_dst_lo r_src_lo
-         mov_hi = MR r_dst_hi r_src_hi
-   -- in
-   return (
-        vcode `snocOL` mov_lo `snocOL` mov_hi
-     )
-
-assignReg_I64Code lvalue valueTree
-   = panic "assignReg_I64Code(powerpc): invalid lvalue"
-
-
--- Don't delete this -- it's very handy for debugging.
---iselExpr64 expr 
---   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
---   = panic "iselExpr64(???)"
-
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
-    (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
-    (rlo, rhi) <- getNewRegPairNat II32
-    let mov_hi = LD II32 rhi hi_addr
-        mov_lo = LD II32 rlo lo_addr
-    return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
-                         rlo
-
-iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
-   = return (ChildCode64 nilOL (mkVReg vu II32))
-
-iselExpr64 (CmmLit (CmmInt i _)) = do
-  (rlo,rhi) <- getNewRegPairNat II32
-  let
-	half0 = fromIntegral (fromIntegral i :: Word16)
-	half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
-	half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
-	half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
-	
-	code = toOL [
-		LIS rlo (ImmInt half1),
-		OR rlo rlo (RIImm $ ImmInt half0),
-		LIS rhi (ImmInt half3),
-		OR rlo rlo (RIImm $ ImmInt half2)
-		]
-  -- in
-  return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
-   ChildCode64 code1 r1lo <- iselExpr64 e1
-   ChildCode64 code2 r2lo <- iselExpr64 e2
-   (rlo,rhi) <- getNewRegPairNat II32
-   let
-	r1hi = getHiVRegFromLo r1lo
-	r2hi = getHiVRegFromLo r2lo
-	code =  code1 `appOL`
-		code2 `appOL`
-		toOL [ ADDC rlo r1lo r2lo,
-		       ADDE rhi r1hi r2hi ]
-   -- in
-   return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
-    (expr_reg,expr_code) <- getSomeReg expr
-    (rlo, rhi) <- getNewRegPairNat II32
-    let mov_hi = LI rhi (ImmInt 0)
-        mov_lo = MR rlo expr_reg
-    return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
-                         rlo
-iselExpr64 expr
-   = pprPanic "iselExpr64(powerpc)" (ppr expr)
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- The 'Register' type
-
--- 'Register's passed up the tree.  If the stix code forces the register
--- to live in a pre-decided machine register, it comes out as @Fixed@;
--- otherwise, it comes out as @Any@, and the parent can decide which
--- register to put it in.
-
-data Register
-  = Fixed   Size Reg InstrBlock
-  | Any	    Size (Reg -> InstrBlock)
-
-swizzleRegisterRep :: Register -> Size -> Register
--- Change the width; it's a no-op
-swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
-swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
-
-
--- -----------------------------------------------------------------------------
--- Utils based on getRegister, below
-
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
-  r <- getRegister expr
-  case r of
-    Any rep code -> do
-	tmp <- getNewRegNat rep
-	return (tmp, code tmp)
-    Fixed _ reg code -> 
-	return (reg, code)
-
--- -----------------------------------------------------------------------------
--- Grab the Reg for a CmmReg
-
-getRegisterReg :: CmmReg -> Reg
-
-getRegisterReg (CmmLocal (LocalReg u pk))
-  = mkVReg u (cmmTypeSize pk)
-
-getRegisterReg (CmmGlobal mid)
-  = case get_GlobalReg_reg_or_addr mid of
-       Left (RealReg rrno) -> RealReg rrno
-       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
-          -- By this stage, the only MagicIds remaining should be the
-          -- ones which map to a real machine register on this
-          -- platform.  Hence ...
-
-
--- -----------------------------------------------------------------------------
--- Generate code to get a subtree into a Register
-
--- Don't delete this -- it's very handy for debugging.
---getRegister expr 
---   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
---   = panic "getRegister(???)"
-
-getRegister :: CmmExpr -> NatM Register
-
-#if !x86_64_TARGET_ARCH
-    -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
-    -- register, it can only be used for rip-relative addressing.
-getRegister (CmmReg (CmmGlobal PicBaseReg))
-  = do
-      reg <- getPicBaseNat wordSize
-      return (Fixed wordSize reg nilOL)
-#endif
-
-getRegister (CmmReg reg) 
-  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
-		  (getRegisterReg reg) nilOL)
-
-getRegister tree@(CmmRegOff _ _) 
-  = getRegister (mangleIndexTree tree)
-
-
-#if WORD_SIZE_IN_BITS==32
-    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
-    -- TO_W_(x), TO_W_(x >> 32)
-
-getRegister (CmmMachOp (MO_UU_Conv W64 W32)
-             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
-  ChildCode64 code rlo <- iselExpr64 x
-  return $ Fixed II32 (getHiVRegFromLo rlo) code
-
-getRegister (CmmMachOp (MO_SS_Conv W64 W32)
-             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
-  ChildCode64 code rlo <- iselExpr64 x
-  return $ Fixed II32 (getHiVRegFromLo rlo) code
-
-getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
-  ChildCode64 code rlo <- iselExpr64 x
-  return $ Fixed II32 rlo code
-
-getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
-  ChildCode64 code rlo <- iselExpr64 x
-  return $ Fixed II32 rlo code       
-
-#endif
-
--- end of machine-"independent" bit; here we go on the rest...
-
-#if alpha_TARGET_ARCH
-
-getRegister (StDouble d)
-  = getBlockIdNat 	    	    `thenNat` \ lbl ->
-    getNewRegNat PtrRep    	    `thenNat` \ tmp ->
-    let code dst = mkSeqInstrs [
-	    LDATA RoDataSegment lbl [
-		    DATA TF [ImmLab (rational d)]
-		],
-	    LDA tmp (AddrImm (ImmCLbl lbl)),
-	    LD TF dst (AddrReg tmp)]
-    in
-    	return (Any FF64 code)
-
-getRegister (StPrim primop [x]) -- unary PrimOps
-  = case primop of
-      IntNegOp -> trivialUCode (NEG Q False) x
-
-      NotOp    -> trivialUCode NOT x
-
-      FloatNegOp  -> trivialUFCode FloatRep (FNEG TF) x
-      DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
-
-      OrdOp -> coerceIntCode IntRep x
-      ChrOp -> chrCode x
-
-      Float2IntOp  -> coerceFP2Int    x
-      Int2FloatOp  -> coerceInt2FP pr x
-      Double2IntOp -> coerceFP2Int    x
-      Int2DoubleOp -> coerceInt2FP pr x
-
-      Double2FloatOp -> coerceFltCode x
-      Float2DoubleOp -> coerceFltCode x
-
-      other_op -> getRegister (StCall fn CCallConv FF64 [x])
-	where
-	  fn = case other_op of
-		 FloatExpOp    -> fsLit "exp"
-		 FloatLogOp    -> fsLit "log"
-		 FloatSqrtOp   -> fsLit "sqrt"
-		 FloatSinOp    -> fsLit "sin"
-		 FloatCosOp    -> fsLit "cos"
-		 FloatTanOp    -> fsLit "tan"
-		 FloatAsinOp   -> fsLit "asin"
-		 FloatAcosOp   -> fsLit "acos"
-		 FloatAtanOp   -> fsLit "atan"
-		 FloatSinhOp   -> fsLit "sinh"
-		 FloatCoshOp   -> fsLit "cosh"
-		 FloatTanhOp   -> fsLit "tanh"
-		 DoubleExpOp   -> fsLit "exp"
-		 DoubleLogOp   -> fsLit "log"
-		 DoubleSqrtOp  -> fsLit "sqrt"
-		 DoubleSinOp   -> fsLit "sin"
-		 DoubleCosOp   -> fsLit "cos"
-		 DoubleTanOp   -> fsLit "tan"
-		 DoubleAsinOp  -> fsLit "asin"
-		 DoubleAcosOp  -> fsLit "acos"
-		 DoubleAtanOp  -> fsLit "atan"
-		 DoubleSinhOp  -> fsLit "sinh"
-		 DoubleCoshOp  -> fsLit "cosh"
-		 DoubleTanhOp  -> fsLit "tanh"
-  where
-    pr = panic "MachCode.getRegister: no primrep needed for Alpha"
-
-getRegister (StPrim primop [x, y]) -- dyadic PrimOps
-  = case primop of
-      CharGtOp -> trivialCode (CMP LTT) y x
-      CharGeOp -> trivialCode (CMP LE) y x
-      CharEqOp -> trivialCode (CMP EQQ) x y
-      CharNeOp -> int_NE_code x y
-      CharLtOp -> trivialCode (CMP LTT) x y
-      CharLeOp -> trivialCode (CMP LE) x y
-
-      IntGtOp  -> trivialCode (CMP LTT) y x
-      IntGeOp  -> trivialCode (CMP LE) y x
-      IntEqOp  -> trivialCode (CMP EQQ) x y
-      IntNeOp  -> int_NE_code x y
-      IntLtOp  -> trivialCode (CMP LTT) x y
-      IntLeOp  -> trivialCode (CMP LE) x y
-
-      WordGtOp -> trivialCode (CMP ULT) y x
-      WordGeOp -> trivialCode (CMP ULE) x y
-      WordEqOp -> trivialCode (CMP EQQ)  x y
-      WordNeOp -> int_NE_code x y
-      WordLtOp -> trivialCode (CMP ULT) x y
-      WordLeOp -> trivialCode (CMP ULE) x y
-
-      AddrGtOp -> trivialCode (CMP ULT) y x
-      AddrGeOp -> trivialCode (CMP ULE) y x
-      AddrEqOp -> trivialCode (CMP EQQ)  x y
-      AddrNeOp -> int_NE_code x y
-      AddrLtOp -> trivialCode (CMP ULT) x y
-      AddrLeOp -> trivialCode (CMP ULE) x y
-	
-      FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
-      FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
-      FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
-      FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
-      FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
-      FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
-
-      DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
-      DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
-      DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
-      DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
-      DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
-      DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
-
-      IntAddOp  -> trivialCode (ADD Q False) x y
-      IntSubOp  -> trivialCode (SUB Q False) x y
-      IntMulOp  -> trivialCode (MUL Q False) x y
-      IntQuotOp -> trivialCode (DIV Q False) x y
-      IntRemOp  -> trivialCode (REM Q False) x y
-
-      WordAddOp  -> trivialCode (ADD Q False) x y
-      WordSubOp  -> trivialCode (SUB Q False) x y
-      WordMulOp  -> trivialCode (MUL Q False) x y
-      WordQuotOp -> trivialCode (DIV Q True) x y
-      WordRemOp  -> trivialCode (REM Q True) x y
-
-      FloatAddOp -> trivialFCode  W32 (FADD TF) x y
-      FloatSubOp -> trivialFCode  W32 (FSUB TF) x y
-      FloatMulOp -> trivialFCode  W32 (FMUL TF) x y
-      FloatDivOp -> trivialFCode  W32 (FDIV TF) x y
-
-      DoubleAddOp -> trivialFCode  W64 (FADD TF) x y
-      DoubleSubOp -> trivialFCode  W64 (FSUB TF) x y
-      DoubleMulOp -> trivialFCode  W64 (FMUL TF) x y
-      DoubleDivOp -> trivialFCode  W64 (FDIV TF) x y
-
-      AddrAddOp  -> trivialCode (ADD Q False) x y
-      AddrSubOp  -> trivialCode (SUB Q False) x y
-      AddrRemOp  -> trivialCode (REM Q True) x y
-
-      AndOp  -> trivialCode AND x y
-      OrOp   -> trivialCode OR  x y
-      XorOp  -> trivialCode XOR x y
-      SllOp  -> trivialCode SLL x y
-      SrlOp  -> trivialCode SRL x y
-
-      ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
-      ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
-      ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
-
-      FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
-      DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
-  where
-    {- ------------------------------------------------------------
-	Some bizarre special code for getting condition codes into
-	registers.  Integer non-equality is a test for equality
-	followed by an XOR with 1.  (Integer comparisons always set
-	the result register to 0 or 1.)  Floating point comparisons of
-	any kind leave the result in a floating point register, so we
-	need to wrangle an integer register out of things.
-    -}
-    int_NE_code :: StixTree -> StixTree -> NatM Register
-
-    int_NE_code x y
-      = trivialCode (CMP EQQ) x y	`thenNat` \ register ->
-	getNewRegNat IntRep		`thenNat` \ tmp ->
-	let
-	    code = registerCode register tmp
-	    src  = registerName register tmp
-	    code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
-	in
-	return (Any IntRep code__2)
-
-    {- ------------------------------------------------------------
-	Comments for int_NE_code also apply to cmpF_code
-    -}
-    cmpF_code
-	:: (Reg -> Reg -> Reg -> Instr)
-	-> Cond
-	-> StixTree -> StixTree
-	-> NatM Register
-
-    cmpF_code instr cond x y
-      = trivialFCode pr instr x y	`thenNat` \ register ->
-	getNewRegNat FF64		`thenNat` \ tmp ->
-	getBlockIdNat			`thenNat` \ lbl ->
-	let
-	    code = registerCode register tmp
-	    result  = registerName register tmp
-
-	    code__2 dst = code . mkSeqInstrs [
-		OR zeroh (RIImm (ImmInt 1)) dst,
-		BF cond  result (ImmCLbl lbl),
-		OR zeroh (RIReg zeroh) dst,
-		NEWBLOCK lbl]
-	in
-	return (Any IntRep code__2)
-      where
-	pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
-      ------------------------------------------------------------
-
-getRegister (CmmLoad pk mem)
-  = getAmode mem    	    	    `thenNat` \ amode ->
-    let
-    	code = amodeCode amode
-    	src   = amodeAddr amode
-    	size = primRepToSize pk
-    	code__2 dst = code . mkSeqInstr (LD size dst src)
-    in
-    return (Any pk code__2)
-
-getRegister (StInt i)
-  | fits8Bits i
-  = let
-    	code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
-    in
-    return (Any IntRep code)
-  | otherwise
-  = let
-    	code dst = mkSeqInstr (LDI Q dst src)
-    in
-    return (Any IntRep code)
-  where
-    src = ImmInt (fromInteger i)
-
-getRegister leaf
-  | isJust imm
-  = let
-    	code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
-    in
-    return (Any PtrRep code)
-  where
-    imm = maybeImm leaf
-    imm__2 = case imm of Just x -> x
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-getRegister (CmmLit (CmmFloat f W32)) = do
-    lbl <- getNewLabelNat
-    dflags <- getDynFlagsNat
-    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
-    Amode addr addr_code <- getAmode dynRef
-    let code dst =
-	    LDATA ReadOnlyData
-			[CmmDataLabel lbl,
-			 CmmStaticLit (CmmFloat f W32)]
-	    `consOL` (addr_code `snocOL`
-	    GLD FF32 addr dst)
-    -- in
-    return (Any FF32 code)
-
-
-getRegister (CmmLit (CmmFloat d W64))
-  | d == 0.0
-  = let code dst = unitOL (GLDZ dst)
-    in  return (Any FF64 code)
-
-  | d == 1.0
-  = let code dst = unitOL (GLD1 dst)
-    in  return (Any FF64 code)
-
-  | otherwise = do
-    lbl <- getNewLabelNat
-    dflags <- getDynFlagsNat
-    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
-    Amode addr addr_code <- getAmode dynRef
-    let code dst =
-	    LDATA ReadOnlyData
-			[CmmDataLabel lbl,
-			 CmmStaticLit (CmmFloat d W64)]
-	    `consOL` (addr_code `snocOL`
-	    GLD FF64 addr dst)
-    -- in
-    return (Any FF64 code)
-
-#endif /* i386_TARGET_ARCH */
-
-#if x86_64_TARGET_ARCH
-
-getRegister (CmmLit (CmmFloat 0.0 w)) = do
-   let size = floatSize w
-       code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
-	-- I don't know why there are xorpd, xorps, and pxor instructions.
-	-- They all appear to do the same thing --SDM
-   return (Any size code)
-
-getRegister (CmmLit (CmmFloat f w)) = do
-    lbl <- getNewLabelNat
-    let code dst = toOL [
-	    LDATA ReadOnlyData
-			[CmmDataLabel lbl,
-			 CmmStaticLit (CmmFloat f w)],
-	    MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
-	    ]
-    -- in
-    return (Any size code)
-  where size = floatSize w
-
-#endif /* x86_64_TARGET_ARCH */
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- catch simple cases of zero- or sign-extended load
-getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVZxL II8) addr
-  return (Any II32 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVSxL II8) addr
-  return (Any II32 code)
-
-getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVZxL II16) addr
-  return (Any II32 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVSxL II16) addr
-  return (Any II32 code)
-
-#endif
-
-#if x86_64_TARGET_ARCH
-
--- catch simple cases of zero- or sign-extended load
-getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVZxL II8) addr
-  return (Any II64 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVSxL II8) addr
-  return (Any II64 code)
-
-getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVZxL II16) addr
-  return (Any II64 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVSxL II16) addr
-  return (Any II64 code)
-
-getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
-  return (Any II64 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVSxL II32) addr
-  return (Any II64 code)
-
-#endif
-
-#if x86_64_TARGET_ARCH
-getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
-                                     CmmLit displacement])
-    = return $ Any II64 (\dst -> unitOL $
-        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
-#endif
-
-#if x86_64_TARGET_ARCH
-getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
-  x_code <- getAnyReg x
-  lbl <- getNewLabelNat
-  let
-    code dst = x_code dst `appOL` toOL [
-	-- This is how gcc does it, so it can't be that bad:
-	LDATA ReadOnlyData16 [
-		CmmAlign 16,
-		CmmDataLabel lbl,
-		CmmStaticLit (CmmInt 0x80000000 W32),
-		CmmStaticLit (CmmInt 0 W32),
-		CmmStaticLit (CmmInt 0 W32),
-		CmmStaticLit (CmmInt 0 W32)
-	],
-	XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
-		-- xorps, so we need the 128-bit constant
-		-- ToDo: rip-relative
-	]
-  --
-  return (Any FF32 code)
-
-getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
-  x_code <- getAnyReg x
-  lbl <- getNewLabelNat
-  let
-	-- This is how gcc does it, so it can't be that bad:
-    code dst = x_code dst `appOL` toOL [
-	LDATA ReadOnlyData16 [
-		CmmAlign 16,
-		CmmDataLabel lbl,
-		CmmStaticLit (CmmInt 0x8000000000000000 W64),
-		CmmStaticLit (CmmInt 0 W64)
-	],
-		-- gcc puts an unpck here.  Wonder if we need it.
-	XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
-		-- xorpd, so we need the 128-bit constant
-	]
-  --
-  return (Any FF64 code)
-#endif
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-getRegister (CmmMachOp mop [x]) -- unary MachOps
-  = case mop of
-#if i386_TARGET_ARCH
-      MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
-      MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
-#endif
-
-      MO_S_Neg w -> triv_ucode NEGI (intSize w)
-      MO_F_Neg w -> triv_ucode NEGI (floatSize w)
-      MO_Not w   -> triv_ucode NOT  (intSize w)
-
-      -- Nop conversions
-      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
-      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
-      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
-      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
-      MO_UU_Conv W32 W16 -> toI16Reg W32 x
-      MO_SS_Conv W32 W16 -> toI16Reg W32 x
-
-#if x86_64_TARGET_ARCH
-      MO_UU_Conv W64 W32 -> conversionNop II64 x
-      MO_SS_Conv W64 W32 -> conversionNop II64 x
-      MO_UU_Conv W64 W16 -> toI16Reg W64 x
-      MO_SS_Conv W64 W16 -> toI16Reg W64 x
-      MO_UU_Conv W64 W8  -> toI8Reg  W64 x
-      MO_SS_Conv W64 W8  -> toI8Reg  W64 x
-#endif
-
-      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
-      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
-
-      -- widenings
-      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
-      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
-      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
-
-      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
-      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
-      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
-
-#if x86_64_TARGET_ARCH
-      MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
-      MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
-      MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
-      MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
-      MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
-      MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
-	-- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
-	-- However, we don't want the register allocator to throw it
-	-- away as an unnecessary reg-to-reg move, so we keep it in
-	-- the form of a movzl and print it as a movl later.
-#endif
-
-#if i386_TARGET_ARCH
-      MO_FF_Conv W32 W64 -> conversionNop FF64 x
-      MO_FF_Conv W64 W32 -> conversionNop FF32 x
-#else
-      MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
-      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
-#endif
-
-      MO_FS_Conv from to -> coerceFP2Int from to x
-      MO_SF_Conv from to -> coerceInt2FP from to x
-
-      other -> pprPanic "getRegister" (pprMachOp mop)
-   where
-	triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
-	triv_ucode instr size = trivialUCode size (instr size) x
-
-	-- signed or unsigned extension.
-	integerExtend :: Width -> Width
-		      -> (Size -> Operand -> Operand -> Instr)
-		      -> CmmExpr -> NatM Register
-	integerExtend from to instr expr = do
-	    (reg,e_code) <- if from == W8 then getByteReg expr
-					  else getSomeReg expr
-	    let 
-		code dst = 
-		  e_code `snocOL`
-		  instr (intSize from) (OpReg reg) (OpReg dst)
-	    return (Any (intSize to) code)
-
-	toI8Reg :: Width -> CmmExpr -> NatM Register
-	toI8Reg new_rep expr
-            = do codefn <- getAnyReg expr
-		 return (Any (intSize new_rep) codefn)
-		-- HACK: use getAnyReg to get a byte-addressable register.
-		-- If the source was a Fixed register, this will add the
-		-- mov instruction to put it into the desired destination.
-		-- We're assuming that the destination won't be a fixed
-		-- non-byte-addressable register; it won't be, because all
-		-- fixed registers are word-sized.
-
-	toI16Reg = toI8Reg -- for now
-
-	conversionNop :: Size -> CmmExpr -> NatM Register
-        conversionNop new_size expr
-            = do e_code <- getRegister expr
-                 return (swizzleRegisterRep e_code new_size)
-
-
-getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
-  = case mop of
-      MO_F_Eq w -> condFltReg EQQ x y
-      MO_F_Ne w -> condFltReg NE x y
-      MO_F_Gt w -> condFltReg GTT x y
-      MO_F_Ge w -> condFltReg GE x y
-      MO_F_Lt w -> condFltReg LTT x y
-      MO_F_Le w -> condFltReg LE x y
-
-      MO_Eq rep   -> condIntReg EQQ x y
-      MO_Ne rep   -> condIntReg NE x y
-
-      MO_S_Gt rep -> condIntReg GTT x y
-      MO_S_Ge rep -> condIntReg GE x y
-      MO_S_Lt rep -> condIntReg LTT x y
-      MO_S_Le rep -> condIntReg LE x y
-
-      MO_U_Gt rep -> condIntReg GU  x y
-      MO_U_Ge rep -> condIntReg GEU x y
-      MO_U_Lt rep -> condIntReg LU  x y
-      MO_U_Le rep -> condIntReg LEU x y
-
-#if i386_TARGET_ARCH
-      MO_F_Add w -> trivialFCode w GADD x y
-      MO_F_Sub w -> trivialFCode w GSUB x y
-      MO_F_Quot w -> trivialFCode w GDIV x y
-      MO_F_Mul w -> trivialFCode w GMUL x y
-#endif
-
-#if x86_64_TARGET_ARCH
-      MO_F_Add w -> trivialFCode w ADD x y
-      MO_F_Sub w -> trivialFCode w SUB x y
-      MO_F_Quot w -> trivialFCode w FDIV x y
-      MO_F_Mul w -> trivialFCode w MUL x y
-#endif
-
-      MO_Add rep -> add_code rep x y
-      MO_Sub rep -> sub_code rep x y
-
-      MO_S_Quot rep -> div_code rep True  True  x y
-      MO_S_Rem  rep -> div_code rep True  False x y
-      MO_U_Quot rep -> div_code rep False True  x y
-      MO_U_Rem  rep -> div_code rep False False x y
-
-      MO_S_MulMayOflo rep -> imulMayOflo rep x y
-
-      MO_Mul rep -> triv_op rep IMUL
-      MO_And rep -> triv_op rep AND
-      MO_Or  rep -> triv_op rep OR
-      MO_Xor rep -> triv_op rep XOR
-
-	{- Shift ops on x86s have constraints on their source, it
-	   either has to be Imm, CL or 1
-	    => trivialCode is not restrictive enough (sigh.)
-	-}	   
-      MO_Shl rep   -> shift_code rep SHL x y {-False-}
-      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
-      MO_S_Shr rep -> shift_code rep SAR x y {-False-}
-
-      other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
-  where
-    --------------------
-    triv_op width instr = trivialCode width op (Just op) x y
-			where op   = instr (intSize width)
-
-    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
-    imulMayOflo rep a b = do
-         (a_reg, a_code) <- getNonClobberedReg a
-         b_code <- getAnyReg b
-         let 
-	     shift_amt  = case rep of
-			   W32 -> 31
-			   W64 -> 63
-			   _ -> panic "shift_amt"
-
-	     size = intSize rep
-             code = a_code `appOL` b_code eax `appOL`
-                        toOL [
-			   IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
-                           SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
-				-- sign extend lower part
-                           SUB size (OpReg edx) (OpReg eax)
-				-- compare against upper
-                           -- eax==0 if high part == sign extended low part
-                        ]
-         -- in
-	 return (Fixed size eax code)
-
-    --------------------
-    shift_code :: Width
-	       -> (Size -> Operand -> Operand -> Instr)
-	       -> CmmExpr
-	       -> CmmExpr
-	       -> NatM Register
-
-    {- Case1: shift length as immediate -}
-    shift_code width instr x y@(CmmLit lit) = do
-	  x_code <- getAnyReg x
-	  let
-	       size = intSize width
-	       code dst
-		  = x_code dst `snocOL` 
-		    instr size (OpImm (litToImm lit)) (OpReg dst)
-	  -- in
-	  return (Any size code)
-        
-    {- Case2: shift length is complex (non-immediate)
-      * y must go in %ecx.
-      * we cannot do y first *and* put its result in %ecx, because
-        %ecx might be clobbered by x.
-      * if we do y second, then x cannot be 
-        in a clobbered reg.  Also, we cannot clobber x's reg
-        with the instruction itself.
-      * so we can either:
-        - do y first, put its result in a fresh tmp, then copy it to %ecx later
-        - do y second and put its result into %ecx.  x gets placed in a fresh
-          tmp.  This is likely to be better, becuase the reg alloc can
-          eliminate this reg->reg move here (it won't eliminate the other one,
-          because the move is into the fixed %ecx).
-    -}
-    shift_code width instr x y{-amount-} = do
-        x_code <- getAnyReg x
-	let size = intSize width
-	tmp <- getNewRegNat size
-        y_code <- getAnyReg y
-	let 
-	   code = x_code tmp `appOL`
-		  y_code ecx `snocOL`
-		  instr size (OpReg ecx) (OpReg tmp)
-        -- in
-        return (Fixed size tmp code)
-
-    --------------------
-    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
-    add_code rep x (CmmLit (CmmInt y _))
-	| is32BitInteger y = add_int rep x y
-    add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
-      where size = intSize rep
-
-    --------------------
-    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
-    sub_code rep x (CmmLit (CmmInt y _))
-	| is32BitInteger (-y) = add_int rep x (-y)
-    sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
-
-    -- our three-operand add instruction:
-    add_int width x y = do
-	(x_reg, x_code) <- getSomeReg x
-	let
-	    size = intSize width
-	    imm = ImmInt (fromInteger y)
-	    code dst
-               = x_code `snocOL`
-		 LEA size
-			(OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
-                        (OpReg dst)
-	-- 
-	return (Any size code)
-
-    ----------------------
-    div_code width signed quotient x y = do
-	   (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
-	   x_code <- getAnyReg x
-	   let
-	     size = intSize width
-	     widen | signed    = CLTD size
-		   | otherwise = XOR size (OpReg edx) (OpReg edx)
-
-	     instr | signed    = IDIV
-		   | otherwise = DIV
-
-	     code = y_code `appOL`
-		    x_code eax `appOL`
-		    toOL [widen, instr size y_op]
-
-	     result | quotient  = eax
-		    | otherwise = edx
-
-	   -- in
-           return (Fixed size result code)
-
-
-getRegister (CmmLoad mem pk)
-  | isFloatType pk
-  = do
-    Amode src mem_code <- getAmode mem
-    let
-	size = cmmTypeSize pk
-    	code dst = mem_code `snocOL` 
-		   IF_ARCH_i386(GLD size src dst,
-			        MOV size (OpAddr src) (OpReg dst))
-    return (Any size code)
-
-#if i386_TARGET_ARCH
-getRegister (CmmLoad mem pk)
-  | not (isWord64 pk)
-  = do 
-    code <- intLoadCode instr mem
-    return (Any size code)
-  where
-    width = typeWidth pk
-    size = intSize width
-    instr = case width of
-		W8     -> MOVZxL II8
-		_other -> MOV size
-	-- We always zero-extend 8-bit loads, if we
-	-- can't think of anything better.  This is because
-	-- we can't guarantee access to an 8-bit variant of every register
-	-- (esi and edi don't have 8-bit variants), so to make things
-	-- simpler we do our 8-bit arithmetic with full 32-bit registers.
-#endif
-
-#if x86_64_TARGET_ARCH
--- Simpler memory load code on x86_64
-getRegister (CmmLoad mem pk)
-  = do 
-    code <- intLoadCode (MOV size) mem
-    return (Any size code)
-  where size = intSize $ typeWidth pk
-#endif
-
-getRegister (CmmLit (CmmInt 0 width))
-  = let
-	size = intSize width
-
-	-- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
-	adj_size = case size of II64 -> II32; _ -> size
-	size1 = IF_ARCH_i386( size, adj_size ) 
-    	code dst 
-           = unitOL (XOR size1 (OpReg dst) (OpReg dst))
-    in
-    	return (Any size code)
-
-#if x86_64_TARGET_ARCH
-  -- optimisation for loading small literals on x86_64: take advantage
-  -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
-  -- instruction forms are shorter.
-getRegister (CmmLit lit) 
-  | isWord64 (cmmLitType lit), not (isBigLit lit)
-  = let 
-	imm = litToImm lit
-	code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
-    in
-    	return (Any II64 code)
-  where
-   isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
-   isBigLit _ = False
-	-- note1: not the same as (not.is32BitLit), because that checks for
-	-- signed literals that fit in 32 bits, but we want unsigned
-	-- literals here.
-	-- note2: all labels are small, because we're assuming the
-	-- small memory model (see gcc docs, -mcmodel=small).
-#endif
-
-getRegister (CmmLit lit)
-  = let 
-	size = cmmTypeSize (cmmLitType lit)
-	imm = litToImm lit
-	code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
-    in
-    	return (Any size code)
-
-getRegister other = pprPanic "getRegister(x86)" (ppr other)
-
-
-intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
-   -> NatM (Reg -> InstrBlock)
-intLoadCode instr mem = do
-  Amode src mem_code <- getAmode mem
-  return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
-
--- Compute an expression into *any* register, adding the appropriate
--- move instruction if necessary.
-getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
-getAnyReg expr = do
-  r <- getRegister expr
-  anyReg r
-
-anyReg :: Register -> NatM (Reg -> InstrBlock)
-anyReg (Any _ code)          = return code
-anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
-
--- A bit like getSomeReg, but we want a reg that can be byte-addressed.
--- Fixed registers might not be byte-addressable, so we make sure we've
--- got a temporary, inserting an extra reg copy if necessary.
-getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
-#if x86_64_TARGET_ARCH
-getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
-#else
-getByteReg expr = do
-  r <- getRegister expr
-  case r of
-    Any rep code -> do
-	tmp <- getNewRegNat rep
-	return (tmp, code tmp)
-    Fixed rep reg code 
-	| isVirtualReg reg -> return (reg,code)
-	| otherwise -> do
-	    tmp <- getNewRegNat rep
-	    return (tmp, code `snocOL` reg2reg rep reg tmp)
-	-- ToDo: could optimise slightly by checking for byte-addressable
-	-- real registers, but that will happen very rarely if at all.
-#endif
-
--- Another variant: this time we want the result in a register that cannot
--- be modified by code to evaluate an arbitrary expression.
-getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getNonClobberedReg expr = do
-  r <- getRegister expr
-  case r of
-    Any rep code -> do
-	tmp <- getNewRegNat rep
-	return (tmp, code tmp)
-    Fixed rep reg code
-	-- only free regs can be clobbered
-	| RealReg rr <- reg, isFastTrue (freeReg rr) -> do
-		tmp <- getNewRegNat rep
-		return (tmp, code `snocOL` reg2reg rep reg tmp)
-	| otherwise -> 
-		return (reg, code)
-
-reg2reg :: Size -> Reg -> Reg -> Instr
-reg2reg size src dst 
-#if i386_TARGET_ARCH
-  | isFloatSize size = GMOV src dst
-#endif
-  | otherwise	     = MOV size (OpReg src) (OpReg dst)
-
-#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
--- getRegister :: CmmExpr -> NatM Register
-
--- Load a literal float into a float register.
---	The actual literal is stored in a new data area, and we load it 
---	at runtime.
-getRegister (CmmLit (CmmFloat f W32)) = do
-
-    -- a label for the new data area
-    lbl <- getNewLabelNat
-    tmp <- getNewRegNat II32
-
-    let code dst = toOL [
-            -- the data area         
-	    LDATA ReadOnlyData
-	                [CmmDataLabel lbl,
-			 CmmStaticLit (CmmFloat f W32)],
-
-            -- load the literal
-	    SETHI (HI (ImmCLbl lbl)) tmp,
-	    LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
-
-    return (Any FF32 code)
-
-getRegister (CmmLit (CmmFloat d W64)) = do
-    lbl <- getNewLabelNat
-    tmp <- getNewRegNat II32
-    let code dst = toOL [
-	    LDATA ReadOnlyData
-	                [CmmDataLabel lbl,
-			 CmmStaticLit (CmmFloat d W64)],
-	    SETHI (HI (ImmCLbl lbl)) tmp,
-	    LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
-    return (Any FF64 code)
-
-getRegister (CmmMachOp mop [x]) -- unary MachOps
-  = case mop of
-      MO_F_Neg W32     -> trivialUFCode FF32 (FNEG FF32) x
-      MO_F_Neg W64     -> trivialUFCode FF64 (FNEG FF64) x
-
-      MO_S_Neg rep     -> trivialUCode (intSize rep) (SUB False False g0) x
-      MO_Not rep       -> trivialUCode (intSize rep) (XNOR False g0) x
-
-      MO_FF_Conv W64 W32-> coerceDbl2Flt x
-      MO_FF_Conv W32 W64-> coerceFlt2Dbl x
-
-      MO_FS_Conv from to -> coerceFP2Int from to x
-      MO_SF_Conv from to -> coerceInt2FP from to x
-
-      -- Conversions which are a nop on sparc
-      MO_UU_Conv from to
-	| from == to    -> conversionNop (intSize to)  x
-      MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
-      MO_UU_Conv W32 to -> conversionNop (intSize to) x
-      MO_SS_Conv W32 to -> conversionNop (intSize to) x
-
-      MO_UU_Conv W8  to@W32  -> conversionNop (intSize to)  x
-      MO_UU_Conv W16 to@W32  -> conversionNop (intSize to)  x
-      MO_UU_Conv W8  to@W16  -> conversionNop (intSize to)  x
-
-      -- sign extension
-      MO_SS_Conv W8  W32  -> integerExtend W8  W32 x
-      MO_SS_Conv W16 W32  -> integerExtend W16 W32 x
-      MO_SS_Conv W8  W16  -> integerExtend W8  W16 x
-
-      other_op -> panic ("Unknown unary mach op: " ++ show mop)
-    where
-
-	-- | sign extend and widen
-	integerExtend 
-		:: Width 		-- ^ width of source expression
-		-> Width 		-- ^ width of result
-		-> CmmExpr 		-- ^ source expression
-		-> NatM Register	
-
-	integerExtend from to expr
-	 = do	-- load the expr into some register
-	 	(reg, e_code) 	<- getSomeReg expr
-		tmp		<- getNewRegNat II32
-		let bitCount
-			= case (from, to) of
-				(W8,  W32)	-> 24
-				(W16, W32)	-> 16
-				(W8,  W16)	-> 24
-	 	let code dst
-			= e_code 	
-
-			-- local shift word left to load the sign bit
-			`snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp
-			
-			-- arithmetic shift right to sign extend
-			`snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst
-			
-		return (Any (intSize to) code)
-				
-
-        conversionNop new_rep expr
-            = do e_code <- getRegister expr
-                 return (swizzleRegisterRep e_code new_rep)
-
-getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
-  = case mop of
-      MO_Eq rep -> condIntReg EQQ x y
-      MO_Ne rep -> condIntReg NE x y
-
-      MO_S_Gt rep -> condIntReg GTT x y
-      MO_S_Ge rep -> condIntReg GE x y
-      MO_S_Lt rep -> condIntReg LTT x y
-      MO_S_Le rep -> condIntReg LE x y
-	      
-      MO_U_Gt W32  -> condIntReg GTT x y
-      MO_U_Ge W32  -> condIntReg GE x y
-      MO_U_Lt W32  -> condIntReg LTT x y
-      MO_U_Le W32  -> condIntReg LE x y
-
-      MO_U_Gt W16 -> condIntReg GU  x y
-      MO_U_Ge W16 -> condIntReg GEU x y
-      MO_U_Lt W16 -> condIntReg LU  x y
-      MO_U_Le W16 -> condIntReg LEU x y
-
-      MO_Add W32 -> trivialCode W32 (ADD False False) x y
-      MO_Sub W32 -> trivialCode W32 (SUB False False) x y
-
-      MO_S_MulMayOflo rep -> imulMayOflo rep x y
-
-      MO_S_Quot W32 	-> idiv True  False x y
-      MO_U_Quot W32 	-> idiv False False x y
-       
-      MO_S_Rem  W32	-> irem True  x y
-      MO_U_Rem	W32	-> irem False x y
-       
-      MO_F_Eq w -> condFltReg EQQ x y
-      MO_F_Ne w -> condFltReg NE x y
-
-      MO_F_Gt w -> condFltReg GTT x y
-      MO_F_Ge w -> condFltReg GE x y 
-      MO_F_Lt w -> condFltReg LTT x y
-      MO_F_Le w -> condFltReg LE x y
-
-      MO_F_Add  w  -> trivialFCode w FADD x y
-      MO_F_Sub  w  -> trivialFCode w FSUB x y
-      MO_F_Mul  w  -> trivialFCode w FMUL x y
-      MO_F_Quot w  -> trivialFCode w FDIV x y
-
-      MO_And rep   -> trivialCode rep (AND False) x y
-      MO_Or rep    -> trivialCode rep (OR  False) x y
-      MO_Xor rep   -> trivialCode rep (XOR False) x y
-
-      MO_Mul rep -> trivialCode rep (SMUL False) x y
-
-      MO_Shl rep   -> trivialCode rep SLL  x y
-      MO_U_Shr rep   -> trivialCode rep SRL x y
-      MO_S_Shr rep   -> trivialCode rep SRA x y
-
-{-
-      MO_F32_Pwr  -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 
-                                         [promote x, promote y])
-		       where promote x = CmmMachOp MO_F32_to_Dbl [x]
-      MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 
-                                        [x, y])
--}
-      other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
-  where
-    -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
-
-
-    -- | Generate an integer division instruction.
-    idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
-	
-    -- For unsigned division with a 32 bit numerator, 
-    --		we can just clear the Y register.
-    idiv False cc x y = do
-	(a_reg, a_code)		<- getSomeReg x
-       	(b_reg, b_code)		<- getSomeReg y
-	
-	let code dst
-		= 	a_code 
-		`appOL`	b_code  
-		`appOL`	toOL
-			[ WRY  g0 g0
-			, UDIV cc a_reg (RIReg b_reg) dst]
-			
-	return (Any II32 code)
-    	
-
-    -- For _signed_ division with a 32 bit numerator,
-    --		we have to sign extend the numerator into the Y register.
-    idiv True cc x y = do
-	(a_reg, a_code)		<- getSomeReg x
-       	(b_reg, b_code)		<- getSomeReg y
-	
-	tmp			<- getNewRegNat II32
-	
-	let code dst
-		= 	a_code 
-		`appOL`	b_code  
-		`appOL`	toOL
-			[ SRA  a_reg (RIImm (ImmInt 16)) tmp		-- sign extend
-			, SRA  tmp   (RIImm (ImmInt 16)) tmp
-
-			, WRY  tmp g0				
-			, SDIV cc a_reg (RIReg b_reg) dst]
-			
-	return (Any II32 code)
-
-
-    -- | Do an integer remainder.
-    --
-    --	 NOTE:	The SPARC v8 architecture manual says that integer division
-    --		instructions _may_ generate a remainder, depending on the implementation.
-    --		If so it is _recommended_ that the remainder is placed in the Y register.
-    --
-    --          The UltraSparc 2007 manual says Y is _undefined_ after division.
-    --
-    --		The SPARC T2 doesn't store the remainder, not sure about the others. 
-    --		It's probably best not to worry about it, and just generate our own
-    --		remainders. 
-    --
-    irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
-
-    -- For unsigned operands: 
-    --		Division is between a 64 bit numerator and a 32 bit denominator, 
-    --		so we still have to clear the Y register.
-    irem False x y = do
-    	(a_reg, a_code)	<- getSomeReg x
-	(b_reg, b_code)	<- getSomeReg y
-
-	tmp_reg		<- getNewRegNat II32
-
-	let code dst
-		= 	a_code
-		`appOL`	b_code
-		`appOL`	toOL
-			[ WRY	g0 g0
-			, UDIV  False         a_reg (RIReg b_reg) tmp_reg
-			, UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
-			, SUB   False False   a_reg (RIReg tmp_reg) dst]
-    
-    	return	(Any II32 code)
-
-    
-    -- For signed operands:
-    --		Make sure to sign extend into the Y register, or the remainder
-    --		will have the wrong sign when the numerator is negative.
-    --
-    --	TODO:	When sign extending, GCC only shifts the a_reg right by 17 bits,
-    --		not the full 32. Not sure why this is, something to do with overflow?
-    --		If anyone cares enough about the speed of signed remainder they
-    --		can work it out themselves (then tell me). -- BL 2009/01/20
-    
-    irem True x y = do
-    	(a_reg, a_code)	<- getSomeReg x
-	(b_reg, b_code)	<- getSomeReg y
-	
-	tmp1_reg	<- getNewRegNat II32
-	tmp2_reg	<- getNewRegNat II32
-		
-	let code dst
-		=	a_code
-		`appOL`	b_code
-		`appOL`	toOL
-			[ SRA	a_reg      (RIImm (ImmInt 16)) tmp1_reg	-- sign extend
-			, SRA	tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg	-- sign extend
-			, WRY	tmp1_reg g0
-
-			, SDIV  False          a_reg (RIReg b_reg)    tmp2_reg	
-			, SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
-			, SUB   False False    a_reg (RIReg tmp2_reg) dst]
-			
-	return (Any II32 code)
-   
-
-    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
-    imulMayOflo rep a b = do
-         (a_reg, a_code) <- getSomeReg a
-	 (b_reg, b_code) <- getSomeReg b
-	 res_lo <- getNewRegNat II32
-	 res_hi <- getNewRegNat II32
-	 let
-	    shift_amt  = case rep of
-			  W32 -> 31
-			  W64 -> 63
-			  _ -> panic "shift_amt"
-	    code dst = a_code `appOL` b_code `appOL`
-                       toOL [
-                           SMUL False a_reg (RIReg b_reg) res_lo,
-                           RDY res_hi,
-                           SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
-                           SUB False False res_lo (RIReg res_hi) dst
-                        ]
-         return (Any II32 code)
-
-getRegister (CmmLoad mem pk) = do
-    Amode src code <- getAmode mem
-    let
-	code__2 dst 	= code `snocOL` LD (cmmTypeSize pk) src dst
-    return (Any (cmmTypeSize pk) code__2)
-
-getRegister (CmmLit (CmmInt i _))
-  | fits13Bits i
-  = let
-    	src = ImmInt (fromInteger i)
-    	code dst = unitOL (OR False g0 (RIImm src) dst)
-    in
-    	return (Any II32 code)
-
-getRegister (CmmLit lit)
-  = let rep = cmmLitType lit
-	imm = litToImm lit
-    	code dst = toOL [
-    	    SETHI (HI imm) dst,
-    	    OR False dst (RIImm (LO imm)) dst]
-    in return (Any II32 code)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-getRegister (CmmLoad mem pk)
-  | not (isWord64 pk)
-  = do
-        Amode addr addr_code <- getAmode mem
-        let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
-                       addr_code `snocOL` LD size dst addr
-        return (Any size code)
-          where size = cmmTypeSize pk
-
--- catch simple cases of zero- or sign-extended load
-getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
-    Amode addr addr_code <- getAmode mem
-    return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
-
--- Note: there is no Load Byte Arithmetic instruction, so no signed case here
-
-getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
-    Amode addr addr_code <- getAmode mem
-    return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
-
-getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
-    Amode addr addr_code <- getAmode mem
-    return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
-
-getRegister (CmmMachOp mop [x]) -- unary MachOps
-  = case mop of
-      MO_Not rep   -> triv_ucode_int rep NOT
-
-      MO_F_Neg w   -> triv_ucode_float w FNEG
-      MO_S_Neg w   -> triv_ucode_int   w NEG
-
-      MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
-      MO_FF_Conv W32 W64 -> conversionNop FF64 x
-
-      MO_FS_Conv from to -> coerceFP2Int from to x
-      MO_SF_Conv from to -> coerceInt2FP from to x
-
-      MO_SS_Conv from to
-        | from == to    -> conversionNop (intSize to) x
-
-        -- narrowing is a nop: we treat the high bits as undefined
-      MO_SS_Conv W32 to -> conversionNop (intSize to) x
-      MO_SS_Conv W16 W8 -> conversionNop II8 x
-      MO_SS_Conv W8  to -> triv_ucode_int to (EXTS II8)
-      MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
-
-      MO_UU_Conv from to
-        | from == to -> conversionNop (intSize to) x
-        -- narrowing is a nop: we treat the high bits as undefined
-      MO_UU_Conv W32 to -> conversionNop (intSize to) x
-      MO_UU_Conv W16 W8 -> conversionNop II8 x
-      MO_UU_Conv W8 to  -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
-      MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32)) 
-
-    where
-	triv_ucode_int   width instr = trivialUCode (intSize   width) instr x
-	triv_ucode_float width instr = trivialUCode (floatSize width) instr x
-
-        conversionNop new_size expr
-            = do e_code <- getRegister expr
-                 return (swizzleRegisterRep e_code new_size)
-
-getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
-  = case mop of
-      MO_F_Eq w -> condFltReg EQQ x y
-      MO_F_Ne w -> condFltReg NE  x y
-      MO_F_Gt w -> condFltReg GTT x y
-      MO_F_Ge w -> condFltReg GE  x y
-      MO_F_Lt w -> condFltReg LTT x y
-      MO_F_Le w -> condFltReg LE  x y
-
-      MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
-      MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
-
-      MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
-      MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
-      MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
-      MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
-
-      MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
-      MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
-      MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
-      MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
-
-      MO_F_Add w  -> triv_float w FADD
-      MO_F_Sub w  -> triv_float w FSUB
-      MO_F_Mul w  -> triv_float w FMUL
-      MO_F_Quot w -> triv_float w FDIV
-      
-         -- optimize addition with 32-bit immediate
-         -- (needed for PIC)
-      MO_Add W32 ->
-        case y of
-          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
-            -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
-          CmmLit lit
-            -> do
-                (src, srcCode) <- getSomeReg x
-                let imm = litToImm lit
-                    code dst = srcCode `appOL` toOL [
-                                    ADDIS dst src (HA imm),
-                                    ADD dst dst (RIImm (LO imm))
-                                ]
-                return (Any II32 code)
-          _ -> trivialCode W32 True ADD x y
-
-      MO_Add rep -> trivialCode rep True ADD x y
-      MO_Sub rep ->
-        case y of    -- subfi ('substract from' with immediate) doesn't exist
-          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
-            -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
-          _ -> trivialCodeNoImm' (intSize rep) SUBF y x
-
-      MO_Mul rep -> trivialCode rep True MULLW x y
-
-      MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
-      
-      MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
-      MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
-
-      MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
-      MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
-      
-      MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
-      MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
-      
-      MO_And rep   -> trivialCode rep False AND x y
-      MO_Or rep    -> trivialCode rep False OR x y
-      MO_Xor rep   -> trivialCode rep False XOR x y
-
-      MO_Shl rep   -> trivialCode rep False SLW x y
-      MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
-      MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
-  where
-    triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
-    triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
-
-getRegister (CmmLit (CmmInt i rep))
-  | Just imm <- makeImmediate rep True i
-  = let
-    	code dst = unitOL (LI dst imm)
-    in
-    	return (Any (intSize rep) code)
-
-getRegister (CmmLit (CmmFloat f frep)) = do
-    lbl <- getNewLabelNat
-    dflags <- getDynFlagsNat
-    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
-    Amode addr addr_code <- getAmode dynRef
-    let size = floatSize frep
-        code dst = 
-	    LDATA ReadOnlyData  [CmmDataLabel lbl,
-				 CmmStaticLit (CmmFloat f frep)]
-            `consOL` (addr_code `snocOL` LD size dst addr)
-    return (Any size code)
-
-getRegister (CmmLit lit)
-  = let rep = cmmLitType lit
-        imm = litToImm lit
-        code dst = toOL [
-              LIS dst (HA imm),
-              ADD dst dst (RIImm (LO imm))
-          ]
-    in return (Any (cmmTypeSize rep) code)
-
-getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
-    
-    -- extend?Rep: wrap integer expression of type rep
-    -- in a conversion to II32
-extendSExpr W32 x = x
-extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
-extendUExpr W32 x = x
-extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
---  The 'Amode' type: Memory addressing modes passed up the tree.
-
-data Amode = Amode AddrMode InstrBlock
-
-{-
-Now, given a tree (the argument to an CmmLoad) that references memory,
-produce a suitable addressing mode.
-
-A Rule of the Game (tm) for Amodes: use of the addr bit must
-immediately follow use of the code part, since the code part puts
-values in registers which the addr then refers to.  So you can't put
-anything in between, lest it overwrite some of those registers.  If
-you need to do some other computation between the code part and use of
-the addr bit, first store the effective address from the amode in a
-temporary, then do the other computation, and then use the temporary:
-
-    code
-    LEA amode, tmp
-    ... other computation ...
-    ... (tmp) ...
--}
-
-getAmode :: CmmExpr -> NatM Amode
-getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-getAmode (StPrim IntSubOp [x, StInt i])
-  = getNewRegNat PtrRep		`thenNat` \ tmp ->
-    getRegister x		`thenNat` \ register ->
-    let
-    	code = registerCode register tmp
-    	reg  = registerName register tmp
-    	off  = ImmInt (-(fromInteger i))
-    in
-    return (Amode (AddrRegImm reg off) code)
-
-getAmode (StPrim IntAddOp [x, StInt i])
-  = getNewRegNat PtrRep		`thenNat` \ tmp ->
-    getRegister x		`thenNat` \ register ->
-    let
-    	code = registerCode register tmp
-    	reg  = registerName register tmp
-    	off  = ImmInt (fromInteger i)
-    in
-    return (Amode (AddrRegImm reg off) code)
-
-getAmode leaf
-  | isJust imm
-  = return (Amode (AddrImm imm__2) id)
-  where
-    imm = maybeImm leaf
-    imm__2 = case imm of Just x -> x
-
-getAmode other
-  = getNewRegNat PtrRep		`thenNat` \ tmp ->
-    getRegister other		`thenNat` \ register ->
-    let
-    	code = registerCode register tmp
-    	reg  = registerName register tmp
-    in
-    return (Amode (AddrReg reg) code)
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if x86_64_TARGET_ARCH
-
-getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
-                                     CmmLit displacement])
-    = return $ Amode (ripRel (litToImm displacement)) nilOL
-
-#endif
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- This is all just ridiculous, since it carefully undoes 
--- what mangleIndexTree has just done.
-getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
-  | is32BitLit lit
-  -- ASSERT(rep == II32)???
-  = do (x_reg, x_code) <- getSomeReg x
-       let off = ImmInt (-(fromInteger i))
-       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-  
-getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
-  | is32BitLit lit
-  -- ASSERT(rep == II32)???
-  = do (x_reg, x_code) <- getSomeReg x
-       let off = ImmInt (fromInteger i)
-       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-
--- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be 
--- recognised by the next rule.
-getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
-				  b@(CmmLit _)])
-  = getAmode (CmmMachOp (MO_Add rep) [b,a])
-
-getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) 
-					[y, CmmLit (CmmInt shift _)]])
-  | shift == 0 || shift == 1 || shift == 2 || shift == 3
-  = x86_complex_amode x y shift 0
-
-getAmode (CmmMachOp (MO_Add rep) 
-                [x, CmmMachOp (MO_Add _)
-                        [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
-                         CmmLit (CmmInt offset _)]])
-  | shift == 0 || shift == 1 || shift == 2 || shift == 3
-  && is32BitInteger offset
-  = x86_complex_amode x y shift offset
-
-getAmode (CmmMachOp (MO_Add rep) [x,y])
-  = x86_complex_amode x y 0 0
-
-getAmode (CmmLit lit) | is32BitLit lit
-  = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
-
-getAmode expr = do
-  (reg,code) <- getSomeReg expr
-  return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
-
-
-x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
-x86_complex_amode base index shift offset
-  = do (x_reg, x_code) <- getNonClobberedReg base
-	-- x must be in a temp, because it has to stay live over y_code
-	-- we could compre x_reg and y_reg and do something better here...
-       (y_reg, y_code) <- getSomeReg index
-       let
-    	   code = x_code `appOL` y_code
-           base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
-       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
-               code)
-
-#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
-  | fits13Bits (-i)
-  = do
-       (reg, code) <- getSomeReg x
-       let
-         off  = ImmInt (-(fromInteger i))
-       return (Amode (AddrRegImm reg off) code)
-
-
-getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
-  | fits13Bits i
-  = do
-       (reg, code) <- getSomeReg x
-       let
-     	 off  = ImmInt (fromInteger i)
-       return (Amode (AddrRegImm reg off) code)
-
-getAmode (CmmMachOp (MO_Add rep) [x, y])
-  = do
-    (regX, codeX) <- getSomeReg x
-    (regY, codeY) <- getSomeReg y
-    let
-    	code = codeX `appOL` codeY
-    return (Amode (AddrRegReg regX regY) code)
-
-getAmode (CmmLit lit)
-  = do
-	let imm__2	= litToImm lit
-	tmp1 	<- getNewRegNat II32
-	tmp2	<- getNewRegNat II32
-
-	let code = toOL	[ SETHI (HI imm__2) tmp1
-			, OR    False tmp1 (RIImm (LO imm__2)) tmp2]
-		
-	return (Amode (AddrRegReg tmp2 g0) code)
-
-getAmode other
-  = do
-       (reg, code) <- getSomeReg other
-       let
-    	    off  = ImmInt 0
-       return (Amode (AddrRegImm reg off) code)
-
-#endif /* sparc_TARGET_ARCH */
-
-#ifdef powerpc_TARGET_ARCH
-getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
-  | Just off <- makeImmediate W32 True (-i)
-  = do
-        (reg, code) <- getSomeReg x
-        return (Amode (AddrRegImm reg off) code)
-
-
-getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
-  | Just off <- makeImmediate W32 True i
-  = do
-        (reg, code) <- getSomeReg x
-        return (Amode (AddrRegImm reg off) code)
-
-   -- optimize addition with 32-bit immediate
-   -- (needed for PIC)
-getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
-  = do
-        tmp <- getNewRegNat II32
-        (src, srcCode) <- getSomeReg x
-        let imm = litToImm lit
-            code = srcCode `snocOL` ADDIS tmp src (HA imm)
-        return (Amode (AddrRegImm tmp (LO imm)) code)
-
-getAmode (CmmLit lit)
-  = do
-        tmp <- getNewRegNat II32
-        let imm = litToImm lit
-            code = unitOL (LIS tmp (HA imm))
-        return (Amode (AddrRegImm tmp (LO imm)) code)
-    
-getAmode (CmmMachOp (MO_Add W32) [x, y])
-  = do
-        (regX, codeX) <- getSomeReg x
-        (regY, codeY) <- getSomeReg y
-        return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
-    
-getAmode other
-  = do
-        (reg, code) <- getSomeReg other
-        let
-            off  = ImmInt 0
-        return (Amode (AddrRegImm reg off) code)
-#endif /* powerpc_TARGET_ARCH */
-
--- -----------------------------------------------------------------------------
--- getOperand: sometimes any operand will do.
-
--- getNonClobberedOperand: the value of the operand will remain valid across
--- the computation of an arbitrary expression, unless the expression
--- is computed directly into a register which the operand refers to
--- (see trivialCode where this function is used for an example).
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-#if x86_64_TARGET_ARCH
-getNonClobberedOperand (CmmLit lit)
-  | isSuitableFloatingPointLit lit = do
-    lbl <- getNewLabelNat
-    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
-					   CmmStaticLit lit])
-    return (OpAddr (ripRel (ImmCLbl lbl)), code)
-#endif
-getNonClobberedOperand (CmmLit lit)
-  | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
-    return (OpImm (litToImm lit), nilOL)
-getNonClobberedOperand (CmmLoad mem pk) 
-  | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
-    Amode src mem_code <- getAmode mem
-    (src',save_code) <- 
-	if (amodeCouldBeClobbered src) 
-		then do
-		   tmp <- getNewRegNat wordSize
-		   return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
-			   unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
-		else
-		   return (src, nilOL)
-    return (OpAddr src', save_code `appOL` mem_code)
-getNonClobberedOperand e = do
-    (reg, code) <- getNonClobberedReg e
-    return (OpReg reg, code)
-
-amodeCouldBeClobbered :: AddrMode -> Bool
-amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
-
-regClobbered (RealReg rr) = isFastTrue (freeReg rr)
-regClobbered _ = False
-
--- getOperand: the operand is not required to remain valid across the
--- computation of an arbitrary expression.
-getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-#if x86_64_TARGET_ARCH
-getOperand (CmmLit lit)
-  | isSuitableFloatingPointLit lit = do
-    lbl <- getNewLabelNat
-    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
-					   CmmStaticLit lit])
-    return (OpAddr (ripRel (ImmCLbl lbl)), code)
-#endif
-getOperand (CmmLit lit)
-  | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
-    return (OpImm (litToImm lit), nilOL)
-getOperand (CmmLoad mem pk)
-  | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
-    Amode src mem_code <- getAmode mem
-    return (OpAddr src, mem_code)
-getOperand e = do
-    (reg, code) <- getSomeReg e
-    return (OpReg reg, code)
-
-isOperand :: CmmExpr -> Bool
-isOperand (CmmLoad _ _) = True
-isOperand (CmmLit lit)  = is32BitLit lit
-			  || isSuitableFloatingPointLit lit
-isOperand _             = False
-
--- if we want a floating-point literal as an operand, we can
--- use it directly from memory.  However, if the literal is
--- zero, we're better off generating it into a register using
--- xor.
-isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
-isSuitableFloatingPointLit _ = False
-
-getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
-getRegOrMem (CmmLoad mem pk)
-  | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
-    Amode src mem_code <- getAmode mem
-    return (OpAddr src, mem_code)
-getRegOrMem e = do
-    (reg, code) <- getNonClobberedReg e
-    return (OpReg reg, code)
-
-#if x86_64_TARGET_ARCH
-is32BitLit (CmmInt i W64) = is32BitInteger i
-   -- assume that labels are in the range 0-2^31-1: this assumes the
-   -- small memory model (see gcc docs, -mcmodel=small).
-#endif
-is32BitLit x = True
-#endif
-
-is32BitInteger :: Integer -> Bool
-is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
-  where i64 = fromIntegral i :: Int64
-  -- a CmmInt is intended to be truncated to the appropriate 
-  -- number of bits, so here we truncate it to Int64.  This is
-  -- important because e.g. -1 as a CmmInt might be either
-  -- -1 or 18446744073709551615.
-
--- -----------------------------------------------------------------------------
---  The 'CondCode' type:  Condition codes passed up the tree.
-
-data CondCode = CondCode Bool Cond InstrBlock
-
--- Set up a condition code for a conditional branch.
-
-getCondCode :: CmmExpr -> NatM CondCode
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-getCondCode = panic "MachCode.getCondCode: not on Alphas"
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
--- yes, they really do seem to want exactly the same!
-
-getCondCode (CmmMachOp mop [x, y])
-  = 
-    case mop of
-      MO_F_Eq W32 -> condFltCode EQQ x y
-      MO_F_Ne W32 -> condFltCode NE  x y
-      MO_F_Gt W32 -> condFltCode GTT x y
-      MO_F_Ge W32 -> condFltCode GE  x y
-      MO_F_Lt W32 -> condFltCode LTT x y
-      MO_F_Le W32 -> condFltCode LE  x y
-
-      MO_F_Eq W64 -> condFltCode EQQ x y
-      MO_F_Ne W64 -> condFltCode NE  x y
-      MO_F_Gt W64 -> condFltCode GTT x y
-      MO_F_Ge W64 -> condFltCode GE  x y
-      MO_F_Lt W64 -> condFltCode LTT x y
-      MO_F_Le W64 -> condFltCode LE  x y
-
-      MO_Eq rep -> condIntCode EQQ  x y
-      MO_Ne rep -> condIntCode NE   x y
-
-      MO_S_Gt rep -> condIntCode GTT  x y
-      MO_S_Ge rep -> condIntCode GE   x y
-      MO_S_Lt rep -> condIntCode LTT  x y
-      MO_S_Le rep -> condIntCode LE   x y
-
-      MO_U_Gt rep -> condIntCode GU   x y
-      MO_U_Ge rep -> condIntCode GEU  x y
-      MO_U_Lt rep -> condIntCode LU   x y
-      MO_U_Le rep -> condIntCode LEU  x y
-
-      other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
-
-getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
-
-#elif powerpc_TARGET_ARCH
-
--- almost the same as everywhere else - but we need to
--- extend small integers to 32 bit first
-
-getCondCode (CmmMachOp mop [x, y])
-  = case mop of
-      MO_F_Eq W32 -> condFltCode EQQ x y
-      MO_F_Ne W32 -> condFltCode NE  x y
-      MO_F_Gt W32 -> condFltCode GTT x y
-      MO_F_Ge W32 -> condFltCode GE  x y
-      MO_F_Lt W32 -> condFltCode LTT x y
-      MO_F_Le W32 -> condFltCode LE  x y
-
-      MO_F_Eq W64 -> condFltCode EQQ x y
-      MO_F_Ne W64 -> condFltCode NE  x y
-      MO_F_Gt W64 -> condFltCode GTT x y
-      MO_F_Ge W64 -> condFltCode GE  x y
-      MO_F_Lt W64 -> condFltCode LTT x y
-      MO_F_Le W64 -> condFltCode LE  x y
-
-      MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
-      MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
-
-      MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
-      MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
-      MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
-      MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
-
-      MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
-      MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
-      MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
-      MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
-
-      other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
-
-getCondCode other =  panic "getCondCode(2)(powerpc)"
-
-
-#endif
-
-
--- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
--- passed back up the tree.
-
-condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
-
-#if alpha_TARGET_ARCH
-condIntCode = panic "MachCode.condIntCode: not on Alphas"
-condFltCode = panic "MachCode.condFltCode: not on Alphas"
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- memory vs immediate
-condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
-    Amode x_addr x_code <- getAmode x
-    let
-	imm  = litToImm lit
-    	code = x_code `snocOL`
-    	    	  CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
-    --
-    return (CondCode False cond code)
-
--- anything vs zero, using a mask
--- TODO: Add some sanity checking!!!!
-condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
-    | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
-    = do
-      (x_reg, x_code) <- getSomeReg x
-      let
-         code = x_code `snocOL`
-                TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
-      --
-      return (CondCode False cond code)
-
--- anything vs zero
-condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
-    (x_reg, x_code) <- getSomeReg x
-    let
-	code = x_code `snocOL`
-    	    	  TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
-    --
-    return (CondCode False cond code)
-
--- anything vs operand
-condIntCode cond x y | isOperand y = do
-    (x_reg, x_code) <- getNonClobberedReg x
-    (y_op,  y_code) <- getOperand y    
-    let
-	code = x_code `appOL` y_code `snocOL`
-                  CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
-    -- in
-    return (CondCode False cond code)
-
--- anything vs anything
-condIntCode cond x y = do
-  (y_reg, y_code) <- getNonClobberedReg y
-  (x_op, x_code) <- getRegOrMem x
-  let
-	code = y_code `appOL`
-	       x_code `snocOL`
-    	    	  CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
-  -- in
-  return (CondCode False cond code)
-#endif
-
-#if i386_TARGET_ARCH
-condFltCode cond x y 
-  = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
-  (x_reg, x_code) <- getNonClobberedReg x
-  (y_reg, y_code) <- getSomeReg y
-  let
-	code = x_code `appOL` y_code `snocOL`
-		GCMP cond x_reg y_reg
-  -- The GCMP insn does the test and sets the zero flag if comparable
-  -- and true.  Hence we always supply EQQ as the condition to test.
-  return (CondCode True EQQ code)
-#endif /* i386_TARGET_ARCH */
-
-#if x86_64_TARGET_ARCH
--- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
--- an operand, but the right must be a reg.  We can probably do better
--- than this general case...
-condFltCode cond x y = do
-  (x_reg, x_code) <- getNonClobberedReg x
-  (y_op, y_code) <- getOperand y
-  let
-	code = x_code `appOL`
-	       y_code `snocOL`
-    	    	  CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
-	-- NB(1): we need to use the unsigned comparison operators on the
-	-- result of this comparison.
-  -- in
-  return (CondCode True (condToUnsigned cond) code)
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-condIntCode cond x (CmmLit (CmmInt y rep))
-  | fits13Bits y
-  = do
-       (src1, code) <- getSomeReg x
-       let
-           src2 = ImmInt (fromInteger y)
-           code' = code `snocOL` SUB False True src1 (RIImm src2) g0
-       return (CondCode False cond code')
-
-condIntCode cond x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    let
-	code__2 = code1 `appOL` code2 `snocOL`
-    	    	  SUB False True src1 (RIReg src2) g0
-    return (CondCode False cond code__2)
-
------------
-condFltCode cond x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    tmp <- getNewRegNat FF64
-    let
-   	promote x = FxTOy FF32 FF64 x tmp
-
-    	pk1   = cmmExprType x
-    	pk2   = cmmExprType y
-
-    	code__2 =
-		if pk1 `cmmEqType` pk2 then
-    	            code1 `appOL` code2 `snocOL`
-    	    	    FCMP True (cmmTypeSize pk1) src1 src2
-    	    	else if typeWidth pk1 == W32 then
-    	    	    code1 `snocOL` promote src1 `appOL` code2 `snocOL`
-    	    	    FCMP True FF64 tmp src2
-    	    	else
-    	    	    code1 `appOL` code2 `snocOL` promote src2 `snocOL`
-    	    	    FCMP True FF64 src1 tmp
-    return (CondCode True cond code__2)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
---  ###FIXME: I16 and I8!
-condIntCode cond x (CmmLit (CmmInt y rep))
-  | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
-  = do
-        (src1, code) <- getSomeReg x
-        let
-            code' = code `snocOL` 
-                (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
-        return (CondCode False cond code')
-
-condIntCode cond x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    let
-	code' = code1 `appOL` code2 `snocOL`
-    	    	  (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
-    return (CondCode False cond code')
-
-condFltCode cond x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    let
-	code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
-	code'' = case cond of -- twiddle CR to handle unordered case
-                    GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
-	            LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
-	            _ -> code'
-                 where
-                    ltbit = 0 ; eqbit = 2 ; gtbit = 1
-    return (CondCode True cond code'')
-
-#endif /* powerpc_TARGET_ARCH */
-
--- -----------------------------------------------------------------------------
--- Generating assignments
-
--- Assignments are really at the heart of the whole code generation
--- business.  Almost all top-level nodes of any real importance are
--- assignments, which correspond to loads, stores, or register
--- transfers.  If we're really lucky, some of the register transfers
--- will go away, because we can use the destination register to
--- complete the code generation for the right hand side.  This only
--- fails when the right hand side is forced into a fixed register
--- (e.g. the result of a call).
-
-assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
-
-assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-assignIntCode pk (CmmLoad dst _) src
-  = getNewRegNat IntRep    	    `thenNat` \ tmp ->
-    getAmode dst    	    	    `thenNat` \ amode ->
-    getRegister src	     	    `thenNat` \ register ->
-    let
-    	code1   = amodeCode amode []
-    	dst__2  = amodeAddr amode
-    	code2   = registerCode register tmp []
-    	src__2  = registerName register tmp
-    	sz      = primRepToSize pk
-    	code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
-    in
-    return code__2
-
-assignIntCode pk dst src
-  = getRegister dst	    	    	    `thenNat` \ register1 ->
-    getRegister src	    	    	    `thenNat` \ register2 ->
-    let
-    	dst__2  = registerName register1 zeroh
-    	code    = registerCode register2 dst__2
-    	src__2  = registerName register2 dst__2
-    	code__2 = if isFixed register2
-		  then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
-    	    	  else code
-    in
-    return code__2
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- integer assignment to memory
-
--- specific case of adding/subtracting an integer to a particular address.
--- ToDo: catch other cases where we can use an operation directly on a memory 
--- address.
-assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
-                                                 CmmLit (CmmInt i _)])
-   | addr == addr2, pk /= II64 || is32BitInteger i,
-     Just instr <- check op
-   = do Amode amode code_addr <- getAmode addr
-        let code = code_addr `snocOL`
-                   instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
-        return code
-   where
-        check (MO_Add _) = Just ADD
-        check (MO_Sub _) = Just SUB
-        check _ = Nothing
-        -- ToDo: more?
-
--- general case
-assignMem_IntCode pk addr src = do
-    Amode addr code_addr <- getAmode addr
-    (code_src, op_src)   <- get_op_RI src
-    let
-	code = code_src `appOL`
-	       code_addr `snocOL`
-                  MOV pk op_src (OpAddr addr)
-	-- NOTE: op_src is stable, so it will still be valid
-	-- after code_addr.  This may involve the introduction 
-	-- of an extra MOV to a temporary register, but we hope
-	-- the register allocator will get rid of it.
-    --
-    return code
-  where
-    get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)	-- code, operator
-    get_op_RI (CmmLit lit) | is32BitLit lit
-      = return (nilOL, OpImm (litToImm lit))
-    get_op_RI op
-      = do (reg,code) <- getNonClobberedReg op
-	   return (code, OpReg reg)
-
-
--- Assign; dst is a reg, rhs is mem
-assignReg_IntCode pk reg (CmmLoad src _) = do
-  load_code <- intLoadCode (MOV pk) src
-  return (load_code (getRegisterReg reg))
-
--- dst is a reg, but src could be anything
-assignReg_IntCode pk reg src = do
-  code <- getAnyReg src
-  return (code (getRegisterReg reg))
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-assignMem_IntCode pk addr src = do
-    (srcReg, code) <- getSomeReg src
-    Amode dstAddr addr_code <- getAmode addr
-    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
-
-assignReg_IntCode pk reg src = do
-    r <- getRegister src
-    return $ case r of
-	Any _ code         -> code dst
-	Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
-    where
-      dst = getRegisterReg reg
-
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-
-assignMem_IntCode pk addr src = do
-    (srcReg, code) <- getSomeReg src
-    Amode dstAddr addr_code <- getAmode addr
-    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
-
--- dst is a reg, but src could be anything
-assignReg_IntCode pk reg src
-    = do
-        r <- getRegister src
-        return $ case r of
-            Any _ code         -> code dst
-            Fixed _ freg fcode -> fcode `snocOL` MR dst freg
-    where
-        dst = getRegisterReg reg
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Floating-point assignments
-
-#if alpha_TARGET_ARCH
-
-assignFltCode pk (CmmLoad dst _) src
-  = getNewRegNat pk        	    `thenNat` \ tmp ->
-    getAmode dst    	    	    `thenNat` \ amode ->
-    getRegister src	    	    	    `thenNat` \ register ->
-    let
-    	code1   = amodeCode amode []
-    	dst__2  = amodeAddr amode
-    	code2   = registerCode register tmp []
-    	src__2  = registerName register tmp
-    	sz      = primRepToSize pk
-    	code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
-    in
-    return code__2
-
-assignFltCode pk dst src
-  = getRegister dst	    	    	    `thenNat` \ register1 ->
-    getRegister src	    	    	    `thenNat` \ register2 ->
-    let
-    	dst__2  = registerName register1 zeroh
-    	code    = registerCode register2 dst__2
-    	src__2  = registerName register2 dst__2
-    	code__2 = if isFixed register2
-		  then code . mkSeqInstr (FMOV src__2 dst__2)
-		  else code
-    in
-    return code__2
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- Floating point assignment to memory
-assignMem_FltCode pk addr src = do
-  (src_reg, src_code) <- getNonClobberedReg src
-  Amode addr addr_code <- getAmode addr
-  let
-	code = src_code `appOL`
-	       addr_code `snocOL`
-                IF_ARCH_i386(GST pk src_reg addr,
-		             MOV pk (OpReg src_reg) (OpAddr addr))
-  return code
-
--- Floating point assignment to a register/temporary
-assignReg_FltCode pk reg src = do
-  src_code <- getAnyReg src
-  return (src_code (getRegisterReg reg))
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
--- Floating point assignment to memory
-assignMem_FltCode pk addr src = do
-    Amode dst__2 code1 <- getAmode addr
-    (src__2, code2) <- getSomeReg src
-    tmp1 <- getNewRegNat pk
-    let
-    	pk__2   = cmmExprType src
-    	code__2 = code1 `appOL` code2 `appOL`
-	    if   sizeToWidth pk == typeWidth pk__2 
-            then unitOL (ST pk src__2 dst__2)
-	    else toOL 	[ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
-	    		, ST    pk tmp1 dst__2]
-    return code__2
-
--- Floating point assignment to a register/temporary
-assignReg_FltCode pk dstCmmReg srcCmmExpr = do
-    srcRegister <- getRegister srcCmmExpr
-    let dstReg	= getRegisterReg dstCmmReg
-
-    return $ case srcRegister of
-        Any _ code         	    -> code dstReg
-	Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-
--- Easy, isn't it?
-assignMem_FltCode = assignMem_IntCode
-assignReg_FltCode = assignReg_IntCode
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Generating an non-local jump
-
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
-
-genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-genJump (CmmLabel lbl)
-  | isAsmTemp lbl = returnInstr (BR target)
-  | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
-  where
-    target = ImmCLbl lbl
-
-genJump tree
-  = getRegister tree	     	    `thenNat` \ register ->
-    getNewRegNat PtrRep    	    `thenNat` \ tmp ->
-    let
-    	dst    = registerName register pv
-    	code   = registerCode register pv
-    	target = registerName register pv
-    in
-    if isFixed register then
-	returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
-    else
-    return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-genJump (CmmLoad mem pk) = do
-  Amode target code <- getAmode mem
-  return (code `snocOL` JMP (OpAddr target))
-
-genJump (CmmLit lit) = do
-  return (unitOL (JMP (OpImm (litToImm lit))))
-
-genJump expr = do
-  (reg,code) <- getSomeReg expr
-  return (code `snocOL` JMP (OpReg reg))
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-genJump (CmmLit (CmmLabel lbl))
-  = return (toOL [CALL (Left target) 0 True, NOP])
-  where
-    target = ImmCLbl lbl
-
-genJump tree
-  = do
-        (target, code) <- getSomeReg tree
-	return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-genJump (CmmLit (CmmLabel lbl))
-  = return (unitOL $ JMP lbl)
-
-genJump tree
-  = do
-        (target,code) <- getSomeReg tree
-        return (code `snocOL` MTCTR target `snocOL` BCTR [])
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
---  Unconditional branches
-
-genBranch :: BlockId -> NatM InstrBlock
-
-genBranch = return . toOL . mkBranchInstr
-
--- -----------------------------------------------------------------------------
---  Conditional jumps
-
-{-
-Conditional jumps are always to local labels, so we can use branch
-instructions.  We peek at the arguments to decide what kind of
-comparison to do.
-
-ALPHA: For comparisons with 0, we're laughing, because we can just do
-the desired conditional branch.
-
-I386: First, we have to ensure that the condition
-codes are set according to the supplied comparison operation.
-
-SPARC: First, we have to ensure that the condition codes are set
-according to the supplied comparison operation.  We generate slightly
-different code for floating point comparisons, because a floating
-point operation cannot directly precede a @BF@.  We assume the worst
-and fill that slot with a @NOP@.
-
-SPARC: Do not fill the delay slots here; you will confuse the register
-allocator.
--}
-
-
-genCondJump
-    :: BlockId	    -- the branch target
-    -> CmmExpr      -- the condition on which to branch
-    -> NatM InstrBlock
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-genCondJump id (StPrim op [x, StInt 0])
-  = getRegister x	  	    	    `thenNat` \ register ->
-    getNewRegNat (registerRep register)
-    	    	        	    `thenNat` \ tmp ->
-    let
-    	code   = registerCode register tmp
-    	value  = registerName register tmp
-    	pk     = registerRep register
-	target = ImmCLbl lbl
-    in
-    returnSeq code [BI (cmpOp op) value target]
-  where
-    cmpOp CharGtOp = GTT
-    cmpOp CharGeOp = GE
-    cmpOp CharEqOp = EQQ
-    cmpOp CharNeOp = NE
-    cmpOp CharLtOp = LTT
-    cmpOp CharLeOp = LE
-    cmpOp IntGtOp = GTT
-    cmpOp IntGeOp = GE
-    cmpOp IntEqOp = EQQ
-    cmpOp IntNeOp = NE
-    cmpOp IntLtOp = LTT
-    cmpOp IntLeOp = LE
-    cmpOp WordGtOp = NE
-    cmpOp WordGeOp = ALWAYS
-    cmpOp WordEqOp = EQQ
-    cmpOp WordNeOp = NE
-    cmpOp WordLtOp = NEVER
-    cmpOp WordLeOp = EQQ
-    cmpOp AddrGtOp = NE
-    cmpOp AddrGeOp = ALWAYS
-    cmpOp AddrEqOp = EQQ
-    cmpOp AddrNeOp = NE
-    cmpOp AddrLtOp = NEVER
-    cmpOp AddrLeOp = EQQ
-
-genCondJump lbl (StPrim op [x, StDouble 0.0])
-  = getRegister x	  	    	    `thenNat` \ register ->
-    getNewRegNat (registerRep register)
-    	    	        	    `thenNat` \ tmp ->
-    let
-    	code   = registerCode register tmp
-    	value  = registerName register tmp
-    	pk     = registerRep register
-	target = ImmCLbl lbl
-    in
-    return (code . mkSeqInstr (BF (cmpOp op) value target))
-  where
-    cmpOp FloatGtOp = GTT
-    cmpOp FloatGeOp = GE
-    cmpOp FloatEqOp = EQQ
-    cmpOp FloatNeOp = NE
-    cmpOp FloatLtOp = LTT
-    cmpOp FloatLeOp = LE
-    cmpOp DoubleGtOp = GTT
-    cmpOp DoubleGeOp = GE
-    cmpOp DoubleEqOp = EQQ
-    cmpOp DoubleNeOp = NE
-    cmpOp DoubleLtOp = LTT
-    cmpOp DoubleLeOp = LE
-
-genCondJump lbl (StPrim op [x, y])
-  | fltCmpOp op
-  = trivialFCode pr instr x y 	    `thenNat` \ register ->
-    getNewRegNat FF64    	    `thenNat` \ tmp ->
-    let
-    	code   = registerCode register tmp
-    	result = registerName register tmp
-	target = ImmCLbl lbl
-    in
-    return (code . mkSeqInstr (BF cond result target))
-  where
-    pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
-
-    fltCmpOp op = case op of
-	FloatGtOp -> True
-	FloatGeOp -> True
-	FloatEqOp -> True
-	FloatNeOp -> True
-	FloatLtOp -> True
-	FloatLeOp -> True
-	DoubleGtOp -> True
-	DoubleGeOp -> True
-	DoubleEqOp -> True
-	DoubleNeOp -> True
-	DoubleLtOp -> True
-	DoubleLeOp -> True
-	_ -> False
-    (instr, cond) = case op of
-	FloatGtOp -> (FCMP TF LE, EQQ)
-	FloatGeOp -> (FCMP TF LTT, EQQ)
-	FloatEqOp -> (FCMP TF EQQ, NE)
-	FloatNeOp -> (FCMP TF EQQ, EQQ)
-	FloatLtOp -> (FCMP TF LTT, NE)
-	FloatLeOp -> (FCMP TF LE, NE)
-	DoubleGtOp -> (FCMP TF LE, EQQ)
-	DoubleGeOp -> (FCMP TF LTT, EQQ)
-	DoubleEqOp -> (FCMP TF EQQ, NE)
-	DoubleNeOp -> (FCMP TF EQQ, EQQ)
-	DoubleLtOp -> (FCMP TF LTT, NE)
-	DoubleLeOp -> (FCMP TF LE, NE)
-
-genCondJump lbl (StPrim op [x, y])
-  = trivialCode instr x y    	    `thenNat` \ register ->
-    getNewRegNat IntRep    	    `thenNat` \ tmp ->
-    let
-    	code   = registerCode register tmp
-    	result = registerName register tmp
-	target = ImmCLbl lbl
-    in
-    return (code . mkSeqInstr (BI cond result target))
-  where
-    (instr, cond) = case op of
-	CharGtOp -> (CMP LE, EQQ)
-	CharGeOp -> (CMP LTT, EQQ)
-	CharEqOp -> (CMP EQQ, NE)
-	CharNeOp -> (CMP EQQ, EQQ)
-	CharLtOp -> (CMP LTT, NE)
-	CharLeOp -> (CMP LE, NE)
-	IntGtOp -> (CMP LE, EQQ)
-	IntGeOp -> (CMP LTT, EQQ)
-	IntEqOp -> (CMP EQQ, NE)
-	IntNeOp -> (CMP EQQ, EQQ)
-	IntLtOp -> (CMP LTT, NE)
-	IntLeOp -> (CMP LE, NE)
-	WordGtOp -> (CMP ULE, EQQ)
-	WordGeOp -> (CMP ULT, EQQ)
-	WordEqOp -> (CMP EQQ, NE)
-	WordNeOp -> (CMP EQQ, EQQ)
-	WordLtOp -> (CMP ULT, NE)
-	WordLeOp -> (CMP ULE, NE)
-	AddrGtOp -> (CMP ULE, EQQ)
-	AddrGeOp -> (CMP ULT, EQQ)
-	AddrEqOp -> (CMP EQQ, NE)
-	AddrNeOp -> (CMP EQQ, EQQ)
-	AddrLtOp -> (CMP ULT, NE)
-	AddrLeOp -> (CMP ULE, NE)
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-genCondJump id bool = do
-  CondCode _ cond code <- getCondCode bool
-  return (code `snocOL` JXX cond id)
-
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if x86_64_TARGET_ARCH
-
-genCondJump id bool = do
-  CondCode is_float cond cond_code <- getCondCode bool
-  if not is_float
-    then
-	return (cond_code `snocOL` JXX cond id)
-    else do
-	lbl <- getBlockIdNat
-
-	-- see comment with condFltReg
-	let code = case cond of
-	  		NE  -> or_unordered
-			GU  -> plain_test
-			GEU -> plain_test
-			_   -> and_ordered
-
-	    plain_test = unitOL (
-		  JXX cond id
-		)
-	    or_unordered = toOL [
-		  JXX cond id,
-		  JXX PARITY id
-		]
-	    and_ordered = toOL [
-		  JXX PARITY lbl,
-		  JXX cond id,
-		  JXX ALWAYS lbl,
-		  NEWBLOCK lbl
-		]
-	return (cond_code `appOL` code)
-
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-genCondJump bid bool = do
-  CondCode is_float cond code <- getCondCode bool
-  return (
-       code `appOL` 
-       toOL (
-         if   is_float
-         then [NOP, BF cond False bid, NOP]
-         else [BI cond False bid, NOP]
-       )
-    )
-
-#endif /* sparc_TARGET_ARCH */
-
-
-#if powerpc_TARGET_ARCH
-
-genCondJump id bool = do
-  CondCode is_float cond code <- getCondCode bool
-  return (code `snocOL` BCC cond id)
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
---  Generating C calls
-
--- Now the biggest nightmare---calls.  Most of the nastiness is buried in
--- @get_arg@, which moves the arguments to the correct registers/stack
--- locations.  Apart from that, the code is easy.
--- 
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
-
-genCCall
-    :: CmmCallTarget		-- function to call
-    -> HintedCmmFormals		-- where to put the result
-    -> HintedCmmActuals		-- arguments (of mixed type)
-    -> NatM InstrBlock
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-ccallResultRegs = 
-
-genCCall fn cconv result_regs args
-  = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
-    	    	  	  `thenNat` \ ((unused,_), argCode) ->
-    let
-    	nRegs = length allArgRegs - length unused
-    	code = asmSeqThen (map ($ []) argCode)
-    in
-    	returnSeq code [
-    	    LDA pv (AddrImm (ImmLab (ptext fn))),
-    	    JSR ra (AddrReg pv) nRegs,
-    	    LDGP gp (AddrReg ra)]
-  where
-    ------------------------
-    {-	Try to get a value into a specific register (or registers) for
-	a call.  The first 6 arguments go into the appropriate
-	argument register (separate registers for integer and floating
-	point arguments, but used in lock-step), and the remaining
-	arguments are dumped to the stack, beginning at 0(sp).  Our
-	first argument is a pair of the list of remaining argument
-	registers to be assigned for this call and the next stack
-	offset to use for overflowing arguments.  This way,
-	@get_Arg@ can be applied to all of a call's arguments using
-	@mapAccumLNat@.
-    -}
-    get_arg
-	:: ([(Reg,Reg)], Int)	-- Argument registers and stack offset (accumulator)
-	-> StixTree		-- Current argument
-	-> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
-
-    -- We have to use up all of our argument registers first...
-
-    get_arg ((iDst,fDst):dsts, offset) arg
-      = getRegister arg	    	    	    `thenNat` \ register ->
-	let
-	    reg  = if isFloatType pk then fDst else iDst
-	    code = registerCode register reg
-	    src  = registerName register reg
-	    pk   = registerRep register
-	in
-	return (
-	    if isFloatType pk then
-		((dsts, offset), if isFixed register then
-		    code . mkSeqInstr (FMOV src fDst)
-		    else code)
-	    else
-		((dsts, offset), if isFixed register then
-		    code . mkSeqInstr (OR src (RIReg src) iDst)
-		    else code))
-
-    -- Once we have run out of argument registers, we move to the
-    -- stack...
-
-    get_arg ([], offset) arg
-      = getRegister arg			`thenNat` \ register ->
-	getNewRegNat (registerRep register)
-					`thenNat` \ tmp ->
-	let
-	    code = registerCode register tmp
-	    src  = registerName register tmp
-	    pk   = registerRep register
-	    sz   = primRepToSize pk
-	in
-	return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
-	-- write barrier compiles to no code on x86/x86-64; 
-	-- we keep it this long in order to prevent earlier optimisations.
-
--- we only cope with a single result for foreign calls
-genCCall (CmmPrim op) [CmmHinted r _] args = do
-  l1 <- getNewLabelNat
-  l2 <- getNewLabelNat
-  case op of
-	MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
-	MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
-	
-	MO_F32_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
-	MO_F64_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
-
-	MO_F32_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
-	MO_F64_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
-
-	MO_F32_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
-	MO_F64_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
-	
-	other_op    -> outOfLineFloatOp op r args
- where
-  actuallyInlineFloatOp instr size [CmmHinted x _]
-	= do res <- trivialUFCode size (instr size) x
-	     any <- anyReg res
- 	     return (any (getRegisterReg (CmmLocal r)))
-
-genCCall target dest_regs args = do
-    let
-        sizes               = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
-#if !darwin_TARGET_OS        
-        tot_arg_size        = sum sizes
-#else
-        raw_arg_size        = sum sizes
-        tot_arg_size        = roundTo 16 raw_arg_size
-        arg_pad_size        = tot_arg_size - raw_arg_size
-    delta0 <- getDeltaNat
-    setDeltaNat (delta0 - arg_pad_size)
-#endif
-
-    push_codes <- mapM push_arg (reverse args)
-    delta <- getDeltaNat
-
-    -- in
-    -- deal with static vs dynamic call targets
-    (callinsns,cconv) <-
-      case target of
-	-- CmmPrim -> ...
-        CmmCallee (CmmLit (CmmLabel lbl)) conv
-           -> -- ToDo: stdcall arg sizes
-	      return (unitOL (CALL (Left fn_imm) []), conv)
-	   where fn_imm = ImmCLbl lbl
-        CmmCallee expr conv
-           -> do { (dyn_c, dyn_r) <- get_op expr
-                 ; ASSERT( isWord32 (cmmExprType expr) )
-                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
-
-    let	push_code
-#if darwin_TARGET_OS
-            | arg_pad_size /= 0
-            = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
-                    DELTA (delta0 - arg_pad_size)]
-              `appOL` concatOL push_codes
-            | otherwise
-#endif
-            = concatOL push_codes
-	call = callinsns `appOL`
-               toOL (
-			-- Deallocate parameters after call for ccall;
-			-- but not for stdcall (callee does it)
-                  (if cconv == StdCallConv || tot_arg_size==0 then [] else 
-		   [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
-                  ++
-                  [DELTA (delta + tot_arg_size)]
-               )
-    -- in
-    setDeltaNat (delta + tot_arg_size)
-
-    let
-	-- assign the results, if necessary
-	assign_code []     = nilOL
-	assign_code [CmmHinted dest _hint]
-	  | isFloatType ty = unitOL (GMOV fake0 r_dest)
-	  | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
-				    MOV II32 (OpReg edx) (OpReg r_dest_hi)]
-	  | otherwise      = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
-	  where 
-		ty = localRegType dest
-		w  = typeWidth ty
-		r_dest_hi = getHiVRegFromLo r_dest
-		r_dest    = getRegisterReg (CmmLocal dest)
-	assign_code many = panic "genCCall.assign_code many"
-
-    return (push_code `appOL` 
-	    call `appOL` 
-	    assign_code dest_regs)
-
-  where
-    arg_size :: CmmType -> Int	-- Width in bytes
-    arg_size ty = widthInBytes (typeWidth ty)
-
-    roundTo a x | x `mod` a == 0 = x
-                | otherwise = x + a - (x `mod` a)
-
-
-    push_arg :: HintedCmmActual {-current argument-}
-                    -> NatM InstrBlock  -- code
-
-    push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
-      | isWord64 arg_ty = do
-        ChildCode64 code r_lo <- iselExpr64 arg
-        delta <- getDeltaNat
-        setDeltaNat (delta - 8)
-        let 
-            r_hi = getHiVRegFromLo r_lo
-        -- in
-	return (       code `appOL`
-                       toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
-                             PUSH II32 (OpReg r_lo), DELTA (delta - 8),
-			     DELTA (delta-8)]
-            )
-
-      | otherwise = do
-        (code, reg) <- get_op arg
-        delta <- getDeltaNat
-        let size = arg_size arg_ty	-- Byte size
-        setDeltaNat (delta-size)
-        if (isFloatType arg_ty)
-           then return (code `appOL`
-                        toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
-                              DELTA (delta-size),
-                              GST (floatSize (typeWidth arg_ty))
-				  reg (AddrBaseIndex (EABaseReg esp) 
-                                                        EAIndexNone
-                                                        (ImmInt 0))]
-                       )
-           else return (code `snocOL`
-                        PUSH II32 (OpReg reg) `snocOL`
-                        DELTA (delta-size)
-                       )
-      where
-         arg_ty = cmmExprType arg
-
-    ------------
-    get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
-    get_op op = do
-        (reg,code) <- getSomeReg op
-	return (code, reg)
-
-#endif /* i386_TARGET_ARCH */
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
-  -> NatM InstrBlock
-outOfLineFloatOp mop res args
-  = do
-      dflags <- getDynFlagsNat
-      targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
-      let target = CmmCallee targetExpr CCallConv
-        
-      if isFloat64 (localRegType res)
-        then
-          stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
-        else do
-          uq <- getUniqueNat
-          let 
-            tmp = LocalReg uq f64
-          -- in
-          code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
-          code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
-          return (code1 `appOL` code2)
-  where
-	lbl = mkForeignLabel fn Nothing False IsFunction
-
-	fn = case mop of
-	      MO_F32_Sqrt  -> fsLit "sqrtf"
-	      MO_F32_Sin   -> fsLit "sinf"
-	      MO_F32_Cos   -> fsLit "cosf"
-	      MO_F32_Tan   -> fsLit "tanf"
-	      MO_F32_Exp   -> fsLit "expf"
-	      MO_F32_Log   -> fsLit "logf"
-
-	      MO_F32_Asin  -> fsLit "asinf"
-	      MO_F32_Acos  -> fsLit "acosf"
-	      MO_F32_Atan  -> fsLit "atanf"
-
-	      MO_F32_Sinh  -> fsLit "sinhf"
-	      MO_F32_Cosh  -> fsLit "coshf"
-	      MO_F32_Tanh  -> fsLit "tanhf"
-	      MO_F32_Pwr   -> fsLit "powf"
-
-	      MO_F64_Sqrt  -> fsLit "sqrt"
-	      MO_F64_Sin   -> fsLit "sin"
-	      MO_F64_Cos   -> fsLit "cos"
-	      MO_F64_Tan   -> fsLit "tan"
-	      MO_F64_Exp   -> fsLit "exp"
-	      MO_F64_Log   -> fsLit "log"
-
-	      MO_F64_Asin  -> fsLit "asin"
-	      MO_F64_Acos  -> fsLit "acos"
-	      MO_F64_Atan  -> fsLit "atan"
-
-	      MO_F64_Sinh  -> fsLit "sinh"
-	      MO_F64_Cosh  -> fsLit "cosh"
-	      MO_F64_Tanh  -> fsLit "tanh"
-	      MO_F64_Pwr   -> fsLit "pow"
-
-#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if x86_64_TARGET_ARCH
-
-genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
-	-- write barrier compiles to no code on x86/x86-64; 
-	-- we keep it this long in order to prevent earlier optimisations.
-
-
-genCCall (CmmPrim op) [CmmHinted r _] args = 
-  outOfLineFloatOp op r args
-
-genCCall target dest_regs args = do
-
-	-- load up the register arguments
-    (stack_args, aregs, fregs, load_args_code)
-	 <- load_args args allArgRegs allFPArgRegs nilOL
-
-    let
-	fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
-	int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
-	arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
-		-- for annotating the call instruction with
-
-	sse_regs = length fp_regs_used
-
-	tot_arg_size = arg_size * length stack_args
-
-	-- On entry to the called function, %rsp should be aligned
-	-- on a 16-byte boundary +8 (i.e. the first stack arg after
-	-- the return address is 16-byte aligned).  In STG land
-	-- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
-	-- need to make sure we push a multiple of 16-bytes of args,
-	-- plus the return address, to get the correct alignment.
-	-- Urg, this is hard.  We need to feed the delta back into
-	-- the arg pushing code.
-    (real_size, adjust_rsp) <-
-	if tot_arg_size `rem` 16 == 0
-	    then return (tot_arg_size, nilOL)
-	    else do -- we need to adjust...
-		delta <- getDeltaNat
-		setDeltaNat (delta-8)
-		return (tot_arg_size+8, toOL [
-				SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
-				DELTA (delta-8)
-			])
-
-	-- push the stack args, right to left
-    push_code <- push_args (reverse stack_args) nilOL
-    delta <- getDeltaNat
-
-    -- deal with static vs dynamic call targets
-    (callinsns,cconv) <-
-      case target of
-	-- CmmPrim -> ...
-        CmmCallee (CmmLit (CmmLabel lbl)) conv
-           -> -- ToDo: stdcall arg sizes
-	      return (unitOL (CALL (Left fn_imm) arg_regs), conv)
-	   where fn_imm = ImmCLbl lbl
-        CmmCallee expr conv
-           -> do (dyn_r, dyn_c) <- getSomeReg expr
-		 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
-
-    let
-	-- The x86_64 ABI requires us to set %al to the number of SSE
-	-- registers that contain arguments, if the called routine
-	-- is a varargs function.  We don't know whether it's a
-	-- varargs function or not, so we have to assume it is.
-	--
-	-- It's not safe to omit this assignment, even if the number
-	-- of SSE regs in use is zero.  If %al is larger than 8
-	-- on entry to a varargs function, seg faults ensue.
-	assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
-
-    let call = callinsns `appOL`
-               toOL (
-			-- Deallocate parameters after call for ccall;
-			-- but not for stdcall (callee does it)
-                  (if cconv == StdCallConv || real_size==0 then [] else 
-		   [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
-                  ++
-                  [DELTA (delta + real_size)]
-               )
-    -- in
-    setDeltaNat (delta + real_size)
-
-    let
-	-- assign the results, if necessary
-	assign_code []     = nilOL
-	assign_code [CmmHinted dest _hint] = 
-	  case typeWidth rep of
-		W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
-		W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
-		_ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
-	  where 
-		rep = localRegType dest
-		r_dest = getRegisterReg (CmmLocal dest)
-	assign_code many = panic "genCCall.assign_code many"
-
-    return (load_args_code 	`appOL` 
-	    adjust_rsp 		`appOL`
-	    push_code 		`appOL`
-	    assign_eax sse_regs `appOL`
-	    call 		`appOL` 
-	    assign_code dest_regs)
-
-  where
-    arg_size = 8 -- always, at the mo
-
-    load_args :: [CmmHinted CmmExpr]
-	      -> [Reg] 			-- int regs avail for args
-	      -> [Reg] 			-- FP regs avail for args
-	      -> InstrBlock
-	      -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
-    load_args args [] [] code     =  return (args, [], [], code)
-	-- no more regs to use
-    load_args [] aregs fregs code =  return ([], aregs, fregs, code)
-	-- no more args to push
-    load_args ((CmmHinted arg hint) : rest) aregs fregs code
-	| isFloatType arg_rep = 
-	case fregs of
-	  [] -> push_this_arg
-	  (r:rs) -> do
-	     arg_code <- getAnyReg arg
-	     load_args rest aregs rs (code `appOL` arg_code r)
-	| otherwise =
-	case aregs of
-	  [] -> push_this_arg
-	  (r:rs) -> do
-	     arg_code <- getAnyReg arg
-	     load_args rest rs fregs (code `appOL` arg_code r)
-	where
-	  arg_rep = cmmExprType arg
-
-	  push_this_arg = do
-	    (args',ars,frs,code') <- load_args rest aregs fregs code
-	    return ((CmmHinted arg hint):args', ars, frs, code')
-
-    push_args [] code = return code
-    push_args ((CmmHinted arg hint):rest) code
-       | isFloatType arg_rep = do
-	 (arg_reg, arg_code) <- getSomeReg arg
-         delta <- getDeltaNat
-         setDeltaNat (delta-arg_size)
-	 let code' = code `appOL` arg_code `appOL` toOL [
-			SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
-	 		DELTA (delta-arg_size),
-			MOV (floatSize width) (OpReg arg_reg) (OpAddr  (spRel 0))]
-	 push_args rest code'
-
-       | otherwise = do
-       -- we only ever generate word-sized function arguments.  Promotion
-       -- has already happened: our Int8# type is kept sign-extended
-       -- in an Int#, for example.
-	 ASSERT(width == W64) return ()
-	 (arg_op, arg_code) <- getOperand arg
-         delta <- getDeltaNat
-         setDeltaNat (delta-arg_size)
-	 let code' = code `appOL` arg_code `appOL` toOL [
-                        PUSH II64 arg_op, 
-	 		DELTA (delta-arg_size)]
-	 push_args rest code'
-	where
-	  arg_rep = cmmExprType arg
-	  width = typeWidth arg_rep
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-{- 
-   The SPARC calling convention is an absolute
-   nightmare.  The first 6x32 bits of arguments are mapped into
-   %o0 through %o5, and the remaining arguments are dumped to the
-   stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
-
-   If we have to put args on the stack, move %o6==%sp down by
-   the number of words to go on the stack, to ensure there's enough space.
-
-   According to Fraser and Hanson's lcc book, page 478, fig 17.2,
-   16 words above the stack pointer is a word for the address of
-   a structure return value.  I use this as a temporary location
-   for moving values from float to int regs.  Certainly it isn't
-   safe to put anything in the 16 words starting at %sp, since
-   this area can get trashed at any time due to window overflows
-   caused by signal handlers.
-
-   A final complication (if the above isn't enough) is that 
-   we can't blithely calculate the arguments one by one into
-   %o0 .. %o5.  Consider the following nested calls:
-
-       fff a (fff b c)
-
-   Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
-   the inner call will itself use %o0, which trashes the value put there
-   in preparation for the outer call.  Upshot: we need to calculate the
-   args into temporary regs, and move those to arg regs or onto the
-   stack only immediately prior to the call proper.  Sigh.
-
-genCCall
-    :: CmmCallTarget		-- function to call
-    -> HintedCmmFormals		-- where to put the result
-    -> HintedCmmActuals		-- arguments (of mixed type)
-    -> NatM InstrBlock
-
--}
-
-
--- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
--- are guaranteed to take place before writes afterwards (unlike on PowerPC). 
--- Ref: Section 8.4 of the SPARC V9 Architecture manual.
---
--- In the SPARC case we don't need a barrier.
---
-genCCall (CmmPrim (MO_WriteBarrier)) _ _
- = do	return nilOL
-
-genCCall target dest_regs argsAndHints 
- = do	 	
-	-- strip hints from the arg regs
-	let args :: [CmmExpr]
-	    args  = map hintlessCmm argsAndHints
-
-
-	-- work out the arguments, and assign them to integer regs
-	argcode_and_vregs	<- mapM arg_to_int_vregs args
-	let (argcodes, vregss)	= unzip argcode_and_vregs
-	let vregs		= concat vregss
-
-	let n_argRegs		= length allArgRegs
-	let n_argRegs_used 	= min (length vregs) n_argRegs
-
-
-	-- deal with static vs dynamic call targets
-	callinsns <- case target of
-		CmmCallee (CmmLit (CmmLabel lbl)) conv -> 
-			return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-
-		CmmCallee expr conv 
-		 -> do	(dyn_c, [dyn_r]) <- arg_to_int_vregs expr
-			return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-
-		CmmPrim mop 
-		 -> do	res	<- outOfLineFloatOp mop
-			lblOrMopExpr <- case res of
-				Left lbl -> do
-					return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-
-		       		Right mopExpr -> do
-					(dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
-					return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-
-			return lblOrMopExpr
-
-	let argcode = concatOL argcodes
-
-	let (move_sp_down, move_sp_up)
-	           = let diff = length vregs - n_argRegs
-	                 nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
-	             in  if   nn <= 0
-	                 then (nilOL, nilOL)
-	                 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
-
-        let transfer_code
-           	= toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
-				
-	return 
-	 $ 	argcode			`appOL`
-		move_sp_down		`appOL`
-		transfer_code		`appOL`
-		callinsns		`appOL`
-		unitOL NOP		`appOL`
-		move_sp_up		`appOL`
-		assign_code dest_regs
-
-
--- | Generate code to calculate an argument, and move it into one
--- 	or two integer vregs.
-arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
-arg_to_int_vregs arg
-
-	-- If the expr produces a 64 bit int, then we can just use iselExpr64
-	| isWord64 (cmmExprType arg)
-	= do	(ChildCode64 code r_lo) <- iselExpr64 arg
-	  	let r_hi 		= getHiVRegFromLo r_lo
-		return (code, [r_hi, r_lo])
-
-	| otherwise
-	= do	(src, code) 	<- getSomeReg arg
-		tmp 		<- getNewRegNat (cmmTypeSize $ cmmExprType arg)
-		let pk   	= cmmExprType arg
-
-		case cmmTypeSize pk of
-
-		 -- Load a 64 bit float return value into two integer regs.
-		 FF64 -> do
-			v1 <- getNewRegNat II32
-			v2 <- getNewRegNat II32
-
-			let Just f0_high = fPair f0
-			
-			let code2 = 
-				code				`snocOL`
-		                FMOV FF64 src f0		`snocOL`
-		                ST   FF32  f0 (spRel 16)	`snocOL`
-		                LD   II32  (spRel 16) v1	`snocOL`
-		                ST   FF32  f0_high (spRel 16)	`snocOL`
-		                LD   II32  (spRel 16) v2
-
-			return	(code2, [v1,v2])
-
-		 -- Load a 32 bit float return value into an integer reg
-		 FF32 -> do
-			v1 <- getNewRegNat II32
-			
-			let code2 =
-				code                    	`snocOL`
-				ST   FF32  src (spRel 16)  	`snocOL`
-				LD   II32  (spRel 16) v1
-				
-			return (code2, [v1])
-
-		 -- Move an integer return value into its destination reg.
-		 other -> do
-			v1 <- getNewRegNat II32
-	                
-			let code2 = 
-				code				`snocOL`
-				OR False g0 (RIReg src) v1
-			
-			return (code2, [v1])
-
-
--- | Move args from the integer vregs into which they have been 
--- 	marshalled, into %o0 .. %o5, and the rest onto the stack.
---
-move_final :: [Reg] -> [Reg] -> Int -> [Instr]
-
--- all args done
-move_final [] _ offset          
-	= []
-
--- out of aregs; move to stack
-move_final (v:vs) [] offset     
-	= ST II32 v (spRel offset)
-	: move_final vs [] (offset+1)
-
--- move into an arg (%o[0..5]) reg
-move_final (v:vs) (a:az) offset 
-	= OR False g0 (RIReg v) a
-	: move_final vs az offset
-
-
--- | Assign results returned from the call into their 
---	desination regs.
---
-assign_code :: [CmmHinted LocalReg] -> OrdList Instr
-assign_code []	= nilOL
-
-assign_code [CmmHinted dest _hint]	
- = let	rep	= localRegType dest
-	width	= typeWidth rep
-	r_dest 	= getRegisterReg (CmmLocal dest)
-
-	result
-		| isFloatType rep 
-		, W32	<- width
-		= unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
-
-		| isFloatType rep
-		, W64	<- width
-		= unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
-
-		| not $ isFloatType rep
-		, W32	<- width
-		= unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
-
-		| not $ isFloatType rep
-		, W64		<- width
-		, r_dest_hi	<- getHiVRegFromLo r_dest
-		= toOL 	[ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
-			, mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
-   in	result
-
-
--- | Generate a call to implement an out-of-line floating point operation
-outOfLineFloatOp 
-	:: CallishMachOp 
-	-> NatM (Either CLabel CmmExpr)
-
-outOfLineFloatOp mop 
- = do	let functionName
- 		= outOfLineFloatOp_table mop
-	
- 	dflags	<- getDynFlagsNat
-	mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference 
-		$  mkForeignLabel functionName Nothing True IsFunction
-
-	let mopLabelOrExpr 
-		= case mopExpr of
-			CmmLit (CmmLabel lbl) 	-> Left lbl
-                        _ 			-> Right mopExpr
-
-	return mopLabelOrExpr
-
-
--- | Decide what C function to use to implement a CallishMachOp
---
-outOfLineFloatOp_table 
-	:: CallishMachOp
-	-> FastString
-	
-outOfLineFloatOp_table mop
- = case mop of
-	MO_F32_Exp    -> fsLit "expf"
-	MO_F32_Log    -> fsLit "logf"
-	MO_F32_Sqrt   -> fsLit "sqrtf"
-	MO_F32_Pwr    -> fsLit "powf"
-
-	MO_F32_Sin    -> fsLit "sinf"
-	MO_F32_Cos    -> fsLit "cosf"
-	MO_F32_Tan    -> fsLit "tanf"
-
-	MO_F32_Asin   -> fsLit "asinf"
-	MO_F32_Acos   -> fsLit "acosf"
-	MO_F32_Atan   -> fsLit "atanf"
-
-	MO_F32_Sinh   -> fsLit "sinhf"
-	MO_F32_Cosh   -> fsLit "coshf"
-	MO_F32_Tanh   -> fsLit "tanhf"
-
-	MO_F64_Exp    -> fsLit "exp"
-	MO_F64_Log    -> fsLit "log"
-	MO_F64_Sqrt   -> fsLit "sqrt"
-	MO_F64_Pwr    -> fsLit "pow"
-
-	MO_F64_Sin    -> fsLit "sin"
-	MO_F64_Cos    -> fsLit "cos"
-	MO_F64_Tan    -> fsLit "tan"
-
-	MO_F64_Asin   -> fsLit "asin"
-	MO_F64_Acos   -> fsLit "acos"
-	MO_F64_Atan   -> fsLit "atan"
-
-	MO_F64_Sinh   -> fsLit "sinh"
-	MO_F64_Cosh   -> fsLit "cosh"
-	MO_F64_Tanh   -> fsLit "tanh"
-
-	other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
-              		(pprCallishMachOp mop)
-
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-
-#if darwin_TARGET_OS || linux_TARGET_OS
-{-
-    The PowerPC calling convention for Darwin/Mac OS X
-    is described in Apple's document
-    "Inside Mac OS X - Mach-O Runtime Architecture".
-    
-    PowerPC Linux uses the System V Release 4 Calling Convention
-    for PowerPC. It is described in the
-    "System V Application Binary Interface PowerPC Processor Supplement".
-
-    Both conventions are similar:
-    Parameters may be passed in general-purpose registers starting at r3, in
-    floating point registers starting at f1, or on the stack. 
-    
-    But there are substantial differences:
-    * The number of registers used for parameter passing and the exact set of
-      nonvolatile registers differs (see MachRegs.lhs).
-    * On Darwin, stack space is always reserved for parameters, even if they are
-      passed in registers. The called routine may choose to save parameters from
-      registers to the corresponding space on the stack.
-    * On Darwin, a corresponding amount of GPRs is skipped when a floating point
-      parameter is passed in an FPR.
-    * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
-      starting with an odd-numbered GPR. It may skip a GPR to achieve this.
-      Darwin just treats an I64 like two separate II32s (high word first).
-    * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
-      4-byte aligned like everything else on Darwin.
-    * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
-      PowerPC Linux does not agree, so neither do we.
-      
-    According to both conventions, The parameter area should be part of the
-    caller's stack frame, allocated in the caller's prologue code (large enough
-    to hold the parameter lists for all called routines). The NCG already
-    uses the stack for register spilling, leaving 64 bytes free at the top.
-    If we need a larger parameter area than that, we just allocate a new stack
-    frame just before ccalling.
--}
-
-
-genCCall (CmmPrim MO_WriteBarrier) _ _ 
- = return $ unitOL LWSYNC
-
-genCCall target dest_regs argsAndHints
-  = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
-        -- we rely on argument promotion in the codeGen
-    do
-        (finalStack,passArgumentsCode,usedRegs) <- passArguments
-                                                        (zip args argReps)
-                                                        allArgRegs allFPArgRegs
-                                                        initialStackOffset
-                                                        (toOL []) []
-                                                
-        (labelOrExpr, reduceToFF32) <- case target of
-            CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
-            CmmCallee expr conv -> return  (Right expr, False)
-            CmmPrim mop -> outOfLineFloatOp mop
-                                                        
-        let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
-            codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
-
-        case labelOrExpr of
-            Left lbl -> do
-		return (         codeBefore
-                        `snocOL` BL lbl usedRegs
-                        `appOL`	 codeAfter)
-            Right dyn -> do
-		(dynReg, dynCode) <- getSomeReg dyn
-		return (         dynCode
-			`snocOL` MTCTR dynReg
-                        `appOL`	 codeBefore
-                        `snocOL` BCTRL usedRegs
-                        `appOL`	 codeAfter)
-    where
-#if darwin_TARGET_OS
-        initialStackOffset = 24
-	    -- size of linkage area + size of arguments, in bytes	
-	stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
-	                         map (widthInBytes . typeWidth) argReps
-#elif linux_TARGET_OS
-        initialStackOffset = 8
-        stackDelta finalStack = roundTo 16 finalStack
-#endif
-	args = map hintlessCmm argsAndHints
-	argReps = map cmmExprType args
-
-	roundTo a x | x `mod` a == 0 = x
-		    | otherwise = x + a - (x `mod` a)
-
-        move_sp_down finalStack
-               | delta > 64 =
-                        toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
-	                      DELTA (-delta)]
-	       | otherwise = nilOL
-	       where delta = stackDelta finalStack
-	move_sp_up finalStack
-	       | delta > 64 =
-                        toOL [ADD sp sp (RIImm (ImmInt delta)),
-                              DELTA 0]
-	       | otherwise = nilOL
-	       where delta = stackDelta finalStack
-	       
-
-        passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
-        passArguments ((arg,arg_ty):args) gprs fprs stackOffset
-               accumCode accumUsed | isWord64 arg_ty =
-            do
-                ChildCode64 code vr_lo <- iselExpr64 arg
-                let vr_hi = getHiVRegFromLo vr_lo
-
-#if darwin_TARGET_OS                
-                passArguments args
-                              (drop 2 gprs)
-                              fprs
-                              (stackOffset+8)
-                              (accumCode `appOL` code
-                                    `snocOL` storeWord vr_hi gprs stackOffset
-                                    `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
-                              ((take 2 gprs) ++ accumUsed)
-            where
-                storeWord vr (gpr:_) offset = MR gpr vr
-                storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
-                
-#elif linux_TARGET_OS
-                let stackOffset' = roundTo 8 stackOffset
-                    stackCode = accumCode `appOL` code
-                        `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
-                        `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
-                    regCode hireg loreg =
-                        accumCode `appOL` code
-                            `snocOL` MR hireg vr_hi
-                            `snocOL` MR loreg vr_lo
-                                        
-                case gprs of
-                    hireg : loreg : regs | even (length gprs) ->
-                        passArguments args regs fprs stackOffset
-                                      (regCode hireg loreg) (hireg : loreg : accumUsed)
-                    _skipped : hireg : loreg : regs ->
-                        passArguments args regs fprs stackOffset
-                                      (regCode hireg loreg) (hireg : loreg : accumUsed)
-                    _ -> -- only one or no regs left
-                        passArguments args [] fprs (stackOffset'+8)
-                                      stackCode accumUsed
-#endif
-        
-        passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
-            | reg : _ <- regs = do
-                register <- getRegister arg
-                let code = case register of
-                            Fixed _ freg fcode -> fcode `snocOL` MR reg freg
-                            Any _ acode -> acode reg
-                passArguments args
-                              (drop nGprs gprs)
-                              (drop nFprs fprs)
-#if darwin_TARGET_OS
-        -- The Darwin ABI requires that we reserve stack slots for register parameters
-                              (stackOffset + stackBytes)
-#elif linux_TARGET_OS
-        -- ... the SysV ABI doesn't.
-                              stackOffset
-#endif
-                              (accumCode `appOL` code)
-                              (reg : accumUsed)
-            | otherwise = do
-                (vr, code) <- getSomeReg arg
-                passArguments args
-                              (drop nGprs gprs)
-                              (drop nFprs fprs)
-                              (stackOffset' + stackBytes)
-                              (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
-                              accumUsed
-            where
-#if darwin_TARGET_OS
-        -- stackOffset is at least 4-byte aligned
-        -- The Darwin ABI is happy with that.
-                stackOffset' = stackOffset
-#else
-        -- ... the SysV ABI requires 8-byte alignment for doubles.
-                stackOffset' | isFloatType rep && typeWidth rep == W64 =
-		                 roundTo 8 stackOffset
-                             | otherwise  =           stackOffset
-#endif
-                stackSlot = AddrRegImm sp (ImmInt stackOffset')
-                (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
-                    II32 -> (1, 0, 4, gprs)
-#if darwin_TARGET_OS
-        -- The Darwin ABI requires that we skip a corresponding number of GPRs when
-        -- we use the FPRs.
-                    FF32 -> (1, 1, 4, fprs)
-                    FF64 -> (2, 1, 8, fprs)
-#elif linux_TARGET_OS
-        -- ... the SysV ABI doesn't.
-                    FF32 -> (0, 1, 4, fprs)
-                    FF64 -> (0, 1, 8, fprs)
-#endif
-        
-        moveResult reduceToFF32 =
-            case dest_regs of
-                [] -> nilOL
-                [CmmHinted dest _hint]
-                    | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
-                    | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
-                    | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
-                                          MR r_dest r4]
-                    | otherwise -> unitOL (MR r_dest r3)
-                    where rep = cmmRegType (CmmLocal dest)
-                          r_dest = getRegisterReg (CmmLocal dest)
-                          
-        outOfLineFloatOp mop =
-            do
-                dflags <- getDynFlagsNat
-                mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
-                              mkForeignLabel functionName Nothing True IsFunction
-                let mopLabelOrExpr = case mopExpr of
-                        CmmLit (CmmLabel lbl) -> Left lbl
-                        _ -> Right mopExpr
-                return (mopLabelOrExpr, reduce)
-            where
-                (functionName, reduce) = case mop of
-                    MO_F32_Exp   -> (fsLit "exp", True)
-                    MO_F32_Log   -> (fsLit "log", True)
-                    MO_F32_Sqrt  -> (fsLit "sqrt", True)
-                        
-                    MO_F32_Sin   -> (fsLit "sin", True)
-                    MO_F32_Cos   -> (fsLit "cos", True)
-                    MO_F32_Tan   -> (fsLit "tan", True)
-                    
-                    MO_F32_Asin  -> (fsLit "asin", True)
-                    MO_F32_Acos  -> (fsLit "acos", True)
-                    MO_F32_Atan  -> (fsLit "atan", True)
-                    
-                    MO_F32_Sinh  -> (fsLit "sinh", True)
-                    MO_F32_Cosh  -> (fsLit "cosh", True)
-                    MO_F32_Tanh  -> (fsLit "tanh", True)
-                    MO_F32_Pwr   -> (fsLit "pow", True)
-                        
-                    MO_F64_Exp   -> (fsLit "exp", False)
-                    MO_F64_Log   -> (fsLit "log", False)
-                    MO_F64_Sqrt  -> (fsLit "sqrt", False)
-                        
-                    MO_F64_Sin   -> (fsLit "sin", False)
-                    MO_F64_Cos   -> (fsLit "cos", False)
-                    MO_F64_Tan   -> (fsLit "tan", False)
-                     
-                    MO_F64_Asin  -> (fsLit "asin", False)
-                    MO_F64_Acos  -> (fsLit "acos", False)
-                    MO_F64_Atan  -> (fsLit "atan", False)
-                    
-                    MO_F64_Sinh  -> (fsLit "sinh", False)
-                    MO_F64_Cosh  -> (fsLit "cosh", False)
-                    MO_F64_Tanh  -> (fsLit "tanh", False)
-                    MO_F64_Pwr   -> (fsLit "pow", False)
-                    other -> pprPanic "genCCall(ppc): unknown callish op"
-                                    (pprCallishMachOp other)
-
-#endif /* darwin_TARGET_OS || linux_TARGET_OS */
-                
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Generating a table-branch
-
-genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-genSwitch expr ids
-  | opt_PIC
-  = do
-        (reg,e_code) <- getSomeReg expr
-        lbl <- getNewLabelNat
-        dflags <- getDynFlagsNat
-        dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
-        (tableReg,t_code) <- getSomeReg $ dynRef
-        let
-            jumpTable = map jumpTableEntryRel ids
-            
-            jumpTableEntryRel Nothing
-                = CmmStaticLit (CmmInt 0 wordWidth)
-            jumpTableEntryRel (Just (BlockId id))
-                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
-                where blockLabel = mkAsmTempLabel id
-
-            op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
-                                       (EAIndex reg wORD_SIZE) (ImmInt 0))
-
-#if x86_64_TARGET_ARCH
-#if darwin_TARGET_OS
-    -- on Mac OS X/x86_64, put the jump table in the text section
-    -- to work around a limitation of the linker.
-    -- ld64 is unable to handle the relocations for
-    --     .quad L1 - L0
-    -- if L0 is not preceded by a non-anonymous label in its section.
-    
-            code = e_code `appOL` t_code `appOL` toOL [
-                            ADD (intSize wordWidth) op (OpReg tableReg),
-                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
-                            LDATA Text (CmmDataLabel lbl : jumpTable)
-                    ]
-#else
-    -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
-    -- relocations, hence we only get 32-bit offsets in the jump
-    -- table. As these offsets are always negative we need to properly
-    -- sign extend them to 64-bit. This hack should be removed in
-    -- conjunction with the hack in PprMach.hs/pprDataItem once
-    -- binutils 2.17 is standard.
-            code = e_code `appOL` t_code `appOL` toOL [
-			    LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
-			    MOVSxL II32
-				   (OpAddr (AddrBaseIndex (EABaseReg tableReg)
-							  (EAIndex reg wORD_SIZE) (ImmInt 0)))
-				   (OpReg reg),
-			    ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
-			    JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
-		   ]
-#endif
-#else
-            code = e_code `appOL` t_code `appOL` toOL [
-                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
-                            ADD (intSize wordWidth) op (OpReg tableReg),
-                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
-                    ]
-#endif
-        return code
-  | otherwise
-  = do
-        (reg,e_code) <- getSomeReg expr
-        lbl <- getNewLabelNat
-        let
-            jumpTable = map jumpTableEntry ids
-            op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
-            code = e_code `appOL` toOL [
-                    LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
-                    JMP_TBL op [ id | Just id <- ids ]
-                 ]
-        -- in
-        return code
-#elif powerpc_TARGET_ARCH
-genSwitch expr ids 
-  | opt_PIC
-  = do
-        (reg,e_code) <- getSomeReg expr
-        tmp <- getNewRegNat II32
-        lbl <- getNewLabelNat
-        dflags <- getDynFlagsNat
-        dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
-        (tableReg,t_code) <- getSomeReg $ dynRef
-        let
-            jumpTable = map jumpTableEntryRel ids
-            
-            jumpTableEntryRel Nothing
-                = CmmStaticLit (CmmInt 0 wordWidth)
-            jumpTableEntryRel (Just (BlockId id))
-                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
-                where blockLabel = mkAsmTempLabel id
-
-            code = e_code `appOL` t_code `appOL` toOL [
-                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
-                            SLW tmp reg (RIImm (ImmInt 2)),
-                            LD II32 tmp (AddrRegReg tableReg tmp),
-                            ADD tmp tmp (RIReg tableReg),
-                            MTCTR tmp,
-                            BCTR [ id | Just id <- ids ]
-                    ]
-        return code
-  | otherwise
-  = do
-        (reg,e_code) <- getSomeReg expr
-        tmp <- getNewRegNat II32
-        lbl <- getNewLabelNat
-        let
-            jumpTable = map jumpTableEntry ids
-        
-            code = e_code `appOL` toOL [
-                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
-                            SLW tmp reg (RIImm (ImmInt 2)),
-                            ADDIS tmp tmp (HA (ImmCLbl lbl)),
-                            LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
-                            MTCTR tmp,
-                            BCTR [ id | Just id <- ids ]
-                    ]
-        return code
-#elif sparc_TARGET_ARCH
-genSwitch expr ids
-	| opt_PIC
-	= error "MachCodeGen: sparc genSwitch PIC not finished\n"
-  
-	| otherwise
-	= do	(e_reg, e_code) <- getSomeReg expr
-
-		base_reg	<- getNewRegNat II32
-		offset_reg	<- getNewRegNat II32
-		dst		<- getNewRegNat II32
-
-		label 		<- getNewLabelNat
-		let jumpTable	= map jumpTableEntry ids
-
-		return $ e_code `appOL`
-		 toOL	
-		 	-- the jump table
-			[ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
-
-			-- load base of jump table
-			, SETHI (HI (ImmCLbl label)) base_reg
-			, OR    False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
-			
-			-- the addrs in the table are 32 bits wide..
-			, SLL   e_reg (RIImm $ ImmInt 2) offset_reg
-
-			-- load and jump to the destination
-			, LD 	  II32 (AddrRegReg base_reg offset_reg) dst
-			, JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids]
-			, NOP ]
-
-#else
-#error "ToDo: genSwitch"
-#endif
-
-
--- | Convert a BlockId to some CmmStatic data
-jumpTableEntry :: Maybe BlockId -> CmmStatic
-jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
-jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
-    where blockLabel = mkAsmTempLabel id
-
--- -----------------------------------------------------------------------------
--- Support bits
--- -----------------------------------------------------------------------------
-
-
--- -----------------------------------------------------------------------------
--- 'condIntReg' and 'condFltReg': condition codes into registers
-
--- Turn those condition codes into integers now (when they appear on
--- the right hand side of an assignment).
--- 
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
-
-condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-condIntReg = panic "MachCode.condIntReg (not on Alpha)"
-condFltReg = panic "MachCode.condFltReg (not on Alpha)"
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-condIntReg cond x y = do
-  CondCode _ cond cond_code <- condIntCode cond x y
-  tmp <- getNewRegNat II8
-  let 
-	code dst = cond_code `appOL` toOL [
-		    SETCC cond (OpReg tmp),
-		    MOVZxL II8 (OpReg tmp) (OpReg dst)
-		  ]
-  -- in
-  return (Any II32 code)
-
-#endif
-
-#if i386_TARGET_ARCH
-
-condFltReg cond x y = do
-  CondCode _ cond cond_code <- condFltCode cond x y
-  tmp <- getNewRegNat II8
-  let 
-	code dst = cond_code `appOL` toOL [
-		    SETCC cond (OpReg tmp),
-		    MOVZxL II8 (OpReg tmp) (OpReg dst)
-		  ]
-  -- in
-  return (Any II32 code)
-
-#endif
-
-#if x86_64_TARGET_ARCH
-
-condFltReg cond x y = do
-  CondCode _ cond cond_code <- condFltCode cond x y
-  tmp1 <- getNewRegNat wordSize
-  tmp2 <- getNewRegNat wordSize
-  let 
-	-- We have to worry about unordered operands (eg. comparisons
-	-- against NaN).  If the operands are unordered, the comparison
-	-- sets the parity flag, carry flag and zero flag.
-	-- All comparisons are supposed to return false for unordered
-	-- operands except for !=, which returns true.
-	--
-	-- Optimisation: we don't have to test the parity flag if we
-	-- know the test has already excluded the unordered case: eg >
-	-- and >= test for a zero carry flag, which can only occur for
-	-- ordered operands.
-	--
-	-- ToDo: by reversing comparisons we could avoid testing the
-	-- parity flag in more cases.
-
-	code dst = 
-	   cond_code `appOL` 
-	     (case cond of
-	  	NE  -> or_unordered dst
-		GU  -> plain_test   dst
-		GEU -> plain_test   dst
-		_   -> and_ordered  dst)
-
-	plain_test dst = toOL [
-		    SETCC cond (OpReg tmp1),
-		    MOVZxL II8 (OpReg tmp1) (OpReg dst)
-		 ]
-	or_unordered dst = toOL [
-		    SETCC cond (OpReg tmp1),
-		    SETCC PARITY (OpReg tmp2),
-		    OR II8 (OpReg tmp1) (OpReg tmp2),
-		    MOVZxL II8 (OpReg tmp2) (OpReg dst)
-		  ]
-	and_ordered dst = toOL [
-		    SETCC cond (OpReg tmp1),
-		    SETCC NOTPARITY (OpReg tmp2),
-		    AND II8 (OpReg tmp1) (OpReg tmp2),
-		    MOVZxL II8 (OpReg tmp2) (OpReg dst)
-		  ]
-  -- in
-  return (Any II32 code)
-
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
-    (src, code) <- getSomeReg x
-    tmp <- getNewRegNat II32
-    let
-	code__2 dst = code `appOL` toOL [
-    	    SUB False True g0 (RIReg src) g0,
-    	    SUB True False g0 (RIImm (ImmInt (-1))) dst]
-    return (Any II32 code__2)
-
-condIntReg EQQ x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    tmp1 <- getNewRegNat II32
-    tmp2 <- getNewRegNat II32
-    let
-    	code__2 dst = code1 `appOL` code2 `appOL` toOL [
-    	    XOR False src1 (RIReg src2) dst,
-    	    SUB False True g0 (RIReg dst) g0,
-    	    SUB True False g0 (RIImm (ImmInt (-1))) dst]
-    return (Any II32 code__2)
-
-condIntReg NE x (CmmLit (CmmInt 0 d)) = do
-    (src, code) <- getSomeReg x
-    tmp <- getNewRegNat II32
-    let
-    	code__2 dst = code `appOL` toOL [
-    	    SUB False True g0 (RIReg src) g0,
-    	    ADD True False g0 (RIImm (ImmInt 0)) dst]
-    return (Any II32 code__2)
-
-condIntReg NE x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    tmp1 <- getNewRegNat II32
-    tmp2 <- getNewRegNat II32
-    let
-	code__2 dst = code1 `appOL` code2 `appOL` toOL [
-    	    XOR False src1 (RIReg src2) dst,
-    	    SUB False True g0 (RIReg dst) g0,
-    	    ADD True False g0 (RIImm (ImmInt 0)) dst]
-    return (Any II32 code__2)
-
-condIntReg cond x y = do
-    bid1@(BlockId lbl1) <- getBlockIdNat
-    bid2@(BlockId lbl2) <- getBlockIdNat
-    CondCode _ cond cond_code <- condIntCode cond x y
-    let
-	code__2 dst = cond_code `appOL` toOL [
-	    BI cond False bid1, NOP,
-	    OR False g0 (RIImm (ImmInt 0)) dst,
-	    BI ALWAYS False bid2, NOP,
-	    NEWBLOCK bid1,
-	    OR False g0 (RIImm (ImmInt 1)) dst,
-	    NEWBLOCK bid2]
-    return (Any II32 code__2)
-
-condFltReg cond x y = do
-    bid1@(BlockId lbl1) <- getBlockIdNat
-    bid2@(BlockId lbl2) <- getBlockIdNat
-    CondCode _ cond cond_code <- condFltCode cond x y
-    let
-    	code__2 dst = cond_code `appOL` toOL [ 
-    	    NOP,
-	    BF cond False bid1, NOP,
-	    OR False g0 (RIImm (ImmInt 0)) dst,
-	    BI ALWAYS False bid2, NOP,
-	    NEWBLOCK bid1,
-	    OR False g0 (RIImm (ImmInt 1)) dst,
-	    NEWBLOCK bid2]
-    return (Any II32 code__2)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-condReg getCond = do
-    lbl1 <- getBlockIdNat
-    lbl2 <- getBlockIdNat
-    CondCode _ cond cond_code <- getCond
-    let
-{-        code dst = cond_code `appOL` toOL [
-                BCC cond lbl1,
-                LI dst (ImmInt 0),
-                BCC ALWAYS lbl2,
-                NEWBLOCK lbl1,
-                LI dst (ImmInt 1),
-                BCC ALWAYS lbl2,
-                NEWBLOCK lbl2
-            ]-}
-        code dst = cond_code
-            `appOL` negate_code
-            `appOL` toOL [
-                MFCR dst,
-                RLWINM dst dst (bit + 1) 31 31
-            ]
-        
-        negate_code | do_negate = unitOL (CRNOR bit bit bit)
-                    | otherwise = nilOL
-                    
-        (bit, do_negate) = case cond of
-            LTT -> (0, False)
-            LE  -> (1, True)
-            EQQ -> (2, False)
-            GE  -> (0, True)
-            GTT -> (1, False)
-            
-            NE  -> (2, True)
-            
-            LU  -> (0, False)
-            LEU -> (1, True)
-            GEU -> (0, True)
-            GU  -> (1, False)
-                
-    return (Any II32 code)
-    
-condIntReg cond x y = condReg (condIntCode cond x y)
-condFltReg cond x y = condReg (condFltCode cond x y)
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- 'trivial*Code': deal with trivial instructions
-
--- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
--- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
--- Only look for constants on the right hand side, because that's
--- where the generic optimizer will have put them.
-
--- Similarly, for unary instructions, we don't have to worry about
--- matching an StInt as the argument, because genericOpt will already
--- have handled the constant-folding.
-
-trivialCode
-    :: Width	-- Int only 
-    -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
-      ,IF_ARCH_i386 ((Operand -> Operand -> Instr) 
-                     -> Maybe (Operand -> Operand -> Instr)
-      ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr) 
-                     -> Maybe (Operand -> Operand -> Instr)
-      ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
-      ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
-      ,)))))
-    -> CmmExpr -> CmmExpr -- the two arguments
-    -> NatM Register
-
-#ifndef powerpc_TARGET_ARCH
-trivialFCode
-    :: Width	-- Floating point only
-    -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
-      ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
-      ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
-      ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
-      ,))))
-    -> CmmExpr -> CmmExpr -- the two arguments
-    -> NatM Register
-#endif
-
-trivialUCode
-    :: Size
-    -> IF_ARCH_alpha((RI -> Reg -> Instr)
-      ,IF_ARCH_i386 ((Operand -> Instr)
-      ,IF_ARCH_x86_64 ((Operand -> Instr)
-      ,IF_ARCH_sparc((RI -> Reg -> Instr)
-      ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
-      ,)))))
-    -> CmmExpr	-- the one argument
-    -> NatM Register
-
-#ifndef powerpc_TARGET_ARCH
-trivialUFCode
-    :: Size
-    -> IF_ARCH_alpha((Reg -> Reg -> Instr)
-      ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
-      ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
-      ,IF_ARCH_sparc((Reg -> Reg -> Instr)
-      ,))))
-    -> CmmExpr -- the one argument
-    -> NatM Register
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-trivialCode instr x (StInt y)
-  | fits8Bits y
-  = getRegister x		`thenNat` \ register ->
-    getNewRegNat IntRep		`thenNat` \ tmp ->
-    let
-    	code = registerCode register tmp
-    	src1 = registerName register tmp
-    	src2 = ImmInt (fromInteger y)
-    	code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
-    in
-    return (Any IntRep code__2)
-
-trivialCode instr x y
-  = getRegister x		`thenNat` \ register1 ->
-    getRegister y		`thenNat` \ register2 ->
-    getNewRegNat IntRep		`thenNat` \ tmp1 ->
-    getNewRegNat IntRep		`thenNat` \ tmp2 ->
-    let
-    	code1 = registerCode register1 tmp1 []
-    	src1  = registerName register1 tmp1
-    	code2 = registerCode register2 tmp2 []
-    	src2  = registerName register2 tmp2
-    	code__2 dst = asmSeqThen [code1, code2] .
-    	    	     mkSeqInstr (instr src1 (RIReg src2) dst)
-    in
-    return (Any IntRep code__2)
-
-------------
-trivialUCode instr x
-  = getRegister x		`thenNat` \ register ->
-    getNewRegNat IntRep		`thenNat` \ tmp ->
-    let
-    	code = registerCode register tmp
-    	src  = registerName register tmp
-    	code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
-    in
-    return (Any IntRep code__2)
-
-------------
-trivialFCode _ instr x y
-  = getRegister x		`thenNat` \ register1 ->
-    getRegister y		`thenNat` \ register2 ->
-    getNewRegNat FF64	`thenNat` \ tmp1 ->
-    getNewRegNat FF64	`thenNat` \ tmp2 ->
-    let
-    	code1 = registerCode register1 tmp1
-    	src1  = registerName register1 tmp1
-
-    	code2 = registerCode register2 tmp2
-    	src2  = registerName register2 tmp2
-
-    	code__2 dst = asmSeqThen [code1 [], code2 []] .
-    	    	      mkSeqInstr (instr src1 src2 dst)
-    in
-    return (Any FF64 code__2)
-
-trivialUFCode _ instr x
-  = getRegister x		`thenNat` \ register ->
-    getNewRegNat FF64	`thenNat` \ tmp ->
-    let
-    	code = registerCode register tmp
-    	src  = registerName register tmp
-    	code__2 dst = code . mkSeqInstr (instr src dst)
-    in
-    return (Any FF64 code__2)
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-{-
-The Rules of the Game are:
-
-* You cannot assume anything about the destination register dst;
-  it may be anything, including a fixed reg.
-
-* You may compute an operand into a fixed reg, but you may not 
-  subsequently change the contents of that fixed reg.  If you
-  want to do so, first copy the value either to a temporary
-  or into dst.  You are free to modify dst even if it happens
-  to be a fixed reg -- that's not your problem.
-
-* You cannot assume that a fixed reg will stay live over an
-  arbitrary computation.  The same applies to the dst reg.
-
-* Temporary regs obtained from getNewRegNat are distinct from 
-  each other and from all other regs, and stay live over 
-  arbitrary computations.
-
---------------------
-
-SDM's version of The Rules:
-
-* If getRegister returns Any, that means it can generate correct
-  code which places the result in any register, period.  Even if that
-  register happens to be read during the computation.
-
-  Corollary #1: this means that if you are generating code for an
-  operation with two arbitrary operands, you cannot assign the result
-  of the first operand into the destination register before computing
-  the second operand.  The second operand might require the old value
-  of the destination register.
-
-  Corollary #2: A function might be able to generate more efficient
-  code if it knows the destination register is a new temporary (and
-  therefore not read by any of the sub-computations).
-
-* If getRegister returns Any, then the code it generates may modify only:
-	(a) fresh temporaries
-	(b) the destination register
-	(c) known registers (eg. %ecx is used by shifts)
-  In particular, it may *not* modify global registers, unless the global
-  register happens to be the destination register.
--}
-
-trivialCode width instr (Just revinstr) (CmmLit lit_a) b
-  | is32BitLit lit_a = do
-  b_code <- getAnyReg b
-  let
-       code dst 
-	 = b_code dst `snocOL`
-           revinstr (OpImm (litToImm lit_a)) (OpReg dst)
-  -- in
-  return (Any (intSize width) code)
-
-trivialCode width instr maybe_revinstr a b
-  = genTrivialCode (intSize width) instr a b
-
--- This is re-used for floating pt instructions too.
-genTrivialCode rep instr a b = do
-  (b_op, b_code) <- getNonClobberedOperand b
-  a_code <- getAnyReg a
-  tmp <- getNewRegNat rep
-  let
-     -- We want the value of b to stay alive across the computation of a.
-     -- But, we want to calculate a straight into the destination register,
-     -- because the instruction only has two operands (dst := dst `op` src).
-     -- The troublesome case is when the result of b is in the same register
-     -- as the destination reg.  In this case, we have to save b in a
-     -- new temporary across the computation of a.
-     code dst
-	| dst `regClashesWithOp` b_op =
-		b_code `appOL`
-		unitOL (MOV rep b_op (OpReg tmp)) `appOL`
-		a_code dst `snocOL`
-		instr (OpReg tmp) (OpReg dst)
-	| otherwise =
-		b_code `appOL`
-		a_code dst `snocOL`
-		instr b_op (OpReg dst)
-  -- in
-  return (Any rep code)
-
-reg `regClashesWithOp` OpReg reg2   = reg == reg2
-reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
-reg `regClashesWithOp` _            = False
-
------------
-
-trivialUCode rep instr x = do
-  x_code <- getAnyReg x
-  let
-     code dst =
-	x_code dst `snocOL`
-	instr (OpReg dst)
-  return (Any rep code)
-
------------
-
-#if i386_TARGET_ARCH
-
-trivialFCode width instr x y = do
-  (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
-  (y_reg, y_code) <- getSomeReg y
-  let
-     size = floatSize width
-     code dst =
-	x_code `appOL`
-	y_code `snocOL`
-	instr size x_reg y_reg dst
-  return (Any size code)
-
-#endif
-
-#if x86_64_TARGET_ARCH
-trivialFCode pk instr x y 
-  = genTrivialCode size (instr size) x y
-  where size = floatSize pk
-#endif
-
--------------
-
-trivialUFCode size instr x = do
-  (x_reg, x_code) <- getSomeReg x
-  let
-     code dst =
-	x_code `snocOL`
-	instr x_reg dst
-  -- in
-  return (Any size code)
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-trivialCode pk instr x (CmmLit (CmmInt y d))
-  | fits13Bits y
-  = do
-      (src1, code) <- getSomeReg x
-      tmp <- getNewRegNat II32
-      let
-    	src2 = ImmInt (fromInteger y)
-    	code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
-      return (Any II32 code__2)
-
-trivialCode pk instr x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    tmp1 <- getNewRegNat II32
-    tmp2 <- getNewRegNat II32
-    let
-    	code__2 dst = code1 `appOL` code2 `snocOL`
-    	    	      instr src1 (RIReg src2) dst
-    return (Any II32 code__2)
-
-------------
-trivialFCode pk instr x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
-    tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
-    tmp <- getNewRegNat FF64
-    let
-    	promote x = FxTOy FF32 FF64 x tmp
-
-    	pk1   = cmmExprType x
-    	pk2   = cmmExprType y
-
-    	code__2 dst =
-    	    	if pk1 `cmmEqType` pk2 then
-    	            code1 `appOL` code2 `snocOL`
-    	    	    instr (floatSize pk) src1 src2 dst
-    	    	else if typeWidth pk1 == W32 then
-    	    	    code1 `snocOL` promote src1 `appOL` code2 `snocOL`
-    	    	    instr FF64 tmp src2 dst
-    	    	else
-    	    	    code1 `appOL` code2 `snocOL` promote src2 `snocOL`
-    	    	    instr FF64 src1 tmp dst
-    return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64) 
-    		code__2)
-
-------------
-trivialUCode size instr x = do
-    (src, code) <- getSomeReg x
-    tmp <- getNewRegNat size
-    let
-    	code__2 dst = code `snocOL` instr (RIReg src) dst
-    return (Any size code__2)
-
--------------
-trivialUFCode pk instr x = do
-    (src, code) <- getSomeReg x
-    tmp <- getNewRegNat pk
-    let
-    	code__2 dst = code `snocOL` instr src dst
-    return (Any pk code__2)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-
-{-
-Wolfgang's PowerPC version of The Rules:
-
-A slightly modified version of The Rules to take advantage of the fact
-that PowerPC instructions work on all registers and don't implicitly
-clobber any fixed registers.
-
-* The only expression for which getRegister returns Fixed is (CmmReg reg).
-
-* If getRegister returns Any, then the code it generates may modify only:
-	(a) fresh temporaries
-	(b) the destination register
-  It may *not* modify global registers, unless the global
-  register happens to be the destination register.
-  It may not clobber any other registers. In fact, only ccalls clobber any
-  fixed registers.
-  Also, it may not modify the counter register (used by genCCall).
-  
-  Corollary: If a getRegister for a subexpression returns Fixed, you need
-  not move it to a fresh temporary before evaluating the next subexpression.
-  The Fixed register won't be modified.
-  Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
-  
-* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
-  the value of the destination register.
--}
-
-trivialCode rep signed instr x (CmmLit (CmmInt y _))
-    | Just imm <- makeImmediate rep signed y 
-    = do
-        (src1, code1) <- getSomeReg x
-        let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
-        return (Any (intSize rep) code)
-  
-trivialCode rep signed instr x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
-    return (Any (intSize rep) code)
-
-trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
-		 -> CmmExpr -> CmmExpr -> NatM Register
-trivialCodeNoImm' size instr x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
-    return (Any size code)
-    
-trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
-		 -> CmmExpr -> CmmExpr -> NatM Register
-trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
-    
-trivialUCode rep instr x = do
-    (src, code) <- getSomeReg x
-    let code' dst = code `snocOL` instr dst src
-    return (Any rep code')
-    
--- There is no "remainder" instruction on the PPC, so we have to do
--- it the hard way.
--- The "div" parameter is the division instruction to use (DIVW or DIVWU)
-
-remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
-    -> CmmExpr -> CmmExpr -> NatM Register
-remainderCode rep div x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    let code dst = code1 `appOL` code2 `appOL` toOL [
-                div dst src1 src2,
-                MULLW dst dst (RIReg src2),
-                SUBF dst dst src1
-            ]
-    return (Any (intSize rep) code)
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
---  Coercing to/from integer/floating-point...
-
--- When going to integer, we truncate (round towards 0).
-
--- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
--- conversions.  We have to store temporaries in memory to move
--- between the integer and the floating point register sets.
-
--- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
--- pretend, on sparc at least, that double and float regs are seperate
--- kinds, so the value has to be computed into one kind before being
--- explicitly "converted" to live in the other kind.
-
-coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
-coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
-
-#if sparc_TARGET_ARCH
-coerceDbl2Flt :: CmmExpr -> NatM Register
-coerceFlt2Dbl :: CmmExpr -> NatM Register
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-coerceInt2FP _ x
-  = getRegister x		`thenNat` \ register ->
-    getNewRegNat IntRep		`thenNat` \ reg ->
-    let
-    	code = registerCode register reg
-    	src  = registerName register reg
-
-    	code__2 dst = code . mkSeqInstrs [
-    	    ST Q src (spRel 0),
-    	    LD TF dst (spRel 0),
-    	    CVTxy Q TF dst dst]
-    in
-    return (Any FF64 code__2)
-
--------------
-coerceFP2Int x
-  = getRegister x		`thenNat` \ register ->
-    getNewRegNat FF64	`thenNat` \ tmp ->
-    let
-    	code = registerCode register tmp
-    	src  = registerName register tmp
-
-    	code__2 dst = code . mkSeqInstrs [
-    	    CVTxy TF Q src tmp,
-    	    ST TF tmp (spRel 0),
-    	    LD Q dst (spRel 0)]
-    in
-    return (Any IntRep code__2)
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-coerceInt2FP from to x = do
-  (x_reg, x_code) <- getSomeReg x
-  let
-        opc  = case to of W32 -> GITOF; W64 -> GITOD
-        code dst = x_code `snocOL` opc x_reg dst
-	-- ToDo: works for non-II32 reps?
-  return (Any (floatSize to) code)
-
-------------
-
-coerceFP2Int from to x = do
-  (x_reg, x_code) <- getSomeReg x
-  let
-        opc  = case from of W32 -> GFTOI; W64 -> GDTOI
-        code dst = x_code `snocOL` opc x_reg dst
-	-- ToDo: works for non-II32 reps?
-  -- in
-  return (Any (intSize to) code)
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if x86_64_TARGET_ARCH
-
-coerceFP2Int from to x = do
-  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
-  let
-        opc  = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
-        code dst = x_code `snocOL` opc x_op dst
-  -- in
-  return (Any (intSize to) code) -- works even if the destination rep is <II32
-
-coerceInt2FP from to x = do
-  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
-  let
-        opc  = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
-        code dst = x_code `snocOL` opc x_op dst
-  -- in
-  return (Any (floatSize to) code) -- works even if the destination rep is <II32
-
-coerceFP2FP :: Width -> CmmExpr -> NatM Register
-coerceFP2FP to x = do
-  (x_reg, x_code) <- getSomeReg x
-  let
-        opc  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
-        code dst = x_code `snocOL` opc x_reg dst
-  -- in
-  return (Any (floatSize to) code)
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-coerceInt2FP width1 width2 x = do
-    (src, code) <- getSomeReg x
-    let
-    	code__2 dst = code `appOL` toOL [
-    	    ST (intSize width1) src (spRel (-2)),
-    	    LD (intSize width1) (spRel (-2)) dst,
-    	    FxTOy (intSize width1) (floatSize width2) dst dst]
-    return (Any (floatSize $ width2) code__2)
-
-
--- | Coerce a floating point value to integer
---
---   NOTE: On sparc v9 there are no instructions to move a value from an
---	   FP register directly to an int register, so we have to use a load/store.
---
-coerceFP2Int width1 width2 x 
- = do	let fsize1	= floatSize width1
-	    fsize2	= floatSize width2
-	
-            isize2	= intSize   width2
-
-	(fsrc, code)	<- getSomeReg x
-	fdst		<- getNewRegNat fsize2
-    
-	let code2 dst	
-		= 	code
-		`appOL` toOL
-			-- convert float to int format, leaving it in a float reg.
-			[ FxTOy fsize1 isize2 fsrc fdst
-
-			-- store the int into mem, then load it back to move
-			--	it into an actual int reg.
-			, ST    fsize2 fdst (spRel (-2))
-			, LD	isize2 (spRel (-2)) dst]
-
-	return (Any isize2 code2)
-
-------------
-coerceDbl2Flt x = do
-    (src, code) <- getSomeReg x
-    return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst)) 
-
-------------
-coerceFlt2Dbl x = do
-    (src, code) <- getSomeReg x
-    return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-coerceInt2FP fromRep toRep x = do
-    (src, code) <- getSomeReg x
-    lbl <- getNewLabelNat
-    itmp <- getNewRegNat II32
-    ftmp <- getNewRegNat FF64
-    dflags <- getDynFlagsNat
-    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
-    Amode addr addr_code <- getAmode dynRef
-    let
-    	code' dst = code `appOL` maybe_exts `appOL` toOL [
-		LDATA ReadOnlyData
-				[CmmDataLabel lbl,
-				 CmmStaticLit (CmmInt 0x43300000 W32),
-				 CmmStaticLit (CmmInt 0x80000000 W32)],
-		XORIS itmp src (ImmInt 0x8000),
-		ST II32 itmp (spRel 3),
-		LIS itmp (ImmInt 0x4330),
-		ST II32 itmp (spRel 2),
-		LD FF64 ftmp (spRel 2)
-            ] `appOL` addr_code `appOL` toOL [
-		LD FF64 dst addr,
-		FSUB FF64 dst ftmp dst
-	    ] `appOL` maybe_frsp dst
-            
-        maybe_exts = case fromRep of
-                        W8 ->  unitOL $ EXTS II8 src src
-                        W16 -> unitOL $ EXTS II16 src src
-                        W32 -> nilOL
-        maybe_frsp dst = case toRep of
-                        W32 -> unitOL $ FRSP dst dst
-                        W64 -> nilOL
-    return (Any (floatSize toRep) code')
-
-coerceFP2Int fromRep toRep x = do
-    -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
-    (src, code) <- getSomeReg x
-    tmp <- getNewRegNat FF64
-    let
-    	code' dst = code `appOL` toOL [
-		-- convert to int in FP reg
-    	    FCTIWZ tmp src,
-		-- store value (64bit) from FP to stack
-    	    ST FF64 tmp (spRel 2),
-		-- read low word of value (high word is undefined)
-    	    LD II32 dst (spRel 3)]	
-    return (Any (intSize toRep) code')
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- eXTRA_STK_ARGS_HERE
-
--- We (allegedly) put the first six C-call arguments in registers;
--- where do we start putting the rest of them?
-
--- Moved from Instrs (SDM):
-
-#if alpha_TARGET_ARCH || sparc_TARGET_ARCH
-eXTRA_STK_ARGS_HERE :: Int
-eXTRA_STK_ARGS_HERE
-  = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
-#endif
diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs
index d19cda45f4..ed59d2bd0a 100644
--- a/compiler/nativeGen/NCGMonad.hs
+++ b/compiler/nativeGen/NCGMonad.hs
@@ -10,28 +10,43 @@ module NCGMonad (
 	NatM_State(..), mkNatM_State,
 
 	NatM, -- instance Monad
-	initNat, addImportNat, getUniqueNat,
-	mapAccumLNat, setDeltaNat, getDeltaNat,
-	getBlockIdNat, getNewLabelNat, getNewRegNat, getNewRegPairNat,
-	getPicBaseMaybeNat, getPicBaseNat, getDynFlagsNat
- ) where
+	initNat, 
+	addImportNat, 
+	getUniqueNat,
+	mapAccumLNat, 
+	setDeltaNat, 
+	getDeltaNat,
+	getBlockIdNat, 
+	getNewLabelNat, 
+	getNewRegNat, 
+	getNewRegPairNat,
+	getPicBaseMaybeNat, 
+	getPicBaseNat, 
+	getDynFlagsNat
+) 
+ 
+where
   
 #include "HsVersions.h"
 
+import Reg
+import Size
+import TargetReg
+
 import BlockId
 import CLabel		( CLabel, mkAsmTempLabel )
-import Regs
 import UniqSupply
 import Unique		( Unique )
 import DynFlags
 
-data NatM_State = NatM_State {
-			natm_us      :: UniqSupply,
-			natm_delta   :: Int,
-			natm_imports :: [(CLabel)],
-			natm_pic     :: Maybe Reg,
-			natm_dflags  :: DynFlags
-		}
+data NatM_State 
+	= NatM_State {
+		natm_us      :: UniqSupply,
+		natm_delta   :: Int,
+		natm_imports :: [(CLabel)],
+		natm_pic     :: Maybe Reg,
+		natm_dflags  :: DynFlags
+	}
 
 newtype NatM result = NatM (NatM_State -> (result, NatM_State))
 
@@ -39,22 +54,27 @@ unNat :: NatM a -> NatM_State -> (a, NatM_State)
 unNat (NatM a) = a
 
 mkNatM_State :: UniqSupply -> Int -> DynFlags -> NatM_State
-mkNatM_State us delta dflags = NatM_State us delta [] Nothing dflags
+mkNatM_State us delta dflags 
+	= NatM_State us delta [] Nothing dflags
 
 initNat :: NatM_State -> NatM a -> (a, NatM_State)
-initNat init_st m = case unNat m init_st of { (r,st) -> (r,st) }
+initNat init_st m 
+	= case unNat m init_st of { (r,st) -> (r,st) }
+
 
 instance Monad NatM where
   (>>=) = thenNat
   return = returnNat
 
+
 thenNat :: NatM a -> (a -> NatM b) -> NatM b
 thenNat expr cont
-  = NatM $ \st -> case unNat expr st of
+	= NatM $ \st -> case unNat expr st of
 			(result, st') -> unNat (cont result) st'
 
 returnNat :: a -> NatM a
-returnNat result = NatM $ \st ->  (result, st)
+returnNat result 
+	= NatM $ \st ->  (result, st)
 
 mapAccumLNat :: (acc -> x -> NatM (acc, y))
                 -> acc
@@ -75,43 +95,64 @@ getUniqueNat = NatM $ \ (NatM_State us delta imports pic dflags) ->
 
 
 getDynFlagsNat :: NatM DynFlags
-getDynFlagsNat = NatM $ \ (NatM_State us delta imports pic dflags) ->
-		          (dflags, (NatM_State us delta imports pic dflags))
+getDynFlagsNat 
+	= NatM $ \ (NatM_State us delta imports pic dflags) ->
+	          (dflags, (NatM_State us delta imports pic dflags))
+
 
 getDeltaNat :: NatM Int
-getDeltaNat = NatM $ \ st -> (natm_delta st, st)
+getDeltaNat 
+	= NatM $ \ st -> (natm_delta st, st)
+
 
 setDeltaNat :: Int -> NatM ()
-setDeltaNat delta = NatM $ \ (NatM_State us _ imports pic dflags) ->
-   ((), NatM_State us delta imports pic dflags)
+setDeltaNat delta 
+	= NatM $ \ (NatM_State us _ imports pic dflags) ->
+		   ((), NatM_State us delta imports pic dflags)
+
 
 addImportNat :: CLabel -> NatM ()
-addImportNat imp = NatM $ \ (NatM_State us delta imports pic dflags) ->
-   ((), NatM_State us delta (imp:imports) pic dflags)
+addImportNat imp 
+	= NatM $ \ (NatM_State us delta imports pic dflags) ->
+		   ((), NatM_State us delta (imp:imports) pic dflags)
+
 
 getBlockIdNat :: NatM BlockId
-getBlockIdNat = do u <- getUniqueNat; return (BlockId u)
+getBlockIdNat 
+ = do	u <- getUniqueNat
+ 	return (BlockId u)
+
 
 getNewLabelNat :: NatM CLabel
-getNewLabelNat = do u <- getUniqueNat; return (mkAsmTempLabel u)
+getNewLabelNat 
+ = do 	u <- getUniqueNat
+ 	return (mkAsmTempLabel u)
+
 
 getNewRegNat :: Size -> NatM Reg
-getNewRegNat rep = do u <- getUniqueNat; return (mkVReg u rep)
+getNewRegNat rep 
+ = do	u <- getUniqueNat
+ 	return (targetMkVReg u rep)
+
 
 getNewRegPairNat :: Size -> NatM (Reg,Reg)
-getNewRegPairNat rep = do 
-  u <- getUniqueNat
-  let lo = mkVReg u rep; hi = getHiVRegFromLo lo
-  return (lo,hi)
+getNewRegPairNat rep 
+ = do	u 	<- getUniqueNat
+	let lo 	= targetMkVReg u rep; hi = getHiVRegFromLo lo
+	return (lo,hi)
+
 
 getPicBaseMaybeNat :: NatM (Maybe Reg)
-getPicBaseMaybeNat = NatM (\state -> (natm_pic state, state))
+getPicBaseMaybeNat 
+	= NatM (\state -> (natm_pic state, state))
+
 
 getPicBaseNat :: Size -> NatM Reg
-getPicBaseNat rep = do
-  mbPicBase <- getPicBaseMaybeNat
-  case mbPicBase of
-        Just picBase -> return picBase
-        Nothing -> do
-            reg <- getNewRegNat rep
-            NatM (\state -> (reg, state { natm_pic = Just reg }))
+getPicBaseNat rep 
+ = do	mbPicBase <- getPicBaseMaybeNat
+	case mbPicBase of
+	        Just picBase -> return picBase
+	        Nothing 
+		 -> do
+			reg <- getNewRegNat rep
+			NatM (\state -> (reg, state { natm_pic = Just reg }))
diff --git a/compiler/nativeGen/PositionIndependentCode.hs b/compiler/nativeGen/PIC.hs
index a1e11d8a34..98e4f9f2d5 100644
--- a/compiler/nativeGen/PositionIndependentCode.hs
+++ b/compiler/nativeGen/PIC.hs
@@ -1,24 +1,9 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module PositionIndependentCode (
-        cmmMakeDynamicReference,
-        ReferenceKind(..),
-        needImportedSymbols,
-        pprImportedSymbol,
-        pprGotDeclaration,
-        initializePicBase
-     ) where
-
-#include "HsVersions.h"
-
 {-
   This module handles generation of position independent code and
   dynamic-linking related issues for the native code generator.
+
+  This depends both the architecture and OS, so we define it here
+  instead of in one of the architecture specific modules.
   
   Things outside this module which are related to this:
   
@@ -53,7 +38,30 @@ module PositionIndependentCode (
       and ppc-linux).
 -}
 
-#include "nativeGen/NCG.h"
+module PIC (
+        cmmMakeDynamicReference,
+        ReferenceKind(..),
+        needImportedSymbols,
+        pprImportedSymbol,
+        pprGotDeclaration,
+
+        initializePicBase_ppc,
+        initializePicBase_x86
+)
+
+where
+
+import qualified PPC.Instr	as PPC
+import qualified PPC.Regs	as PPC
+
+import qualified X86.Instr	as X86
+
+import Platform
+import Instruction
+import Size
+import Reg
+import NCGMonad
+
 
 import Cmm
 import CLabel           ( CLabel, pprCLabel,
@@ -61,13 +69,8 @@ import CLabel           ( CLabel, pprCLabel,
                           dynamicLinkerLabelInfo, mkPicBaseLabel,
                           labelDynamic, externallyVisibleCLabel )
 
-#if linux_TARGET_OS
 import CLabel           ( mkForeignLabel )
-#endif
 
-import Regs
-import Instrs
-import NCGMonad         ( NatM, getNewRegNat, getNewLabelNat )
 
 import StaticFlags	( opt_PIC, opt_Static )
 import BasicTypes
@@ -80,8 +83,7 @@ import DynFlags
 import FastString
 
 
--- The most important function here is cmmMakeDynamicReference.
-
+--------------------------------------------------------------------------------
 -- It gets called by the cmmToCmm pass for every CmmLabel in the Cmm
 -- code. It does The Right Thing(tm) to convert the CmmLabel into a
 -- position-independent, dynamic-linking-aware reference to the thing
@@ -94,10 +96,12 @@ import FastString
 -- - addImportCmmOpt for the CmmOptM monad
 -- - addImportNat for the NatM monad.
 
-data ReferenceKind = DataReference
-                   | CallReference
-                   | JumpReference
-                   deriving(Eq)
+data ReferenceKind 
+	= DataReference
+        | CallReference
+        | JumpReference
+        deriving(Eq)
+
 
 cmmMakeDynamicReference
   :: Monad m => DynFlags
@@ -110,80 +114,98 @@ cmmMakeDynamicReference
 cmmMakeDynamicReference dflags addImport referenceKind lbl
   | Just _ <- dynamicLinkerLabelInfo lbl
   = return $ CmmLit $ CmmLabel lbl   -- already processed it, pass through
-  | otherwise = case howToAccessLabel dflags referenceKind lbl of
+
+  | otherwise 
+  = case howToAccessLabel 
+  		dflags 
+		(platformArch $ targetPlatform dflags)
+		(platformOS   $ targetPlatform dflags)
+		referenceKind lbl of
+
         AccessViaStub -> do
               let stub = mkDynamicLinkerLabel CodeStub lbl
               addImport stub
               return $ CmmLit $ CmmLabel stub
+
         AccessViaSymbolPtr -> do
               let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl
               addImport symbolPtr
-              return $ CmmLoad (cmmMakePicReference symbolPtr) bWord
+              return $ CmmLoad (cmmMakePicReference dflags symbolPtr) bWord
+
         AccessDirectly -> case referenceKind of
                 -- for data, we might have to make some calculations:
-              DataReference -> return $ cmmMakePicReference lbl  
+              DataReference -> return $ cmmMakePicReference dflags lbl  
                 -- all currently supported processors support
                 -- PC-relative branch and call instructions,
                 -- so just jump there if it's a call or a jump
               _ -> return $ CmmLit $ CmmLabel lbl
-  
--- -------------------------------------------------------------------
-  
+
+
+-- -----------------------------------------------------------------------------
 -- Create a position independent reference to a label.
 -- (but do not bother with dynamic linking).
 -- We calculate the label's address by adding some (platform-dependent)
 -- offset to our base register; this offset is calculated by
 -- the function picRelative in the platform-dependent part below.
 
-cmmMakePicReference :: CLabel -> CmmExpr
-  
-#if !mingw32_TARGET_OS
+cmmMakePicReference :: DynFlags -> CLabel -> CmmExpr
+cmmMakePicReference dflags lbl
+
         -- Windows doesn't need PIC,
         -- everything gets relocated at runtime
+	| OSMinGW32	<- platformOS $ targetPlatform dflags
+	= CmmLit $ CmmLabel lbl
 
-cmmMakePicReference lbl
-    | (opt_PIC || not opt_Static) && absoluteLabel lbl = CmmMachOp (MO_Add wordWidth) [
-            CmmReg (CmmGlobal PicBaseReg),
-            CmmLit $ picRelative lbl
-        ]
-    where
-        absoluteLabel lbl = case dynamicLinkerLabelInfo lbl of
-                                Just (GotSymbolPtr, _) -> False
-                                Just (GotSymbolOffset, _) -> False
-                                _ -> True
 
-#endif
-cmmMakePicReference lbl = CmmLit $ CmmLabel lbl
+	| (opt_PIC || not opt_Static) && absoluteLabel lbl 
+	= CmmMachOp (MO_Add wordWidth) 
+		[ CmmReg (CmmGlobal PicBaseReg)
+		, CmmLit $ picRelative 
+				(platformArch 	$ targetPlatform dflags)
+				(platformOS	$ targetPlatform dflags)
+				lbl ]
 
--- ===================================================================
--- Platform dependent stuff
--- ===================================================================
+	| otherwise
+	= CmmLit $ CmmLabel lbl
+  
+  
+absoluteLabel :: CLabel -> Bool
+absoluteLabel lbl 
+ = case dynamicLinkerLabelInfo lbl of
+	Just (GotSymbolPtr, _)		-> False
+	Just (GotSymbolOffset, _)	-> False
+	_ 				-> True
 
+
+--------------------------------------------------------------------------------
 -- Knowledge about how special dynamic linker labels like symbol
 -- pointers, code stubs and GOT offsets look like is located in the
 -- module CLabel.
 
--- -------------------------------------------------------------------
-
 -- We have to decide which labels need to be accessed
 -- indirectly or via a piece of stub code.
+data LabelAccessStyle 
+	= AccessViaStub
+	| AccessViaSymbolPtr
+	| AccessDirectly
 
-data LabelAccessStyle = AccessViaStub
-                      | AccessViaSymbolPtr
-                      | AccessDirectly
+howToAccessLabel 
+	:: DynFlags -> Arch -> OS -> ReferenceKind -> CLabel -> LabelAccessStyle
 
-howToAccessLabel :: DynFlags -> ReferenceKind -> CLabel -> LabelAccessStyle
-
-#if mingw32_TARGET_OS
 -- Windows
 -- 
 -- We need to use access *exactly* those things that
 -- are imported from a DLL via an __imp_* label.
 -- There are no stubs for imported code.
+--
+howToAccessLabel dflags _ OSMinGW32 _ lbl
+	| labelDynamic (thisPackage dflags) lbl	
+	= AccessViaSymbolPtr
+
+	| otherwise
+	= AccessDirectly
+
 
-howToAccessLabel dflags _ lbl | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr
-			      | otherwise        = AccessDirectly
-#elif darwin_TARGET_OS
 -- Mach-O (Darwin, Mac OS X)
 --
 -- Indirect access is required in the following cases:
@@ -191,54 +213,48 @@ howToAccessLabel dflags _ lbl | labelDynamic (thisPackage dflags) lbl = AccessVi
 --  * (not on x86_64) data from a different module, if we're generating PIC code
 -- It is always possible to access something indirectly,
 -- even when it's not necessary.
+--
+howToAccessLabel dflags arch OSDarwin DataReference lbl
+	-- data access to a dynamic library goes via a symbol pointer
+	| labelDynamic (thisPackage dflags) lbl 
+	= AccessViaSymbolPtr
+
+	-- when generating PIC code, all cross-module data references must
+	-- must go via a symbol pointer, too, because the assembler
+	-- cannot generate code for a label difference where one
+	-- label is undefined. Doesn't apply t x86_64.
+	-- Unfortunately, we don't know whether it's cross-module,
+	-- so we do it for all externally visible labels.
+	-- This is a slight waste of time and space, but otherwise
+	-- we'd need to pass the current Module all the way in to
+	-- this function.
+	| arch /= ArchX86_64
+	, opt_PIC && externallyVisibleCLabel lbl 
+	= AccessViaSymbolPtr
+	 
+	| otherwise 
+	= AccessDirectly
+
+howToAccessLabel dflags arch OSDarwin JumpReference lbl
+	-- dyld code stubs don't work for tailcalls because the
+	-- stack alignment is only right for regular calls.
+	-- Therefore, we have to go via a symbol pointer:
+	| arch == ArchX86 || arch == ArchX86_64
+	, labelDynamic (thisPackage dflags) lbl
+	= AccessViaSymbolPtr
+	    
+
+howToAccessLabel dflags arch OSDarwin _ lbl
+	-- Code stubs are the usual method of choice for imported code;
+	-- not needed on x86_64 because Apple's new linker, ld64, generates
+	-- them automatically.
+	| arch /= ArchX86_64
+	, labelDynamic (thisPackage dflags) lbl
+	= AccessViaStub
+
+	| otherwise
+	= AccessDirectly
 
-howToAccessLabel dflags DataReference lbl
-      -- data access to a dynamic library goes via a symbol pointer
-    | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr
-    
-#if !x86_64_TARGET_ARCH
-      -- when generating PIC code, all cross-module data references must
-      -- must go via a symbol pointer, too, because the assembler
-      -- cannot generate code for a label difference where one
-      -- label is undefined. Doesn't apply t x86_64.
-      -- Unfortunately, we don't know whether it's cross-module,
-      -- so we do it for all externally visible labels.
-      -- This is a slight waste of time and space, but otherwise
-      -- we'd need to pass the current Module all the way in to
-      -- this function.
-    | opt_PIC && externallyVisibleCLabel lbl = AccessViaSymbolPtr
-#endif
-    | otherwise = AccessDirectly
-
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-    -- dyld code stubs don't work for tailcalls because the
-    -- stack alignment is only right for regular calls.
-    -- Therefore, we have to go via a symbol pointer:
-howToAccessLabel dflags JumpReference lbl
-    | labelDynamic (thisPackage dflags) lbl
-    = AccessViaSymbolPtr
-#endif
-
-howToAccessLabel dflags _ lbl
-#if !x86_64_TARGET_ARCH
-    -- Code stubs are the usual method of choice for imported code;
-    -- not needed on x86_64 because Apple's new linker, ld64, generates
-    -- them automatically.
-    | labelDynamic (thisPackage dflags) lbl
-    = AccessViaStub
-#endif
-    | otherwise
-    = AccessDirectly
-
-
-#elif linux_TARGET_OS && powerpc64_TARGET_ARCH
--- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
-
-howToAccessLabel _ DataReference lbl = AccessViaSymbolPtr
-howToAccessLabel _ _ lbl = AccessDirectly -- actually, .label instead of label
-
-#elif linux_TARGET_OS
 -- ELF (Linux)
 --
 -- ELF tries to pretend to the main application code that dynamic linking does 
@@ -250,63 +266,82 @@ howToAccessLabel _ _ lbl = AccessDirectly -- actually, .label instead of label
 -- from position independent code. It is also required from the main program
 -- when dynamic libraries containing Haskell code are used.
 
-howToAccessLabel _ _ lbl
+howToAccessLabel _ ArchPPC_64 OSLinux kind _
+
+	-- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
+	| DataReference	<- kind
+	= AccessViaSymbolPtr
+	
+	-- actually, .label instead of label
+	| otherwise
+	= AccessDirectly 
+
+howToAccessLabel _ _ OSLinux _ _
 	-- no PIC -> the dynamic linker does everything for us;
 	--           if we don't dynamically link to Haskell code,
 	--           it actually manages to do so without messing thins up.
-    | not opt_PIC && opt_Static = AccessDirectly
-   
-howToAccessLabel dflags DataReference lbl
-    	-- A dynamic label needs to be accessed via a symbol pointer.
-    | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr
-#if powerpc_TARGET_ARCH
+	| not opt_PIC && opt_Static 
+	= AccessDirectly
+
+howToAccessLabel dflags arch OSLinux DataReference lbl
+	-- A dynamic label needs to be accessed via a symbol pointer.
+	| labelDynamic (thisPackage dflags) lbl 
+	= AccessViaSymbolPtr
+
  	-- For PowerPC32 -fPIC, we have to access even static data
 	-- via a symbol pointer (see below for an explanation why
 	-- PowerPC32 Linux is especially broken).
-    | opt_PIC = AccessViaSymbolPtr
-#endif
-    | otherwise = AccessDirectly
-
-
--- In most cases, we have to avoid symbol stubs on ELF, for the following reasons:
--- * on i386, the position-independent symbol stubs in the Procedure Linkage Table
---   require the address of the GOT to be loaded into register %ebx on entry.
--- * The linker will take any reference to the symbol stub as a hint that
---   the label in question is a code label. When linking executables, this
---   will cause the linker to replace even data references to the label with
---   references to the symbol stub.
-
--- This leaves calling a (foreign) function from non-PIC code
--- (AccessDirectly, because we get an implicit symbol stub)
--- and calling functions from PIC code on non-i386 platforms (via a symbol stub) 
-
-howToAccessLabel dflags CallReference lbl
-    | labelDynamic (thisPackage dflags) lbl && not opt_PIC
-    = AccessDirectly
-#if !i386_TARGET_ARCH
-    | labelDynamic (thisPackage dflags) lbl && opt_PIC
-    = AccessViaStub
-#endif
-
-howToAccessLabel dflags _ lbl
-    | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr
-    | otherwise = AccessDirectly
-#else
---
+	| arch == ArchPPC
+	, opt_PIC
+	= AccessViaSymbolPtr
+	
+	| otherwise 
+	= AccessDirectly
+
+
+	-- In most cases, we have to avoid symbol stubs on ELF, for the following reasons:
+	-- * on i386, the position-independent symbol stubs in the Procedure Linkage Table
+	--   require the address of the GOT to be loaded into register %ebx on entry.
+	-- * The linker will take any reference to the symbol stub as a hint that
+	--   the label in question is a code label. When linking executables, this
+	--   will cause the linker to replace even data references to the label with
+	--   references to the symbol stub.
+
+	-- This leaves calling a (foreign) function from non-PIC code
+	-- (AccessDirectly, because we get an implicit symbol stub)
+	-- and calling functions from PIC code on non-i386 platforms (via a symbol stub) 
+
+howToAccessLabel dflags arch OSLinux CallReference lbl
+	| labelDynamic (thisPackage dflags) lbl && not opt_PIC
+	= AccessDirectly
+
+	| arch /= ArchX86
+	, labelDynamic (thisPackage dflags) lbl && opt_PIC
+	= AccessViaStub
+
+howToAccessLabel dflags _ OSLinux _ lbl
+	| labelDynamic (thisPackage dflags) lbl 
+	= AccessViaSymbolPtr
+
+	| otherwise 
+	= AccessDirectly
+
 -- all other platforms
---
-howToAccessLabel _ _ _
-        | not opt_PIC = AccessDirectly
-        | otherwise   = panic "howToAccessLabel: PIC not defined for this platform"
-#endif
+howToAccessLabel _ _ _ _ _
+	| not opt_PIC 
+	= AccessDirectly
+	
+	| otherwise
+	= panic "howToAccessLabel: PIC not defined for this platform"
+
+
 
 -- -------------------------------------------------------------------
+-- | Says what we we have to add to our 'PIC base register' in order to
+-- 	get the address of a label.
 
--- What do we have to add to our 'PIC base register' in order to
--- get the address of a label?
+picRelative :: Arch -> OS -> CLabel -> CmmLit
 
-picRelative :: CLabel -> CmmLit
-#if darwin_TARGET_OS && !x86_64_TARGET_ARCH
 -- Darwin, but not x86_64:
 -- The PIC base register points to the PIC base label at the beginning
 -- of the current CmmTop. We just have to use a label difference to
@@ -314,21 +349,21 @@ picRelative :: CLabel -> CmmLit
 -- We have already made sure that all labels that are not from the current
 -- module are accessed indirectly ('as' can't calculate differences between
 -- undefined labels).
+picRelative arch OSDarwin lbl
+	| arch /= ArchX86_64
+	= CmmLabelDiffOff lbl mkPicBaseLabel 0
+	
 
-picRelative lbl
-  = CmmLabelDiffOff lbl mkPicBaseLabel 0
-
-#elif powerpc_TARGET_ARCH && linux_TARGET_OS
 -- PowerPC Linux:
 -- The PIC base register points to our fake GOT. Use a label difference
 -- to get the offset.
 -- We have made sure that *everything* is accessed indirectly, so this
 -- is only used for offsets from the GOT to symbol pointers inside the
 -- GOT.
-picRelative lbl
-  = CmmLabelDiffOff lbl gotLabel 0
+picRelative ArchPPC OSLinux lbl
+	= CmmLabelDiffOff lbl gotLabel 0
+
 
-#elif linux_TARGET_OS || (darwin_TARGET_OS && x86_64_TARGET_ARCH)
 -- Most Linux versions:
 -- The PIC base register points to the GOT. Use foo@got for symbol
 -- pointers, and foo@gotoff for everything else.
@@ -336,61 +371,116 @@ picRelative lbl
 -- The PIC base register is %rip, we use foo@gotpcrel for symbol pointers,
 -- and a GotSymbolOffset label for other things.
 -- For reasons of tradition, the symbol offset label is written as a plain label.
+picRelative arch os lbl
+	| os == OSLinux || (os == OSDarwin && arch == ArchX86_64)
+	= let	result
+			| Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl
+  			= CmmLabel $ mkDynamicLinkerLabel GotSymbolPtr lbl'
 
-picRelative lbl
-  | Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl
-  = CmmLabel $ mkDynamicLinkerLabel GotSymbolPtr lbl'
-  | otherwise
-  = CmmLabel $ mkDynamicLinkerLabel GotSymbolOffset lbl
+			| otherwise
+			= CmmLabel $ mkDynamicLinkerLabel GotSymbolOffset lbl
+			
+	  in	result
+
+picRelative _ _ _
+	= panic "PositionIndependentCode.picRelative undefined for this platform"
 
-#else
-picRelative lbl = panic "PositionIndependentCode.picRelative"
-#endif
 
--- -------------------------------------------------------------------
 
--- What do we have to add to every assembly file we generate?
+--------------------------------------------------------------------------------
 
 -- utility function for pretty-printing asm-labels,
 -- copied from PprMach
-asmSDoc d = Outputable.withPprStyleDoc (
-	      Outputable.mkCodeStyle Outputable.AsmStyle) d
-pprCLabel_asm l = asmSDoc (pprCLabel l)
+--
+asmSDoc :: Outputable.SDoc -> Doc
+asmSDoc d 
+	= Outputable.withPprStyleDoc 
+		(Outputable.mkCodeStyle Outputable.AsmStyle) d
+
+pprCLabel_asm :: CLabel -> Doc
+pprCLabel_asm l 
+	= asmSDoc (pprCLabel l)
+
+
+needImportedSymbols :: Arch -> OS -> Bool
+needImportedSymbols arch os
+	| os 	== OSDarwin
+	, arch 	/= ArchX86_64
+	= True
+	
+	-- PowerPC Linux: -fPIC or -dynamic
+	| os	== OSLinux
+	, arch	== ArchPPC
+	= opt_PIC || not opt_Static
+
+	-- i386 (and others?): -dynamic but not -fPIC
+	| os	== OSLinux
+	, arch	/= ArchPPC_64
+	= not opt_Static && not opt_PIC
+
+	| otherwise
+	= False
 
+-- gotLabel
+-- The label used to refer to our "fake GOT" from
+-- position-independent code.
+gotLabel :: CLabel
+gotLabel 
+	= mkForeignLabel -- HACK: it's not really foreign
+        	(fsLit ".LCTOC1") Nothing False IsData
 
-#if darwin_TARGET_OS && !x86_64_TARGET_ARCH
 
-needImportedSymbols = True
 
+--------------------------------------------------------------------------------
 -- We don't need to declare any offset tables.
 -- However, for PIC on x86, we need a small helper function.
-#if i386_TARGET_ARCH
-pprGotDeclaration
-    | opt_PIC
-    = vcat [
-        ptext (sLit ".section __TEXT,__textcoal_nt,coalesced,no_toc"),
-        ptext (sLit ".weak_definition ___i686.get_pc_thunk.ax"),
-        ptext (sLit ".private_extern ___i686.get_pc_thunk.ax"),
-        ptext (sLit "___i686.get_pc_thunk.ax:"),
-            ptext (sLit "\tmovl (%esp), %eax"),
-            ptext (sLit "\tret")
-    ]
-    | otherwise = Pretty.empty
-#else
-pprGotDeclaration = Pretty.empty
-#endif
+pprGotDeclaration :: Arch -> OS -> Doc
+pprGotDeclaration ArchX86 OSDarwin
+	| opt_PIC
+	= vcat [
+	        ptext (sLit ".section __TEXT,__textcoal_nt,coalesced,no_toc"),
+	        ptext (sLit ".weak_definition ___i686.get_pc_thunk.ax"),
+	        ptext (sLit ".private_extern ___i686.get_pc_thunk.ax"),
+	        ptext (sLit "___i686.get_pc_thunk.ax:"),
+		ptext (sLit "\tmovl (%esp), %eax"),
+		ptext (sLit "\tret") ]
+
+	| otherwise
+	= empty
+	
+		
+-- pprGotDeclaration
+-- Output whatever needs to be output once per .s file.
+-- The .LCTOC1 label is defined to point 32768 bytes into the table,
+-- to make the most of the PPC's 16-bit displacements.
+-- Only needed for PIC.
+pprGotDeclaration arch OSLinux
+	| arch	/= ArchPPC_64
+	, not opt_PIC 
+	= Pretty.empty
+
+	| arch	/= ArchPPC_64
+	= vcat [
+		ptext (sLit ".section \".got2\",\"aw\""),
+		ptext (sLit ".LCTOC1 = .+32768") ]
+
+pprGotDeclaration _ _
+	= panic "pprGotDeclaration: no match"   
 
+
+--------------------------------------------------------------------------------
 -- On Darwin, we have to generate our own stub code for lazy binding..
 -- For each processor architecture, there are two versions, one for PIC
 -- and one for non-PIC.
 --
 -- Whenever you change something in this assembler output, make sure
 -- the splitter in driver/split/ghc-split.lprl recognizes the new output
-pprImportedSymbol importedLbl
-#if powerpc_TARGET_ARCH
-    | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
-    = case opt_PIC of
-        False ->
+
+pprImportedSymbol :: Arch -> OS -> CLabel -> Doc
+pprImportedSymbol ArchPPC OSDarwin importedLbl
+	| Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
+	= case opt_PIC of
+           False ->
             vcat [
                 ptext (sLit ".symbol_stub"),
                 ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$stub:"),
@@ -404,7 +494,7 @@ pprImportedSymbol importedLbl
                         <> ptext (sLit "$lazy_ptr)"),
                     ptext (sLit "\tbctr")
             ]
-        True ->
+           True ->
             vcat [
                 ptext (sLit ".section __TEXT,__picsymbolstub1,")
                   <> ptext (sLit "symbol_stubs,pure_instructions,32"),
@@ -424,16 +514,27 @@ pprImportedSymbol importedLbl
                     ptext (sLit "\tmtctr r12"),
                     ptext (sLit "\tbctr")
             ]
-    $+$ vcat [
-        ptext (sLit ".lazy_symbol_pointer"),
-        ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"),
-            ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
-            ptext (sLit "\t.long dyld_stub_binding_helper")
-    ]
-#elif i386_TARGET_ARCH
-    | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
-    = case opt_PIC of
-        False ->
+	  $+$ vcat [
+         	ptext (sLit ".lazy_symbol_pointer"),
+	        ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"),
+		ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
+	        ptext (sLit "\t.long dyld_stub_binding_helper")]
+
+	| Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
+	= vcat [
+	        ptext (sLit ".non_lazy_symbol_pointer"),
+	        char 'L' <> pprCLabel_asm lbl <> ptext (sLit "$non_lazy_ptr:"),
+		ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
+		ptext (sLit "\t.long\t0")]
+
+	| otherwise 
+	= empty
+
+		
+pprImportedSymbol ArchX86 OSDarwin importedLbl
+	| Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
+	= case opt_PIC of
+           False ->
             vcat [
                 ptext (sLit ".symbol_stub"),
                 ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$stub:"),
@@ -446,7 +547,7 @@ pprImportedSymbol importedLbl
                         <> ptext (sLit "$lazy_ptr"),
                     ptext (sLit "\tjmp dyld_stub_binding_helper")
             ]
-        True ->
+           True ->
             vcat [
                 ptext (sLit ".section __TEXT,__picsymbolstub2,")
                     <> ptext (sLit "symbol_stubs,pure_instructions,25"),
@@ -464,27 +565,28 @@ pprImportedSymbol importedLbl
                     ptext (sLit "\tpushl %eax"),
                     ptext (sLit "\tjmp dyld_stub_binding_helper")
             ]
-    $+$ vcat [        ptext (sLit ".section __DATA, __la_sym_ptr")
+	  $+$ vcat [        ptext (sLit ".section __DATA, __la_sym_ptr")
                     <> (if opt_PIC then int 2 else int 3)
                     <> ptext (sLit ",lazy_symbol_pointers"),
-        ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"),
-            ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
-            ptext (sLit "\t.long L") <> pprCLabel_asm lbl
-                    <> ptext (sLit "$stub_binder")
-    ]
-#endif
--- We also have to declare our symbol pointers ourselves:
-    | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
-    = vcat [
-        ptext (sLit ".non_lazy_symbol_pointer"),
-        char 'L' <> pprCLabel_asm lbl <> ptext (sLit "$non_lazy_ptr:"),
-	    ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
-            ptext (sLit "\t.long\t0")
-    ]
-
-    | otherwise = empty
-
-#elif linux_TARGET_OS && !powerpc64_TARGET_ARCH
+	        ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"),
+	            ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
+	            ptext (sLit "\t.long L") <> pprCLabel_asm lbl
+                    <> ptext (sLit "$stub_binder")]
+
+	| Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
+	= vcat [
+	        ptext (sLit ".non_lazy_symbol_pointer"),
+	        char 'L' <> pprCLabel_asm lbl <> ptext (sLit "$non_lazy_ptr:"),
+		ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
+		ptext (sLit "\t.long\t0")]
+
+	| otherwise 
+	= empty
+
+
+pprImportedSymbol _ OSDarwin _
+	= empty
+	
 
 -- ELF / Linux
 --
@@ -514,64 +616,29 @@ pprImportedSymbol importedLbl
 -- When needImportedSymbols is defined,
 -- the NCG will keep track of all DynamicLinkerLabels it uses
 -- and output each of them using pprImportedSymbol.
-#if powerpc_TARGET_ARCH
-    -- PowerPC Linux: -fPIC or -dynamic
-needImportedSymbols = opt_PIC || not opt_Static
-#else
-    -- i386 (and others?): -dynamic but not -fPIC
-needImportedSymbols = not opt_Static && not opt_PIC
-#endif
 
--- gotLabel
--- The label used to refer to our "fake GOT" from
--- position-independent code.
-gotLabel = mkForeignLabel -- HACK: it's not really foreign
-                           (fsLit ".LCTOC1") Nothing False IsData
+pprImportedSymbol ArchPPC_64 OSLinux _
+	= empty
 
--- pprGotDeclaration
--- Output whatever needs to be output once per .s file.
--- The .LCTOC1 label is defined to point 32768 bytes into the table,
--- to make the most of the PPC's 16-bit displacements.
--- Only needed for PIC.
-
-pprGotDeclaration
-    | not opt_PIC = Pretty.empty
-    | otherwise = vcat [
-        ptext (sLit ".section \".got2\",\"aw\""),
-        ptext (sLit ".LCTOC1 = .+32768")
-    ]
-
--- We generate one .long/.quad literal for every symbol we import;
--- the dynamic linker will relocate those addresses.
-
-pprImportedSymbol importedLbl
-    | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
-    = vcat [
-        ptext (sLit ".section \".got2\", \"aw\""),
-        ptext (sLit ".LC_") <> pprCLabel_asm lbl <> char ':',
-        ptext symbolSize <+> pprCLabel_asm lbl
-    ]
-
--- PLT code stubs are generated automatically by the dynamic linker.
-    | otherwise = empty
-    where
-      symbolSize = case wordWidth of
+pprImportedSymbol _ OSLinux importedLbl
+	| Just (SymbolPtr, lbl)	<- dynamicLinkerLabelInfo importedLbl
+	= let	symbolSize = case wordWidth of
 		     W32 -> sLit "\t.long"
 		     W64 -> sLit "\t.quad"
 		     _ -> panic "Unknown wordRep in pprImportedSymbol"
 
-#else
-
--- For all other currently supported platforms, we don't need to do
--- anything at all.
+	  in vcat [
+	        ptext (sLit ".section \".got2\", \"aw\""),
+	        ptext (sLit ".LC_") <> pprCLabel_asm lbl <> char ':',
+	        ptext symbolSize <+> pprCLabel_asm lbl ]
 
-needImportedSymbols = False
-pprGotDeclaration = Pretty.empty
-pprImportedSymbol _ = empty
-#endif
+	-- PLT code stubs are generated automatically by the dynamic linker.
+	| otherwise = empty
 
--- -------------------------------------------------------------------
+pprImportedSymbol _ _ _
+	= panic "PIC.pprImportedSymbol: no match"
 
+--------------------------------------------------------------------------------
 -- Generate code to calculate the address that should be put in the
 -- PIC base register.
 -- This is called by MachCodeGen for every CmmProc that accessed the
@@ -581,10 +648,6 @@ pprImportedSymbol _ = empty
 -- It is assumed that the first NatCmmTop in the input list is a Proc
 -- and the rest are CmmDatas.
 
-initializePicBase :: Reg -> [NatCmmTop] -> NatM [NatCmmTop]
-
-#if darwin_TARGET_OS
-
 -- Darwin is simple: just fetch the address of a local label.
 -- The FETCHPC pseudo-instruction is expanded to multiple instructions
 -- during pretty-printing so that we don't have to deal with the
@@ -598,12 +661,7 @@ initializePicBase :: Reg -> [NatCmmTop] -> NatM [NatCmmTop]
 --          call 1f
 --      1:  popl %picReg
 
-initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics)
-    = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
-    where BasicBlock bID insns = head blocks
-          b' = BasicBlock bID (FETCHPC picReg : insns)
 
-#elif powerpc_TARGET_ARCH && linux_TARGET_OS
 
 -- Get a pointer to our own fake GOT, which is defined on a per-module basis.
 -- This is exactly how GCC does it, and it's quite horrible:
@@ -612,7 +670,13 @@ initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics)
 -- define in .text space right next to the proc. This .long literal contains
 -- the (32-bit) offset from our local label to our global offset table
 -- (.LCTOC1 aka gotOffLabel).
-initializePicBase picReg
+
+initializePicBase_ppc 
+	:: Arch -> OS -> Reg 
+	-> [NatCmmTop PPC.Instr] 
+	-> NatM [NatCmmTop PPC.Instr]
+
+initializePicBase_ppc ArchPPC OSLinux picReg
     (CmmProc info lab params (ListGraph blocks) : statics)
     = do
         gotOffLabel <- getNewLabelNat
@@ -624,16 +688,33 @@ initializePicBase picReg
                         	                          mkPicBaseLabel
 				                          0)
                         ]
-            offsetToOffset = ImmConstantDiff (ImmCLbl gotOffLabel)
-                                             (ImmCLbl mkPicBaseLabel)
-            BasicBlock bID insns = head blocks
-            b' = BasicBlock bID (FETCHPC picReg
-                               : LD wordSize tmp
-                                    (AddrRegImm picReg offsetToOffset)
-                               : ADD picReg picReg (RIReg tmp)
+            offsetToOffset
+	    		= PPC.ImmConstantDiff 
+				(PPC.ImmCLbl gotOffLabel)
+				(PPC.ImmCLbl mkPicBaseLabel)
+
+            BasicBlock bID insns 
+	    		= head blocks
+
+            b' = BasicBlock bID (PPC.FETCHPC picReg
+                               : PPC.LD PPC.archWordSize tmp
+                                    (PPC.AddrRegImm picReg offsetToOffset)
+                               : PPC.ADD picReg picReg (PPC.RIReg tmp)
                                : insns)
+
         return (CmmProc info lab params (ListGraph (b' : tail blocks)) : gotOffset : statics)
-#elif i386_TARGET_ARCH && linux_TARGET_OS
+
+initializePicBase_ppc ArchPPC OSDarwin picReg
+	(CmmProc info lab params (ListGraph blocks) : statics)
+	= return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
+
+	where 	BasicBlock bID insns = head blocks
+          	b' = BasicBlock bID (PPC.FETCHPC picReg : insns)
+
+
+initializePicBase_ppc _ _ _ _
+	= panic "initializePicBase_ppc: not needed"
+
 
 -- We cheat a bit here by defining a pseudo-instruction named FETCHGOT
 -- which pretty-prints as:
@@ -642,13 +723,24 @@ initializePicBase picReg
 --              addl __GLOBAL_OFFSET_TABLE__+.-1b, %picReg
 -- (See PprMach.lhs)
 
-initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics)
+initializePicBase_x86
+	:: Arch -> OS -> Reg 
+	-> [NatCmmTop X86.Instr] 
+	-> NatM [NatCmmTop X86.Instr]
+
+initializePicBase_x86 ArchX86 OSLinux picReg 
+	(CmmProc info lab params (ListGraph blocks) : statics)
     = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
     where BasicBlock bID insns = head blocks
-          b' = BasicBlock bID (FETCHGOT picReg : insns)
+          b' = BasicBlock bID (X86.FETCHGOT picReg : insns)
+
+initializePicBase_x86 ArchX86 OSDarwin picReg
+	(CmmProc info lab params (ListGraph blocks) : statics)
+	= return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
+
+	where 	BasicBlock bID insns = head blocks
+          	b' = BasicBlock bID (X86.FETCHPC picReg : insns)
 
-#else
-initializePicBase picReg proc = panic "initializePicBase"
+initializePicBase_x86 _ _ _ _
+	= panic "initializePicBase_x86: not needed"
 
--- mingw32_TARGET_OS: not needed, won't be called
-#endif
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
new file mode 100644
index 0000000000..6661a3ec92
--- /dev/null
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -0,0 +1,1364 @@
+{-# OPTIONS -w #-}
+
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+-- This is a big module, but, if you pay attention to
+-- (a) the sectioning, (b) the type signatures, and
+-- (c) the #if blah_TARGET_ARCH} things, the
+-- structure should not be too overwhelming.
+
+module PPC.CodeGen ( 
+	cmmTopCodeGen, 
+	InstrBlock 
+) 
+
+where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import PPC.Instr
+import PPC.Cond
+import PPC.Regs
+import PPC.RegInfo
+import NCGMonad
+import Instruction
+import PIC
+import Size
+import RegClass
+import Reg
+import Platform
+
+-- Our intermediate code:
+import BlockId
+import PprCmm		( pprExpr )
+import Cmm
+import CLabel
+
+-- The rest:
+import StaticFlags	( opt_PIC )
+import OrdList
+import qualified Outputable as O
+import Outputable
+import DynFlags
+
+import Control.Monad	( mapAndUnzipM )
+import Data.Bits
+import Data.Int
+import Data.Word
+
+-- -----------------------------------------------------------------------------
+-- Top-level of the instruction selector
+
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- They are really trees of insns to facilitate fast appending, where a
+-- left-to-right traversal (pre-order?) yields the insns in the correct
+-- order.
+
+cmmTopCodeGen 
+	:: DynFlags 
+	-> RawCmmTop 
+	-> NatM [NatCmmTop Instr]
+
+cmmTopCodeGen dflags (CmmProc info lab params (ListGraph blocks)) = do
+  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+  picBaseMb <- getPicBaseMaybeNat
+  let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
+      tops = proc : concat statics
+      os   = platformOS $ targetPlatform dflags
+  case picBaseMb of
+      Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
+      Nothing -> return tops
+  
+cmmTopCodeGen dflags (CmmData sec dat) = do
+  return [CmmData sec dat]  -- no translation, we just use CmmStatic
+
+basicBlockCodeGen 
+	:: CmmBasicBlock 
+	-> NatM ( [NatBasicBlock Instr]
+		, [NatCmmTop Instr])
+
+basicBlockCodeGen (BasicBlock id stmts) = do
+  instrs <- stmtsToInstrs stmts
+  -- code generation may introduce new basic block boundaries, which
+  -- are indicated by the NEWBLOCK instruction.  We must split up the
+  -- instruction stream into basic blocks again.  Also, we extract
+  -- LDATAs here too.
+  let
+	(top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+	
+	mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
+	  = ([], BasicBlock id instrs : blocks, statics)
+	mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
+	  = (instrs, blocks, CmmData sec dat:statics)
+	mkBlocks instr (instrs,blocks,statics)
+	  = (instr:instrs, blocks, statics)
+  -- in
+  return (BasicBlock id top : other_blocks, statics)
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+   = do instrss <- mapM stmtToInstrs stmts
+        return (concatOL instrss)
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+    CmmNop	   -> return nilOL
+    CmmComment s   -> return (unitOL (COMMENT s))
+
+    CmmAssign reg src
+      | isFloatType ty -> assignReg_FltCode size reg src
+#if WORD_SIZE_IN_BITS==32
+      | isWord64 ty    -> assignReg_I64Code      reg src
+#endif
+      | otherwise	 -> assignReg_IntCode size reg src
+	where ty = cmmRegType reg
+	      size = cmmTypeSize ty
+
+    CmmStore addr src
+      | isFloatType ty -> assignMem_FltCode size addr src
+#if WORD_SIZE_IN_BITS==32
+      | isWord64 ty 	 -> assignMem_I64Code      addr src
+#endif
+      | otherwise	 -> assignMem_IntCode size addr src
+	where ty = cmmExprType src
+	      size = cmmTypeSize ty
+
+    CmmCall target result_regs args _ _
+       -> genCCall target result_regs args
+
+    CmmBranch id	  -> genBranch id
+    CmmCondBranch arg id  -> genCondJump id arg
+    CmmSwitch arg ids     -> genSwitch arg ids
+    CmmJump arg params	  -> genJump arg
+    CmmReturn params	  ->
+      panic "stmtToInstrs: return statement should have been cps'd away"
+
+
+--------------------------------------------------------------------------------
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- 	They are really trees of insns to facilitate fast appending, where a
+-- 	left-to-right traversal yields the insns in the correct	order.
+--
+type InstrBlock 
+	= OrdList Instr
+
+
+-- | Register's passed up the tree.  If the stix code forces the register
+-- 	to live in a pre-decided machine register, it comes out as @Fixed@;
+-- 	otherwise, it comes out as @Any@, and the parent can decide which
+-- 	register to put it in.
+--
+data Register
+	= Fixed	Size Reg InstrBlock
+	| Any	Size (Reg -> InstrBlock)
+
+
+swizzleRegisterRep :: Register -> Size -> Register
+swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
+swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
+
+
+-- | Grab the Reg for a CmmReg
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+  = mkVReg u (cmmTypeSize pk)
+
+getRegisterReg (CmmGlobal mid)
+  = case get_GlobalReg_reg_or_addr mid of
+       Left (RealReg rrno) -> RealReg rrno
+       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+          -- By this stage, the only MagicIds remaining should be the
+          -- ones which map to a real machine register on this
+          -- platform.  Hence ...
+
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to.  So you can't put
+anything in between, lest it overwrite some of those registers.  If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+    code
+    LEA amode, tmp
+    ... other computation ...
+    ... (tmp) ...
+-}
+
+
+-- | Check whether an integer will fit in 32 bits.
+--	A CmmInt is intended to be truncated to the appropriate 
+-- 	number of bits, so here we truncate it to Int64.  This is
+-- 	important because e.g. -1 as a CmmInt might be either
+-- 	-1 or 18446744073709551615.
+--
+is32BitInteger :: Integer -> Bool
+is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
+  where i64 = fromIntegral i :: Int64
+
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+    where blockLabel = mkAsmTempLabel id
+
+
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+  where width = typeWidth (cmmRegType reg)
+
+mangleIndexTree _
+	= panic "PPC.CodeGen.mangleIndexTree: no match"
+
+-- -----------------------------------------------------------------------------
+--  Code gen for 64-bit arithmetic on 32-bit platforms
+
+{-
+Simple support for generating 64-bit code (ie, 64 bit values and 64
+bit assignments) on 32-bit platforms.  Unlike the main code generator
+we merely shoot for generating working code as simply as possible, and
+pay little attention to code quality.  Specifically, there is no
+attempt to deal cleverly with the fixed-vs-floating register
+distinction; all values are generated into (pairs of) floating
+registers, even if this would mean some redundant reg-reg moves as a
+result.  Only one of the VRegUniques is returned, since it will be
+of the VRegUniqueLo form, and the upper-half VReg can be determined
+by applying getHiVRegFromLo to it.
+-}
+
+data ChildCode64 	-- a.k.a "Register64"
+      = ChildCode64 
+     	   InstrBlock 	-- code
+	   Reg	 	-- the lower 32-bit temporary which contains the
+			-- result; use getHiVRegFromLo to find the other
+			-- VRegUnique.  Rules of this simplified insn
+			-- selection game are therefore that the returned
+			-- Reg may be modified
+
+
+-- | The dual to getAnyReg: compute an expression into a register, but
+-- 	we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed _ reg code -> 
+	return (reg, code)
+
+getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
+getI64Amodes addrTree = do
+    Amode hi_addr addr_code <- getAmode addrTree
+    case addrOffset hi_addr 4 of
+        Just lo_addr -> return (hi_addr, lo_addr, addr_code)
+        Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
+                           return (AddrRegImm hi_ptr (ImmInt 0),
+                                   AddrRegImm hi_ptr (ImmInt 4),
+                                   code)
+
+
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_I64Code addrTree valueTree = do
+        (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
+	ChildCode64 vcode rlo <- iselExpr64 valueTree
+	let 
+		rhi = getHiVRegFromLo rlo
+
+		-- Big-endian store
+		mov_hi = ST II32 rhi hi_addr
+		mov_lo = ST II32 rlo lo_addr
+	-- in
+	return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
+
+
+assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+   let 
+         r_dst_lo = mkVReg u_dst II32
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = MR r_dst_lo r_src_lo
+         mov_hi = MR r_dst_hi r_src_hi
+   -- in
+   return (
+        vcode `snocOL` mov_lo `snocOL` mov_hi
+     )
+
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(powerpc): invalid lvalue"
+
+
+iselExpr64        :: CmmExpr -> NatM ChildCode64
+iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
+    (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
+    (rlo, rhi) <- getNewRegPairNat II32
+    let mov_hi = LD II32 rhi hi_addr
+        mov_lo = LD II32 rlo lo_addr
+    return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
+                         rlo
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
+   = return (ChildCode64 nilOL (mkVReg vu II32))
+
+iselExpr64 (CmmLit (CmmInt i _)) = do
+  (rlo,rhi) <- getNewRegPairNat II32
+  let
+	half0 = fromIntegral (fromIntegral i :: Word16)
+	half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
+	half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
+	half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
+	
+	code = toOL [
+		LIS rlo (ImmInt half1),
+		OR rlo rlo (RIImm $ ImmInt half0),
+		LIS rhi (ImmInt half3),
+		OR rlo rlo (RIImm $ ImmInt half2)
+		]
+  -- in
+  return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+   ChildCode64 code1 r1lo <- iselExpr64 e1
+   ChildCode64 code2 r2lo <- iselExpr64 e2
+   (rlo,rhi) <- getNewRegPairNat II32
+   let
+	r1hi = getHiVRegFromLo r1lo
+	r2hi = getHiVRegFromLo r2lo
+	code =  code1 `appOL`
+		code2 `appOL`
+		toOL [ ADDC rlo r1lo r2lo,
+		       ADDE rhi r1hi r2hi ]
+   -- in
+   return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
+    (expr_reg,expr_code) <- getSomeReg expr
+    (rlo, rhi) <- getNewRegPairNat II32
+    let mov_hi = LI rhi (ImmInt 0)
+        mov_lo = MR rlo expr_reg
+    return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
+                         rlo
+iselExpr64 expr
+   = pprPanic "iselExpr64(powerpc)" (ppr expr)
+
+
+
+getRegister :: CmmExpr -> NatM Register
+
+getRegister (CmmReg reg) 
+  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
+		  (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _) 
+  = getRegister (mangleIndexTree tree)
+
+
+#if WORD_SIZE_IN_BITS==32
+    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
+    -- TO_W_(x), TO_W_(x >> 32)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code       
+
+#endif
+
+
+getRegister (CmmLoad mem pk)
+  | not (isWord64 pk)
+  = do
+        Amode addr addr_code <- getAmode mem
+        let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
+                       addr_code `snocOL` LD size dst addr
+        return (Any size code)
+          where size = cmmTypeSize pk
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
+    Amode addr addr_code <- getAmode mem
+    return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
+
+-- Note: there is no Load Byte Arithmetic instruction, so no signed case here
+
+getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
+    Amode addr addr_code <- getAmode mem
+    return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
+
+getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
+    Amode addr addr_code <- getAmode mem
+    return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+      MO_Not rep   -> triv_ucode_int rep NOT
+
+      MO_F_Neg w   -> triv_ucode_float w FNEG
+      MO_S_Neg w   -> triv_ucode_int   w NEG
+
+      MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
+      MO_FF_Conv W32 W64 -> conversionNop FF64 x
+
+      MO_FS_Conv from to -> coerceFP2Int from to x
+      MO_SF_Conv from to -> coerceInt2FP from to x
+
+      MO_SS_Conv from to
+        | from == to    -> conversionNop (intSize to) x
+
+        -- narrowing is a nop: we treat the high bits as undefined
+      MO_SS_Conv W32 to -> conversionNop (intSize to) x
+      MO_SS_Conv W16 W8 -> conversionNop II8 x
+      MO_SS_Conv W8  to -> triv_ucode_int to (EXTS II8)
+      MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
+
+      MO_UU_Conv from to
+        | from == to -> conversionNop (intSize to) x
+        -- narrowing is a nop: we treat the high bits as undefined
+      MO_UU_Conv W32 to -> conversionNop (intSize to) x
+      MO_UU_Conv W16 W8 -> conversionNop II8 x
+      MO_UU_Conv W8 to  -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
+      MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32)) 
+      _	-> panic "PPC.CodeGen.getRegister: no match"
+
+    where
+	triv_ucode_int   width instr = trivialUCode (intSize   width) instr x
+	triv_ucode_float width instr = trivialUCode (floatSize width) instr x
+
+        conversionNop new_size expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_size)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+  = case mop of
+      MO_F_Eq w -> condFltReg EQQ x y
+      MO_F_Ne w -> condFltReg NE  x y
+      MO_F_Gt w -> condFltReg GTT x y
+      MO_F_Ge w -> condFltReg GE  x y
+      MO_F_Lt w -> condFltReg LTT x y
+      MO_F_Le w -> condFltReg LE  x y
+
+      MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
+      MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
+
+      MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
+
+      MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
+
+      MO_F_Add w  -> triv_float w FADD
+      MO_F_Sub w  -> triv_float w FSUB
+      MO_F_Mul w  -> triv_float w FMUL
+      MO_F_Quot w -> triv_float w FDIV
+      
+         -- optimize addition with 32-bit immediate
+         -- (needed for PIC)
+      MO_Add W32 ->
+        case y of
+          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
+            -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
+          CmmLit lit
+            -> do
+                (src, srcCode) <- getSomeReg x
+                let imm = litToImm lit
+                    code dst = srcCode `appOL` toOL [
+                                    ADDIS dst src (HA imm),
+                                    ADD dst dst (RIImm (LO imm))
+                                ]
+                return (Any II32 code)
+          _ -> trivialCode W32 True ADD x y
+
+      MO_Add rep -> trivialCode rep True ADD x y
+      MO_Sub rep ->
+        case y of    -- subfi ('substract from' with immediate) doesn't exist
+          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
+            -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
+          _ -> trivialCodeNoImm' (intSize rep) SUBF y x
+
+      MO_Mul rep -> trivialCode rep True MULLW x y
+
+      MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
+      
+      MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
+      MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
+
+      MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
+      MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
+      
+      MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
+      MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
+      
+      MO_And rep   -> trivialCode rep False AND x y
+      MO_Or rep    -> trivialCode rep False OR x y
+      MO_Xor rep   -> trivialCode rep False XOR x y
+
+      MO_Shl rep   -> trivialCode rep False SLW x y
+      MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
+      MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
+      _		-> panic "PPC.CodeGen.getRegister: no match"
+
+  where
+    triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
+    triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
+
+getRegister (CmmLit (CmmInt i rep))
+  | Just imm <- makeImmediate rep True i
+  = let
+    	code dst = unitOL (LI dst imm)
+    in
+    	return (Any (intSize rep) code)
+
+getRegister (CmmLit (CmmFloat f frep)) = do
+    lbl <- getNewLabelNat
+    dflags <- getDynFlagsNat
+    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+    Amode addr addr_code <- getAmode dynRef
+    let size = floatSize frep
+        code dst = 
+	    LDATA ReadOnlyData  [CmmDataLabel lbl,
+				 CmmStaticLit (CmmFloat f frep)]
+            `consOL` (addr_code `snocOL` LD size dst addr)
+    return (Any size code)
+
+getRegister (CmmLit lit)
+  = let rep = cmmLitType lit
+        imm = litToImm lit
+        code dst = toOL [
+              LIS dst (HA imm),
+              ADD dst dst (RIImm (LO imm))
+          ]
+    in return (Any (cmmTypeSize rep) code)
+
+getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
+    
+    -- extend?Rep: wrap integer expression of type rep
+    -- in a conversion to II32
+extendSExpr W32 x = x
+extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
+extendUExpr W32 x = x
+extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
+
+-- -----------------------------------------------------------------------------
+--  The 'Amode' type: Memory addressing modes passed up the tree.
+
+data Amode 
+	= Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to.  So you can't put
+anything in between, lest it overwrite some of those registers.  If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+    code
+    LEA amode, tmp
+    ... other computation ...
+    ... (tmp) ...
+-}
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
+  | Just off <- makeImmediate W32 True (-i)
+  = do
+        (reg, code) <- getSomeReg x
+        return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
+  | Just off <- makeImmediate W32 True i
+  = do
+        (reg, code) <- getSomeReg x
+        return (Amode (AddrRegImm reg off) code)
+
+   -- optimize addition with 32-bit immediate
+   -- (needed for PIC)
+getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
+  = do
+        tmp <- getNewRegNat II32
+        (src, srcCode) <- getSomeReg x
+        let imm = litToImm lit
+            code = srcCode `snocOL` ADDIS tmp src (HA imm)
+        return (Amode (AddrRegImm tmp (LO imm)) code)
+
+getAmode (CmmLit lit)
+  = do
+        tmp <- getNewRegNat II32
+        let imm = litToImm lit
+            code = unitOL (LIS tmp (HA imm))
+        return (Amode (AddrRegImm tmp (LO imm)) code)
+    
+getAmode (CmmMachOp (MO_Add W32) [x, y])
+  = do
+        (regX, codeX) <- getSomeReg x
+        (regY, codeY) <- getSomeReg y
+        return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
+    
+getAmode other
+  = do
+        (reg, code) <- getSomeReg other
+        let
+            off  = ImmInt 0
+        return (Amode (AddrRegImm reg off) code)
+
+
+
+--  The 'CondCode' type:  Condition codes passed up the tree.
+data CondCode 	
+	= CondCode Bool Cond InstrBlock
+
+-- Set up a condition code for a conditional branch.
+
+getCondCode :: CmmExpr -> NatM CondCode
+
+-- almost the same as everywhere else - but we need to
+-- extend small integers to 32 bit first
+
+getCondCode (CmmMachOp mop [x, y])
+  = case mop of
+      MO_F_Eq W32 -> condFltCode EQQ x y
+      MO_F_Ne W32 -> condFltCode NE  x y
+      MO_F_Gt W32 -> condFltCode GTT x y
+      MO_F_Ge W32 -> condFltCode GE  x y
+      MO_F_Lt W32 -> condFltCode LTT x y
+      MO_F_Le W32 -> condFltCode LE  x y
+
+      MO_F_Eq W64 -> condFltCode EQQ x y
+      MO_F_Ne W64 -> condFltCode NE  x y
+      MO_F_Gt W64 -> condFltCode GTT x y
+      MO_F_Ge W64 -> condFltCode GE  x y
+      MO_F_Lt W64 -> condFltCode LTT x y
+      MO_F_Le W64 -> condFltCode LE  x y
+
+      MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
+      MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
+
+      MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
+
+      MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
+
+      other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
+
+getCondCode other =  panic "getCondCode(2)(powerpc)"
+
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+
+--  ###FIXME: I16 and I8!
+condIntCode cond x (CmmLit (CmmInt y rep))
+  | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
+  = do
+        (src1, code) <- getSomeReg x
+        let
+            code' = code `snocOL` 
+                (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
+        return (CondCode False cond code')
+
+condIntCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+	code' = code1 `appOL` code2 `snocOL`
+    	    	  (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
+    return (CondCode False cond code')
+
+condFltCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+	code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
+	code'' = case cond of -- twiddle CR to handle unordered case
+                    GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
+	            LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
+	            _ -> code'
+                 where
+                    ltbit = 0 ; eqbit = 2 ; gtbit = 1
+    return (CondCode True cond code'')
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business.  Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers.  If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side.  This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+assignMem_IntCode pk addr src = do
+    (srcReg, code) <- getSomeReg src
+    Amode dstAddr addr_code <- getAmode addr
+    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+-- dst is a reg, but src could be anything
+assignReg_IntCode _ reg src
+    = do
+        r <- getRegister src
+        return $ case r of
+            Any _ code         -> code dst
+            Fixed _ freg fcode -> fcode `snocOL` MR dst freg
+    where
+        dst = getRegisterReg reg
+
+
+
+-- Easy, isn't it?
+assignMem_FltCode = assignMem_IntCode
+assignReg_FltCode = assignReg_IntCode
+
+
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+genJump (CmmLit (CmmLabel lbl))
+  = return (unitOL $ JMP lbl)
+
+genJump tree
+  = do
+        (target,code) <- getSomeReg tree
+        return (code `snocOL` MTCTR target `snocOL` BCTR [])
+
+
+-- -----------------------------------------------------------------------------
+--  Unconditional branches
+genBranch :: BlockId -> NatM InstrBlock
+genBranch = return . toOL . mkJumpInstr
+
+
+-- -----------------------------------------------------------------------------
+--  Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions.  We peek at the arguments to decide what kind of
+comparison to do.
+
+SPARC: First, we have to ensure that the condition codes are set
+according to the supplied comparison operation.  We generate slightly
+different code for floating point comparisons, because a floating
+point operation cannot directly precede a @BF@.  We assume the worst
+and fill that slot with a @NOP@.
+
+SPARC: Do not fill the delay slots here; you will confuse the register
+allocator.
+-}
+
+
+genCondJump
+    :: BlockId	    -- the branch target
+    -> CmmExpr      -- the condition on which to branch
+    -> NatM InstrBlock
+
+genCondJump id bool = do
+  CondCode _ cond code <- getCondCode bool
+  return (code `snocOL` BCC cond id)
+
+
+
+-- -----------------------------------------------------------------------------
+--  Generating C calls
+
+-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations.  Apart from that, the code is easy.
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+    :: CmmCallTarget		-- function to call
+    -> HintedCmmFormals		-- where to put the result
+    -> HintedCmmActuals		-- arguments (of mixed type)
+    -> NatM InstrBlock
+
+
+#if darwin_TARGET_OS || linux_TARGET_OS
+{-
+    The PowerPC calling convention for Darwin/Mac OS X
+    is described in Apple's document
+    "Inside Mac OS X - Mach-O Runtime Architecture".
+    
+    PowerPC Linux uses the System V Release 4 Calling Convention
+    for PowerPC. It is described in the
+    "System V Application Binary Interface PowerPC Processor Supplement".
+
+    Both conventions are similar:
+    Parameters may be passed in general-purpose registers starting at r3, in
+    floating point registers starting at f1, or on the stack. 
+    
+    But there are substantial differences:
+    * The number of registers used for parameter passing and the exact set of
+      nonvolatile registers differs (see MachRegs.lhs).
+    * On Darwin, stack space is always reserved for parameters, even if they are
+      passed in registers. The called routine may choose to save parameters from
+      registers to the corresponding space on the stack.
+    * On Darwin, a corresponding amount of GPRs is skipped when a floating point
+      parameter is passed in an FPR.
+    * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
+      starting with an odd-numbered GPR. It may skip a GPR to achieve this.
+      Darwin just treats an I64 like two separate II32s (high word first).
+    * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
+      4-byte aligned like everything else on Darwin.
+    * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
+      PowerPC Linux does not agree, so neither do we.
+      
+    According to both conventions, The parameter area should be part of the
+    caller's stack frame, allocated in the caller's prologue code (large enough
+    to hold the parameter lists for all called routines). The NCG already
+    uses the stack for register spilling, leaving 64 bytes free at the top.
+    If we need a larger parameter area than that, we just allocate a new stack
+    frame just before ccalling.
+-}
+
+
+genCCall (CmmPrim MO_WriteBarrier) _ _ 
+ = return $ unitOL LWSYNC
+
+genCCall target dest_regs argsAndHints
+  = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
+        -- we rely on argument promotion in the codeGen
+    do
+        (finalStack,passArgumentsCode,usedRegs) <- passArguments
+                                                        (zip args argReps)
+                                                        allArgRegs allFPArgRegs
+                                                        initialStackOffset
+                                                        (toOL []) []
+                                                
+        (labelOrExpr, reduceToFF32) <- case target of
+            CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
+            CmmCallee expr conv -> return  (Right expr, False)
+            CmmPrim mop -> outOfLineFloatOp mop
+                                                        
+        let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
+            codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
+
+        case labelOrExpr of
+            Left lbl -> do
+		return (         codeBefore
+                        `snocOL` BL lbl usedRegs
+                        `appOL`	 codeAfter)
+            Right dyn -> do
+		(dynReg, dynCode) <- getSomeReg dyn
+		return (         dynCode
+			`snocOL` MTCTR dynReg
+                        `appOL`	 codeBefore
+                        `snocOL` BCTRL usedRegs
+                        `appOL`	 codeAfter)
+    where
+#if darwin_TARGET_OS
+        initialStackOffset = 24
+	    -- size of linkage area + size of arguments, in bytes	
+	stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
+	                         map (widthInBytes . typeWidth) argReps
+#elif linux_TARGET_OS
+        initialStackOffset = 8
+        stackDelta finalStack = roundTo 16 finalStack
+#endif
+	args = map hintlessCmm argsAndHints
+	argReps = map cmmExprType args
+
+	roundTo a x | x `mod` a == 0 = x
+		    | otherwise = x + a - (x `mod` a)
+
+        move_sp_down finalStack
+               | delta > 64 =
+                        toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
+	                      DELTA (-delta)]
+	       | otherwise = nilOL
+	       where delta = stackDelta finalStack
+	move_sp_up finalStack
+	       | delta > 64 =
+                        toOL [ADD sp sp (RIImm (ImmInt delta)),
+                              DELTA 0]
+	       | otherwise = nilOL
+	       where delta = stackDelta finalStack
+	       
+
+        passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
+        passArguments ((arg,arg_ty):args) gprs fprs stackOffset
+               accumCode accumUsed | isWord64 arg_ty =
+            do
+                ChildCode64 code vr_lo <- iselExpr64 arg
+                let vr_hi = getHiVRegFromLo vr_lo
+
+#if darwin_TARGET_OS                
+                passArguments args
+                              (drop 2 gprs)
+                              fprs
+                              (stackOffset+8)
+                              (accumCode `appOL` code
+                                    `snocOL` storeWord vr_hi gprs stackOffset
+                                    `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
+                              ((take 2 gprs) ++ accumUsed)
+            where
+                storeWord vr (gpr:_) offset = MR gpr vr
+                storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
+                
+#elif linux_TARGET_OS
+                let stackOffset' = roundTo 8 stackOffset
+                    stackCode = accumCode `appOL` code
+                        `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
+                        `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
+                    regCode hireg loreg =
+                        accumCode `appOL` code
+                            `snocOL` MR hireg vr_hi
+                            `snocOL` MR loreg vr_lo
+                                        
+                case gprs of
+                    hireg : loreg : regs | even (length gprs) ->
+                        passArguments args regs fprs stackOffset
+                                      (regCode hireg loreg) (hireg : loreg : accumUsed)
+                    _skipped : hireg : loreg : regs ->
+                        passArguments args regs fprs stackOffset
+                                      (regCode hireg loreg) (hireg : loreg : accumUsed)
+                    _ -> -- only one or no regs left
+                        passArguments args [] fprs (stackOffset'+8)
+                                      stackCode accumUsed
+#endif
+        
+        passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
+            | reg : _ <- regs = do
+                register <- getRegister arg
+                let code = case register of
+                            Fixed _ freg fcode -> fcode `snocOL` MR reg freg
+                            Any _ acode -> acode reg
+                passArguments args
+                              (drop nGprs gprs)
+                              (drop nFprs fprs)
+#if darwin_TARGET_OS
+        -- The Darwin ABI requires that we reserve stack slots for register parameters
+                              (stackOffset + stackBytes)
+#elif linux_TARGET_OS
+        -- ... the SysV ABI doesn't.
+                              stackOffset
+#endif
+                              (accumCode `appOL` code)
+                              (reg : accumUsed)
+            | otherwise = do
+                (vr, code) <- getSomeReg arg
+                passArguments args
+                              (drop nGprs gprs)
+                              (drop nFprs fprs)
+                              (stackOffset' + stackBytes)
+                              (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
+                              accumUsed
+            where
+#if darwin_TARGET_OS
+        -- stackOffset is at least 4-byte aligned
+        -- The Darwin ABI is happy with that.
+                stackOffset' = stackOffset
+#else
+        -- ... the SysV ABI requires 8-byte alignment for doubles.
+                stackOffset' | isFloatType rep && typeWidth rep == W64 =
+		                 roundTo 8 stackOffset
+                             | otherwise  =           stackOffset
+#endif
+                stackSlot = AddrRegImm sp (ImmInt stackOffset')
+                (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
+                    II32 -> (1, 0, 4, gprs)
+#if darwin_TARGET_OS
+        -- The Darwin ABI requires that we skip a corresponding number of GPRs when
+        -- we use the FPRs.
+                    FF32 -> (1, 1, 4, fprs)
+                    FF64 -> (2, 1, 8, fprs)
+#elif linux_TARGET_OS
+        -- ... the SysV ABI doesn't.
+                    FF32 -> (0, 1, 4, fprs)
+                    FF64 -> (0, 1, 8, fprs)
+#endif
+        
+        moveResult reduceToFF32 =
+            case dest_regs of
+                [] -> nilOL
+                [CmmHinted dest _hint]
+                    | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
+                    | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
+                    | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
+                                          MR r_dest r4]
+                    | otherwise -> unitOL (MR r_dest r3)
+                    where rep = cmmRegType (CmmLocal dest)
+                          r_dest = getRegisterReg (CmmLocal dest)
+                          
+        outOfLineFloatOp mop =
+            do
+                dflags <- getDynFlagsNat
+                mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
+                              mkForeignLabel functionName Nothing True
+                let mopLabelOrExpr = case mopExpr of
+                        CmmLit (CmmLabel lbl) -> Left lbl
+                        _ -> Right mopExpr
+                return (mopLabelOrExpr, reduce)
+            where
+                (functionName, reduce) = case mop of
+                    MO_F32_Exp   -> (fsLit "exp", True)
+                    MO_F32_Log   -> (fsLit "log", True)
+                    MO_F32_Sqrt  -> (fsLit "sqrt", True)
+                        
+                    MO_F32_Sin   -> (fsLit "sin", True)
+                    MO_F32_Cos   -> (fsLit "cos", True)
+                    MO_F32_Tan   -> (fsLit "tan", True)
+                    
+                    MO_F32_Asin  -> (fsLit "asin", True)
+                    MO_F32_Acos  -> (fsLit "acos", True)
+                    MO_F32_Atan  -> (fsLit "atan", True)
+                    
+                    MO_F32_Sinh  -> (fsLit "sinh", True)
+                    MO_F32_Cosh  -> (fsLit "cosh", True)
+                    MO_F32_Tanh  -> (fsLit "tanh", True)
+                    MO_F32_Pwr   -> (fsLit "pow", True)
+                        
+                    MO_F64_Exp   -> (fsLit "exp", False)
+                    MO_F64_Log   -> (fsLit "log", False)
+                    MO_F64_Sqrt  -> (fsLit "sqrt", False)
+                        
+                    MO_F64_Sin   -> (fsLit "sin", False)
+                    MO_F64_Cos   -> (fsLit "cos", False)
+                    MO_F64_Tan   -> (fsLit "tan", False)
+                     
+                    MO_F64_Asin  -> (fsLit "asin", False)
+                    MO_F64_Acos  -> (fsLit "acos", False)
+                    MO_F64_Atan  -> (fsLit "atan", False)
+                    
+                    MO_F64_Sinh  -> (fsLit "sinh", False)
+                    MO_F64_Cosh  -> (fsLit "cosh", False)
+                    MO_F64_Tanh  -> (fsLit "tanh", False)
+                    MO_F64_Pwr   -> (fsLit "pow", False)
+                    other -> pprPanic "genCCall(ppc): unknown callish op"
+                                    (pprCallishMachOp other)
+
+#else /* darwin_TARGET_OS || linux_TARGET_OS */
+genCCall = panic "PPC.CodeGen.genCCall: not defined for this os"
+#endif           
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+genSwitch expr ids 
+  | opt_PIC
+  = do
+        (reg,e_code) <- getSomeReg expr
+        tmp <- getNewRegNat II32
+        lbl <- getNewLabelNat
+        dflags <- getDynFlagsNat
+        dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+        (tableReg,t_code) <- getSomeReg $ dynRef
+        let
+            jumpTable = map jumpTableEntryRel ids
+            
+            jumpTableEntryRel Nothing
+                = CmmStaticLit (CmmInt 0 wordWidth)
+            jumpTableEntryRel (Just (BlockId id))
+                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+                where blockLabel = mkAsmTempLabel id
+
+            code = e_code `appOL` t_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            SLW tmp reg (RIImm (ImmInt 2)),
+                            LD II32 tmp (AddrRegReg tableReg tmp),
+                            ADD tmp tmp (RIReg tableReg),
+                            MTCTR tmp,
+                            BCTR [ id | Just id <- ids ]
+                    ]
+        return code
+  | otherwise
+  = do
+        (reg,e_code) <- getSomeReg expr
+        tmp <- getNewRegNat II32
+        lbl <- getNewLabelNat
+        let
+            jumpTable = map jumpTableEntry ids
+        
+            code = e_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            SLW tmp reg (RIImm (ImmInt 2)),
+                            ADDIS tmp tmp (HA (ImmCLbl lbl)),
+                            LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
+                            MTCTR tmp,
+                            BCTR [ id | Just id <- ids ]
+                    ]
+        return code
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+condReg :: NatM CondCode -> NatM Register
+condReg getCond = do
+    CondCode _ cond cond_code <- getCond
+    let
+{-        code dst = cond_code `appOL` toOL [
+                BCC cond lbl1,
+                LI dst (ImmInt 0),
+                BCC ALWAYS lbl2,
+                NEWBLOCK lbl1,
+                LI dst (ImmInt 1),
+                BCC ALWAYS lbl2,
+                NEWBLOCK lbl2
+            ]-}
+        code dst = cond_code
+            `appOL` negate_code
+            `appOL` toOL [
+                MFCR dst,
+                RLWINM dst dst (bit + 1) 31 31
+            ]
+        
+        negate_code | do_negate = unitOL (CRNOR bit bit bit)
+                    | otherwise = nilOL
+                    
+        (bit, do_negate) = case cond of
+            LTT -> (0, False)
+            LE  -> (1, True)
+            EQQ -> (2, False)
+            GE  -> (0, True)
+            GTT -> (1, False)
+            
+            NE  -> (2, True)
+            
+            LU  -> (0, False)
+            LEU -> (1, True)
+            GEU -> (0, True)
+            GU  -> (1, False)
+	    _	-> panic "PPC.CodeGen.codeReg: no match"
+                
+    return (Any II32 code)
+    
+condIntReg cond x y = condReg (condIntCode cond x y)
+condFltReg cond x y = condReg (condFltCode cond x y)
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+
+
+{-
+Wolfgang's PowerPC version of The Rules:
+
+A slightly modified version of The Rules to take advantage of the fact
+that PowerPC instructions work on all registers and don't implicitly
+clobber any fixed registers.
+
+* The only expression for which getRegister returns Fixed is (CmmReg reg).
+
+* If getRegister returns Any, then the code it generates may modify only:
+	(a) fresh temporaries
+	(b) the destination register
+  It may *not* modify global registers, unless the global
+  register happens to be the destination register.
+  It may not clobber any other registers. In fact, only ccalls clobber any
+  fixed registers.
+  Also, it may not modify the counter register (used by genCCall).
+  
+  Corollary: If a getRegister for a subexpression returns Fixed, you need
+  not move it to a fresh temporary before evaluating the next subexpression.
+  The Fixed register won't be modified.
+  Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
+  
+* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
+  the value of the destination register.
+-}
+
+trivialCode 
+	:: Width
+	-> Bool
+	-> (Reg -> Reg -> RI -> Instr)
+	-> CmmExpr
+	-> CmmExpr
+	-> NatM Register
+
+trivialCode rep signed instr x (CmmLit (CmmInt y _))
+    | Just imm <- makeImmediate rep signed y 
+    = do
+        (src1, code1) <- getSomeReg x
+        let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
+        return (Any (intSize rep) code)
+  
+trivialCode rep _ instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
+    return (Any (intSize rep) code)
+
+trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
+		 -> CmmExpr -> CmmExpr -> NatM Register
+trivialCodeNoImm' size instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
+    return (Any size code)
+    
+trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
+		 -> CmmExpr -> CmmExpr -> NatM Register
+trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
+    
+    
+trivialUCode 
+	:: Size
+	-> (Reg -> Reg -> Instr)
+	-> CmmExpr
+	-> NatM Register
+trivialUCode rep instr x = do
+    (src, code) <- getSomeReg x
+    let code' dst = code `snocOL` instr dst src
+    return (Any rep code')
+    
+-- There is no "remainder" instruction on the PPC, so we have to do
+-- it the hard way.
+-- The "div" parameter is the division instruction to use (DIVW or DIVWU)
+
+remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
+    -> CmmExpr -> CmmExpr -> NatM Register
+remainderCode rep div x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let code dst = code1 `appOL` code2 `appOL` toOL [
+                div dst src1 src2,
+                MULLW dst dst (RIReg src2),
+                SUBF dst dst src1
+            ]
+    return (Any (intSize rep) code)
+
+
+coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
+coerceInt2FP fromRep toRep x = do
+    (src, code) <- getSomeReg x
+    lbl <- getNewLabelNat
+    itmp <- getNewRegNat II32
+    ftmp <- getNewRegNat FF64
+    dflags <- getDynFlagsNat
+    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+    Amode addr addr_code <- getAmode dynRef
+    let
+    	code' dst = code `appOL` maybe_exts `appOL` toOL [
+		LDATA ReadOnlyData
+				[CmmDataLabel lbl,
+				 CmmStaticLit (CmmInt 0x43300000 W32),
+				 CmmStaticLit (CmmInt 0x80000000 W32)],
+		XORIS itmp src (ImmInt 0x8000),
+		ST II32 itmp (spRel 3),
+		LIS itmp (ImmInt 0x4330),
+		ST II32 itmp (spRel 2),
+		LD FF64 ftmp (spRel 2)
+            ] `appOL` addr_code `appOL` toOL [
+		LD FF64 dst addr,
+		FSUB FF64 dst ftmp dst
+	    ] `appOL` maybe_frsp dst
+            
+        maybe_exts = case fromRep of
+                        W8 ->  unitOL $ EXTS II8 src src
+                        W16 -> unitOL $ EXTS II16 src src
+                        W32 -> nilOL
+			_	-> panic "PPC.CodeGen.coerceInt2FP: no match"
+
+        maybe_frsp dst 
+		= case toRep of
+                        W32 -> unitOL $ FRSP dst dst
+                        W64 -> nilOL
+			_	-> panic "PPC.CodeGen.coerceInt2FP: no match"
+
+    return (Any (floatSize toRep) code')
+
+coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
+coerceFP2Int _ toRep x = do
+    -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat FF64
+    let
+    	code' dst = code `appOL` toOL [
+		-- convert to int in FP reg
+    	    FCTIWZ tmp src,
+		-- store value (64bit) from FP to stack
+    	    ST FF64 tmp (spRel 2),
+		-- read low word of value (high word is undefined)
+    	    LD II32 dst (spRel 3)]	
+    return (Any (intSize toRep) code')
diff --git a/compiler/nativeGen/PPC/Cond.hs b/compiler/nativeGen/PPC/Cond.hs
new file mode 100644
index 0000000000..7345ee5f1d
--- /dev/null
+++ b/compiler/nativeGen/PPC/Cond.hs
@@ -0,0 +1,62 @@
+
+module PPC.Cond (
+	Cond(..),
+	condNegate,
+	condUnsigned,
+	condToSigned,
+	condToUnsigned,
+)
+
+where
+
+import Panic
+
+data Cond
+	= ALWAYS
+	| EQQ
+	| GE
+	| GEU
+	| GTT
+	| GU
+	| LE
+	| LEU
+	| LTT
+	| LU
+	| NE
+	deriving Eq
+
+
+condNegate :: Cond -> Cond
+condNegate ALWAYS  = panic "condNegate: ALWAYS"
+condNegate EQQ     = NE
+condNegate GE      = LTT
+condNegate GEU     = LU
+condNegate GTT     = LE
+condNegate GU      = LEU
+condNegate LE      = GTT
+condNegate LEU     = GU
+condNegate LTT     = GE
+condNegate LU      = GEU
+condNegate NE      = EQQ
+
+-- Condition utils
+condUnsigned :: Cond -> Bool
+condUnsigned GU  = True
+condUnsigned LU  = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _   = False
+
+condToSigned :: Cond -> Cond
+condToSigned GU  = GTT
+condToSigned LU  = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x   = x
+
+condToUnsigned :: Cond -> Cond
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE  = GEU
+condToUnsigned LE  = LEU
+condToUnsigned x   = x
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index 85aa494ba4..55affc6e1e 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -10,49 +10,50 @@
 #include "nativeGen/NCG.h"
 
 module PPC.Instr (
-	Cond(..),
-	condNegate,
+	archWordSize,
 	RI(..),
-	Instr(..)
+	Instr(..),
+	maxSpillSlots
 )
 
 where
 
-import BlockId
 import PPC.Regs
-import RegsBase
+import PPC.Cond
+import Instruction
+import Size
+import RegClass
+import Reg
+
+import Constants	(rESERVED_C_STACK_BYTES)
+import BlockId
 import Cmm
-import Outputable
 import FastString
 import CLabel
+import Outputable
+import FastBool
+
+--------------------------------------------------------------------------------
+-- Size of a PPC memory address, in bytes.
+--
+archWordSize	:: Size
+archWordSize	= II32
 
-data Cond
-	= ALWAYS
-	| EQQ
-	| GE
-	| GEU
-	| GTT
-	| GU
-	| LE
-	| LEU
-	| LTT
-	| LU
-	| NE
-	deriving Eq
-
-
-condNegate :: Cond -> Cond
-condNegate ALWAYS  = panic "condNegate: ALWAYS"
-condNegate EQQ     = NE
-condNegate GE      = LTT
-condNegate GEU     = LU
-condNegate GTT     = LE
-condNegate GU      = LEU
-condNegate LE      = GTT
-condNegate LEU     = GU
-condNegate LTT     = GE
-condNegate LU      = GEU
-condNegate NE      = EQQ
+
+-- | Instruction instance for powerpc
+instance Instruction Instr where
+	regUsageOfInstr		= ppc_regUsageOfInstr
+	patchRegsOfInstr	= ppc_patchRegsOfInstr
+	isJumpishInstr		= ppc_isJumpishInstr
+	jumpDestsOfInstr	= ppc_jumpDestsOfInstr
+	patchJumpInstr		= ppc_patchJumpInstr
+	mkSpillInstr		= ppc_mkSpillInstr
+	mkLoadInstr		= ppc_mkLoadInstr
+	takeDeltaInstr		= ppc_takeDeltaInstr
+	isMetaInstr		= ppc_isMetaInstr
+	mkRegRegMoveInstr	= ppc_mkRegRegMoveInstr
+	takeRegRegMoveInstr	= ppc_takeRegRegMoveInstr
+	mkJumpInstr		= ppc_mkJumpInstr
 
 
 -- -----------------------------------------------------------------------------
@@ -85,12 +86,6 @@ data Instr
         -- benefit of subsequent passes
 	| DELTA   Int
 
-	-- | spill this reg to a stack slot
-	| SPILL   Reg Int
-
-	-- | reload this reg from a stack slot
-	| RELOAD  Int Reg
-
 	-- Loads and stores.
 	| LD	Size Reg AddrMode 	-- Load size, dst, src
 	| LA      Size Reg AddrMode 	-- Load arithmetic size, dst, src
@@ -165,3 +160,293 @@ data Instr
 	                         	-- bcl to next insn, mflr reg
 	      
 	| LWSYNC -- memory barrier
+
+
+-- | Get the registers that are being used by this instruction.
+--	regUsage doesn't need to do any trickery for jumps and such.  
+--	Just state precisely the regs read and written by that insn.  
+--	The consequences of control flow transfers, as far as register
+-- 	allocation goes, are taken care of by the register allocator.
+--
+ppc_regUsageOfInstr :: Instr -> RegUsage
+ppc_regUsageOfInstr instr 
+ = case instr of
+    LD    _ reg addr  	-> usage (regAddr addr, [reg])
+    LA    _ reg addr  	-> usage (regAddr addr, [reg])
+    ST    _ reg addr  	-> usage (reg : regAddr addr, [])
+    STU    _ reg addr  	-> usage (reg : regAddr addr, [])
+    LIS   reg _		-> usage ([], [reg])
+    LI    reg _		-> usage ([], [reg])
+    MR	  reg1 reg2     -> usage ([reg2], [reg1])
+    CMP   _ reg ri	-> usage (reg : regRI ri,[])
+    CMPL  _ reg ri	-> usage (reg : regRI ri,[])
+    BCC	   _ _		-> noUsage
+    BCCFAR _ _		-> noUsage
+    MTCTR reg		-> usage ([reg],[])
+    BCTR  _		-> noUsage
+    BL    _ params	-> usage (params, callClobberedRegs)
+    BCTRL params	-> usage (params, callClobberedRegs)
+    ADD	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    ADDC  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    ADDE  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    ADDIS reg1 reg2 _   -> usage ([reg2], [reg1])
+    SUBF  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    MULLW reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    DIVW  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    DIVWU reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    MULLW_MayOflo reg1 reg2 reg3        
+                        -> usage ([reg2,reg3], [reg1])
+    AND	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    OR	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    XOR	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    XORIS reg1 reg2 _   -> usage ([reg2], [reg1])
+    EXTS  _  reg1 reg2 -> usage ([reg2], [reg1])
+    NEG	  reg1 reg2	-> usage ([reg2], [reg1])
+    NOT	  reg1 reg2	-> usage ([reg2], [reg1])
+    SLW	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    SRW	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    SRAW  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    RLWINM reg1 reg2 _ _ _
+                        -> usage ([reg2], [reg1])
+    FADD  _ r1 r2 r3   -> usage ([r2,r3], [r1])
+    FSUB  _ r1 r2 r3   -> usage ([r2,r3], [r1])
+    FMUL  _ r1 r2 r3   -> usage ([r2,r3], [r1])
+    FDIV  _ r1 r2 r3   -> usage ([r2,r3], [r1])
+    FNEG  r1 r2		-> usage ([r2], [r1])
+    FCMP  r1 r2		-> usage ([r1,r2], [])
+    FCTIWZ r1 r2	-> usage ([r2], [r1])
+    FRSP r1 r2		-> usage ([r2], [r1])
+    MFCR reg            -> usage ([], [reg])
+    MFLR reg            -> usage ([], [reg])
+    FETCHPC reg         -> usage ([], [reg])
+    _ 	    	    	-> noUsage
+  where
+    usage (src, dst) = RU (filter interesting src)
+    	    	    	  (filter interesting dst)
+    regAddr (AddrRegReg r1 r2) = [r1, r2]
+    regAddr (AddrRegImm r1 _)  = [r1]
+
+    regRI (RIReg r) = [r]
+    regRI  _	= []
+
+interesting :: Reg -> Bool
+interesting (VirtualRegI  _)  = True
+interesting (VirtualRegHi _)  = True
+interesting (VirtualRegF  _)  = True
+interesting (VirtualRegD  _)  = True
+interesting (RealReg i)       = isFastTrue (freeReg i)
+
+
+
+
+-- | Apply a given mapping to all the register references in this
+--	instruction.
+ppc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+ppc_patchRegsOfInstr instr env 
+ = case instr of
+    LD    sz reg addr   -> LD sz (env reg) (fixAddr addr)
+    LA    sz reg addr   -> LA sz (env reg) (fixAddr addr)
+    ST    sz reg addr   -> ST sz (env reg) (fixAddr addr)
+    STU    sz reg addr  -> STU sz (env reg) (fixAddr addr)
+    LIS   reg imm	-> LIS (env reg) imm
+    LI    reg imm	-> LI (env reg) imm
+    MR	  reg1 reg2     -> MR (env reg1) (env reg2)
+    CMP	  sz reg ri	-> CMP sz (env reg) (fixRI ri)
+    CMPL  sz reg ri	-> CMPL sz (env reg) (fixRI ri)
+    BCC	  cond lbl	-> BCC cond lbl
+    BCCFAR cond lbl	-> BCCFAR cond lbl
+    MTCTR reg		-> MTCTR (env reg)
+    BCTR  targets	-> BCTR targets
+    BL    imm argRegs	-> BL imm argRegs	-- argument regs
+    BCTRL argRegs	-> BCTRL argRegs 	-- cannot be remapped
+    ADD	  reg1 reg2 ri	-> ADD (env reg1) (env reg2) (fixRI ri)
+    ADDC  reg1 reg2 reg3-> ADDC (env reg1) (env reg2) (env reg3)
+    ADDE  reg1 reg2 reg3-> ADDE (env reg1) (env reg2) (env reg3)
+    ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
+    SUBF  reg1 reg2 reg3-> SUBF (env reg1) (env reg2) (env reg3)
+    MULLW reg1 reg2 ri	-> MULLW (env reg1) (env reg2) (fixRI ri)
+    DIVW  reg1 reg2 reg3-> DIVW (env reg1) (env reg2) (env reg3)
+    DIVWU reg1 reg2 reg3-> DIVWU (env reg1) (env reg2) (env reg3)
+    MULLW_MayOflo reg1 reg2 reg3
+                        -> MULLW_MayOflo (env reg1) (env reg2) (env reg3)
+    AND	  reg1 reg2 ri	-> AND (env reg1) (env reg2) (fixRI ri)
+    OR 	  reg1 reg2 ri	-> OR  (env reg1) (env reg2) (fixRI ri)
+    XOR	  reg1 reg2 ri	-> XOR (env reg1) (env reg2) (fixRI ri)
+    XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
+    EXTS  sz reg1 reg2 -> EXTS sz (env reg1) (env reg2)
+    NEG	  reg1 reg2	-> NEG (env reg1) (env reg2)
+    NOT	  reg1 reg2	-> NOT (env reg1) (env reg2)
+    SLW	  reg1 reg2 ri	-> SLW (env reg1) (env reg2) (fixRI ri)
+    SRW	  reg1 reg2 ri	-> SRW (env reg1) (env reg2) (fixRI ri)
+    SRAW  reg1 reg2 ri	-> SRAW (env reg1) (env reg2) (fixRI ri)
+    RLWINM reg1 reg2 sh mb me
+                        -> RLWINM (env reg1) (env reg2) sh mb me
+    FADD  sz r1 r2 r3   -> FADD sz (env r1) (env r2) (env r3)
+    FSUB  sz r1 r2 r3   -> FSUB sz (env r1) (env r2) (env r3)
+    FMUL  sz r1 r2 r3   -> FMUL sz (env r1) (env r2) (env r3)
+    FDIV  sz r1 r2 r3   -> FDIV sz (env r1) (env r2) (env r3)
+    FNEG  r1 r2		-> FNEG (env r1) (env r2)
+    FCMP  r1 r2		-> FCMP (env r1) (env r2)
+    FCTIWZ r1 r2	-> FCTIWZ (env r1) (env r2)
+    FRSP r1 r2		-> FRSP (env r1) (env r2)
+    MFCR reg            -> MFCR (env reg)
+    MFLR reg            -> MFLR (env reg)
+    FETCHPC reg         -> FETCHPC (env reg)
+    _ -> instr
+  where
+    fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
+    fixAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i
+
+    fixRI (RIReg r) = RIReg (env r)
+    fixRI other	= other
+
+
+--------------------------------------------------------------------------------
+-- | Checks whether this instruction is a jump/branch instruction. 
+--	One that can change the flow of control in a way that the 
+--	register allocator needs to worry about. 
+ppc_isJumpishInstr :: Instr -> Bool
+ppc_isJumpishInstr instr
+ = case instr of
+	BCC{}		-> True
+	BCCFAR{}	-> True
+	BCTR{}		-> True
+	BCTRL{}		-> True
+	BL{}		-> True
+	JMP{}		-> True
+	_		-> False
+
+
+-- | Checks whether this instruction is a jump/branch instruction. 
+--	One that can change the flow of control in a way that the 
+--	register allocator needs to worry about. 
+ppc_jumpDestsOfInstr :: Instr -> [BlockId] 
+ppc_jumpDestsOfInstr insn 
+  = case insn of
+        BCC _ id        -> [id]
+        BCCFAR _ id     -> [id]
+        BCTR targets    -> targets
+	_		-> []
+	
+	
+-- | Change the destination of this jump instruction.
+--	Used in the linear allocator when adding fixup blocks for join
+--	points.
+ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
+ppc_patchJumpInstr insn patchF
+  = case insn of
+        BCC cc id 	-> BCC cc (patchF id)
+        BCCFAR cc id 	-> BCCFAR cc (patchF id)
+        BCTR _	 	-> error "Cannot patch BCTR"
+	_		-> insn
+
+
+-- -----------------------------------------------------------------------------
+
+-- | An instruction to spill a register into a spill slot.
+ppc_mkSpillInstr
+   :: Reg		-- register to spill
+   -> Int		-- current stack delta
+   -> Int		-- spill slot to use
+   -> Instr
+
+ppc_mkSpillInstr reg delta slot
+  = let	off     = spillSlotToOffset slot
+    in
+    let sz = case regClass reg of
+                RcInteger -> II32
+                RcDouble  -> FF64
+		_	  -> panic "PPC.Instr.mkSpillInstr: no match"
+    in ST sz reg (AddrRegImm sp (ImmInt (off-delta)))
+
+
+ppc_mkLoadInstr
+   :: Reg		-- register to load
+   -> Int		-- current stack delta
+   -> Int		-- spill slot to use
+   -> Instr
+
+ppc_mkLoadInstr reg delta slot
+  = let off     = spillSlotToOffset slot
+    in
+    let sz = case regClass reg of
+                RcInteger -> II32
+                RcDouble  -> FF64
+		_         -> panic "PPC.Instr.mkLoadInstr: no match"
+    in LD sz reg (AddrRegImm sp (ImmInt (off-delta)))
+
+
+spillSlotSize :: Int
+spillSlotSize = 8
+
+maxSpillSlots :: Int
+maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
+
+-- convert a spill slot number to a *byte* offset, with no sign:
+-- decide on a per arch basis whether you are spilling above or below
+-- the C stack pointer.
+spillSlotToOffset :: Int -> Int
+spillSlotToOffset slot
+   | slot >= 0 && slot < maxSpillSlots
+   = 64 + spillSlotSize * slot
+   | otherwise
+   = pprPanic "spillSlotToOffset:" 
+              (   text "invalid spill location: " <> int slot
+	      $$  text "maxSpillSlots:          " <> int maxSpillSlots)
+
+
+--------------------------------------------------------------------------------
+-- | See if this instruction is telling us the current C stack delta
+ppc_takeDeltaInstr
+	:: Instr
+	-> Maybe Int
+	
+ppc_takeDeltaInstr instr
+ = case instr of
+ 	DELTA i		-> Just i
+	_		-> Nothing
+
+
+ppc_isMetaInstr
+	:: Instr
+	-> Bool
+	
+ppc_isMetaInstr instr
+ = case instr of
+ 	COMMENT{}	-> True
+	LDATA{}		-> True
+	NEWBLOCK{}	-> True
+	DELTA{}		-> True
+	_		-> False
+
+
+-- | Copy the value in a register to another one.
+--	Must work for all register classes.
+ppc_mkRegRegMoveInstr
+	:: Reg
+	-> Reg
+	-> Instr
+
+ppc_mkRegRegMoveInstr src dst
+	= MR dst src
+
+
+-- | Make an unconditional jump instruction.
+--	For architectures with branch delay slots, its ok to put
+--	a NOP after the jump. Don't fill the delay slot with an
+--	instruction that references regs or you'll confuse the 
+--	linear allocator.
+ppc_mkJumpInstr
+	:: BlockId
+	-> [Instr]
+
+ppc_mkJumpInstr id 
+	= [BCC ALWAYS id]
+
+
+-- | Take the source and destination from this reg -> reg move instruction
+--	or Nothing if it's not one
+ppc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
+ppc_takeRegRegMoveInstr (MR dst src) = Just (src,dst)
+ppc_takeRegRegMoveInstr _  = Nothing
+
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
index ac83600f9c..f12d32a9b0 100644
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -7,12 +7,15 @@
 -----------------------------------------------------------------------------
 
 module PPC.Ppr (
+	pprNatCmmTop,
+	pprBasicBlock,
+	pprSectionHeader,
+	pprData,
+	pprInstr,
 	pprUserReg,
 	pprSize,
 	pprImm,
-	pprSectionHeader,
 	pprDataItem,
-	pprInstr
 )
 
 where
@@ -20,26 +23,134 @@ where
 #include "nativeGen/NCG.h"
 #include "HsVersions.h"
 
-import RegsBase
-import PprBase
 import PPC.Regs
 import PPC.Instr
+import PPC.Cond
+import PprBase
+import Instruction
+import Size
+import Reg
+import RegClass
 
 import BlockId
 import Cmm
 
-import CLabel		( mkAsmTempLabel )
+import CLabel
 
 import Unique		( pprUnique )
 import Pretty
 import FastString
 import qualified Outputable
-import Outputable	( panic )
+import Outputable	( Outputable, panic )
 
-import Data.Word(Word32)
+import Data.Word
 import Data.Bits
 
 
+-- -----------------------------------------------------------------------------
+-- Printing this stuff out
+
+pprNatCmmTop :: NatCmmTop Instr -> Doc
+pprNatCmmTop (CmmData section dats) = 
+  pprSectionHeader section $$ vcat (map pprData dats)
+
+ -- special case for split markers:
+pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
+
+pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) = 
+  pprSectionHeader Text $$
+  (if null info then -- blocks guaranteed not null, so label needed
+       pprLabel lbl
+   else
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+            pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+                <> char ':' $$
+#endif
+       vcat (map pprData info) $$
+       pprLabel (entryLblToInfoLbl lbl)
+  ) $$
+  vcat (map pprBasicBlock blocks)
+     -- above: Even the first block gets a label, because with branch-chain
+     -- elimination, it might be the target of a goto.
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+        -- If we are using the .subsections_via_symbols directive
+        -- (available on recent versions of Darwin),
+        -- we have to make sure that there is some kind of reference
+        -- from the entry code to a label on the _top_ of of the info table,
+        -- so that the linker will not think it is unreferenced and dead-strip
+        -- it. That's why the label is called a DeadStripPreventer (_dsp).
+  $$ if not (null info)
+		    then text "\t.long "
+		      <+> pprCLabel_asm (entryLblToInfoLbl lbl)
+		      <+> char '-'
+		      <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+		    else empty
+#endif
+
+
+pprBasicBlock :: NatBasicBlock Instr -> Doc
+pprBasicBlock (BasicBlock (BlockId id) instrs) =
+  pprLabel (mkAsmTempLabel id) $$
+  vcat (map pprInstr instrs)
+
+
+pprData :: CmmStatic -> Doc
+pprData (CmmAlign bytes)         = pprAlign bytes
+pprData (CmmDataLabel lbl)       = pprLabel lbl
+pprData (CmmString str)          = pprASCII str
+pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
+pprData (CmmStaticLit lit)       = pprDataItem lit
+
+pprGloblDecl :: CLabel -> Doc
+pprGloblDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext IF_ARCH_sparc((sLit ".global "), 
+				    (sLit ".globl ")) <>
+		pprCLabel_asm lbl
+
+pprTypeAndSizeDecl :: CLabel -> Doc
+#if linux_TARGET_OS
+pprTypeAndSizeDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext (sLit ".type ") <>
+		pprCLabel_asm lbl <> ptext (sLit ", @object")
+#else
+pprTypeAndSizeDecl _
+  = empty
+#endif
+
+pprLabel :: CLabel -> Doc
+pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
+
+
+pprASCII :: [Word8] -> Doc
+pprASCII str
+  = vcat (map do1 str) $$ do1 0
+    where
+       do1 :: Word8 -> Doc
+       do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
+
+pprAlign :: Int -> Doc
+pprAlign bytes =
+	ptext (sLit ".align ") <> int pow2
+  where
+	pow2 = log2 bytes
+	
+	log2 :: Int -> Int  -- cache the common ones
+	log2 1 = 0 
+	log2 2 = 1
+	log2 4 = 2
+	log2 8 = 3
+	log2 n = 1 + log2 (n `quot` 2)
+
+
+-- -----------------------------------------------------------------------------
+-- pprInstr: print an 'Instr'
+
+instance Outputable Instr where
+    ppr	 instr	= Outputable.docToSDoc $ pprInstr instr
+
+
 pprUserReg :: Reg -> Doc
 pprUserReg = pprReg
 
@@ -255,7 +366,7 @@ pprInstr (NEWBLOCK _)
 pprInstr (LDATA _ _)
    = panic "PprMach.pprInstr: LDATA"
 
-
+{-
 pprInstr (SPILL reg slot)
    = hcat [
    	ptext (sLit "\tSPILL"),
@@ -271,6 +382,7 @@ pprInstr (RELOAD slot reg)
 	ptext (sLit "SLOT") <> parens (int slot),
 	comma,
 	pprReg reg]
+-}
 
 pprInstr (LD sz reg addr) = hcat [
 	char '\t',
diff --git a/compiler/nativeGen/PPC/RegInfo.hs b/compiler/nativeGen/PPC/RegInfo.hs
index ea882a0d23..b2806c74d1 100644
--- a/compiler/nativeGen/PPC/RegInfo.hs
+++ b/compiler/nativeGen/PPC/RegInfo.hs
@@ -7,27 +7,14 @@
 -----------------------------------------------------------------------------
 
 module PPC.RegInfo (
-	RegUsage(..),
-	noUsage,
-	regUsage,
-	patchRegs,
-	jumpDests,
-	isJumpish,
-	patchJump,
-	isRegRegMove,
+	mkVReg,
 
-        JumpDest(..), 
+        JumpDest, 
 	canShortcut, 
 	shortcutJump, 
 
-	mkSpillInstr,
-	mkLoadInstr,
-	mkRegRegMoveInstr,
-	mkBranchInstr,
-
-	spillSlotSize,
-	maxSpillSlots,
-	spillSlotToOffset		
+	shortcutStatic,
+	regDotColor
 )
 
 where
@@ -35,203 +22,29 @@ where
 #include "nativeGen/NCG.h"
 #include "HsVersions.h"
 
-import BlockId
-import RegsBase
 import PPC.Regs
 import PPC.Instr
-import Outputable
-import Constants	( rESERVED_C_STACK_BYTES )
-import FastBool
-
-data RegUsage = RU [Reg] [Reg]
-
-noUsage :: RegUsage
-noUsage  = RU [] []
-
-regUsage :: Instr -> RegUsage
-regUsage instr = case instr of
-    SPILL  reg _	-> usage ([reg], [])
-    RELOAD _ reg	-> usage ([], [reg])
-
-    LD    _ reg addr  	-> usage (regAddr addr, [reg])
-    LA    _ reg addr  	-> usage (regAddr addr, [reg])
-    ST    _ reg addr  	-> usage (reg : regAddr addr, [])
-    STU    _ reg addr  -> usage (reg : regAddr addr, [])
-    LIS   reg _		-> usage ([], [reg])
-    LI    reg _		-> usage ([], [reg])
-    MR	  reg1 reg2     -> usage ([reg2], [reg1])
-    CMP   _ reg ri	-> usage (reg : regRI ri,[])
-    CMPL  _ reg ri	-> usage (reg : regRI ri,[])
-    BCC	   _ _		-> noUsage
-    BCCFAR _ _		-> noUsage
-    MTCTR reg		-> usage ([reg],[])
-    BCTR  _		-> noUsage
-    BL    _  params	-> usage (params, callClobberedRegs)
-    BCTRL params	-> usage (params, callClobberedRegs)
-    ADD	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    ADDC  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
-    ADDE  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
-    ADDIS reg1 reg2 _   -> usage ([reg2], [reg1])
-    SUBF  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
-    MULLW reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    DIVW  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
-    DIVWU reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
-    MULLW_MayOflo reg1 reg2 reg3        
-                        -> usage ([reg2,reg3], [reg1])
-    AND	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    OR	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    XOR	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    XORIS reg1 reg2 _   -> usage ([reg2], [reg1])
-    EXTS  _   reg1 reg2 -> usage ([reg2], [reg1])
-    NEG	  reg1 reg2	-> usage ([reg2], [reg1])
-    NOT	  reg1 reg2	-> usage ([reg2], [reg1])
-    SLW	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    SRW	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    SRAW  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    RLWINM reg1 reg2 _ _ _
-                        -> usage ([reg2], [reg1])
-    FADD  _ r1 r2 r3   -> usage ([r2,r3], [r1])
-    FSUB  _ r1 r2 r3   -> usage ([r2,r3], [r1])
-    FMUL  _ r1 r2 r3   -> usage ([r2,r3], [r1])
-    FDIV  _ r1 r2 r3   -> usage ([r2,r3], [r1])
-    FNEG  r1 r2		-> usage ([r2], [r1])
-    FCMP  r1 r2		-> usage ([r1,r2], [])
-    FCTIWZ r1 r2	-> usage ([r2], [r1])
-    FRSP r1 r2		-> usage ([r2], [r1])
-    MFCR reg            -> usage ([], [reg])
-    MFLR reg            -> usage ([], [reg])
-    FETCHPC reg         -> usage ([], [reg])
-    _ 	    	    	-> noUsage
-  where
-    usage (src, dst) = RU (filter interesting src)
-    	    	    	  (filter interesting dst)
-    regAddr (AddrRegReg r1 r2) = [r1, r2]
-    regAddr (AddrRegImm r1 _)  = [r1]
-
-    regRI (RIReg r) = [r]
-    regRI  _	= []
-
-interesting :: Reg -> Bool
-interesting (VirtualRegI  _)  = True
-interesting (VirtualRegHi _)  = True
-interesting (VirtualRegF  _)  = True
-interesting (VirtualRegD  _)  = True
-interesting (RealReg i)       = isFastTrue (freeReg i)
-
-
--- -----------------------------------------------------------------------------
--- 'patchRegs' function
-
--- 'patchRegs' takes an instruction and applies the given mapping to
--- all the register references.
-
-patchRegs :: Instr -> (Reg -> Reg) -> Instr
-patchRegs instr env = case instr of
-    SPILL reg slot	-> SPILL (env reg) slot
-    RELOAD slot reg	-> RELOAD slot (env reg)
-
-    LD    sz reg addr   -> LD sz (env reg) (fixAddr addr)
-    LA    sz reg addr   -> LA sz (env reg) (fixAddr addr)
-    ST    sz reg addr   -> ST sz (env reg) (fixAddr addr)
-    STU    sz reg addr  -> STU sz (env reg) (fixAddr addr)
-    LIS   reg imm	-> LIS (env reg) imm
-    LI    reg imm	-> LI (env reg) imm
-    MR	  reg1 reg2     -> MR (env reg1) (env reg2)
-    CMP	  sz reg ri	-> CMP sz (env reg) (fixRI ri)
-    CMPL  sz reg ri	-> CMPL sz (env reg) (fixRI ri)
-    BCC	  cond lbl	-> BCC cond lbl
-    BCCFAR cond lbl	-> BCCFAR cond lbl
-    MTCTR reg		-> MTCTR (env reg)
-    BCTR  targets	-> BCTR targets
-    BL    imm argRegs	-> BL imm argRegs	-- argument regs
-    BCTRL argRegs	-> BCTRL argRegs 	-- cannot be remapped
-    ADD	  reg1 reg2 ri	-> ADD (env reg1) (env reg2) (fixRI ri)
-    ADDC  reg1 reg2 reg3-> ADDC (env reg1) (env reg2) (env reg3)
-    ADDE  reg1 reg2 reg3-> ADDE (env reg1) (env reg2) (env reg3)
-    ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
-    SUBF  reg1 reg2 reg3-> SUBF (env reg1) (env reg2) (env reg3)
-    MULLW reg1 reg2 ri	-> MULLW (env reg1) (env reg2) (fixRI ri)
-    DIVW  reg1 reg2 reg3-> DIVW (env reg1) (env reg2) (env reg3)
-    DIVWU reg1 reg2 reg3-> DIVWU (env reg1) (env reg2) (env reg3)
-    MULLW_MayOflo reg1 reg2 reg3
-                        -> MULLW_MayOflo (env reg1) (env reg2) (env reg3)
-    AND	  reg1 reg2 ri	-> AND (env reg1) (env reg2) (fixRI ri)
-    OR 	  reg1 reg2 ri	-> OR  (env reg1) (env reg2) (fixRI ri)
-    XOR	  reg1 reg2 ri	-> XOR (env reg1) (env reg2) (fixRI ri)
-    XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
-    EXTS  sz reg1 reg2 -> EXTS sz (env reg1) (env reg2)
-    NEG	  reg1 reg2	-> NEG (env reg1) (env reg2)
-    NOT	  reg1 reg2	-> NOT (env reg1) (env reg2)
-    SLW	  reg1 reg2 ri	-> SLW (env reg1) (env reg2) (fixRI ri)
-    SRW	  reg1 reg2 ri	-> SRW (env reg1) (env reg2) (fixRI ri)
-    SRAW  reg1 reg2 ri	-> SRAW (env reg1) (env reg2) (fixRI ri)
-    RLWINM reg1 reg2 sh mb me
-                        -> RLWINM (env reg1) (env reg2) sh mb me
-    FADD  sz r1 r2 r3   -> FADD sz (env r1) (env r2) (env r3)
-    FSUB  sz r1 r2 r3   -> FSUB sz (env r1) (env r2) (env r3)
-    FMUL  sz r1 r2 r3   -> FMUL sz (env r1) (env r2) (env r3)
-    FDIV  sz r1 r2 r3   -> FDIV sz (env r1) (env r2) (env r3)
-    FNEG  r1 r2		-> FNEG (env r1) (env r2)
-    FCMP  r1 r2		-> FCMP (env r1) (env r2)
-    FCTIWZ r1 r2	-> FCTIWZ (env r1) (env r2)
-    FRSP r1 r2		-> FRSP (env r1) (env r2)
-    MFCR reg            -> MFCR (env reg)
-    MFLR reg            -> MFLR (env reg)
-    FETCHPC reg         -> FETCHPC (env reg)
-    _ -> instr
-  where
-    fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
-    fixAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i
-
-    fixRI (RIReg r) = RIReg (env r)
-    fixRI other	= other
-
+import RegClass
+import Reg
+import Size
 
+import BlockId
+import Cmm
+import CLabel
 
-jumpDests :: Instr -> [BlockId] -> [BlockId]
-jumpDests insn acc
-  = case insn of
-        BCC _ id        -> id : acc
-        BCCFAR _ id     -> id : acc
-        BCTR targets    -> targets ++ acc
-	_		-> acc
-	
-	
--- | Check whether a particular instruction is a jump, branch or call instruction (jumpish)
---	We can't just use jumpDests above because the jump might take its arg,
---	so the instr won't contain a blockid.
---
-isJumpish :: Instr -> Bool
-isJumpish instr
- = case instr of
-	BCC{}		-> True
-	BCCFAR{}	-> True
-        BCTR{}		-> True
-        BCTRL{}		-> True
-	BL{}		-> True
-	JMP{}		-> True
-	_		-> False
-
--- | Change the destination of this jump instruction
---	Used in joinToTargets in the linear allocator, when emitting fixup code
---	for join points.
-patchJump :: Instr -> BlockId -> BlockId -> Instr
-patchJump insn old new
-  = case insn of
-        BCC cc id 
-	 | id == old 	-> BCC cc new
-
-        BCCFAR cc id 
-	 | id == old 	-> BCCFAR cc new
-
-        BCTR _	 	-> error "Cannot patch BCTR"
+import Outputable
+import Unique
 
-	_		-> insn
+mkVReg :: Unique -> Size -> Reg
+mkVReg u size
+   | not (isFloatSize size) = VirtualRegI u
+   | otherwise
+   = case size of
+        FF32	-> VirtualRegD u
+        FF64	-> VirtualRegD u
+	_	-> panic "mkVReg"
 
 
-isRegRegMove :: Instr -> Maybe (Reg,Reg)
-isRegRegMove (MR dst src) = Just (src,dst)
-isRegRegMove _  = Nothing
 
 
 data JumpDest = DestBlockId BlockId | DestImm Imm
@@ -243,71 +56,39 @@ shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
 shortcutJump _ other = other
 
 
+-- Here because it knows about JumpDest
+shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
 
+shortcutStatic fn (CmmStaticLit (CmmLabel lab))
+  | Just uq <- maybeAsmTemp lab 
+  = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
 
--- -----------------------------------------------------------------------------
--- Generating spill instructions
-
-mkSpillInstr
-   :: Reg		-- register to spill
-   -> Int		-- current stack delta
-   -> Int		-- spill slot to use
-   -> Instr
-mkSpillInstr reg delta slot
-  = let	off     = spillSlotToOffset slot
-    in
-    let sz = case regClass reg of
-                RcInteger -> II32
-                RcDouble  -> FF64
-		RcFloat	  -> panic "PPC.RegInfo.mkSpillInstr: no match"
-    in ST sz reg (AddrRegImm sp (ImmInt (off-delta)))
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
+  | Just uq <- maybeAsmTemp lbl1
+  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
+        -- slightly dodgy, we're ignoring the second label, but this
+        -- works with the way we use CmmLabelDiffOff for jump tables now.
 
+shortcutStatic _ other_static
+        = other_static
 
-mkLoadInstr
-   :: Reg		-- register to load
-   -> Int		-- current stack delta
-   -> Int		-- spill slot to use
-   -> Instr
-mkLoadInstr reg delta slot
-  = let off     = spillSlotToOffset slot
-    in
-    let sz = case regClass reg of
-                RcInteger -> II32
-                RcDouble  -> FF64
-		RcFloat	  -> panic "PPC.RegInfo.mkSpillInstr: no match"
-    in LD sz reg (AddrRegImm sp (ImmInt (off-delta)))
+shortBlockId 
+	:: (BlockId -> Maybe JumpDest)
+	-> BlockId
+	-> CLabel
 
+shortBlockId fn blockid@(BlockId uq) =
+   case fn blockid of
+      Nothing -> mkAsmTempLabel uq
+      Just (DestBlockId blockid')  -> shortBlockId fn blockid'
+      Just (DestImm (ImmCLbl lbl)) -> lbl
+      _other -> panic "shortBlockId"
 
-mkRegRegMoveInstr
-    :: Reg
-    -> Reg
-    -> Instr
-mkRegRegMoveInstr src dst
-    = MR dst src
 
 
-mkBranchInstr
-    :: BlockId
-    -> [Instr]
-
-mkBranchInstr id = [BCC ALWAYS id]
-
-
-
-spillSlotSize :: Int
-spillSlotSize = 8
-
-maxSpillSlots :: Int
-maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
-
--- convert a spill slot number to a *byte* offset, with no sign:
--- decide on a per arch basis whether you are spilling above or below
--- the C stack pointer.
-spillSlotToOffset :: Int -> Int
-spillSlotToOffset slot
-   | slot >= 0 && slot < maxSpillSlots
-   = 64 + spillSlotSize * slot
-   | otherwise
-   = pprPanic "spillSlotToOffset:" 
-              (   text "invalid spill location: " <> int slot
-	      $$  text "maxSpillSlots:          " <> int maxSpillSlots)
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = case regClass reg of
+ 	RcInteger	-> text "blue"
+	RcFloat		-> text "red"
+	RcDouble	-> text "green"
diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs
index d6993b210b..80c68dd096 100644
--- a/compiler/nativeGen/PPC/Regs.hs
+++ b/compiler/nativeGen/PPC/Regs.hs
@@ -5,16 +5,6 @@
 -- -----------------------------------------------------------------------------
 
 module PPC.Regs (
-	-- sizes
-	Size(..),
-	intSize, 
-	floatSize, 
-	isFloatSize, 
-	wordSize,
-	cmmTypeSize,
-	sizeToWidth,
-	mkVReg,
-
 	-- immediates
 	Imm(..),
 	strImmLit,
@@ -42,7 +32,10 @@ module PPC.Regs (
 
 	-- horrow show
 	freeReg,
-	globalRegMaybe
+	globalRegMaybe,
+	get_GlobalReg_reg_or_addr,
+	allocatableRegs
+
 )
 
 where
@@ -51,78 +44,22 @@ where
 #include "HsVersions.h"
 #include "../includes/MachRegs.h"
 
-import RegsBase
+import Reg
+import RegClass
 
+import CgUtils          ( get_GlobalReg_addr )
 import BlockId
 import Cmm
 import CLabel           ( CLabel )
 import Pretty
 import Outputable	( Outputable(..), pprPanic, panic )
 import qualified Outputable
-import Unique
 import Constants
 import FastBool
 
 import Data.Word	( Word8, Word16, Word32 )
 import Data.Int 	( Int8, Int16, Int32 )
 
--- sizes -----------------------------------------------------------------------
--- For these three, the "size" also gives the int/float
--- distinction, because the instructions for int/float
--- differ only in their suffices
-data Size	
-	= II8 | II16 | II32 | II64 | FF32 | FF64 | FF80
-	deriving Eq
-
-intSize, floatSize :: Width -> Size
-intSize W8 = II8
-intSize W16 = II16
-intSize W32 = II32
-intSize W64 = II64
-intSize other = pprPanic "MachInstrs.intSize" (ppr other)
-
-floatSize W32 = FF32
-floatSize W64 = FF64
-floatSize other = pprPanic "MachInstrs.intSize" (ppr other)
-
-
-isFloatSize :: Size -> Bool
-isFloatSize FF32 = True
-isFloatSize FF64 = True
-isFloatSize FF80 = True
-isFloatSize _    = False
-
-
-wordSize :: Size
-wordSize = intSize wordWidth
-
-
-cmmTypeSize :: CmmType -> Size
-cmmTypeSize ty 
-	| isFloatType ty	= floatSize (typeWidth ty)
-	| otherwise		= intSize (typeWidth ty)
-
-
-sizeToWidth :: Size -> Width
-sizeToWidth II8  = W8
-sizeToWidth II16 = W16
-sizeToWidth II32 = W32
-sizeToWidth II64 = W64
-sizeToWidth FF32 = W32
-sizeToWidth FF64 = W64
-sizeToWidth _ = panic "MachInstrs.sizeToWidth"
-
-
-mkVReg :: Unique -> Size -> Reg
-mkVReg u size
-   | not (isFloatSize size) = VirtualRegI u
-   | otherwise
-   = case size of
-        FF32	-> VirtualRegD u
-        FF64	-> VirtualRegD u
-	_	-> panic "mkVReg"
-
-
 
 -- immediates ------------------------------------------------------------------
 data Imm
@@ -490,7 +427,7 @@ freeReg REG_Hp   = fastBool False
 #ifdef REG_HpLim
 freeReg REG_HpLim = fastBool False
 #endif
-freeReg n               = fastBool True
+freeReg _               = fastBool True
 
 
 --  | Returns 'Nothing' if this global register is not stored
@@ -582,3 +519,26 @@ freeReg _		= 0#
 globalRegMaybe _	= panic "PPC.Regs.globalRegMaybe: not defined"
 
 #endif /* powerpc_TARGET_ARCH */
+
+
+-- We map STG registers onto appropriate CmmExprs.  Either they map
+-- to real machine registers or stored as offsets from BaseReg.  Given
+-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
+-- register it is in, on this platform, or a CmmExpr denoting the
+-- address in the register table holding it.
+-- (See also get_GlobalReg_addr in CgUtils.)
+
+get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr
+get_GlobalReg_reg_or_addr mid
+   = case globalRegMaybe mid of
+        Just rr -> Left rr
+        Nothing -> Right (get_GlobalReg_addr mid)
+
+
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: [RegNo]
+allocatableRegs
+   = let isFree i = isFastTrue (freeReg i)
+     in  filter isFree allMachRegNos
diff --git a/compiler/nativeGen/Platform.hs b/compiler/nativeGen/Platform.hs
new file mode 100644
index 0000000000..8b01f5cbb3
--- /dev/null
+++ b/compiler/nativeGen/Platform.hs
@@ -0,0 +1,92 @@
+
+-- | A description of the platform we're compiling for.
+--	Used by the native code generator.
+--	In the future, this module should be the only one that references
+--	the evil #defines for each TARGET_ARCH and TARGET_OS
+--
+module Platform (
+	Platform(..),
+	Arch(..),
+	OS(..),
+
+	defaultTargetPlatform
+)
+
+where
+
+#include "HsVersions.h"
+
+
+-- | Contains enough information for the native code generator to emit
+--	code for this platform.
+data Platform
+	= Platform 
+	{ platformArch	:: Arch
+	, platformOS	:: OS }
+
+
+-- | Architectures that the native code generator knows about.
+--	TODO: It might be nice to extend these constructors with information
+--	about what instruction set extensions an architecture might support.
+--
+data Arch
+	= ArchAlpha
+	| ArchX86
+	| ArchX86_64
+	| ArchPPC
+	| ArchPPC_64
+	| ArchSPARC
+	deriving (Show, Eq)
+	
+
+-- | Operating systems that the native code generator knows about.
+--	Having OSUnknown should produce a sensible default, but no promises.
+data OS
+	= OSUnknown
+	| OSLinux
+	| OSDarwin
+	| OSSolaris
+	| OSMinGW32
+	deriving (Show, Eq)
+
+
+-- | This is the target platform as far as the #ifdefs are concerned.
+--	These are set in includes/ghcplatform.h by the autoconf scripts
+defaultTargetPlatform :: Platform
+defaultTargetPlatform
+	= Platform defaultTargetArch defaultTargetOS
+
+
+-- | Move the evil TARGET_ARCH #ifdefs into Haskell land.
+defaultTargetArch :: Arch
+#if   alpha_TARGET_ARCH
+defaultTargetArch	= ArchAlpha
+#elif i386_TARGET_ARCH
+defaultTargetArch	= ArchX86
+#elif x86_64_TARGET_ARCH
+defaultTargetArch	= ArchX86_64
+#elif powerpc_TARGET_ARCH
+defaultTargetArch	= ArchPPC
+#elif powerpc64_TARGET_ARCH
+defaultTargetArch	= ArchPPC_64
+#elif sparc_TARGET_ARCH
+defaultTargetArch	= ArchSPARC
+#else
+#error	"Platform.buildArch: undefined"
+#endif
+
+
+-- | Move the evil TARGET_OS #ifdefs into Haskell land.
+defaultTargetOS :: OS
+#if   linux_TARGET_OS
+defaultTargetOS	= OSLinux
+#elif darwin_TARGET_OS
+defaultTargetOS	= OSDarwin
+#elif solaris_TARGET_OS
+defaultTargetOS	= OSSolaris
+#elif mingw32_TARGET_OS
+defaultTargetOS	= OSMinGW32
+#else
+defaultTargetOS	= OSUnknown
+#endif
+
diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs
deleted file mode 100644
index 532d852752..0000000000
--- a/compiler/nativeGen/PprMach.hs
+++ /dev/null
@@ -1,183 +0,0 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
------------------------------------------------------------------------------
---
--- Pretty-printing assembly language
---
--- (c) The University of Glasgow 1993-2005
---
------------------------------------------------------------------------------
-
--- We start with the @pprXXX@s with some cross-platform commonality
--- (e.g., 'pprReg'); we conclude with the no-commonality monster,
--- 'pprInstr'.
-
-#include "nativeGen/NCG.h"
-
-module PprMach ( 
-	pprNatCmmTop, pprBasicBlock, pprSectionHeader, pprData,
-	pprInstr, pprSize, pprUserReg, pprImm
-  ) where
-
-#include "HsVersions.h"
-
-import PprBase
-
-import BlockId
-import Cmm
-import Regs		-- may differ per-platform
-import Instrs
-import Regs
-
-import CLabel		( CLabel, pprCLabel, externallyVisibleCLabel,
-			  labelDynamic, mkAsmTempLabel, entryLblToInfoLbl )
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
-import CLabel       ( mkDeadStripPreventer )
-#endif
-
-import Panic		( panic )
-import Unique		( pprUnique )
-import Pretty
-import FastString
-import qualified Outputable
-import Outputable	( Outputable, pprPanic, ppr, docToSDoc)
-
-import Data.Array.ST
-import Data.Word	( Word8 )
-import Control.Monad.ST
-import Data.Char	( chr, ord )
-import Data.Maybe       ( isJust )
-
-
-#if   alpha_TARGET_ARCH
-import Alpha.Ppr
-#elif powerpc_TARGET_ARCH
-import PPC.Ppr
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-import X86.Ppr
-#elif sparc_TARGET_ARCH
-import SPARC.Ppr
-#else
-#error "Regs: not defined for this architecture"
-#endif
-
-
-
--- -----------------------------------------------------------------------------
--- Printing this stuff out
-
-pprNatCmmTop :: NatCmmTop -> Doc
-pprNatCmmTop (CmmData section dats) = 
-  pprSectionHeader section $$ vcat (map pprData dats)
-
- -- special case for split markers:
-pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
-
-pprNatCmmTop (CmmProc info lbl params (ListGraph blocks)) = 
-  pprSectionHeader Text $$
-  (if null info then -- blocks guaranteed not null, so label needed
-       pprLabel lbl
-   else
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
-            pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
-                <> char ':' $$
-#endif
-       vcat (map pprData info) $$
-       pprLabel (entryLblToInfoLbl lbl)
-  ) $$
-  vcat (map pprBasicBlock blocks)
-     -- above: Even the first block gets a label, because with branch-chain
-     -- elimination, it might be the target of a goto.
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
-        -- If we are using the .subsections_via_symbols directive
-        -- (available on recent versions of Darwin),
-        -- we have to make sure that there is some kind of reference
-        -- from the entry code to a label on the _top_ of of the info table,
-        -- so that the linker will not think it is unreferenced and dead-strip
-        -- it. That's why the label is called a DeadStripPreventer (_dsp).
-  $$ if not (null info)
-		    then text "\t.long "
-		      <+> pprCLabel_asm (entryLblToInfoLbl lbl)
-		      <+> char '-'
-		      <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
-		    else empty
-#endif
-
-
-pprBasicBlock :: NatBasicBlock -> Doc
-pprBasicBlock (BasicBlock (BlockId id) instrs) =
-  pprLabel (mkAsmTempLabel id) $$
-  vcat (map pprInstr instrs)
-
-
-pprData :: CmmStatic -> Doc
-pprData (CmmAlign bytes)         = pprAlign bytes
-pprData (CmmDataLabel lbl)       = pprLabel lbl
-pprData (CmmString str)          = pprASCII str
-pprData (CmmUninitialised bytes) = ptext (sLit s) <> int bytes
-    where s =
-#if defined(solaris2_TARGET_OS)
-              ".skip "
-#else
-              ".space "
-#endif
-pprData (CmmStaticLit lit)       = pprDataItem lit
-
-pprGloblDecl :: CLabel -> Doc
-pprGloblDecl lbl
-  | not (externallyVisibleCLabel lbl) = empty
-  | otherwise = ptext IF_ARCH_sparc((sLit ".global "), 
-				    (sLit ".globl ")) <>
-		pprCLabel_asm lbl
-
-pprTypeAndSizeDecl :: CLabel -> Doc
-pprTypeAndSizeDecl lbl
-#if linux_TARGET_OS
-  | not (externallyVisibleCLabel lbl) = empty
-  | otherwise = ptext (sLit ".type ") <>
-		pprCLabel_asm lbl <> ptext (sLit ", @object")
-#else
-  = empty
-#endif
-
-pprLabel :: CLabel -> Doc
-pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
-
-
-pprASCII str
-  = vcat (map do1 str) $$ do1 0
-    where
-       do1 :: Word8 -> Doc
-       do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
-
-pprAlign bytes =
-	IF_ARCH_alpha(ptext (sLit ".align ") <> int pow2,
-	IF_ARCH_i386(ptext (sLit ".align ") <> int IF_OS_darwin(pow2,bytes),
-	IF_ARCH_x86_64(ptext (sLit ".align ") <> int IF_OS_darwin(pow2,bytes),
-	IF_ARCH_sparc(ptext (sLit ".align ") <> int bytes,
-	IF_ARCH_powerpc(ptext (sLit ".align ") <> int pow2,)))))
-  where
-	pow2 = log2 bytes
-	
-	log2 :: Int -> Int  -- cache the common ones
-	log2 1 = 0 
-	log2 2 = 1
-	log2 4 = 2
-	log2 8 = 3
-	log2 n = 1 + log2 (n `quot` 2)
-
-
--- -----------------------------------------------------------------------------
--- pprInstr: print an 'Instr'
-
-instance Outputable Instr where
-    ppr	 instr	= Outputable.docToSDoc $ pprInstr instr
-
-
-
-
diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs
new file mode 100644
index 0000000000..1a341bbdda
--- /dev/null
+++ b/compiler/nativeGen/Reg.hs
@@ -0,0 +1,113 @@
+
+-- | An architecture independent description of a register.
+--	This needs to stay architecture independent because it is used
+--	by NCGMonad and the register allocators, which are shared
+--	by all architectures.
+--
+module Reg (
+	RegNo,
+	Reg(..),
+	isRealReg,
+	unRealReg,
+	isVirtualReg,
+	renameVirtualReg,
+	getHiVRegFromLo
+)
+
+where
+
+import Outputable
+import Unique
+import Panic
+
+-- | An identifier for a real machine register.
+type RegNo 
+	= Int
+
+-- RealRegs are machine regs which are available for allocation, in
+-- 	the usual way.  We know what class they are, because that's part of
+-- 	the processor's architecture.
+
+-- VirtualRegs are virtual registers.  The register allocator will
+-- 	eventually have to map them into RealRegs, or into spill slots.
+--
+-- 	VirtualRegs are allocated on the fly, usually to represent a single
+-- 	value in the abstract assembly code (i.e. dynamic registers are
+-- 	usually single assignment).  
+--
+--	With the new register allocator, the
+-- 	single assignment restriction isn't necessary to get correct code,
+-- 	although a better register allocation will result if single
+-- 	assignment is used -- because the allocator maps a VirtualReg into
+-- 	a single RealReg, even if the VirtualReg has multiple live ranges.
+
+-- 	Virtual regs can be of either class, so that info is attached.
+data Reg
+	= RealReg      {-# UNPACK #-} !RegNo
+	| VirtualRegI  {-# UNPACK #-} !Unique
+	| VirtualRegHi {-# UNPACK #-} !Unique  -- High part of 2-word register
+	| VirtualRegF  {-# UNPACK #-} !Unique
+	| VirtualRegD  {-# UNPACK #-} !Unique
+	deriving (Eq, Ord)
+
+
+-- We like to have Uniques for Reg so that we can make UniqFM and UniqSets 
+-- in the register allocator.
+instance Uniquable Reg where
+	getUnique (RealReg i)      = mkUnique 'C' i
+	getUnique (VirtualRegI u)  = u
+	getUnique (VirtualRegHi u) = u
+	getUnique (VirtualRegF u)  = u
+	getUnique (VirtualRegD u)  = u
+
+
+-- | Print a reg in a generic manner
+--	If you want the architecture specific names, then use the pprReg 
+--	function from the appropriate Ppr module.
+instance Outputable Reg where
+	ppr reg
+	 = case reg of
+		RealReg i       -> text "%r" 	<> int i
+		VirtualRegI  u  -> text "%vI_" 	<> pprUnique u
+		VirtualRegHi u  -> text "%vHi_" <> pprUnique u
+		VirtualRegF  u  -> text "%vF_" 	<> pprUnique u
+		VirtualRegD  u  -> text "%vD_" 	<> pprUnique u
+
+
+
+isRealReg :: Reg -> Bool
+isRealReg = not . isVirtualReg
+
+-- | Take the RegNo from a real reg
+unRealReg :: Reg -> RegNo
+unRealReg (RealReg i)	= i
+unRealReg _		= panic "unRealReg on VirtualReg"
+
+isVirtualReg :: Reg -> Bool
+isVirtualReg (RealReg _)      = False
+isVirtualReg (VirtualRegI _)  = True
+isVirtualReg (VirtualRegHi _) = True
+isVirtualReg (VirtualRegF _)  = True
+isVirtualReg (VirtualRegD _)  = True
+
+
+renameVirtualReg :: Unique -> Reg -> Reg
+renameVirtualReg u r
+ = case r of
+ 	RealReg _	-> error "renameVirtualReg: can't change unique on a real reg"
+	VirtualRegI _	-> VirtualRegI  u
+	VirtualRegHi _	-> VirtualRegHi u
+	VirtualRegF _	-> VirtualRegF  u
+	VirtualRegD _	-> VirtualRegD  u
+
+-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
+-- when supplied with the vreg for the lower-half of the quantity.
+-- (NB. Not reversible).
+getHiVRegFromLo :: Reg -> Reg
+getHiVRegFromLo (VirtualRegI u) 
+   = VirtualRegHi (newTagUnique u 'H') -- makes a pseudo-unique with tag 'H'
+
+getHiVRegFromLo _ 
+   = panic "RegsBase.getHiVRegFromLo"
+
+
diff --git a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
index 18e4b0edd1..8521e92601 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
@@ -8,11 +8,11 @@ module RegAlloc.Graph.Coalesce (
 
 where
 
-import Cmm
-import Regs
 import RegAlloc.Liveness
-import RegAllocInfo
+import Instruction
+import Reg
 
+import Cmm
 import Bag
 import UniqFM
 import UniqSet
@@ -26,7 +26,11 @@ import Data.List
 --	then the mov only serves to join live ranges. The two regs can be renamed to be 
 --	the same and the move instruction safely erased.
 
-regCoalesce :: [LiveCmmTop] -> UniqSM [LiveCmmTop]
+regCoalesce 
+	:: Instruction instr
+	=> [LiveCmmTop instr] 
+	-> UniqSM [LiveCmmTop instr]
+
 regCoalesce code
  = do	
  	let joins	= foldl' unionBags emptyBag
@@ -57,7 +61,11 @@ sinkReg fm r
 --	During a mov, if the source reg dies and the destiation reg is born
 --	then we can rename the two regs to the same thing and eliminate the move.
 --
-slurpJoinMovs :: LiveCmmTop -> Bag (Reg, Reg)
+slurpJoinMovs 
+	:: Instruction instr
+	=> LiveCmmTop instr 
+	-> Bag (Reg, Reg)
+
 slurpJoinMovs live
 	= slurpCmm emptyBag live
  where	
@@ -68,7 +76,7 @@ slurpJoinMovs live
                 
         slurpLI    rs (Instr _	Nothing)	         = rs
 	slurpLI    rs (Instr instr (Just live))
-	 	| Just (r1, r2)	<- isRegRegMove instr
+	 	| Just (r1, r2)	<- takeRegRegMoveInstr instr
 		, elementOfUniqSet r1 $ liveDieRead live
 		, elementOfUniqSet r2 $ liveBorn live
 
@@ -80,4 +88,7 @@ slurpJoinMovs live
 		| otherwise
 		= rs
 	
+	slurpLI	   rs SPILL{} 	= rs
+	slurpLI    rs RELOAD{}	= rs
+		
 	
diff --git a/compiler/nativeGen/RegAlloc/Graph/Main.hs b/compiler/nativeGen/RegAlloc/Graph/Main.hs
index fe99aba120..2e584617e9 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Main.hs
@@ -5,8 +5,7 @@
 --
 
 module RegAlloc.Graph.Main ( 
-	regAlloc,
-	regDotColor
+	regAlloc
 ) 
 
 where
@@ -17,9 +16,12 @@ import RegAlloc.Graph.Spill
 import RegAlloc.Graph.SpillClean
 import RegAlloc.Graph.SpillCost
 import RegAlloc.Graph.Stats
-import Regs
-import Instrs
-import PprMach
+import RegAlloc.Graph.TrivColorable
+import Instruction
+import TargetReg
+import RegClass
+import Reg
+
 
 import UniqSupply
 import UniqSet
@@ -43,18 +45,26 @@ maxSpinCount	= 10
 -- | The top level of the graph coloring register allocator.
 --	
 regAlloc
-	:: DynFlags
+	:: (Outputable instr, Instruction instr)
+	=> DynFlags
 	-> UniqFM (UniqSet Reg)		-- ^ the registers we can use for allocation
 	-> UniqSet Int			-- ^ the set of available spill slots.
-	-> [LiveCmmTop]			-- ^ code annotated with liveness information.
-	-> UniqSM ( [NatCmmTop], [RegAllocStats] )
+	-> [LiveCmmTop instr]		-- ^ code annotated with liveness information.
+	-> UniqSM ( [NatCmmTop instr], [RegAllocStats instr] )
            -- ^ code with registers allocated and stats for each stage of
            -- allocation
 		
 regAlloc dflags regsFree slotsFree code
  = do
+	-- TODO: the regClass function is currently hard coded to the default target
+	--	 architecture. Would prefer to determine this from dflags.
+	--	 There are other uses of targetRegClass later in this module.
+	let triv = trivColorable targetRegClass
+
  	(code_final, debug_codeGraphs, _)
-		<- regAlloc_spin dflags 0 trivColorable regsFree slotsFree [] code
+		<- regAlloc_spin dflags 0 
+			triv
+			regsFree slotsFree [] code
 	
 	return	( code_final
 		, reverse debug_codeGraphs )
@@ -74,7 +84,7 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
 	 $ pprPanic "regAlloc_spin: max build/spill cycle count exceeded."
 	 	(  text "It looks like the register allocator is stuck in an infinite loop."
 		$$ text "max cycles  = " <> int maxSpinCount
-	 	$$ text "regsFree    = " <> (hcat	$ punctuate space $ map (docToSDoc . pprUserReg)
+	 	$$ text "regsFree    = " <> (hcat	$ punctuate space $ map ppr
 						$ uniqSetToList $ unionManyUniqSets $ eltsUFM regsFree)
 		$$ text "slotsFree   = " <> ppr (sizeUniqSet slotsFree))
 
@@ -139,12 +149,12 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
 		-- clean out unneeded SPILL/RELOADs
 		let code_spillclean	= map cleanSpills code_patched
 
-		-- strip off liveness information
-		let code_nat		= map stripLive code_spillclean
+		-- strip off liveness information, 
+		--	and rewrite SPILL/RELOAD pseudos into real instructions along the way
+		let code_final		= map stripLive code_spillclean
 
-		-- rewrite SPILL/RELOAD pseudos into real instructions
-		let spillNatTop		= mapGenBlockTop spillNatBlock
-		let code_final		= map spillNatTop code_nat
+--		let spillNatTop		= mapGenBlockTop spillNatBlock
+--		let code_final		= map spillNatTop code_nat
 		
 		-- record what happened in this stage for debugging
 		let stat		=
@@ -213,7 +223,8 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
 -- | Build a graph from the liveness and coalesce information in this code.
 
 buildGraph 
-	:: [LiveCmmTop]
+	:: Instruction instr
+	=> [LiveCmmTop instr]
 	-> UniqSM (Color.Graph Reg RegClass Reg)
 	
 buildGraph code
@@ -248,8 +259,8 @@ graphAddConflictSet set graph
  = let	reals		= filterUFM isRealReg set
  	virtuals	= filterUFM (not . isRealReg) set
  
-	graph1	= Color.addConflicts virtuals regClass graph
-	graph2	= foldr (\(r1, r2) -> Color.addExclusion r1 regClass r2)
+	graph1	= Color.addConflicts virtuals targetRegClass graph
+	graph2	= foldr (\(r1, r2) -> Color.addExclusion r1 targetRegClass r2)
 			graph1
 			[ (a, b) 
 				| a <- uniqSetToList virtuals
@@ -276,13 +287,14 @@ graphAddCoalesce (r1, r2) graph
 	| otherwise
 	= Color.addCoalesce (regWithClass r1) (regWithClass r2) graph
 
-	where 	regWithClass r	= (r, regClass r)
+	where 	regWithClass r	= (r, targetRegClass r)
 
 
 -- | Patch registers in code using the reg -> reg mapping in this graph.
 patchRegsFromGraph 
-	:: Color.Graph Reg RegClass Reg
-	-> LiveCmmTop -> LiveCmmTop
+	:: (Outputable instr, Instruction instr)
+	=> Color.Graph Reg RegClass Reg
+	-> LiveCmmTop instr -> LiveCmmTop instr
 
 patchRegsFromGraph graph code
  = let
@@ -303,7 +315,7 @@ patchRegsFromGraph graph code
 		= pprPanic "patchRegsFromGraph: register mapping failed." 
 			(  text "There is no node in the graph for register " <> ppr reg
 			$$ ppr code
-			$$ Color.dotGraph (\_ -> text "white") trivColorable graph)
+			$$ Color.dotGraph (\_ -> text "white") (trivColorable targetRegClass) graph)
 
    in	patchEraseLive patchF code
    
diff --git a/compiler/nativeGen/RegAlloc/Graph/Spill.hs b/compiler/nativeGen/RegAlloc/Graph/Spill.hs
index b5a645188f..e6e5622a02 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Spill.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Spill.hs
@@ -10,9 +10,8 @@ module RegAlloc.Graph.Spill (
 where
 
 import RegAlloc.Liveness
-import RegAllocInfo
-import Regs
-import Instrs
+import Instruction
+import Reg
 import Cmm
 
 import State
@@ -35,11 +34,12 @@ import Data.Maybe
 --		address the spill slot directly.
 --
 regSpill
-	:: [LiveCmmTop]			-- ^ the code
+	:: Instruction instr
+	=> [LiveCmmTop instr]		-- ^ the code
 	-> UniqSet Int			-- ^ available stack slots
 	-> UniqSet Reg			-- ^ the regs to spill
 	-> UniqSM
-		([LiveCmmTop]		-- code will spill instructions
+		([LiveCmmTop instr]	-- code will spill instructions
 		, UniqSet Int		-- left over slots
 		, SpillStats )		-- stats about what happened during spilling
 
@@ -75,6 +75,20 @@ regSpill_block regSlotMap (BasicBlock i instrs)
  = do	instrss'	<- mapM (regSpill_instr regSlotMap) instrs
  	return	$ BasicBlock i (concat instrss')
 
+
+regSpill_instr
+	:: Instruction instr
+	=> UniqFM Int 
+	-> LiveInstr instr -> SpillM [LiveInstr instr]
+
+-- | The thing we're spilling shouldn't already have spill or reloads in it
+regSpill_instr	_ SPILL{}
+	= panic "regSpill_instr: unexpected SPILL"
+
+regSpill_instr	_ RELOAD{}
+	= panic "regSpill_instr: unexpected RELOAD"
+
+
 regSpill_instr _	li@(Instr _ Nothing)
  = do	return [li]
 
@@ -82,7 +96,7 @@ regSpill_instr regSlotMap
 	(Instr instr (Just _))
  = do
 	-- work out which regs are read and written in this instr
-	let RU rlRead rlWritten	= regUsage instr
+	let RU rlRead rlWritten	= regUsageOfInstr instr
 
 	-- sometimes a register is listed as being read more than once,
 	--	nub this so we don't end up inserting two lots of spill code.
@@ -109,9 +123,9 @@ regSpill_instr regSlotMap
 	let postfixes			= concat mPostfixes
 
 	-- final code
-	let instrs'	=  map (\i -> Instr i Nothing) prefixes
-			++ [ Instr instr3 Nothing ]
-			++ map (\i -> Instr i Nothing) postfixes
+	let instrs'	=  prefixes
+			++ [Instr instr3 Nothing]
+			++ postfixes
 
 	return
 {-		$ pprTrace "* regSpill_instr spill"
@@ -139,6 +153,7 @@ spillRead regSlotMap instr reg
 
 	| otherwise	= panic "RegSpill.spillRead: no slot defined for spilled reg"
 
+
 spillWrite regSlotMap instr reg
 	| Just slot	<- lookupUFM regSlotMap reg
 	= do 	(instr', nReg)	<- patchInstr reg instr
@@ -152,6 +167,7 @@ spillWrite regSlotMap instr reg
 
 	| otherwise	= panic "RegSpill.spillWrite: no slot defined for spilled reg"
 
+
 spillModify regSlotMap instr reg
 	| Just slot	<- lookupUFM regSlotMap reg
 	= do	(instr', nReg)	<- patchInstr reg instr
@@ -168,19 +184,25 @@ spillModify regSlotMap instr reg
 
 
 -- | rewrite uses of this virtual reg in an instr to use a different virtual reg
-patchInstr :: Reg -> Instr -> SpillM (Instr, Reg)
+patchInstr 
+	:: Instruction instr
+	=> Reg -> instr -> SpillM (instr, Reg)
+
 patchInstr reg instr
  = do	nUnique		<- newUnique
  	let nReg	= renameVirtualReg nUnique reg
 	let instr'	= patchReg1 reg nReg instr
 	return		(instr', nReg)
 
-patchReg1 :: Reg -> Reg -> Instr -> Instr
+patchReg1 
+	:: Instruction instr
+	=> Reg -> Reg -> instr -> instr
+
 patchReg1 old new instr
  = let	patchF r
 		| r == old	= new
 		| otherwise	= r
-   in	patchRegs instr patchF
+   in	patchRegsOfInstr instr patchF
 
 
 ------------------------------------------------------
diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
index b68648bdaf..4f129c468a 100644
--- a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
@@ -29,13 +29,12 @@ module RegAlloc.Graph.SpillClean (
 )
 where
 
-import BlockId
 import RegAlloc.Liveness
-import RegAllocInfo
-import Regs
-import Instrs
-import Cmm
+import Instruction
+import Reg
 
+import BlockId
+import Cmm
 import UniqSet
 import UniqFM
 import Unique
@@ -51,12 +50,19 @@ type Slot = Int
 
 
 -- | Clean out unneeded spill\/reloads from this top level thing.
-cleanSpills :: LiveCmmTop -> LiveCmmTop
+cleanSpills 
+	:: Instruction instr
+	=> LiveCmmTop instr -> LiveCmmTop instr
+
 cleanSpills cmm
 	= evalState (cleanSpin 0 cmm) initCleanS
 
 -- | do one pass of cleaning
-cleanSpin :: Int -> LiveCmmTop -> CleanM LiveCmmTop
+cleanSpin 
+	:: Instruction instr
+	=> Int 
+	-> LiveCmmTop instr 
+	-> CleanM (LiveCmmTop instr)
 
 {-
 cleanSpin spinCount code
@@ -103,7 +109,11 @@ cleanSpin spinCount code
 
 
 -- | Clean one basic block
-cleanBlockForward :: LiveBasicBlock -> CleanM LiveBasicBlock
+cleanBlockForward 
+	:: Instruction instr
+	=> LiveBasicBlock instr 
+	-> CleanM (LiveBasicBlock instr)
+
 cleanBlockForward (BasicBlock blockId instrs)
  = do
  	-- see if we have a valid association for the entry to this block
@@ -116,7 +126,11 @@ cleanBlockForward (BasicBlock blockId instrs)
 	return	$ BasicBlock blockId instrs_reload
 
 
-cleanBlockBackward :: LiveBasicBlock -> CleanM LiveBasicBlock
+cleanBlockBackward 
+	:: Instruction instr
+	=> LiveBasicBlock instr 
+	-> CleanM (LiveBasicBlock instr)
+
 cleanBlockBackward (BasicBlock blockId instrs)
  = do	instrs_spill	<- cleanBackward  emptyUniqSet  [] instrs
 	return	$ BasicBlock blockId instrs_spill
@@ -130,11 +144,12 @@ cleanBlockBackward (BasicBlock blockId instrs)
 --	  then we don't need to do the reload.
 --
 cleanForward
-	:: BlockId		-- ^ the block that we're currently in
-	-> Assoc Store	 	-- ^ two store locations are associated if they have the same value
-	-> [LiveInstr]		-- ^ acc
-	-> [LiveInstr] 		-- ^ instrs to clean (in backwards order)
-	-> CleanM [LiveInstr]	-- ^ cleaned instrs  (in forward   order)
+	:: Instruction instr
+	=> BlockId			-- ^ the block that we're currently in
+	-> Assoc Store	 		-- ^ two store locations are associated if they have the same value
+	-> [LiveInstr instr]		-- ^ acc
+	-> [LiveInstr instr] 		-- ^ instrs to clean (in backwards order)
+	-> CleanM [LiveInstr instr]	-- ^ cleaned instrs  (in forward   order)
 
 cleanForward _ _ acc []
 	= return acc
@@ -142,19 +157,19 @@ cleanForward _ _ acc []
 -- write out live range joins via spill slots to just a spill and a reg-reg move
 --	hopefully the spill will be also be cleaned in the next pass
 --
-cleanForward blockId assoc acc (Instr i1 live1 : Instr i2 _ : instrs)
+cleanForward blockId assoc acc (li1 : li2 : instrs)
 
-	| SPILL  reg1  slot1	<- i1
-	, RELOAD slot2 reg2	<- i2
+	| SPILL  reg1  slot1	<- li1
+	, RELOAD slot2 reg2	<- li2
 	, slot1 == slot2
 	= do
 		modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
 		cleanForward blockId assoc acc
-			(Instr i1 live1 : Instr (mkRegRegMoveInstr reg1 reg2) Nothing : instrs)
+			(li1 : Instr (mkRegRegMoveInstr reg1 reg2) Nothing : instrs)
 
 
 cleanForward blockId assoc acc (li@(Instr i1 _) : instrs)
-	| Just (r1, r2)	<- isRegRegMove i1
+	| Just (r1, r2)	<- takeRegRegMoveInstr i1
 	= if r1 == r2
 		-- erase any left over nop reg reg moves while we're here
 		--	this will also catch any nop moves that the "write out live range joins" case above
@@ -170,38 +185,50 @@ cleanForward blockId assoc acc (li@(Instr i1 _) : instrs)
 			cleanForward blockId assoc' (li : acc) instrs
 
 
-cleanForward blockId assoc acc (li@(Instr instr _) : instrs)
+cleanForward blockId assoc acc (li : instrs)
 
 	-- update association due to the spill
-	| SPILL reg slot	<- instr
+	| SPILL reg slot	<- li
 	= let	assoc'	= addAssoc (SReg reg)  (SSlot slot)
 			$ delAssoc (SSlot slot)
 			$ assoc
 	  in	cleanForward blockId assoc' (li : acc) instrs
 
 	-- clean a reload instr
-	| RELOAD{}		<- instr
+	| RELOAD{}		<- li
 	= do	(assoc', mli)	<- cleanReload blockId assoc li
 		case mli of
 		 Nothing	-> cleanForward blockId assoc' acc 		instrs
 		 Just li'	-> cleanForward blockId assoc' (li' : acc)	instrs
 
 	-- remember the association over a jump
-	| targets	<- jumpDests instr []
+	| Instr instr _ 	<- li
+	, targets		<- jumpDestsOfInstr instr
 	, not $ null targets
 	= do	mapM_ (accJumpValid assoc) targets
 		cleanForward blockId assoc (li : acc) instrs
 
 	-- writing to a reg changes its value.
-	| RU _ written	<- regUsage instr
+	| Instr instr _		<- li
+	, RU _ written		<- regUsageOfInstr instr
 	= let assoc'	= foldr delAssoc assoc (map SReg $ nub written)
 	  in  cleanForward blockId assoc' (li : acc) instrs
 
+-- bogus, to stop pattern match warning
+cleanForward _ _ _ _ 
+	= panic "RegAlloc.Graph.SpillClean.cleanForward: no match"
+
 
 -- | Try and rewrite a reload instruction to something more pleasing
 --
-cleanReload :: BlockId -> Assoc Store -> LiveInstr -> CleanM (Assoc Store, Maybe LiveInstr)
-cleanReload blockId assoc li@(Instr (RELOAD slot reg) _)
+cleanReload 
+	:: Instruction instr
+	=> BlockId 
+	-> Assoc Store 
+	-> LiveInstr instr
+	-> CleanM (Assoc Store, Maybe (LiveInstr instr))
+
+cleanReload blockId assoc li@(RELOAD slot reg)
 
 	-- if the reg we're reloading already has the same value as the slot
 	--	then we can erase the instruction outright
@@ -264,10 +291,10 @@ cleanReload _ _ _
 --	 we should really be updating the noReloads set as we cross jumps also.
 --
 cleanBackward
-	:: UniqSet Int 		-- ^ slots that have been spilled, but not reloaded from
-	-> [LiveInstr]		-- ^ acc
-	-> [LiveInstr]		-- ^ instrs to clean (in forwards order)
-	-> CleanM [LiveInstr]	-- ^ cleaned instrs  (in backwards order)
+	:: UniqSet Int 			-- ^ slots that have been spilled, but not reloaded from
+	-> [LiveInstr instr]		-- ^ acc
+	-> [LiveInstr instr]		-- ^ instrs to clean (in forwards order)
+	-> CleanM [LiveInstr instr]	-- ^ cleaned instrs  (in backwards order)
 
 
 cleanBackward noReloads acc lis
@@ -277,15 +304,15 @@ cleanBackward noReloads acc lis
 cleanBackward' _ _      acc []
 	= return  acc
 
-cleanBackward' reloadedBy noReloads acc (li@(Instr instr _) : instrs)
+cleanBackward' reloadedBy noReloads acc (li : instrs)
 
 	-- if nothing ever reloads from this slot then we don't need the spill
-	| SPILL _ slot	<- instr
+	| SPILL _ slot	<- li
 	, Nothing	<- lookupUFM reloadedBy (SSlot slot)
 	= do	modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 }
 		cleanBackward noReloads acc instrs
 
-	| SPILL _ slot	<- instr
+	| SPILL _ slot	<- li
 	= if elementOfUniqSet slot noReloads
 
 	   -- we can erase this spill because the slot won't be read until after the next one
@@ -299,7 +326,7 @@ cleanBackward' reloadedBy noReloads acc (li@(Instr instr _) : instrs)
 	   	cleanBackward noReloads' (li : acc) instrs
 
 	-- if we reload from a slot then it's no longer unused
-	| RELOAD slot _		<- instr
+	| RELOAD slot _		<- li
 	, noReloads'		<- delOneFromUniqSet noReloads slot
 	= cleanBackward noReloads' (li : acc) instrs
 
diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
index 1d37cf71d6..d4dd75a4b7 100644
--- a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
@@ -16,14 +16,16 @@ module RegAlloc.Graph.SpillCost (
 
 where
 
-import GraphBase
 import RegAlloc.Liveness
-import RegAllocInfo
-import Instrs
-import Regs
+import Instruction
+import RegClass
+import Reg
+
+import GraphBase
+
+
 import BlockId
 import Cmm
-
 import UniqFM
 import UniqSet
 import Outputable
@@ -62,7 +64,8 @@ plusSpillCostRecord (r1, a1, b1, c1) (r2, a2, b2, c2)
 --	and the number of instructions it was live on entry to (lifetime)
 --
 slurpSpillCostInfo
-	:: LiveCmmTop
+	:: (Outputable instr, Instruction instr)
+	=> LiveCmmTop instr
 	-> SpillCostInfo
 
 slurpSpillCostInfo cmm
@@ -89,11 +92,14 @@ slurpSpillCostInfo cmm
 		= return ()
 
 	-- skip over comment and delta pseudo instrs
-	countLIs rsLive (Instr instr Nothing : lis)
-		| COMMENT{}	<- instr
+	countLIs rsLive (SPILL{} : lis)
+		= countLIs rsLive lis
+		
+	countLIs rsLive (RELOAD{} : lis)
 		= countLIs rsLive lis
 
-		| DELTA{}	<- instr
+	countLIs rsLive (Instr instr Nothing : lis)
+		| isMetaInstr instr
 		= countLIs rsLive lis
 
 		| otherwise
@@ -106,7 +112,7 @@ slurpSpillCostInfo cmm
 		mapM_ incLifetime $ uniqSetToList rsLiveEntry
 
 		-- increment counts for what regs were read/written from
-		let (RU read written)	= regUsage instr
+		let (RU read written)	= regUsageOfInstr instr
 		mapM_ incUses	$ filter (not . isRealReg) $ nub read
 		mapM_ incDefs 	$ filter (not . isRealReg) $ nub written
 
@@ -226,8 +232,11 @@ lifeMapFromSpillCostInfo info
 
 
 -- | Work out the degree (number of neighbors) of this node which have the same class.
-nodeDegree :: Graph Reg RegClass Reg -> Reg -> Int
-nodeDegree graph reg
+nodeDegree 
+	:: (Reg -> RegClass)
+	-> Graph Reg RegClass Reg -> Reg -> Int
+
+nodeDegree regClass graph reg
 	| Just node	<- lookupUFM (graphMap graph) reg
 	, virtConflicts	<- length 	$ filter (\r -> regClass r == regClass reg)
 					$ uniqSetToList $ nodeConflicts node
@@ -238,12 +247,17 @@ nodeDegree graph reg
 
 
 -- | Show a spill cost record, including the degree from the graph and final calulated spill cos
-pprSpillCostRecord :: Graph Reg RegClass Reg -> SpillCostRecord -> SDoc
-pprSpillCostRecord graph (reg, uses, defs, life)
+pprSpillCostRecord 
+	:: (Reg -> RegClass)
+	-> (Reg -> SDoc)
+	-> Graph Reg RegClass Reg -> SpillCostRecord -> SDoc
+
+pprSpillCostRecord regClass pprReg graph (reg, uses, defs, life)
  	=  hsep
-	[ ppr reg
+	[ pprReg reg
 	, ppr uses
 	, ppr defs
 	, ppr life
-	, ppr $ nodeDegree graph reg
-	, text $ show $ (fromIntegral (uses + defs) / fromIntegral (nodeDegree graph reg) :: Float) ]
+	, ppr $ nodeDegree regClass graph reg
+	, text $ show $ (fromIntegral (uses + defs) 
+			/ fromIntegral (nodeDegree regClass graph reg) :: Float) ]
diff --git a/compiler/nativeGen/RegAlloc/Graph/Stats.hs b/compiler/nativeGen/RegAlloc/Graph/Stats.hs
index 8082f9e975..5e3dd3265b 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Stats.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Stats.hs
@@ -5,7 +5,6 @@
 
 module RegAlloc.Graph.Stats (
 	RegAllocStats (..),
-	regDotColor,
 
 	pprStats,
 	pprStatsSpills,
@@ -22,13 +21,13 @@ where
 
 import qualified GraphColor as Color
 import RegAlloc.Liveness
-import RegAllocInfo
 import RegAlloc.Graph.Spill
 import RegAlloc.Graph.SpillCost
-import Regs
-import Instrs
-import Cmm
+import Instruction
+import RegClass
+import Reg
 
+import Cmm
 import Outputable
 import UniqFM
 import UniqSet
@@ -36,11 +35,11 @@ import State
 
 import Data.List
 
-data RegAllocStats
+data RegAllocStats instr
 
 	-- initial graph
 	= RegAllocStatsStart
-	{ raLiveCmm	:: [LiveCmmTop]			  -- ^ initial code, with liveness
+	{ raLiveCmm	:: [LiveCmmTop instr]		  -- ^ initial code, with liveness
 	, raGraph	:: Color.Graph Reg RegClass Reg   -- ^ the initial, uncolored graph
 	, raSpillCosts	:: SpillCostInfo } 		  -- ^ information to help choose which regs to spill
 
@@ -50,35 +49,35 @@ data RegAllocStats
 	, raCoalesced	:: UniqFM Reg			-- ^ the regs that were coaleced
 	, raSpillStats	:: SpillStats 			-- ^ spiller stats
 	, raSpillCosts	:: SpillCostInfo 		-- ^ number of instrs each reg lives for
-	, raSpilled	:: [LiveCmmTop] }		-- ^ code with spill instructions added
+	, raSpilled	:: [LiveCmmTop instr] }		-- ^ code with spill instructions added
 
 	-- a successful coloring
 	| RegAllocStatsColored
 	{ raGraph	 :: Color.Graph Reg RegClass Reg -- ^ the uncolored graph
 	, raGraphColored :: Color.Graph Reg RegClass Reg -- ^ the coalesced and colored graph
 	, raCoalesced	:: UniqFM Reg			-- ^ the regs that were coaleced
-	, raPatched	:: [LiveCmmTop] 		-- ^ code with vregs replaced by hregs
-	, raSpillClean  :: [LiveCmmTop]			-- ^ code with unneeded spill\/reloads cleaned out
-	, raFinal	:: [NatCmmTop] 			-- ^ final code
+	, raPatched	:: [LiveCmmTop instr] 		-- ^ code with vregs replaced by hregs
+	, raSpillClean  :: [LiveCmmTop instr]		-- ^ code with unneeded spill\/reloads cleaned out
+	, raFinal	:: [NatCmmTop instr] 		-- ^ final code
 	, raSRMs	:: (Int, Int, Int) }		-- ^ spill\/reload\/reg-reg moves present in this code
 
-instance Outputable RegAllocStats where
+instance Outputable instr => Outputable (RegAllocStats instr) where
 
  ppr (s@RegAllocStatsStart{})
  	=  text "#  Start"
 	$$ text "#  Native code with liveness information."
 	$$ ppr (raLiveCmm s)
 	$$ text ""
-	$$ text "#  Initial register conflict graph."
-	$$ Color.dotGraph regDotColor trivColorable (raGraph s)
+--	$$ text "#  Initial register conflict graph."
+--	$$ Color.dotGraph regDotColor trivColorable (raGraph s)
 
 
  ppr (s@RegAllocStatsSpill{})
  	=  text "#  Spill"
 
-	$$ text "#  Register conflict graph."
-	$$ Color.dotGraph regDotColor trivColorable (raGraph s)
-	$$ text ""
+--	$$ text "#  Register conflict graph."
+--	$$ Color.dotGraph regDotColor trivColorable (raGraph s)
+--	$$ text ""
 
 	$$ (if (not $ isNullUFM $ raCoalesced s)
 		then 	text "#  Registers coalesced."
@@ -86,9 +85,9 @@ instance Outputable RegAllocStats where
 			$$ text ""
 		else empty)
 
-	$$ text "#  Spill costs.  reg uses defs lifetime degree cost"
-	$$ vcat (map (pprSpillCostRecord (raGraph s)) $ eltsUFM $ raSpillCosts s)
-	$$ text ""
+--	$$ text "#  Spill costs.  reg uses defs lifetime degree cost"
+--	$$ vcat (map (pprSpillCostRecord (raGraph s)) $ eltsUFM $ raSpillCosts s)
+--	$$ text ""
 
 	$$ text "#  Spills inserted."
 	$$ ppr (raSpillStats s)
@@ -101,13 +100,13 @@ instance Outputable RegAllocStats where
  ppr (s@RegAllocStatsColored { raSRMs = (spills, reloads, moves) })
  	=  text "#  Colored"
 
-	$$ text "#  Register conflict graph (initial)."
-	$$ Color.dotGraph regDotColor trivColorable (raGraph s)
-	$$ text ""
+--	$$ text "#  Register conflict graph (initial)."
+--	$$ Color.dotGraph regDotColor trivColorable (raGraph s)
+--	$$ text ""
 
-	$$ text "#  Register conflict graph (colored)."
-	$$ Color.dotGraph regDotColor trivColorable (raGraphColored s)
-	$$ text ""
+--	$$ text "#  Register conflict graph (colored)."
+--	$$ Color.dotGraph regDotColor trivColorable (raGraphColored s)
+--	$$ text ""
 
 	$$ (if (not $ isNullUFM $ raCoalesced s)
 		then 	text "#  Registers coalesced."
@@ -133,7 +132,7 @@ instance Outputable RegAllocStats where
 	$$ text ""
 
 -- | Do all the different analysis on this list of RegAllocStats
-pprStats :: [RegAllocStats] -> Color.Graph Reg RegClass Reg -> SDoc
+pprStats :: [RegAllocStats instr] -> Color.Graph Reg RegClass Reg -> SDoc
 pprStats stats graph
  = let 	outSpills	= pprStatsSpills    stats
 	outLife		= pprStatsLifetimes stats
@@ -145,7 +144,7 @@ pprStats stats graph
 
 -- | Dump a table of how many spill loads \/ stores were inserted for each vreg.
 pprStatsSpills
-	:: [RegAllocStats] -> SDoc
+	:: [RegAllocStats instr] -> SDoc
 
 pprStatsSpills stats
  = let
@@ -163,7 +162,7 @@ pprStatsSpills stats
 
 -- | Dump a table of how long vregs tend to live for in the initial code.
 pprStatsLifetimes
-	:: [RegAllocStats] -> SDoc
+	:: [RegAllocStats instr] -> SDoc
 
 pprStatsLifetimes stats
  = let	info		= foldl' plusSpillCostInfo zeroSpillCostInfo
@@ -191,7 +190,7 @@ binLifetimeCount fm
 
 -- | Dump a table of how many conflicts vregs tend to have in the initial code.
 pprStatsConflict
-	:: [RegAllocStats] -> SDoc
+	:: [RegAllocStats instr] -> SDoc
 
 pprStatsConflict stats
  = let	confMap	= foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2)))
@@ -208,7 +207,7 @@ pprStatsConflict stats
 -- | For every vreg, dump it's how many conflicts it has and its lifetime
 --	good for making a scatter plot.
 pprStatsLifeConflict
-	:: [RegAllocStats]
+	:: [RegAllocStats instr]
 	-> Color.Graph Reg RegClass Reg 	-- ^ global register conflict graph
 	-> SDoc
 
@@ -238,7 +237,10 @@ pprStatsLifeConflict stats graph
 -- | Count spill/reload/reg-reg moves.
 --	Lets us see how well the register allocator has done.
 --
-countSRMs :: LiveCmmTop -> (Int, Int, Int)
+countSRMs 
+	:: Instruction instr
+	=> LiveCmmTop instr -> (Int, Int, Int)
+
 countSRMs cmm
 	= execState (mapBlockTopM countSRM_block cmm) (0, 0, 0)
 
@@ -246,16 +248,17 @@ countSRM_block (BasicBlock i instrs)
  = do	instrs'	<- mapM countSRM_instr instrs
  	return	$ BasicBlock i instrs'
 
-countSRM_instr li@(Instr instr _)
-	| SPILL _ _	<- instr
+countSRM_instr li
+	| SPILL _ _	<- li
 	= do	modify 	$ \(s, r, m)	-> (s + 1, r, m)
 		return li
 
-	| RELOAD _ _	<- instr
+	| RELOAD _ _	<- li
 	= do	modify	$ \(s, r, m)	-> (s, r + 1, m)
 		return li
 
-	| Just _		<- isRegRegMove instr
+	| Instr instr _	<- li
+	, Just _	<- takeRegRegMoveInstr instr
 	= do	modify	$ \(s, r, m)	-> (s, r, m + 1)
 		return li
 
@@ -266,77 +269,9 @@ countSRM_instr li@(Instr instr _)
 addSRM (s1, r1, m1) (s2, r2, m2)
 	= (s1+s2, r1+r2, m1+m2)
 
------
--- Register colors for drawing conflict graphs
---	Keep this out of MachRegs.hs because it's specific to the graph coloring allocator.
-
-
--- reg colors for x86
-#if i386_TARGET_ARCH
-regDotColor :: Reg -> SDoc
-regDotColor reg
- = let	Just	str	= lookupUFM regColors reg
-   in	text str
-
-regColors
- = listToUFM
- $  	[ (eax,	"#00ff00")
-	, (ebx,	"#0000ff")
-	, (ecx,	"#00ffff")
-	, (edx,	"#0080ff")
-
-	, (fake0, "#ff00ff")
-	, (fake1, "#ff00aa")
-	, (fake2, "#aa00ff")
-	, (fake3, "#aa00aa")
-	, (fake4, "#ff0055")
-	, (fake5, "#5500ff") ]
-
-
--- reg colors for x86_64
-#elif x86_64_TARGET_ARCH
-regDotColor :: Reg -> SDoc
-regDotColor reg
- = let	Just	str	= lookupUFM regColors reg
-   in	text str
-
-regColors
- = listToUFM
- $	[ (rax, "#00ff00"), (eax, "#00ff00")
-	, (rbx,	"#0000ff"), (ebx, "#0000ff")
-	, (rcx,	"#00ffff"), (ecx, "#00ffff")
-	, (rdx,	"#0080ff"), (edx, "#00ffff")
-	, (r8,  "#00ff80")
-	, (r9,  "#008080")
-	, (r10, "#0040ff")
-	, (r11, "#00ff40")
-	, (r12, "#008040")
-	, (r13, "#004080")
-	, (r14, "#004040")
-	, (r15, "#002080") ]
-
-	++ zip (map RealReg [16..31]) (repeat "red")
-
-
--- reg colors for ppc
-#elif powerpc_TARGET_ARCH
-regDotColor :: Reg -> SDoc
-regDotColor reg
- = case regClass reg of
- 	RcInteger	-> text "blue"
-	RcFloat		-> text "red"
-	RcDouble	-> text "green"
-
-#elif sparc_TARGET_ARCH
-regDotColor :: Reg -> SDoc
-regDotColor reg
- = case regClass reg of
- 	RcInteger	-> text "blue"
-	RcFloat		-> text "red"
-	RcDouble	-> text "green"
-#else
-#error ToDo: regDotColor
-#endif
+
+
+
 
 
 {-
diff --git a/compiler/nativeGen/Regs.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
index 5239520708..6a7211dd06 100644
--- a/compiler/nativeGen/Regs.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
@@ -1,164 +1,21 @@
--- -----------------------------------------------------------------------------
---
--- (c) The University of Glasgow 1994-2004
--- 
--- Machine-specific info about registers.
--- 
--- Also includes stuff about immediate operands, which are
--- often/usually quite entangled with registers.
--- 
--- -----------------------------------------------------------------------------
-
-#include "nativeGen/NCG.h"
-
-module Regs (
-	--------------------------------
-	-- Generic things, shared by all architectures.
-	module RegsBase,	
-	getHiVRegFromLo,
-	get_GlobalReg_reg_or_addr,
-	allocatableRegs,
-	allocatableRegsInClass,
-	trivColorable,
-
-	--------------------------------
-	-- Things that are defined by the arch specific module.
-	--
-
-	-- sizes
-	Size(..),
-	intSize, 
-	floatSize, 
-	isFloatSize, 
-	wordSize, 
-	cmmTypeSize, 
-	sizeToWidth,
-	mkVReg,
-
-	-- immediates
-	Imm(..), 
-	strImmLit, 
-	litToImm,
-
-	-- addressing modes
-	AddrMode(..),
-	addrOffset,
-
-	-- registers
-	spRel,
-	argRegs, 
-	allArgRegs, 
-	callClobberedRegs,
-	allMachRegNos,
-	regClass,
-	showReg,
-
-	-- machine specific things
-#if   powerpc_TARGET_ARCH
-	allFPArgRegs,
-	fits16Bits,
-	makeImmediate,
-	fReg,
-	sp, r3, r4, r27, r28, f1, f20, f21,
-
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-	EABase(..), EAIndex(..), addrModeRegs,
-	
-	eax, ebx, ecx, edx, esi, edi, ebp, esp,
-	fake0, fake1, fake2, fake3, fake4, fake5,
-	rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp,
-	r8, r9, r10, r11, r12, r13, r14, r15,
-  	xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
-  	xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
-	xmm,
-
-	ripRel,
-	allFPArgRegs,
 
-#elif sparc_TARGET_ARCH
-	fpRel,
-	fits13Bits, 
-	largeOffsetError,
-	gReg, iReg, lReg, oReg, fReg,
-	fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f26, f27,
-	nCG_FirstFloatReg,
-#endif
-	-- horror show
-	freeReg,
-	globalRegMaybe	
-) 
+module RegAlloc.Graph.TrivColorable (
+	trivColorable,
+)
 
 where
 
 #include "HsVersions.h"
-#include "../includes/MachRegs.h"
-
-import Cmm
-import CgUtils          ( get_GlobalReg_addr )
-import Outputable	( Outputable(..), pprPanic )
-import qualified Outputable
-import Panic
-import Unique
-import UniqSet
-import FastTypes
-import FastBool
-import UniqFM
-
-
-import RegsBase
-
-#if   alpha_TARGET_ARCH
-import Alpha.Regs
-#elif powerpc_TARGET_ARCH
-import PPC.Regs
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-import X86.Regs
-#elif sparc_TARGET_ARCH
-import SPARC.Regs
-#else
-#error "Regs: not defined for this architecture"
-#endif
 
+import RegClass
+import Reg
 
+import GraphBase
 
-instance Show Reg where
-	show (RealReg i)      = showReg i
-	show (VirtualRegI u)  = "%vI_" ++ show u
-	show (VirtualRegHi u) = "%vHi_" ++ show u
-	show (VirtualRegF u)  = "%vF_" ++ show u
-	show (VirtualRegD u)  = "%vD_" ++ show u
-
-instance Outputable Reg where
-	ppr r = Outputable.text (show r)
-
-
--- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
--- when supplied with the vreg for the lower-half of the quantity.
--- (NB. Not reversible).
-getHiVRegFromLo :: Reg -> Reg
-getHiVRegFromLo (VirtualRegI u) 
-   = VirtualRegHi (newTagUnique u 'H') -- makes a pseudo-unique with tag 'H'
-
-getHiVRegFromLo other 
-   = pprPanic "getHiVRegFromLo" (ppr other)
-
--- -----------------------------------------------------------------------------
--- Global registers
-
--- We map STG registers onto appropriate CmmExprs.  Either they map
--- to real machine registers or stored as offsets from BaseReg.  Given
--- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
--- register it is in, on this platform, or a CmmExpr denoting the
--- address in the register table holding it.
--- (See also get_GlobalReg_addr in CgUtils.)
-
-get_GlobalReg_reg_or_addr       :: GlobalReg -> Either Reg CmmExpr
-get_GlobalReg_reg_or_addr mid
-   = case globalRegMaybe mid of
-        Just rr -> Left rr
-        Nothing -> Right (get_GlobalReg_addr mid)
-
+import UniqFM
+import FastTypes
 
+{-
 -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
 -- i.e., these are the regs for which we are prepared to allow the
 -- register allocator to attempt to map VRegs to.
@@ -187,7 +44,7 @@ allocatableRegsDouble :: Int
 allocatableRegsDouble
 	= length $ filter (\r -> regClass r == RcDouble) 
 		 $ map RealReg allocatableRegs
-
+-}
 
 
 -- trivColorable ---------------------------------------------------------------
@@ -277,8 +134,11 @@ worst n classN classC
 #error ToDo: define ALLOCATABLE_REGS_INTEGER and ALLOCATABLE_REGS_DOUBLE
 #endif
 
-trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
-trivColorable _ conflicts exclusions
+trivColorable 
+	:: (Reg -> RegClass) 
+	-> Triv Reg RegClass Reg
+	
+trivColorable regClass _ conflicts exclusions
  = {-# SCC "trivColorable" #-}
    let
 	isSqueesed cI cF ufm
@@ -314,5 +174,3 @@ trivColorable _ conflicts exclusions
 
 	(# True, _, _ #)
 	 -> False
-
-
diff --git a/compiler/nativeGen/RegAlloc/Linear/Base.hs b/compiler/nativeGen/RegAlloc/Linear/Base.hs
index 60d0175c94..45fd640804 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Base.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Base.hs
@@ -21,7 +21,7 @@ where
 import RegAlloc.Linear.FreeRegs
 import RegAlloc.Linear.StackMap
 import RegAlloc.Liveness
-import Regs
+import Reg
 
 import Outputable
 import Unique
diff --git a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
index bee8c98c61..b357160c96 100644
--- a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
@@ -5,7 +5,8 @@ module RegAlloc.Linear.FreeRegs (
 	releaseReg,
 	initFreeRegs,
 	getFreeRegs,
-	allocateReg
+	allocateReg,
+	maxSpillSlots
 )
 
 #include "HsVersions.h"
@@ -27,12 +28,15 @@ where
 
 #if   defined(powerpc_TARGET_ARCH) 
 import RegAlloc.Linear.PPC.FreeRegs
+import PPC.Instr	(maxSpillSlots)
 
 #elif defined(sparc_TARGET_ARCH)
 import RegAlloc.Linear.SPARC.FreeRegs
+import SPARC.Instr	(maxSpillSlots)
 
 #elif defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
 import RegAlloc.Linear.X86.FreeRegs
+import X86.Instr	(maxSpillSlots)
 
 #else
 #error "RegAlloc.Linear.FreeRegs not defined for this architecture."
diff --git a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
index d3f821b8aa..7d2cbcd7a7 100644
--- a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
@@ -19,13 +19,11 @@ import RegAlloc.Linear.State
 import RegAlloc.Linear.Base
 import RegAlloc.Linear.FreeRegs
 import RegAlloc.Liveness
+import Instruction
+import Reg
 
 import BlockId
-import Instrs
-import Regs
-import RegAllocInfo
 import Cmm	hiding (RegSet)
-
 import Digraph
 import Outputable
 import Unique
@@ -37,39 +35,41 @@ import UniqSet
 --	vregs are in the correct regs for its destination.
 --
 joinToTargets
-	:: BlockMap RegSet		-- ^ maps the unique of the blockid to the set of vregs 
+	:: Instruction instr
+	=> BlockMap RegSet		-- ^ maps the unique of the blockid to the set of vregs 
 					--	that are known to be live on the entry to each block.
 
 	-> BlockId			-- ^ id of the current block
-	-> Instr			-- ^ branch instr on the end of the source block.
+	-> instr			-- ^ branch instr on the end of the source block.
 
-	-> RegM ([NatBasicBlock]	--   fresh blocks of fixup code.
-		, Instr)		--   the original branch instruction, but maybe patched to jump
+	-> RegM ([NatBasicBlock instr]	--   fresh blocks of fixup code.
+		, instr)		--   the original branch instruction, but maybe patched to jump
 					--	to a fixup block first.
 
 joinToTargets block_live id instr
 
 	-- we only need to worry about jump instructions.
-	| not $ isJumpish instr
+	| not $ isJumpishInstr instr
 	= return ([], instr)
 
 	| otherwise
-	= joinToTargets' block_live [] id instr (jumpDests instr [])
+	= joinToTargets' block_live [] id instr (jumpDestsOfInstr instr)
 
 -----
 joinToTargets'
-	:: BlockMap RegSet		-- ^ maps the unique of the blockid to the set of vregs 
+	:: Instruction instr
+	=> BlockMap RegSet		-- ^ maps the unique of the blockid to the set of vregs 
 					--	that are known to be live on the entry to each block.
 
-	-> [NatBasicBlock]		-- ^ acc blocks of fixup code.
+	-> [NatBasicBlock instr]	-- ^ acc blocks of fixup code.
 
 	-> BlockId			-- ^ id of the current block
-	-> Instr			-- ^ branch instr on the end of the source block.
+	-> instr			-- ^ branch instr on the end of the source block.
 
 	-> [BlockId]			-- ^ branch destinations still to consider.
 
-	-> RegM ( [NatBasicBlock]
-		, Instr)
+	-> RegM ( [NatBasicBlock instr]
+		, instr)
 
 -- no more targets to consider. all done.
 joinToTargets' _          new_blocks _ instr []
@@ -173,7 +173,7 @@ joinToTargets_again
 		--	then that will jump to our original destination.
 		fixup_block_id <- getUniqueR
 		let block = BasicBlock (BlockId fixup_block_id) 
-				$ fixUpInstrs ++ mkBranchInstr dest
+				$ fixUpInstrs ++ mkJumpInstr dest
 		
 {-		pprTrace
 			("joinToTargets: fixup code is:")
@@ -187,7 +187,11 @@ joinToTargets_again
 
 		 -- patch the original branch instruction so it goes to our
 		 --	fixup block instead.
-		 _	-> let	instr'	=  patchJump instr dest (BlockId fixup_block_id)
+		 _	-> let	instr'	=  patchJumpInstr instr 
+		 				(\bid -> if bid == dest 
+								then BlockId fixup_block_id 
+								else dest)
+						
 		 	   in	joinToTargets' block_live (block : new_blocks) block_id instr' dests
 
 
@@ -256,7 +260,9 @@ expandNode vreg src dst
 --	destinations. We have eliminated any possibility of single-node
 --	cycles in expandNode above.
 --
-handleComponent :: Int -> Instr -> SCC (Unique, Loc, [Loc]) -> RegM [Instr]
+handleComponent 
+	:: Instruction instr
+	=> Int -> instr -> SCC (Unique, Loc, [Loc]) -> RegM [instr]
 
 -- If the graph is acyclic then we won't get the swapping problem below.
 --	In this case we can just do the moves directly, and avoid having to
@@ -305,11 +311,12 @@ handleComponent _ _ (CyclicSCC _)
 -- | Move a vreg between these two locations.
 --
 makeMove 
-	:: Int 		-- ^ current C stack delta.
+	:: Instruction instr
+	=> Int 		-- ^ current C stack delta.
 	-> Unique 	-- ^ unique of the vreg that we're moving.
 	-> Loc 		-- ^ source location.
 	-> Loc 		-- ^ destination location.
-	-> RegM Instr	-- ^ move instruction.
+	-> RegM instr	-- ^ move instruction.
 
 makeMove _     vreg (InReg src) (InReg dst)
  = do	recordSpill (SpillJoinRR vreg)
diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs
index bfd9ca543e..47529d2c96 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs
@@ -96,14 +96,14 @@ import RegAlloc.Linear.StackMap
 import RegAlloc.Linear.FreeRegs
 import RegAlloc.Linear.Stats
 import RegAlloc.Linear.JoinToTargets
+import TargetReg
 import RegAlloc.Liveness
+import Instruction
+import Reg
 
 -- import PprMach
 
 import BlockId
-import Regs
-import Instrs
-import RegAllocInfo
 import Cmm hiding (RegSet)
 
 import Digraph
@@ -112,7 +112,6 @@ import UniqSet
 import UniqFM
 import UniqSupply
 import Outputable
-import FastString
 
 import Data.Maybe
 import Data.List
@@ -126,8 +125,9 @@ import Control.Monad
 
 -- Allocate registers
 regAlloc 
-	:: LiveCmmTop
-	-> UniqSM (NatCmmTop, Maybe RegAllocStats)
+	:: (Outputable instr, Instruction instr)
+	=> LiveCmmTop instr
+	-> UniqSM (NatCmmTop instr, Maybe RegAllocStats)
 
 regAlloc (CmmData sec d) 
  	= return
@@ -171,10 +171,11 @@ regAlloc (CmmProc _ _ _ _)
 --   an entry in the block map or it is the first block.
 --
 linearRegAlloc
-	:: BlockId                      -- ^ the first block
+	:: (Outputable instr, Instruction instr)
+	=> BlockId                      -- ^ the first block
         -> BlockMap RegSet		-- ^ live regs on entry to each basic block
-	-> [SCC LiveBasicBlock]		-- ^ instructions annotated with "deaths"
-	-> UniqSM ([NatBasicBlock], RegAllocStats)
+	-> [SCC (LiveBasicBlock instr)]	-- ^ instructions annotated with "deaths"
+	-> UniqSM ([NatBasicBlock instr], RegAllocStats)
 
 linearRegAlloc first_id block_live sccs
  = do	us	<- getUs
@@ -234,9 +235,10 @@ process first_id block_live (b@(BasicBlock id _) : blocks) next_round accum
 -- | Do register allocation on this basic block
 --
 processBlock
-	:: BlockMap RegSet 		-- ^ live regs on entry to each basic block
-	-> LiveBasicBlock 		-- ^ block to do register allocation on
-	-> RegM [NatBasicBlock]		-- ^ block with registers allocated
+	:: (Outputable instr, Instruction instr)
+	=> BlockMap RegSet 		-- ^ live regs on entry to each basic block
+	-> LiveBasicBlock instr 	-- ^ block to do register allocation on
+	-> RegM [NatBasicBlock instr]	-- ^ block with registers allocated
 
 processBlock block_live (BasicBlock id instrs)
  = do 	initBlock id
@@ -265,20 +267,21 @@ initBlock id
 
 -- | Do allocation for a sequence of instructions.
 linearRA
-	:: BlockMap RegSet		-- ^ map of what vregs are live on entry to each block.
-	-> [Instr] 			-- ^ accumulator for instructions already processed.
-	-> [NatBasicBlock] 		-- ^ accumulator for blocks of fixup code.
-	-> BlockId			-- ^ id of the current block, for debugging.
-	-> [LiveInstr]			-- ^ liveness annotated instructions in this block.
+	:: (Outputable instr, Instruction instr)
+	=> BlockMap RegSet			-- ^ map of what vregs are live on entry to each block.
+	-> [instr] 				-- ^ accumulator for instructions already processed.
+	-> [NatBasicBlock instr]		-- ^ accumulator for blocks of fixup code.
+	-> BlockId				-- ^ id of the current block, for debugging.
+	-> [LiveInstr instr]			-- ^ liveness annotated instructions in this block.
 
-	-> RegM ( [Instr]		--   instructions after register allocation
-		, [NatBasicBlock])	--   fresh blocks of fixup code.
+	-> RegM ( [instr]			--   instructions after register allocation
+		, [NatBasicBlock instr])	--   fresh blocks of fixup code.
 
 
 linearRA _          accInstr accFixup _ []
 	= return 
-		( reverse accInstr	-- instrs need to be returned in the correct order.
-		, accFixup)		-- it doesn't matter what order the fixup blocks are returned in.
+		( reverse accInstr		-- instrs need to be returned in the correct order.
+		, accFixup)			-- it doesn't matter what order the fixup blocks are returned in.
 
 
 linearRA block_live accInstr accFixups id (instr:instrs)
@@ -291,21 +294,24 @@ linearRA block_live accInstr accFixups id (instr:instrs)
 
 -- | Do allocation for a single instruction.
 raInsn  
-	:: BlockMap RegSet		-- ^ map of what vregs are love on entry to each block.
-	-> [Instr]			-- ^ accumulator for instructions already processed.
-	-> BlockId			-- ^ the id of the current block, for debugging
-	-> LiveInstr 			-- ^ the instr to have its regs allocated, with liveness info.
+	:: (Outputable instr, Instruction instr)
+	=> BlockMap RegSet			-- ^ map of what vregs are love on entry to each block.
+	-> [instr]				-- ^ accumulator for instructions already processed.
+	-> BlockId				-- ^ the id of the current block, for debugging
+	-> LiveInstr instr 			-- ^ the instr to have its regs allocated, with liveness info.
 	-> RegM 
-		( [Instr]		-- new instructions
-		, [NatBasicBlock])	-- extra fixup blocks
+		( [instr]			-- new instructions
+		, [NatBasicBlock instr])	-- extra fixup blocks
 
-raInsn _     new_instrs _ (Instr (COMMENT _) Nothing)
- = return (new_instrs, [])
+raInsn _     new_instrs _ (Instr ii Nothing)  
+	| Just n	<- takeDeltaInstr ii
+ 	= do	setDeltaR n
+		return (new_instrs, [])
+
+raInsn _     new_instrs _ (Instr ii Nothing)
+	| isMetaInstr ii
+	= return (new_instrs, [])
 
-raInsn _     new_instrs _ (Instr (DELTA n) Nothing)  
- = do
-    setDeltaR n
-    return (new_instrs, [])
 
 raInsn block_live new_instrs id (Instr instr (Just live))
  = do
@@ -318,7 +324,7 @@ raInsn block_live new_instrs id (Instr instr (Just live))
     -- then we can eliminate the instruction.
     -- (we can't eliminitate it if the source register is on the stack, because
     --  we do not want to use one spill slot for different virtual registers)
-    case isRegRegMove instr of
+    case takeRegRegMoveInstr instr of
 	Just (src,dst)	| src `elementOfUniqSet` (liveDieRead live), 
 		  	  isVirtualReg dst,
 		 	  not (dst `elemUFM` assig),
@@ -354,7 +360,7 @@ raInsn _ _ _ instr
 
 
 genRaInsn block_live new_instrs block_id instr r_dying w_dying =
-    case regUsage instr              of { RU read written ->
+    case regUsageOfInstr instr              of { RU read written ->
     case partition isRealReg written of { (real_written1,virt_written) ->
     do
     let 
@@ -410,7 +416,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying =
 				  (t,r) <- zip virt_read r_allocd
 					  ++ zip virt_written w_allocd ]
 
-	patched_instr = patchRegs adjusted_instr patchLookup
+	patched_instr = patchRegsOfInstr adjusted_instr patchLookup
 	patchLookup x = case lookupUFM patch_map x of
 				Nothing -> x
 				Just y  -> y
@@ -424,7 +430,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying =
     -- erase reg->reg moves where the source and destination are the same.
     --	If the src temp didn't die in this instr but happened to be allocated
     --	to the same real reg as the destination, then we can erase the move anyway.
-    let	squashed_instr	= case isRegRegMove patched_instr of
+    let	squashed_instr	= case takeRegRegMoveInstr patched_instr of
 				Just (src, dst)
 				 | src == dst	-> []
 				_		-> [patched_instr]
@@ -473,10 +479,11 @@ for allocateRegs on the temps *written*,
 -}
 
 saveClobberedTemps
-   :: [RegNo]		   -- real registers clobbered by this instruction
-   -> [Reg]		   -- registers which are no longer live after this insn
-   -> RegM [Instr] 	   -- return: instructions to spill any temps that will
-		 	   -- be clobbered.
+	:: Instruction instr
+	=> [RegNo]		-- real registers clobbered by this instruction
+	-> [Reg]		-- registers which are no longer live after this insn
+	-> RegM [instr]		-- return: instructions to spill any temps that will
+				-- be clobbered.
 
 saveClobberedTemps [] _ = return [] -- common case
 saveClobberedTemps clobbered dying =  do
@@ -498,7 +505,7 @@ saveClobberedTemps clobbered dying =  do
 	recordSpill (SpillClobber temp)
 
 	let new_assign	= addToUFM assig temp (InBoth reg slot)
-	clobber new_assign (spill : COMMENT (fsLit "spill clobber") : instrs) rest
+	clobber new_assign (spill : {- COMMENT (fsLit "spill clobber") : -} instrs) rest
 
 clobberRegs :: [RegNo] -> RegM ()
 clobberRegs [] = return () -- common case
@@ -533,12 +540,13 @@ clobberRegs clobbered = do
 --   the list of free registers and free stack slots.
 
 allocateRegsAndSpill
-	:: Bool			-- True <=> reading (load up spilled regs)
+	:: Instruction instr
+	=> Bool			-- True <=> reading (load up spilled regs)
 	-> [Reg]		-- don't push these out
-	-> [Instr]		-- spill insns
+	-> [instr]		-- spill insns
 	-> [RegNo]		-- real registers allocated (accum.)
 	-> [Reg]		-- temps to allocate
-	-> RegM ([Instr], [RegNo])
+	-> RegM ([instr], [RegNo])
 
 allocateRegsAndSpill _       _    spills alloc []
   = return (spills,reverse alloc)
@@ -563,7 +571,7 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
      loc -> do
 	freeregs <- getFreeRegsR
 
-        case getFreeRegs (regClass r) freeregs of
+        case getFreeRegs (targetRegClass r) freeregs of
 
       	-- case (2): we have a free register
       	  my_reg:_ -> {- pprTrace "alloc" (ppr r <+> ppr my_reg <+> ppr freeClass) $ -}
@@ -582,10 +590,10 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
 	      keep' = map getUnique keep
 	      candidates1 = [ (temp,reg,mem)
 			    | (temp, InBoth reg mem) <- ufmToList assig,
-			      temp `notElem` keep', regClass (RealReg reg) == regClass r ]
+			      temp `notElem` keep', targetRegClass (RealReg reg) == targetRegClass r ]
 	      candidates2 = [ (temp,reg)
 			    | (temp, InReg reg) <- ufmToList assig,
-			      temp `notElem` keep', regClass (RealReg reg) == regClass r  ]
+			      temp `notElem` keep', targetRegClass (RealReg reg) == targetRegClass r  ]
 	    -- in
 	    ASSERT2(not (null candidates1 && null candidates2), 
 		    text (show freeregs) <+> ppr r <+> ppr assig) do
@@ -622,8 +630,8 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
 				
     	        (spill_insn, slot) <- spillR (RealReg my_reg) temp_to_push_out
 		let spill_store	 = (if reading then id else reverse)
-					[ COMMENT (fsLit "spill alloc") 
-					, spill_insn ]
+					[ -- COMMENT (fsLit "spill alloc") 
+					   spill_insn ]
 
 		-- record that this temp was spilled
 		recordSpill (SpillAlloc temp_to_push_out)
@@ -643,18 +651,19 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
 
 -- | Load up a spilled temporary if we need to.
 loadTemp
-	:: Bool
+	:: Instruction instr
+	=> Bool
 	-> Reg 		-- the temp being loaded
 	-> Maybe Loc	-- the current location of this temp
 	-> RegNo	-- the hreg to load the temp into
-	-> [Instr]
-	-> RegM [Instr]
+	-> [instr]
+	-> RegM [instr]
 
 loadTemp True vreg (Just (InMem slot)) hreg spills
  = do
  	insn <- loadR (RealReg hreg) slot
 	recordSpill (SpillLoad $ getUnique vreg)
-	return	$  COMMENT (fsLit "spill load") : insn : spills
+	return	$  {- COMMENT (fsLit "spill load") : -} insn : spills
 
 loadTemp _ _ _ _ spills =
    return spills
diff --git a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
index 6d8809df52..878bfe313a 100644
--- a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
@@ -3,7 +3,9 @@
 module RegAlloc.Linear.PPC.FreeRegs
 where
 
-import Regs
+import PPC.Regs
+import RegClass
+import Reg
 
 import Outputable
 
diff --git a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
index aa716b585f..5514056318 100644
--- a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
@@ -3,7 +3,9 @@
 module RegAlloc.Linear.SPARC.FreeRegs
 where
 
-import Regs
+import SPARC.Regs
+import RegClass
+import Reg
 
 import Outputable
 import FastBool
diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
index 56569415bb..62bf6adb2a 100644
--- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
@@ -19,7 +19,7 @@ module RegAlloc.Linear.StackMap (
 
 where
 
-import RegAllocInfo	(maxSpillSlots)
+import RegAlloc.Linear.FreeRegs
 
 import Outputable
 import UniqFM
diff --git a/compiler/nativeGen/RegAlloc/Linear/State.hs b/compiler/nativeGen/RegAlloc/Linear/State.hs
index 94a8f7b52e..b9f7049844 100644
--- a/compiler/nativeGen/RegAlloc/Linear/State.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/State.hs
@@ -34,11 +34,8 @@ import RegAlloc.Linear.StackMap
 import RegAlloc.Linear.Base
 import RegAlloc.Linear.FreeRegs
 import RegAlloc.Liveness
-
-
-import Instrs
-import Regs
-import RegAllocInfo
+import Instruction
+import Reg
 
 import Unique
 import UniqSupply
@@ -85,14 +82,19 @@ makeRAStats state
 	{ ra_spillInstrs	= binSpillReasons (ra_spills state) }
 
 
-spillR :: Reg -> Unique -> RegM (Instr, Int)
+spillR 	:: Instruction instr
+	=> Reg -> Unique -> RegM (instr, Int)
+
 spillR reg temp = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack} ->
   let (stack',slot) = getStackSlotFor stack temp
       instr  = mkSpillInstr reg delta slot
   in
   (# s{ra_stack=stack'}, (instr,slot) #)
 
-loadR :: Reg -> Int -> RegM Instr
+
+loadR 	:: Instruction instr
+	=> Reg -> Int -> RegM instr
+
 loadR reg slot = RegM $ \ s@RA_State{ra_delta=delta} ->
   (# s, mkLoadInstr reg delta slot #)
 
diff --git a/compiler/nativeGen/RegAlloc/Linear/Stats.hs b/compiler/nativeGen/RegAlloc/Linear/Stats.hs
index 95bf8ede82..137168e942 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Stats.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Stats.hs
@@ -8,9 +8,8 @@ where
 
 import RegAlloc.Linear.Base
 import RegAlloc.Liveness
+import Instruction
 
-import RegAllocInfo
-import Instrs
 import Cmm		(GenBasicBlock(..))
 
 import UniqFM
@@ -36,7 +35,10 @@ binSpillReasons reasons
 
 
 -- | Count reg-reg moves remaining in this code.
-countRegRegMovesNat :: NatCmmTop -> Int
+countRegRegMovesNat 
+	:: Instruction instr
+	=> NatCmmTop instr -> Int
+
 countRegRegMovesNat cmm
 	= execState (mapGenBlockTopM countBlock cmm) 0
  where
@@ -45,7 +47,7 @@ countRegRegMovesNat cmm
 	 	return	b
 
 	countInstr instr
-		| Just _	<- isRegRegMove instr
+		| Just _	<- takeRegRegMoveInstr instr
 		= do	modify (+ 1)
 			return instr
 
@@ -54,7 +56,10 @@ countRegRegMovesNat cmm
 
 
 -- | Pretty print some RegAllocStats
-pprStats :: [NatCmmTop] -> [RegAllocStats] -> SDoc
+pprStats 
+	:: Instruction instr 
+	=> [NatCmmTop instr] -> [RegAllocStats] -> SDoc
+
 pprStats code statss
  = let	-- sum up all the instrs inserted by the spiller
  	spills		= foldl' (plusUFM_C (zipWith (+)))
diff --git a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
index 1306deb159..eedaca8cc0 100644
--- a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
@@ -3,7 +3,9 @@
 module RegAlloc.Linear.X86.FreeRegs
 where
 
-import Regs
+import X86.Regs
+import RegClass
+import Reg
 
 import Data.Word
 import Data.Bits
diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs
index 8445034ab9..8faab5af92 100644
--- a/compiler/nativeGen/RegAlloc/Liveness.hs
+++ b/compiler/nativeGen/RegAlloc/Liveness.hs
@@ -20,7 +20,7 @@ module RegAlloc.Liveness (
 	mapBlockTop, 	mapBlockTopM,
 	mapGenBlockTop,	mapGenBlockTopM,
 	stripLive,
-	spillNatBlock,
+	stripLiveBlock,
 	slurpConflicts,
 	slurpReloadCoalesce,
 	eraseDeltasLive,
@@ -30,12 +30,13 @@ module RegAlloc.Liveness (
 
   ) where
 
+
+import Reg
+import Instruction
+
 import BlockId
-import Regs
-import Instrs
-import PprMach
-import RegAllocInfo
 import Cmm hiding (RegSet)
+import PprCmm()
 
 import Digraph
 import Outputable
@@ -65,18 +66,25 @@ emptyBlockMap = emptyBlockEnv
 
 
 -- | A top level thing which carries liveness information.
-type LiveCmmTop
+type LiveCmmTop instr
 	= GenCmmTop
 		CmmStatic
 		LiveInfo
-		(ListGraph (GenBasicBlock LiveInstr))
+		(ListGraph (GenBasicBlock (LiveInstr instr)))
 			-- the "instructions" here are actually more blocks,
 			--	single blocks are acyclic
 			--	multiple blocks are taken to be cyclic.
 
 -- | An instruction with liveness information.
-data LiveInstr
-	= Instr Instr (Maybe Liveness)
+data LiveInstr instr
+	= Instr instr (Maybe Liveness)
+
+	-- | spill this reg to a stack slot
+	| SPILL Reg Int
+
+	-- | reload this reg from a stack slot
+	| RELOAD Int Reg
+	
 
 -- | Liveness information.
 -- 	The regs which die are ones which are no longer live in the *next* instruction
@@ -100,11 +108,28 @@ data LiveInfo
 		(BlockMap RegSet)	-- argument locals live on entry to this block
 
 -- | A basic block with liveness information.
-type LiveBasicBlock
-	= GenBasicBlock LiveInstr
+type LiveBasicBlock instr
+	= GenBasicBlock (LiveInstr instr)
+
+
+instance Outputable instr 
+      => Outputable (LiveInstr instr) where
+	ppr (SPILL reg slot)
+	   = hcat [
+	   	ptext (sLit "\tSPILL"),
+		char ' ',
+		ppr reg,
+		comma,
+		ptext (sLit "SLOT") <> parens (int slot)]
+
+	ppr (RELOAD slot reg)
+	   = hcat [
+	   	ptext (sLit "\tRELOAD"),
+		char ' ',
+		ptext (sLit "SLOT") <> parens (int slot),
+		comma,
+		ppr reg]
 
-
-instance Outputable LiveInstr where
 	ppr (Instr instr Nothing)
 	 = ppr instr
 
@@ -120,8 +145,7 @@ instance Outputable LiveInstr where
 	 where 	pprRegs :: SDoc -> RegSet -> SDoc
 	 	pprRegs name regs
 		 | isEmptyUniqSet regs	= empty
-		 | otherwise		= name <> (hcat $ punctuate space $ map (docToSDoc . pprUserReg) $ uniqSetToList regs)
-
+		 | otherwise		= name <> (hcat $ punctuate space $ map ppr $ uniqSetToList regs)
 
 instance Outputable LiveInfo where
 	ppr (LiveInfo static firstId liveOnEntry)
@@ -130,11 +154,12 @@ instance Outputable LiveInfo where
 		$$ text "# liveOnEntry = " <> ppr liveOnEntry
 
 
+
 -- | map a function across all the basic blocks in this code
 --
 mapBlockTop
-	:: (LiveBasicBlock -> LiveBasicBlock)
-	-> LiveCmmTop -> LiveCmmTop
+	:: (LiveBasicBlock instr -> LiveBasicBlock instr)
+	-> LiveCmmTop instr -> LiveCmmTop instr
 
 mapBlockTop f cmm
 	= evalState (mapBlockTopM (\x -> return $ f x) cmm) ()
@@ -144,8 +169,8 @@ mapBlockTop f cmm
 --
 mapBlockTopM
 	:: Monad m
-	=> (LiveBasicBlock -> m LiveBasicBlock)
-	-> LiveCmmTop -> m LiveCmmTop
+	=> (LiveBasicBlock instr -> m (LiveBasicBlock instr))
+	-> LiveCmmTop instr -> m (LiveCmmTop instr)
 
 mapBlockTopM _ cmm@(CmmData{})
 	= return cmm
@@ -187,7 +212,11 @@ mapGenBlockTopM f (CmmProc header label params (ListGraph blocks))
 --	Slurping of conflicts and moves is wrapped up together so we don't have
 --	to make two passes over the same code when we want to build the graph.
 --
-slurpConflicts :: LiveCmmTop -> (Bag (UniqSet Reg), Bag (Reg, Reg))
+slurpConflicts 
+	:: Instruction instr
+	=> LiveCmmTop instr 
+	-> (Bag (UniqSet Reg), Bag (Reg, Reg))
+
 slurpConflicts live
 	= slurpCmm (emptyBag, emptyBag) live
 
@@ -205,12 +234,20 @@ slurpConflicts live
 		= (consBag rsLiveEntry conflicts, moves)
 
 		| otherwise
-		= panic "RegLiveness.slurpBlock: bad block"
+		= panic "Liveness.slurpConflicts: bad block"
 
 	slurpLIs rsLive (conflicts, moves) []
 		= (consBag rsLive conflicts, moves)
 
-	slurpLIs rsLive rs (Instr _ Nothing     : lis)	= slurpLIs rsLive rs lis
+	slurpLIs rsLive rs (Instr _ Nothing     : lis)	
+		= slurpLIs rsLive rs lis
+
+	-- we're not expecting to be slurping conflicts from spilled code
+	slurpLIs _ _ (SPILL _ _ : _)
+		= panic "Liveness.slurpConflicts: unexpected SPILL"
+
+	slurpLIs _ _ (RELOAD _ _ : _)
+		= panic "Liveness.slurpConflicts: unexpected RELOAD"
 		
 	slurpLIs rsLiveEntry (conflicts, moves) (Instr instr (Just live) : lis)
 	 = let
@@ -234,7 +271,7 @@ slurpConflicts live
 		--
 		rsConflicts	= unionUniqSets rsLiveNext rsOrphans
 
-	  in	case isRegRegMove instr of
+	  in	case takeRegRegMoveInstr instr of
 	  	 Just rr	-> slurpLIs rsLiveNext
 		 			( consBag rsConflicts conflicts
 					, consBag rr moves) lis
@@ -254,7 +291,11 @@ slurpConflicts live
 --	the spill\/reload instrs can be cleaned and replaced by a nop reg-reg move.
 --
 --
-slurpReloadCoalesce :: LiveCmmTop -> Bag (Reg, Reg)
+slurpReloadCoalesce 
+	:: Instruction instr
+	=> LiveCmmTop instr
+	-> Bag (Reg, Reg)
+
 slurpReloadCoalesce live
 	= slurpCmm emptyBag live
 
@@ -285,23 +326,24 @@ slurpReloadCoalesce live
 	 	(_, mMoves)	<- mapAccumLM slurpLI slotMap instrs
 	   	return $ listToBag $ catMaybes mMoves
 
-	slurpLI :: UniqFM Reg 				-- current slotMap
-		-> LiveInstr
+	slurpLI :: Instruction instr
+		=> UniqFM Reg 				-- current slotMap
+		-> LiveInstr instr
 		-> State (UniqFM [UniqFM Reg]) 		-- blockId -> [slot -> reg]
 							-- 	for tracking slotMaps across jumps
 
 			 ( UniqFM Reg			-- new slotMap
 			 , Maybe (Reg, Reg))		-- maybe a new coalesce edge
 
-	slurpLI slotMap (Instr instr _)
+	slurpLI slotMap li
 
 		-- remember what reg was stored into the slot
-		| SPILL reg slot	<- instr
+		| SPILL reg slot	<- li
 		, slotMap'		<- addToUFM slotMap slot reg
 		= return (slotMap', Nothing)
 
 		-- add an edge betwen the this reg and the last one stored into the slot
-		| RELOAD slot reg	<- instr
+		| RELOAD slot reg	<- li
 		= case lookupUFM slotMap slot of
 			Just reg2
 			 | reg /= reg2	-> return (slotMap, Just (reg, reg2))
@@ -310,7 +352,8 @@ slurpReloadCoalesce live
 			Nothing		-> return (slotMap, Nothing)
 
 		-- if we hit a jump, remember the current slotMap
-		| targets	<- jumpDests instr []
+		| Instr instr _		<- li
+		, targets		<- jumpDestsOfInstr instr
 		, not $ null targets
 		= do	mapM_	(accSlotMap slotMap) targets
 			return	(slotMap, Nothing)
@@ -340,7 +383,11 @@ slurpReloadCoalesce live
 
 -- | Strip away liveness information, yielding NatCmmTop
 
-stripLive :: LiveCmmTop -> NatCmmTop
+stripLive 
+	:: Instruction instr
+	=> LiveCmmTop instr 
+	-> NatCmmTop instr
+
 stripLive live
 	= stripCmm live
 
@@ -349,26 +396,26 @@ stripLive live
 		= CmmProc info label params
                           (ListGraph $ concatMap stripComp comps)
 
-	stripComp  (BasicBlock _ blocks)	= map stripBlock blocks
-	stripBlock (BasicBlock i instrs)	= BasicBlock i (map stripLI instrs)
-	stripLI    (Instr instr _)		= instr
+	stripComp  (BasicBlock _ blocks)	= map stripLiveBlock blocks
 
 
--- | Make real spill instructions out of SPILL, RELOAD pseudos
+-- | Strip away liveness information from a basic block,
+--	and make real spill instructions out of SPILL, RELOAD pseudos along the way.
 
-spillNatBlock :: NatBasicBlock -> NatBasicBlock
-spillNatBlock (BasicBlock i is)
+stripLiveBlock
+	:: Instruction instr
+	=> LiveBasicBlock instr
+	-> NatBasicBlock instr
+
+stripLiveBlock (BasicBlock i lis)
  = 	BasicBlock i instrs'
+
  where 	(instrs', _)
- 		= runState (spillNat [] is) 0
+ 		= runState (spillNat [] lis) 0
 
 	spillNat acc []
 	 = 	return (reverse acc)
 
-	spillNat acc (DELTA i : instrs)
-	 = do	put i
-	 	spillNat acc instrs
-
 	spillNat acc (SPILL reg slot : instrs)
 	 = do	delta	<- get
 	 	spillNat (mkSpillInstr reg delta slot : acc) instrs
@@ -377,22 +424,28 @@ spillNatBlock (BasicBlock i is)
 	 = do	delta	<- get
 	 	spillNat (mkLoadInstr reg delta slot : acc) instrs
 
-	spillNat acc (instr : instrs)
+	spillNat acc (Instr instr _ : instrs)
+	 | Just i <- takeDeltaInstr instr
+	 = do	put i
+	 	spillNat acc instrs
+
+	spillNat acc (Instr instr _ : instrs)
 	 =	spillNat (instr : acc) instrs
 
 
 -- | Erase Delta instructions.
 
-eraseDeltasLive :: LiveCmmTop -> LiveCmmTop
+eraseDeltasLive 
+	:: Instruction instr
+	=> LiveCmmTop instr
+	-> LiveCmmTop instr
+
 eraseDeltasLive cmm
 	= mapBlockTop eraseBlock cmm
  where
-	isDelta (DELTA _)	= True
-	isDelta _		= False
-
  	eraseBlock (BasicBlock id lis)
 		= BasicBlock id
-		$ filter (\(Instr i _) -> not $ isDelta i)
+		$ filter (\(Instr i _) -> not $ isJust $ takeDeltaInstr i)
 		$ lis
 
 
@@ -401,8 +454,9 @@ eraseDeltasLive cmm
 --	also erase reg -> reg moves when the destination dies in this instr.
 
 patchEraseLive
-	:: (Reg -> Reg)
-	-> LiveCmmTop -> LiveCmmTop
+	:: Instruction instr
+	=> (Reg -> Reg)
+	-> LiveCmmTop instr -> LiveCmmTop instr
 
 patchEraseLive patchF cmm
 	= patchCmm cmm
@@ -427,7 +481,7 @@ patchEraseLive patchF cmm
 	patchInstrs (li : lis)
 
 		| Instr i (Just live)	<- li'
-		, Just (r1, r2)	<- isRegRegMove i
+		, Just (r1, r2)	<- takeRegRegMoveInstr i
 		, eatMe r1 r2 live
 		= patchInstrs lis
 
@@ -451,30 +505,38 @@ patchEraseLive patchF cmm
 -- | Patch registers in this LiveInstr, including the liveness information.
 --
 patchRegsLiveInstr
-	:: (Reg -> Reg)
-	-> LiveInstr -> LiveInstr
+	:: Instruction instr
+	=> (Reg -> Reg)
+	-> LiveInstr instr -> LiveInstr instr
 
 patchRegsLiveInstr patchF li
  = case li of
 	Instr instr Nothing
-	 -> Instr (patchRegs instr patchF) Nothing
+	 -> Instr (patchRegsOfInstr instr patchF) Nothing
 
 	Instr instr (Just live)
 	 -> Instr
-	 	(patchRegs instr patchF)
+	 	(patchRegsOfInstr instr patchF)
 		(Just live
 			{ -- WARNING: have to go via lists here because patchF changes the uniq in the Reg
 			  liveBorn	= mkUniqSet $ map patchF $ uniqSetToList $ liveBorn live
 			, liveDieRead	= mkUniqSet $ map patchF $ uniqSetToList $ liveDieRead live
 			, liveDieWrite	= mkUniqSet $ map patchF $ uniqSetToList $ liveDieWrite live })
 
+	SPILL reg slot
+	 -> SPILL (patchF reg) slot
+		
+	RELOAD slot reg
+	 -> RELOAD slot (patchF reg)
+
 
 ---------------------------------------------------------------------------------
 -- Annotate code with register liveness information
 --
 regLiveness
-	:: NatCmmTop
-	-> UniqSM LiveCmmTop
+	:: Instruction instr
+	=> NatCmmTop instr
+	-> UniqSM (LiveCmmTop instr)
 
 regLiveness (CmmData i d)
 	= returnUs $ CmmData i d
@@ -501,11 +563,15 @@ regLiveness (CmmProc info lbl params (ListGraph blocks@(first : _)))
 			   lbl params (ListGraph liveBlocks)
 
 
-sccBlocks :: [NatBasicBlock] -> [SCC NatBasicBlock]
+sccBlocks 
+	:: Instruction instr
+	=> [NatBasicBlock instr] 
+	-> [SCC (NatBasicBlock instr)]
+
 sccBlocks blocks = stronglyConnCompFromEdgedVertices graph
   where
-	getOutEdges :: [Instr] -> [BlockId]
-	getOutEdges instrs = foldl' (\a x -> jumpDests x a) [] instrs
+	getOutEdges :: Instruction instr => [instr] -> [BlockId]
+	getOutEdges instrs = concat $ map jumpDestsOfInstr instrs
 
 	graph = [ (block, getUnique id, map getUnique (getOutEdges instrs))
 		| block@(BasicBlock id instrs) <- blocks ]
@@ -515,12 +581,13 @@ sccBlocks blocks = stronglyConnCompFromEdgedVertices graph
 -- Computing liveness
 
 computeLiveness
-   :: [SCC NatBasicBlock]
-   -> ([SCC LiveBasicBlock],		-- instructions annotated with list of registers
-					-- which are "dead after this instruction".
-       BlockMap RegSet)			-- blocks annontated with set of live registers
-					-- on entry to the block.
-
+	:: Instruction instr
+	=> [SCC (NatBasicBlock instr)]
+	-> ([SCC (LiveBasicBlock instr)],	-- instructions annotated with list of registers
+						-- which are "dead after this instruction".
+	       BlockMap RegSet)			-- blocks annontated with set of live registers
+						-- on entry to the block.
+	
   -- NOTE: on entry, the SCCs are in "reverse" order: later blocks may transfer
   -- control to earlier ones only.  The SCCs returned are in the *opposite* 
   -- order, which is exactly what we want for the next pass.
@@ -530,10 +597,12 @@ computeLiveness sccs
 
 
 livenessSCCs
-       :: BlockMap RegSet
-       -> [SCC LiveBasicBlock]		-- accum
-       -> [SCC NatBasicBlock]
-       -> ([SCC LiveBasicBlock], BlockMap RegSet)
+       :: Instruction instr
+       => BlockMap RegSet
+       -> [SCC (LiveBasicBlock instr)]		-- accum
+       -> [SCC (NatBasicBlock instr)]
+       -> ( [SCC (LiveBasicBlock instr)]
+       	  , BlockMap RegSet)
 
 livenessSCCs blockmap done [] = (done, blockmap)
 
@@ -561,8 +630,11 @@ livenessSCCs blockmap done
 	          (a, panic "RegLiveness.livenessSCCs")
 
 
-            linearLiveness :: BlockMap RegSet -> [NatBasicBlock]
-                           -> (BlockMap RegSet, [LiveBasicBlock])
+            linearLiveness 
+	    	:: Instruction instr
+		=> BlockMap RegSet -> [NatBasicBlock instr]
+            	-> (BlockMap RegSet, [LiveBasicBlock instr])
+
             linearLiveness = mapAccumL livenessBlock
 
                 -- probably the least efficient way to compare two
@@ -578,9 +650,10 @@ livenessSCCs blockmap done
 -- | Annotate a basic block with register liveness information.
 --
 livenessBlock
-	:: BlockMap RegSet
-	-> NatBasicBlock
-	-> (BlockMap RegSet, LiveBasicBlock)
+	:: Instruction instr
+	=> BlockMap RegSet
+	-> NatBasicBlock instr
+	-> (BlockMap RegSet, LiveBasicBlock instr)
 
 livenessBlock blockmap (BasicBlock block_id instrs)
  = let
@@ -598,8 +671,9 @@ livenessBlock blockmap (BasicBlock block_id instrs)
 --	filling in when regs are born
 
 livenessForward
-	:: RegSet			-- regs live on this instr
-	-> [LiveInstr] -> [LiveInstr]
+	:: Instruction instr
+	=> RegSet			-- regs live on this instr
+	-> [LiveInstr instr] -> [LiveInstr instr]
 
 livenessForward _           []	= []
 livenessForward rsLiveEntry (li@(Instr instr mLive) : lis)
@@ -607,7 +681,7 @@ livenessForward rsLiveEntry (li@(Instr instr mLive) : lis)
 	= li : livenessForward rsLiveEntry lis
 
 	| Just live	<- mLive
-	, RU _ written	<- regUsage instr
+	, RU _ written	<- regUsageOfInstr instr
 	= let
 		-- Regs that are written to but weren't live on entry to this instruction
 		--	are recorded as being born here.
@@ -628,11 +702,12 @@ livenessForward _ _ 		= panic "RegLiveness.livenessForward: no match"
 --	filling in when regs die, and what regs are live across each instruction
 
 livenessBack
-	:: RegSet			-- regs live on this instr
+	:: Instruction instr 
+	=> RegSet			-- regs live on this instr
 	-> BlockMap RegSet   		-- regs live on entry to other BBs
-	-> [LiveInstr]   		-- instructions (accum)
-	-> [Instr]			-- instructions
-	-> (RegSet, [LiveInstr])
+	-> [LiveInstr instr]   		-- instructions (accum)
+	-> [instr]			-- instructions
+	-> (RegSet, [LiveInstr instr])
 
 livenessBack liveregs _        done []  = (liveregs, done)
 
@@ -640,32 +715,37 @@ livenessBack liveregs blockmap acc (instr : instrs)
  = let	(liveregs', instr')	= liveness1 liveregs blockmap instr
    in	livenessBack liveregs' blockmap (instr' : acc) instrs
 
--- don't bother tagging comments or deltas with liveness
-liveness1 :: RegSet -> BlockMap RegSet -> Instr -> (RegSet, LiveInstr)
-liveness1 liveregs _   (instr@COMMENT{})
-	= (liveregs, Instr instr Nothing)
 
-liveness1 liveregs _   (instr@DELTA{})
+-- don't bother tagging comments or deltas with liveness
+liveness1 
+	:: Instruction instr
+	=> RegSet 
+	-> BlockMap RegSet 
+	-> instr
+	-> (RegSet, LiveInstr instr)
+
+liveness1 liveregs _ instr
+	| isMetaInstr instr
 	= (liveregs, Instr instr Nothing)
 
 liveness1 liveregs blockmap instr
 
-      | not_a_branch
-      = (liveregs1, Instr instr
+	| not_a_branch
+	= (liveregs1, Instr instr
 			(Just $ Liveness
 			{ liveBorn	= emptyUniqSet
 			, liveDieRead	= mkUniqSet r_dying
 			, liveDieWrite	= mkUniqSet w_dying }))
 
-      | otherwise
-      = (liveregs_br, Instr instr
+	| otherwise
+	= (liveregs_br, Instr instr
 			(Just $ Liveness
 			{ liveBorn	= emptyUniqSet
 			, liveDieRead	= mkUniqSet r_dying_br
 			, liveDieWrite	= mkUniqSet w_dying }))
 
-      where
-	    RU read written = regUsage instr
+	where
+	    RU read written = regUsageOfInstr instr
 
 	    -- registers that were written here are dead going backwards.
 	    -- registers that were read here are live going backwards.
@@ -682,7 +762,7 @@ liveness1 liveregs blockmap instr
 
 	    -- union in the live regs from all the jump destinations of this
 	    -- instruction.
-	    targets      = jumpDests instr [] -- where we go from here
+	    targets      = jumpDestsOfInstr instr -- where we go from here
 	    not_a_branch = null targets
 
 	    targetLiveRegs target
diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs
deleted file mode 100644
index f0cb8b5c12..0000000000
--- a/compiler/nativeGen/RegAllocInfo.hs
+++ /dev/null
@@ -1,94 +0,0 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
------------------------------------------------------------------------------
---
--- Machine-specific parts of the register allocator
---
--- (c) The University of Glasgow 1996-2004
---
------------------------------------------------------------------------------
-
-
-module RegAllocInfo (
-	-- shared code
-	shortcutStatic,
-	
-	-- machine specific 
-	RegUsage(..),
-	noUsage,
-	regUsage,
-	patchRegs,
-	jumpDests,
-	isJumpish,
-	patchJump,
-	isRegRegMove,
-
-        JumpDest, 
-	canShortcut, 
-	shortcutJump, 
-
-	mkSpillInstr,
-	mkLoadInstr,
-	mkRegRegMoveInstr,
-	mkBranchInstr,
-
-	maxSpillSlots,
-	spillSlotToOffset
-    ) where
-
-#include "nativeGen/NCG.h"
-#include "HsVersions.h"
-
-import BlockId
-import Cmm
-import CLabel
-import Instrs
-import Regs
-import Outputable
-import Constants	( rESERVED_C_STACK_BYTES )
-import FastBool
-
-#if   alpha_TARGET_ARCH
-import Alpha.RegInfo
-
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-import X86.RegInfo
-
-#elif powerpc_TARGET_ARCH
-import PPC.RegInfo
-
-#elif sparc_TARGET_ARCH
-import SPARC.RegInfo
-
-#endif
-
-
--- Here because it knows about JumpDest
-shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
-shortcutStatic fn (CmmStaticLit (CmmLabel lab))
-  | Just uq <- maybeAsmTemp lab 
-  = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
-shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
-  | Just uq <- maybeAsmTemp lbl1
-  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
-        -- slightly dodgy, we're ignoring the second label, but this
-        -- works with the way we use CmmLabelDiffOff for jump tables now.
-shortcutStatic fn other_static
-        = other_static
-
-shortBlockId fn blockid@(BlockId uq) =
-   case fn blockid of
-      Nothing -> mkAsmTempLabel uq
-      Just (DestBlockId blockid')  -> shortBlockId fn blockid'
-      Just (DestImm (ImmCLbl lbl)) -> lbl
-      _other -> panic "shortBlockId"
-
-
-
-
-
diff --git a/compiler/nativeGen/RegClass.hs b/compiler/nativeGen/RegClass.hs
new file mode 100644
index 0000000000..8b6b2d4160
--- /dev/null
+++ b/compiler/nativeGen/RegClass.hs
@@ -0,0 +1,31 @@
+
+-- | An architecture independent description of a register's class.
+module RegClass 
+	( RegClass (..) )
+
+where
+
+import	Outputable
+import	Unique
+
+
+-- | The class of a register. 
+--	Used in the register allocator.
+--	We treat all registers in a class as being interchangable.
+--
+data RegClass 
+	= RcInteger 
+	| RcFloat
+	| RcDouble
+	deriving Eq
+
+
+instance Uniquable RegClass where
+    getUnique RcInteger	= mkUnique 'L' 0
+    getUnique RcFloat	= mkUnique 'L' 1
+    getUnique RcDouble	= mkUnique 'L' 2
+
+instance Outputable RegClass where
+    ppr RcInteger	= Outputable.text "I"
+    ppr RcFloat		= Outputable.text "F"
+    ppr RcDouble	= Outputable.text "D"
diff --git a/compiler/nativeGen/RegsBase.hs b/compiler/nativeGen/RegsBase.hs
deleted file mode 100644
index 00c87cb917..0000000000
--- a/compiler/nativeGen/RegsBase.hs
+++ /dev/null
@@ -1,105 +0,0 @@
-
-module RegsBase (
-	RegNo,
-	Reg(..), 
-	isRealReg, 
-	unRealReg,
-	isVirtualReg, 
-	renameVirtualReg,
-
-        RegClass(..)
-)
-
-where
-
-import Outputable	( Outputable(..) )
-import qualified Outputable
-import Panic
-import Unique
-
--- ---------------------------------------------------------------------------
--- Registers
-
--- RealRegs are machine regs which are available for allocation, in
--- the usual way.  We know what class they are, because that's part of
--- the processor's architecture.
-
--- VirtualRegs are virtual registers.  The register allocator will
--- eventually have to map them into RealRegs, or into spill slots.
--- VirtualRegs are allocated on the fly, usually to represent a single
--- value in the abstract assembly code (i.e. dynamic registers are
--- usually single assignment).  With the new register allocator, the
--- single assignment restriction isn't necessary to get correct code,
--- although a better register allocation will result if single
--- assignment is used -- because the allocator maps a VirtualReg into
--- a single RealReg, even if the VirtualReg has multiple live ranges.
-
--- Virtual regs can be of either class, so that info is attached.
-
-type RegNo 
-	= Int
-
-data Reg
-	= RealReg      {-# UNPACK #-} !RegNo
-	| VirtualRegI  {-# UNPACK #-} !Unique
-	| VirtualRegHi {-# UNPACK #-} !Unique  -- High part of 2-word register
-	| VirtualRegF  {-# UNPACK #-} !Unique
-	| VirtualRegD  {-# UNPACK #-} !Unique
-	deriving (Eq, Ord)
-
-
--- We like to have Uniques for Reg so that we can make UniqFM and UniqSets 
--- in the register allocator.
-instance Uniquable Reg where
-	getUnique (RealReg i)      = mkUnique 'C' i
-	getUnique (VirtualRegI u)  = u
-	getUnique (VirtualRegHi u) = u
-	getUnique (VirtualRegF u)  = u
-	getUnique (VirtualRegD u)  = u
-
-
-isRealReg :: Reg -> Bool
-isRealReg = not . isVirtualReg
-
--- | Take the RegNo from a real reg
-unRealReg :: Reg -> RegNo
-unRealReg (RealReg i)	= i
-unRealReg _		= panic "unRealReg on VirtualReg"
-
-isVirtualReg :: Reg -> Bool
-isVirtualReg (RealReg _)      = False
-isVirtualReg (VirtualRegI _)  = True
-isVirtualReg (VirtualRegHi _) = True
-isVirtualReg (VirtualRegF _)  = True
-isVirtualReg (VirtualRegD _)  = True
-
-
-renameVirtualReg :: Unique -> Reg -> Reg
-renameVirtualReg u r
- = case r of
- 	RealReg _	-> error "renameVirtualReg: can't change unique on a real reg"
-	VirtualRegI _	-> VirtualRegI  u
-	VirtualRegHi _	-> VirtualRegHi u
-	VirtualRegF _	-> VirtualRegF  u
-	VirtualRegD _	-> VirtualRegD  u
-
-
--- RegClass --------------------------------------------------------------------
-data RegClass 
-   = RcInteger 
-   | RcFloat
-   | RcDouble
-     deriving Eq
-
-instance Uniquable RegClass where
-    getUnique RcInteger	= mkUnique 'L' 0
-    getUnique RcFloat	= mkUnique 'L' 1
-    getUnique RcDouble	= mkUnique 'L' 2
-
-instance Outputable RegClass where
-    ppr RcInteger	= Outputable.text "I"
-    ppr RcFloat		= Outputable.text "F"
-    ppr RcDouble	= Outputable.text "D"
-
-
-
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
new file mode 100644
index 0000000000..d921c12e7a
--- /dev/null
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -0,0 +1,1545 @@
+{-# OPTIONS -w #-}
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+module SPARC.CodeGen ( 
+	cmmTopCodeGen, 
+	InstrBlock 
+) 
+
+where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import SPARC.Instr
+import SPARC.Cond
+import SPARC.Regs
+import SPARC.RegInfo
+import Instruction
+import Size
+import Reg
+import PIC
+import NCGMonad
+
+-- Our intermediate code:
+import BlockId
+import Cmm
+import CLabel
+
+-- The rest:
+import BasicTypes
+import StaticFlags	( opt_PIC )
+import OrdList
+import qualified Outputable as O
+import Outputable
+import FastString
+
+import Control.Monad	( mapAndUnzipM )
+import Data.Int
+import DynFlags
+
+-- | Top level code generation
+cmmTopCodeGen 
+	:: DynFlags
+	-> RawCmmTop 
+	-> NatM [NatCmmTop Instr]
+
+cmmTopCodeGen _
+	(CmmProc info lab params (ListGraph blocks)) 
+ = do	
+ 	(nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+
+--	picBaseMb 	<- getPicBaseMaybeNat
+	let proc 	= CmmProc info lab params (ListGraph $ concat nat_blocks)
+	let tops 	= proc : concat statics
+
+--	case picBaseMb of
+--	 Just picBase -> initializePicBase picBase tops
+--	 Nothing -> return tops
+  
+  	return tops
+  
+  
+cmmTopCodeGen _ (CmmData sec dat) = do
+  return [CmmData sec dat]  -- no translation, we just use CmmStatic
+
+
+
+basicBlockCodeGen 
+	:: CmmBasicBlock
+	-> NatM ( [NatBasicBlock Instr]
+		, [NatCmmTop Instr])
+
+basicBlockCodeGen (BasicBlock id stmts) = do
+  instrs <- stmtsToInstrs stmts
+  -- code generation may introduce new basic block boundaries, which
+  -- are indicated by the NEWBLOCK instruction.  We must split up the
+  -- instruction stream into basic blocks again.  Also, we extract
+  -- LDATAs here too.
+  let
+	(top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+	
+	mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
+	  = ([], BasicBlock id instrs : blocks, statics)
+	mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
+	  = (instrs, blocks, CmmData sec dat:statics)
+	mkBlocks instr (instrs,blocks,statics)
+	  = (instr:instrs, blocks, statics)
+  -- in
+  return (BasicBlock id top : other_blocks, statics)
+
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+   = do instrss <- mapM stmtToInstrs stmts
+        return (concatOL instrss)
+
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+    CmmNop	   -> return nilOL
+    CmmComment s   -> return (unitOL (COMMENT s))
+
+    CmmAssign reg src
+      | isFloatType ty	-> assignReg_FltCode size reg src
+      | isWord64 ty	-> assignReg_I64Code      reg src
+      | otherwise	-> assignReg_IntCode size reg src
+	where ty = cmmRegType reg
+	      size = cmmTypeSize ty
+
+    CmmStore addr src
+      | isFloatType ty	-> assignMem_FltCode size addr src
+      | isWord64 ty	-> assignMem_I64Code      addr src
+      | otherwise	-> assignMem_IntCode size addr src
+	where ty = cmmExprType src
+	      size = cmmTypeSize ty
+
+    CmmCall target result_regs args _ _
+       -> genCCall target result_regs args
+
+    CmmBranch	id		-> genBranch id
+    CmmCondBranch arg id	-> genCondJump id arg
+    CmmSwitch	arg ids		-> genSwitch arg ids
+    CmmJump	arg _		-> genJump arg
+
+    CmmReturn	_		
+     -> panic "stmtToInstrs: return statement should have been cps'd away"
+
+
+--------------------------------------------------------------------------------
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- 	They are really trees of insns to facilitate fast appending, where a
+-- 	left-to-right traversal yields the insns in the correct	order.
+--
+type InstrBlock 
+	= OrdList Instr
+
+
+-- | Condition codes passed up the tree.
+--
+data CondCode 	
+	= CondCode Bool Cond InstrBlock
+
+
+-- | a.k.a "Register64"
+--	Reg is the lower 32-bit temporary which contains the result. 
+--	Use getHiVRegFromLo to find the other VRegUnique.  
+--
+--	Rules of this simplified insn selection game are therefore that
+--	the returned Reg may be modified
+--
+data ChildCode64 	
+   = ChildCode64 
+        InstrBlock
+        Reg	 	
+
+
+-- | Register's passed up the tree.  If the stix code forces the register
+-- 	to live in a pre-decided machine register, it comes out as @Fixed@;
+-- 	otherwise, it comes out as @Any@, and the parent can decide which
+-- 	register to put it in.
+--
+data Register
+	= Fixed	Size Reg InstrBlock
+	| Any	Size (Reg -> InstrBlock)
+
+
+swizzleRegisterRep :: Register -> Size -> Register
+swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
+swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
+
+
+-- | Grab the Reg for a CmmReg
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+  = mkVReg u (cmmTypeSize pk)
+
+getRegisterReg (CmmGlobal mid)
+  = case get_GlobalReg_reg_or_addr mid of
+       Left (RealReg rrno) -> RealReg rrno
+       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+          -- By this stage, the only MagicIds remaining should be the
+          -- ones which map to a real machine register on this
+          -- platform.  Hence ...
+
+
+-- | Memory addressing modes passed up the tree.
+data Amode 
+	= Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to.  So you can't put
+anything in between, lest it overwrite some of those registers.  If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+    code
+    LEA amode, tmp
+    ... other computation ...
+    ... (tmp) ...
+-}
+
+
+-- | Check whether an integer will fit in 32 bits.
+--	A CmmInt is intended to be truncated to the appropriate 
+-- 	number of bits, so here we truncate it to Int64.  This is
+-- 	important because e.g. -1 as a CmmInt might be either
+-- 	-1 or 18446744073709551615.
+--
+is32BitInteger :: Integer -> Bool
+is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
+  where i64 = fromIntegral i :: Int64
+
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+    where blockLabel = mkAsmTempLabel id
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+  where width = typeWidth (cmmRegType reg)
+
+
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_I64Code addrTree valueTree = do
+     Amode _ addr_code 		<- getAmode addrTree
+     ChildCode64 vcode rlo 	<- iselExpr64 valueTree  
+
+     (src, code) <- getSomeReg addrTree
+     let 
+         rhi = getHiVRegFromLo rlo
+         -- Big-endian store
+         mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
+         mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
+
+     return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
+
+
+assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+     ChildCode64 vcode r_src_lo <- iselExpr64 valueTree    
+     let 
+         r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = mkMOV r_src_lo r_dst_lo
+         mov_hi = mkMOV r_src_hi r_dst_hi
+         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+     return (vcode `snocOL` mov_hi `snocOL` mov_lo)
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(sparc): invalid lvalue"
+
+
+-- Load a 64 bit word
+iselExpr64 (CmmLoad addrTree ty) 
+ | isWord64 ty
+ = do	Amode amode addr_code	<- getAmode addrTree
+ 	let result
+
+		| AddrRegReg r1 r2	<- amode
+		= do	rlo 	<- getNewRegNat II32
+			tmp	<- getNewRegNat II32
+			let rhi = getHiVRegFromLo rlo
+
+			return	$ ChildCode64 
+				(	 addr_code 
+				`appOL`	 toOL
+					 [ ADD False False r1 (RIReg r2) tmp
+					 , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
+					 , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
+				rlo
+
+		| AddrRegImm r1 (ImmInt i) <- amode
+		= do	rlo	<- getNewRegNat II32
+			let rhi = getHiVRegFromLo rlo
+			
+			return	$ ChildCode64 
+				(	 addr_code 
+				`appOL`	 toOL
+					 [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
+					 , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
+				rlo
+		
+	result
+
+
+-- Add a literal to a 64 bit integer
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) 
+ = do	ChildCode64 code1 r1_lo <- iselExpr64 e1
+ 	let r1_hi	= getHiVRegFromLo r1_lo
+	
+	r_dst_lo	<- getNewRegNat II32
+	let r_dst_hi	=  getHiVRegFromLo r_dst_lo 
+	
+	return	$ ChildCode64
+			( toOL
+			[ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
+			, ADD True  False r1_hi (RIReg g0)	   r_dst_hi ])
+			r_dst_lo
+
+
+-- Addition of II64
+iselExpr64 (CmmMachOp (MO_Add width) [e1, e2])
+ = do	ChildCode64 code1 r1_lo	<- iselExpr64 e1
+	let r1_hi	= getHiVRegFromLo r1_lo
+
+ 	ChildCode64 code2 r2_lo	<- iselExpr64 e2
+	let r2_hi	= getHiVRegFromLo r2_lo
+	
+	r_dst_lo	<- getNewRegNat II32
+	let r_dst_hi	= getHiVRegFromLo r_dst_lo
+	
+	let code =	code1
+		`appOL`	code2
+		`appOL`	toOL
+			[ ADD False False r1_lo (RIReg r2_lo) r_dst_lo
+			, ADD True  False r1_hi (RIReg r2_hi) r_dst_hi ]
+	
+	return	$ ChildCode64 code r_dst_lo
+
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
+     r_dst_lo <-  getNewRegNat II32
+     let r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_lo = mkVReg uq II32
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = mkMOV r_src_lo r_dst_lo
+         mov_hi = mkMOV r_src_hi r_dst_hi
+         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+     return (
+            ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
+         )
+
+-- Convert something into II64
+iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) 
+ = do
+	r_dst_lo 	<- getNewRegNat II32
+	let r_dst_hi	= getHiVRegFromLo r_dst_lo
+
+	-- compute expr and load it into r_dst_lo
+	(a_reg, a_code)	<- getSomeReg expr
+
+	let code	= a_code
+		`appOL`	toOL
+			[ mkRegRegMoveInstr g0    r_dst_hi 	-- clear high 32 bits
+			, mkRegRegMoveInstr a_reg r_dst_lo ]
+			
+	return	$ ChildCode64 code r_dst_lo
+
+
+iselExpr64 expr
+   = pprPanic "iselExpr64(sparc)" (ppr expr)
+
+
+-- | The dual to getAnyReg: compute an expression into a register, but
+-- 	we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed _ reg code -> 
+	return (reg, code)
+
+
+-- 
+getRegister :: CmmExpr -> NatM Register
+
+getRegister (CmmReg reg) 
+  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
+		  (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _) 
+  = getRegister (mangleIndexTree tree)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code       
+
+
+
+-- Load a literal float into a float register.
+--	The actual literal is stored in a new data area, and we load it 
+--	at runtime.
+getRegister (CmmLit (CmmFloat f W32)) = do
+
+    -- a label for the new data area
+    lbl <- getNewLabelNat
+    tmp <- getNewRegNat II32
+
+    let code dst = toOL [
+            -- the data area         
+	    LDATA ReadOnlyData
+	                [CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat f W32)],
+
+            -- load the literal
+	    SETHI (HI (ImmCLbl lbl)) tmp,
+	    LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
+
+    return (Any FF32 code)
+
+getRegister (CmmLit (CmmFloat d W64)) = do
+    lbl <- getNewLabelNat
+    tmp <- getNewRegNat II32
+    let code dst = toOL [
+	    LDATA ReadOnlyData
+	                [CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat d W64)],
+	    SETHI (HI (ImmCLbl lbl)) tmp,
+	    LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
+    return (Any FF64 code)
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+      MO_F_Neg W32     -> trivialUFCode FF32 (FNEG FF32) x
+      MO_F_Neg W64     -> trivialUFCode FF64 (FNEG FF64) x
+
+      MO_S_Neg rep     -> trivialUCode (intSize rep) (SUB False False g0) x
+      MO_Not rep       -> trivialUCode (intSize rep) (XNOR False g0) x
+
+      MO_FF_Conv W64 W32-> coerceDbl2Flt x
+      MO_FF_Conv W32 W64-> coerceFlt2Dbl x
+
+      MO_FS_Conv from to -> coerceFP2Int from to x
+      MO_SF_Conv from to -> coerceInt2FP from to x
+
+      -- Conversions which are a nop on sparc
+      MO_UU_Conv from to
+	| from == to    -> conversionNop (intSize to)  x
+      MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
+      MO_UU_Conv W32 to -> conversionNop (intSize to) x
+      MO_SS_Conv W32 to -> conversionNop (intSize to) x
+
+      MO_UU_Conv W8  to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W16 to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W8  to@W16  -> conversionNop (intSize to)  x
+
+      -- sign extension
+      MO_SS_Conv W8  W32  -> integerExtend W8  W32 x
+      MO_SS_Conv W16 W32  -> integerExtend W16 W32 x
+      MO_SS_Conv W8  W16  -> integerExtend W8  W16 x
+
+      other_op -> panic ("Unknown unary mach op: " ++ show mop)
+    where
+
+	-- | sign extend and widen
+	integerExtend 
+		:: Width 		-- ^ width of source expression
+		-> Width 		-- ^ width of result
+		-> CmmExpr 		-- ^ source expression
+		-> NatM Register	
+
+	integerExtend from to expr
+	 = do	-- load the expr into some register
+	 	(reg, e_code) 	<- getSomeReg expr
+		tmp		<- getNewRegNat II32
+		let bitCount
+			= case (from, to) of
+				(W8,  W32)	-> 24
+				(W16, W32)	-> 16
+				(W8,  W16)	-> 24
+	 	let code dst
+			= e_code 	
+
+			-- local shift word left to load the sign bit
+			`snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp
+			
+			-- arithmetic shift right to sign extend
+			`snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst
+			
+		return (Any (intSize to) code)
+				
+
+        conversionNop new_rep expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_rep)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+  = case mop of
+      MO_Eq rep -> condIntReg EQQ x y
+      MO_Ne rep -> condIntReg NE x y
+
+      MO_S_Gt rep -> condIntReg GTT x y
+      MO_S_Ge rep -> condIntReg GE x y
+      MO_S_Lt rep -> condIntReg LTT x y
+      MO_S_Le rep -> condIntReg LE x y
+	      
+      MO_U_Gt W32  -> condIntReg GTT x y
+      MO_U_Ge W32  -> condIntReg GE x y
+      MO_U_Lt W32  -> condIntReg LTT x y
+      MO_U_Le W32  -> condIntReg LE x y
+
+      MO_U_Gt W16 -> condIntReg GU  x y
+      MO_U_Ge W16 -> condIntReg GEU x y
+      MO_U_Lt W16 -> condIntReg LU  x y
+      MO_U_Le W16 -> condIntReg LEU x y
+
+      MO_Add W32 -> trivialCode W32 (ADD False False) x y
+      MO_Sub W32 -> trivialCode W32 (SUB False False) x y
+
+      MO_S_MulMayOflo rep -> imulMayOflo rep x y
+
+      MO_S_Quot W32 	-> idiv True  False x y
+      MO_U_Quot W32 	-> idiv False False x y
+       
+      MO_S_Rem  W32	-> irem True  x y
+      MO_U_Rem	W32	-> irem False x y
+       
+      MO_F_Eq w -> condFltReg EQQ x y
+      MO_F_Ne w -> condFltReg NE x y
+
+      MO_F_Gt w -> condFltReg GTT x y
+      MO_F_Ge w -> condFltReg GE x y 
+      MO_F_Lt w -> condFltReg LTT x y
+      MO_F_Le w -> condFltReg LE x y
+
+      MO_F_Add  w  -> trivialFCode w FADD x y
+      MO_F_Sub  w  -> trivialFCode w FSUB x y
+      MO_F_Mul  w  -> trivialFCode w FMUL x y
+      MO_F_Quot w  -> trivialFCode w FDIV x y
+
+      MO_And rep   -> trivialCode rep (AND False) x y
+      MO_Or rep    -> trivialCode rep (OR  False) x y
+      MO_Xor rep   -> trivialCode rep (XOR False) x y
+
+      MO_Mul rep -> trivialCode rep (SMUL False) x y
+
+      MO_Shl rep   -> trivialCode rep SLL  x y
+      MO_U_Shr rep   -> trivialCode rep SRL x y
+      MO_S_Shr rep   -> trivialCode rep SRA x y
+
+{-
+      MO_F32_Pwr  -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 
+                                         [promote x, promote y])
+		       where promote x = CmmMachOp MO_F32_to_Dbl [x]
+      MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 
+                                        [x, y])
+-}
+      other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
+  where
+    -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+
+
+    -- | Generate an integer division instruction.
+    idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+	
+    -- For unsigned division with a 32 bit numerator, 
+    --		we can just clear the Y register.
+    idiv False cc x y = do
+	(a_reg, a_code)		<- getSomeReg x
+       	(b_reg, b_code)		<- getSomeReg y
+	
+	let code dst
+		= 	a_code 
+		`appOL`	b_code  
+		`appOL`	toOL
+			[ WRY  g0 g0
+			, UDIV cc a_reg (RIReg b_reg) dst]
+			
+	return (Any II32 code)
+    	
+
+    -- For _signed_ division with a 32 bit numerator,
+    --		we have to sign extend the numerator into the Y register.
+    idiv True cc x y = do
+	(a_reg, a_code)		<- getSomeReg x
+       	(b_reg, b_code)		<- getSomeReg y
+	
+	tmp			<- getNewRegNat II32
+	
+	let code dst
+		= 	a_code 
+		`appOL`	b_code  
+		`appOL`	toOL
+			[ SRA  a_reg (RIImm (ImmInt 16)) tmp		-- sign extend
+			, SRA  tmp   (RIImm (ImmInt 16)) tmp
+
+			, WRY  tmp g0				
+			, SDIV cc a_reg (RIReg b_reg) dst]
+			
+	return (Any II32 code)
+
+
+    -- | Do an integer remainder.
+    --
+    --	 NOTE:	The SPARC v8 architecture manual says that integer division
+    --		instructions _may_ generate a remainder, depending on the implementation.
+    --		If so it is _recommended_ that the remainder is placed in the Y register.
+    --
+    --          The UltraSparc 2007 manual says Y is _undefined_ after division.
+    --
+    --		The SPARC T2 doesn't store the remainder, not sure about the others. 
+    --		It's probably best not to worry about it, and just generate our own
+    --		remainders. 
+    --
+    irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+    -- For unsigned operands: 
+    --		Division is between a 64 bit numerator and a 32 bit denominator, 
+    --		so we still have to clear the Y register.
+    irem False x y = do
+    	(a_reg, a_code)	<- getSomeReg x
+	(b_reg, b_code)	<- getSomeReg y
+
+	tmp_reg		<- getNewRegNat II32
+
+	let code dst
+		= 	a_code
+		`appOL`	b_code
+		`appOL`	toOL
+			[ WRY	g0 g0
+			, UDIV  False         a_reg (RIReg b_reg) tmp_reg
+			, UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
+			, SUB   False False   a_reg (RIReg tmp_reg) dst]
+    
+    	return	(Any II32 code)
+
+    
+    -- For signed operands:
+    --		Make sure to sign extend into the Y register, or the remainder
+    --		will have the wrong sign when the numerator is negative.
+    --
+    --	TODO:	When sign extending, GCC only shifts the a_reg right by 17 bits,
+    --		not the full 32. Not sure why this is, something to do with overflow?
+    --		If anyone cares enough about the speed of signed remainder they
+    --		can work it out themselves (then tell me). -- BL 2009/01/20
+    
+    irem True x y = do
+    	(a_reg, a_code)	<- getSomeReg x
+	(b_reg, b_code)	<- getSomeReg y
+	
+	tmp1_reg	<- getNewRegNat II32
+	tmp2_reg	<- getNewRegNat II32
+		
+	let code dst
+		=	a_code
+		`appOL`	b_code
+		`appOL`	toOL
+			[ SRA	a_reg      (RIImm (ImmInt 16)) tmp1_reg	-- sign extend
+			, SRA	tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg	-- sign extend
+			, WRY	tmp1_reg g0
+
+			, SDIV  False          a_reg (RIReg b_reg)    tmp2_reg	
+			, SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
+			, SUB   False False    a_reg (RIReg tmp2_reg) dst]
+			
+	return (Any II32 code)
+   
+
+    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
+    imulMayOflo rep a b = do
+         (a_reg, a_code) <- getSomeReg a
+	 (b_reg, b_code) <- getSomeReg b
+	 res_lo <- getNewRegNat II32
+	 res_hi <- getNewRegNat II32
+	 let
+	    shift_amt  = case rep of
+			  W32 -> 31
+			  W64 -> 63
+			  _ -> panic "shift_amt"
+	    code dst = a_code `appOL` b_code `appOL`
+                       toOL [
+                           SMUL False a_reg (RIReg b_reg) res_lo,
+                           RDY res_hi,
+                           SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
+                           SUB False False res_lo (RIReg res_hi) dst
+                        ]
+         return (Any II32 code)
+
+getRegister (CmmLoad mem pk) = do
+    Amode src code <- getAmode mem
+    let
+	code__2 dst 	= code `snocOL` LD (cmmTypeSize pk) src dst
+    return (Any (cmmTypeSize pk) code__2)
+
+getRegister (CmmLit (CmmInt i _))
+  | fits13Bits i
+  = let
+    	src = ImmInt (fromInteger i)
+    	code dst = unitOL (OR False g0 (RIImm src) dst)
+    in
+    	return (Any II32 code)
+
+getRegister (CmmLit lit)
+  = let rep = cmmLitType lit
+	imm = litToImm lit
+    	code dst = toOL [
+    	    SETHI (HI imm) dst,
+    	    OR False dst (RIImm (LO imm)) dst]
+    in return (Any II32 code)
+
+
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
+  | fits13Bits (-i)
+  = do
+       (reg, code) <- getSomeReg x
+       let
+         off  = ImmInt (-(fromInteger i))
+       return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
+  | fits13Bits i
+  = do
+       (reg, code) <- getSomeReg x
+       let
+     	 off  = ImmInt (fromInteger i)
+       return (Amode (AddrRegImm reg off) code)
+
+getAmode (CmmMachOp (MO_Add rep) [x, y])
+  = do
+    (regX, codeX) <- getSomeReg x
+    (regY, codeY) <- getSomeReg y
+    let
+    	code = codeX `appOL` codeY
+    return (Amode (AddrRegReg regX regY) code)
+
+getAmode (CmmLit lit)
+  = do
+	let imm__2	= litToImm lit
+	tmp1 	<- getNewRegNat II32
+	tmp2	<- getNewRegNat II32
+
+	let code = toOL	[ SETHI (HI imm__2) tmp1
+			, OR    False tmp1 (RIImm (LO imm__2)) tmp2]
+		
+	return (Amode (AddrRegReg tmp2 g0) code)
+
+getAmode other
+  = do
+       (reg, code) <- getSomeReg other
+       let
+    	    off  = ImmInt 0
+       return (Amode (AddrRegImm reg off) code)
+
+
+getCondCode :: CmmExpr -> NatM CondCode
+getCondCode (CmmMachOp mop [x, y])
+  = 
+    case mop of
+      MO_F_Eq W32 -> condFltCode EQQ x y
+      MO_F_Ne W32 -> condFltCode NE  x y
+      MO_F_Gt W32 -> condFltCode GTT x y
+      MO_F_Ge W32 -> condFltCode GE  x y
+      MO_F_Lt W32 -> condFltCode LTT x y
+      MO_F_Le W32 -> condFltCode LE  x y
+
+      MO_F_Eq W64 -> condFltCode EQQ x y
+      MO_F_Ne W64 -> condFltCode NE  x y
+      MO_F_Gt W64 -> condFltCode GTT x y
+      MO_F_Ge W64 -> condFltCode GE  x y
+      MO_F_Lt W64 -> condFltCode LTT x y
+      MO_F_Le W64 -> condFltCode LE  x y
+
+      MO_Eq rep -> condIntCode EQQ  x y
+      MO_Ne rep -> condIntCode NE   x y
+
+      MO_S_Gt rep -> condIntCode GTT  x y
+      MO_S_Ge rep -> condIntCode GE   x y
+      MO_S_Lt rep -> condIntCode LTT  x y
+      MO_S_Le rep -> condIntCode LE   x y
+
+      MO_U_Gt rep -> condIntCode GU   x y
+      MO_U_Ge rep -> condIntCode GEU  x y
+      MO_U_Lt rep -> condIntCode LU   x y
+      MO_U_Le rep -> condIntCode LEU  x y
+
+      other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
+
+getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
+
+
+
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+condIntCode cond x (CmmLit (CmmInt y rep))
+  | fits13Bits y
+  = do
+       (src1, code) <- getSomeReg x
+       let
+           src2 = ImmInt (fromInteger y)
+           code' = code `snocOL` SUB False True src1 (RIImm src2) g0
+       return (CondCode False cond code')
+
+condIntCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+	code__2 = code1 `appOL` code2 `snocOL`
+    	    	  SUB False True src1 (RIReg src2) g0
+    return (CondCode False cond code__2)
+
+
+condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+condFltCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp <- getNewRegNat FF64
+    let
+   	promote x = FxTOy FF32 FF64 x tmp
+
+    	pk1   = cmmExprType x
+    	pk2   = cmmExprType y
+
+    	code__2 =
+		if pk1 `cmmEqType` pk2 then
+    	            code1 `appOL` code2 `snocOL`
+    	    	    FCMP True (cmmTypeSize pk1) src1 src2
+    	    	else if typeWidth pk1 == W32 then
+    	    	    code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+    	    	    FCMP True FF64 tmp src2
+    	    	else
+    	    	    code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+    	    	    FCMP True FF64 src1 tmp
+    return (CondCode True cond code__2)
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business.  Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers.  If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side.  This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_IntCode pk addr src = do
+    (srcReg, code) <- getSomeReg src
+    Amode dstAddr addr_code <- getAmode addr
+    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+
+assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode pk reg src = do
+    r <- getRegister src
+    return $ case r of
+	Any _ code         -> code dst
+	Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
+    where
+      dst = getRegisterReg reg
+
+
+
+-- Floating point assignment to memory
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_FltCode pk addr src = do
+    Amode dst__2 code1 <- getAmode addr
+    (src__2, code2) <- getSomeReg src
+    tmp1 <- getNewRegNat pk
+    let
+    	pk__2   = cmmExprType src
+    	code__2 = code1 `appOL` code2 `appOL`
+	    if   sizeToWidth pk == typeWidth pk__2 
+            then unitOL (ST pk src__2 dst__2)
+	    else toOL 	[ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
+	    		, ST    pk tmp1 dst__2]
+    return code__2
+
+-- Floating point assignment to a register/temporary
+assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode pk dstCmmReg srcCmmExpr = do
+    srcRegister <- getRegister srcCmmExpr
+    let dstReg	= getRegisterReg dstCmmReg
+
+    return $ case srcRegister of
+        Any _ code         	    -> code dstReg
+	Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
+
+
+
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+genJump (CmmLit (CmmLabel lbl))
+  = return (toOL [CALL (Left target) 0 True, NOP])
+  where
+    target = ImmCLbl lbl
+
+genJump tree
+  = do
+        (target, code) <- getSomeReg tree
+	return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
+
+-- -----------------------------------------------------------------------------
+--  Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+genBranch = return . toOL . mkJumpInstr
+
+
+-- -----------------------------------------------------------------------------
+--  Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions.  We peek at the arguments to decide what kind of
+comparison to do.
+
+SPARC: First, we have to ensure that the condition codes are set
+according to the supplied comparison operation.  We generate slightly
+different code for floating point comparisons, because a floating
+point operation cannot directly precede a @BF@.  We assume the worst
+and fill that slot with a @NOP@.
+
+SPARC: Do not fill the delay slots here; you will confuse the register
+allocator.
+-}
+
+
+genCondJump
+    :: BlockId	    -- the branch target
+    -> CmmExpr      -- the condition on which to branch
+    -> NatM InstrBlock
+
+
+
+genCondJump bid bool = do
+  CondCode is_float cond code <- getCondCode bool
+  return (
+       code `appOL` 
+       toOL (
+         if   is_float
+         then [NOP, BF cond False bid, NOP]
+         else [BI cond False bid, NOP]
+       )
+    )
+
+
+
+-- -----------------------------------------------------------------------------
+--  Generating C calls
+
+-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations.  Apart from that, the code is easy.
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+    :: CmmCallTarget		-- function to call
+    -> HintedCmmFormals		-- where to put the result
+    -> HintedCmmActuals		-- arguments (of mixed type)
+    -> NatM InstrBlock
+
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+{- 
+   The SPARC calling convention is an absolute
+   nightmare.  The first 6x32 bits of arguments are mapped into
+   %o0 through %o5, and the remaining arguments are dumped to the
+   stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
+
+   If we have to put args on the stack, move %o6==%sp down by
+   the number of words to go on the stack, to ensure there's enough space.
+
+   According to Fraser and Hanson's lcc book, page 478, fig 17.2,
+   16 words above the stack pointer is a word for the address of
+   a structure return value.  I use this as a temporary location
+   for moving values from float to int regs.  Certainly it isn't
+   safe to put anything in the 16 words starting at %sp, since
+   this area can get trashed at any time due to window overflows
+   caused by signal handlers.
+
+   A final complication (if the above isn't enough) is that 
+   we can't blithely calculate the arguments one by one into
+   %o0 .. %o5.  Consider the following nested calls:
+
+       fff a (fff b c)
+
+   Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
+   the inner call will itself use %o0, which trashes the value put there
+   in preparation for the outer call.  Upshot: we need to calculate the
+   args into temporary regs, and move those to arg regs or onto the
+   stack only immediately prior to the call proper.  Sigh.
+
+genCCall
+    :: CmmCallTarget		-- function to call
+    -> HintedCmmFormals		-- where to put the result
+    -> HintedCmmActuals		-- arguments (of mixed type)
+    -> NatM InstrBlock
+
+-}
+
+
+-- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
+-- are guaranteed to take place before writes afterwards (unlike on PowerPC). 
+-- Ref: Section 8.4 of the SPARC V9 Architecture manual.
+--
+-- In the SPARC case we don't need a barrier.
+--
+genCCall (CmmPrim (MO_WriteBarrier)) _ _
+ = do	return nilOL
+
+genCCall target dest_regs argsAndHints 
+ = do	 	
+	-- strip hints from the arg regs
+	let args :: [CmmExpr]
+	    args  = map hintlessCmm argsAndHints
+
+
+	-- work out the arguments, and assign them to integer regs
+	argcode_and_vregs	<- mapM arg_to_int_vregs args
+	let (argcodes, vregss)	= unzip argcode_and_vregs
+	let vregs		= concat vregss
+
+	let n_argRegs		= length allArgRegs
+	let n_argRegs_used 	= min (length vregs) n_argRegs
+
+
+	-- deal with static vs dynamic call targets
+	callinsns <- case target of
+		CmmCallee (CmmLit (CmmLabel lbl)) conv -> 
+			return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+		CmmCallee expr conv 
+		 -> do	(dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+			return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+		CmmPrim mop 
+		 -> do	res	<- outOfLineFloatOp mop
+			lblOrMopExpr <- case res of
+				Left lbl -> do
+					return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+		       		Right mopExpr -> do
+					(dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+					return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+			return lblOrMopExpr
+
+	let argcode = concatOL argcodes
+
+	let (move_sp_down, move_sp_up)
+	           = let diff = length vregs - n_argRegs
+	                 nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
+	             in  if   nn <= 0
+	                 then (nilOL, nilOL)
+	                 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+
+        let transfer_code
+           	= toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
+				
+	return 
+	 $ 	argcode			`appOL`
+		move_sp_down		`appOL`
+		transfer_code		`appOL`
+		callinsns		`appOL`
+		unitOL NOP		`appOL`
+		move_sp_up		`appOL`
+		assign_code dest_regs
+
+
+-- | Generate code to calculate an argument, and move it into one
+-- 	or two integer vregs.
+arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
+arg_to_int_vregs arg
+
+	-- If the expr produces a 64 bit int, then we can just use iselExpr64
+	| isWord64 (cmmExprType arg)
+	= do	(ChildCode64 code r_lo) <- iselExpr64 arg
+	  	let r_hi 		= getHiVRegFromLo r_lo
+		return (code, [r_hi, r_lo])
+
+	| otherwise
+	= do	(src, code) 	<- getSomeReg arg
+		tmp 		<- getNewRegNat (cmmTypeSize $ cmmExprType arg)
+		let pk   	= cmmExprType arg
+
+		case cmmTypeSize pk of
+
+		 -- Load a 64 bit float return value into two integer regs.
+		 FF64 -> do
+			v1 <- getNewRegNat II32
+			v2 <- getNewRegNat II32
+
+			let Just f0_high = fPair f0
+			
+			let code2 = 
+				code				`snocOL`
+		                FMOV FF64 src f0		`snocOL`
+		                ST   FF32  f0 (spRel 16)	`snocOL`
+		                LD   II32  (spRel 16) v1	`snocOL`
+		                ST   FF32  f0_high (spRel 16)	`snocOL`
+		                LD   II32  (spRel 16) v2
+
+			return	(code2, [v1,v2])
+
+		 -- Load a 32 bit float return value into an integer reg
+		 FF32 -> do
+			v1 <- getNewRegNat II32
+			
+			let code2 =
+				code                    	`snocOL`
+				ST   FF32  src (spRel 16)  	`snocOL`
+				LD   II32  (spRel 16) v1
+				
+			return (code2, [v1])
+
+		 -- Move an integer return value into its destination reg.
+		 other -> do
+			v1 <- getNewRegNat II32
+	                
+			let code2 = 
+				code				`snocOL`
+				OR False g0 (RIReg src) v1
+			
+			return (code2, [v1])
+
+
+-- | Move args from the integer vregs into which they have been 
+-- 	marshalled, into %o0 .. %o5, and the rest onto the stack.
+--
+move_final :: [Reg] -> [Reg] -> Int -> [Instr]
+
+-- all args done
+move_final [] _ offset          
+	= []
+
+-- out of aregs; move to stack
+move_final (v:vs) [] offset     
+	= ST II32 v (spRel offset)
+	: move_final vs [] (offset+1)
+
+-- move into an arg (%o[0..5]) reg
+move_final (v:vs) (a:az) offset 
+	= OR False g0 (RIReg v) a
+	: move_final vs az offset
+
+
+-- | Assign results returned from the call into their 
+--	desination regs.
+--
+assign_code :: [CmmHinted LocalReg] -> OrdList Instr
+assign_code []	= nilOL
+
+assign_code [CmmHinted dest _hint]	
+ = let	rep	= localRegType dest
+	width	= typeWidth rep
+	r_dest 	= getRegisterReg (CmmLocal dest)
+
+	result
+		| isFloatType rep 
+		, W32	<- width
+		= unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
+
+		| isFloatType rep
+		, W64	<- width
+		= unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
+
+		| not $ isFloatType rep
+		, W32	<- width
+		= unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
+
+		| not $ isFloatType rep
+		, W64		<- width
+		, r_dest_hi	<- getHiVRegFromLo r_dest
+		= toOL 	[ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
+			, mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
+   in	result
+
+
+-- | Generate a call to implement an out-of-line floating point operation
+outOfLineFloatOp 
+	:: CallishMachOp 
+	-> NatM (Either CLabel CmmExpr)
+
+outOfLineFloatOp mop 
+ = do	let functionName
+ 		= outOfLineFloatOp_table mop
+	
+ 	dflags	<- getDynFlagsNat
+	mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference 
+		$  mkForeignLabel functionName Nothing True IsFunction
+
+	let mopLabelOrExpr 
+		= case mopExpr of
+			CmmLit (CmmLabel lbl) 	-> Left lbl
+                        _ 			-> Right mopExpr
+
+	return mopLabelOrExpr
+
+
+-- | Decide what C function to use to implement a CallishMachOp
+--
+outOfLineFloatOp_table 
+	:: CallishMachOp
+	-> FastString
+	
+outOfLineFloatOp_table mop
+ = case mop of
+	MO_F32_Exp    -> fsLit "expf"
+	MO_F32_Log    -> fsLit "logf"
+	MO_F32_Sqrt   -> fsLit "sqrtf"
+	MO_F32_Pwr    -> fsLit "powf"
+
+	MO_F32_Sin    -> fsLit "sinf"
+	MO_F32_Cos    -> fsLit "cosf"
+	MO_F32_Tan    -> fsLit "tanf"
+
+	MO_F32_Asin   -> fsLit "asinf"
+	MO_F32_Acos   -> fsLit "acosf"
+	MO_F32_Atan   -> fsLit "atanf"
+
+	MO_F32_Sinh   -> fsLit "sinhf"
+	MO_F32_Cosh   -> fsLit "coshf"
+	MO_F32_Tanh   -> fsLit "tanhf"
+
+	MO_F64_Exp    -> fsLit "exp"
+	MO_F64_Log    -> fsLit "log"
+	MO_F64_Sqrt   -> fsLit "sqrt"
+	MO_F64_Pwr    -> fsLit "pow"
+
+	MO_F64_Sin    -> fsLit "sin"
+	MO_F64_Cos    -> fsLit "cos"
+	MO_F64_Tan    -> fsLit "tan"
+
+	MO_F64_Asin   -> fsLit "asin"
+	MO_F64_Acos   -> fsLit "acos"
+	MO_F64_Atan   -> fsLit "atan"
+
+	MO_F64_Sinh   -> fsLit "sinh"
+	MO_F64_Cosh   -> fsLit "cosh"
+	MO_F64_Tanh   -> fsLit "tanh"
+
+	other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
+              		(pprCallishMachOp mop)
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+genSwitch expr ids
+	| opt_PIC
+	= error "MachCodeGen: sparc genSwitch PIC not finished\n"
+  
+	| otherwise
+	= do	(e_reg, e_code) <- getSomeReg expr
+
+		base_reg	<- getNewRegNat II32
+		offset_reg	<- getNewRegNat II32
+		dst		<- getNewRegNat II32
+
+		label 		<- getNewLabelNat
+		let jumpTable	= map jumpTableEntry ids
+
+		return $ e_code `appOL`
+		 toOL	
+		 	-- the jump table
+			[ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
+
+			-- load base of jump table
+			, SETHI (HI (ImmCLbl label)) base_reg
+			, OR    False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
+			
+			-- the addrs in the table are 32 bits wide..
+			, SLL   e_reg (RIImm $ ImmInt 2) offset_reg
+
+			-- load and jump to the destination
+			, LD 	  II32 (AddrRegReg base_reg offset_reg) dst
+			, JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids]
+			, NOP ]
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat II32
+    let
+	code__2 dst = code `appOL` toOL [
+    	    SUB False True g0 (RIReg src) g0,
+    	    SUB True False g0 (RIImm (ImmInt (-1))) dst]
+    return (Any II32 code__2)
+
+condIntReg EQQ x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat II32
+    tmp2 <- getNewRegNat II32
+    let
+    	code__2 dst = code1 `appOL` code2 `appOL` toOL [
+    	    XOR False src1 (RIReg src2) dst,
+    	    SUB False True g0 (RIReg dst) g0,
+    	    SUB True False g0 (RIImm (ImmInt (-1))) dst]
+    return (Any II32 code__2)
+
+condIntReg NE x (CmmLit (CmmInt 0 d)) = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat II32
+    let
+    	code__2 dst = code `appOL` toOL [
+    	    SUB False True g0 (RIReg src) g0,
+    	    ADD True False g0 (RIImm (ImmInt 0)) dst]
+    return (Any II32 code__2)
+
+condIntReg NE x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat II32
+    tmp2 <- getNewRegNat II32
+    let
+	code__2 dst = code1 `appOL` code2 `appOL` toOL [
+    	    XOR False src1 (RIReg src2) dst,
+    	    SUB False True g0 (RIReg dst) g0,
+    	    ADD True False g0 (RIImm (ImmInt 0)) dst]
+    return (Any II32 code__2)
+
+condIntReg cond x y = do
+    bid1@(BlockId lbl1) <- getBlockIdNat
+    bid2@(BlockId lbl2) <- getBlockIdNat
+    CondCode _ cond cond_code <- condIntCode cond x y
+    let
+	code__2 dst = cond_code `appOL` toOL [
+	    BI cond False bid1, NOP,
+	    OR False g0 (RIImm (ImmInt 0)) dst,
+	    BI ALWAYS False bid2, NOP,
+	    NEWBLOCK bid1,
+	    OR False g0 (RIImm (ImmInt 1)) dst,
+	    NEWBLOCK bid2]
+    return (Any II32 code__2)
+
+condFltReg cond x y = do
+    bid1@(BlockId lbl1) <- getBlockIdNat
+    bid2@(BlockId lbl2) <- getBlockIdNat
+    CondCode _ cond cond_code <- condFltCode cond x y
+    let
+    	code__2 dst = cond_code `appOL` toOL [ 
+    	    NOP,
+	    BF cond False bid1, NOP,
+	    OR False g0 (RIImm (ImmInt 0)) dst,
+	    BI ALWAYS False bid2, NOP,
+	    NEWBLOCK bid1,
+	    OR False g0 (RIImm (ImmInt 1)) dst,
+	    NEWBLOCK bid2]
+    return (Any II32 code__2)
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+trivialCode pk instr x (CmmLit (CmmInt y d))
+  | fits13Bits y
+  = do
+      (src1, code) <- getSomeReg x
+      tmp <- getNewRegNat II32
+      let
+    	src2 = ImmInt (fromInteger y)
+    	code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
+      return (Any II32 code__2)
+
+trivialCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat II32
+    tmp2 <- getNewRegNat II32
+    let
+    	code__2 dst = code1 `appOL` code2 `snocOL`
+    	    	      instr src1 (RIReg src2) dst
+    return (Any II32 code__2)
+
+------------
+trivialFCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
+    tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
+    tmp <- getNewRegNat FF64
+    let
+    	promote x = FxTOy FF32 FF64 x tmp
+
+    	pk1   = cmmExprType x
+    	pk2   = cmmExprType y
+
+    	code__2 dst =
+    	    	if pk1 `cmmEqType` pk2 then
+    	            code1 `appOL` code2 `snocOL`
+    	    	    instr (floatSize pk) src1 src2 dst
+    	    	else if typeWidth pk1 == W32 then
+    	    	    code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+    	    	    instr FF64 tmp src2 dst
+    	    	else
+    	    	    code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+    	    	    instr FF64 src1 tmp dst
+    return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64) 
+    		code__2)
+
+------------
+trivialUCode size instr x = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat size
+    let
+    	code__2 dst = code `snocOL` instr (RIReg src) dst
+    return (Any size code__2)
+
+-------------
+trivialUFCode pk instr x = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat pk
+    let
+    	code__2 dst = code `snocOL` instr src dst
+    return (Any pk code__2)
+
+
+
+coerceDbl2Flt :: CmmExpr -> NatM Register
+coerceFlt2Dbl :: CmmExpr -> NatM Register
+
+
+coerceInt2FP width1 width2 x = do
+    (src, code) <- getSomeReg x
+    let
+    	code__2 dst = code `appOL` toOL [
+    	    ST (intSize width1) src (spRel (-2)),
+    	    LD (intSize width1) (spRel (-2)) dst,
+    	    FxTOy (intSize width1) (floatSize width2) dst dst]
+    return (Any (floatSize $ width2) code__2)
+
+
+-- | Coerce a floating point value to integer
+--
+--   NOTE: On sparc v9 there are no instructions to move a value from an
+--	   FP register directly to an int register, so we have to use a load/store.
+--
+coerceFP2Int width1 width2 x 
+ = do	let fsize1	= floatSize width1
+	    fsize2	= floatSize width2
+	
+            isize2	= intSize   width2
+
+	(fsrc, code)	<- getSomeReg x
+	fdst		<- getNewRegNat fsize2
+    
+	let code2 dst	
+		= 	code
+		`appOL` toOL
+			-- convert float to int format, leaving it in a float reg.
+			[ FxTOy fsize1 isize2 fsrc fdst
+
+			-- store the int into mem, then load it back to move
+			--	it into an actual int reg.
+			, ST    fsize2 fdst (spRel (-2))
+			, LD	isize2 (spRel (-2)) dst]
+
+	return (Any isize2 code2)
+
+------------
+coerceDbl2Flt x = do
+    (src, code) <- getSomeReg x
+    return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst)) 
+
+------------
+coerceFlt2Dbl x = do
+    (src, code) <- getSomeReg x
+    return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
+
+
+
+-- eXTRA_STK_ARGS_HERE
+
+-- We (allegedly) put the first six C-call arguments in registers;
+-- where do we start putting the rest of them?
+
+-- Moved from Instrs (SDM):
+
+eXTRA_STK_ARGS_HERE :: Int
+eXTRA_STK_ARGS_HERE
+	= 23
diff --git a/compiler/nativeGen/SPARC/Cond.hs b/compiler/nativeGen/SPARC/Cond.hs
new file mode 100644
index 0000000000..d0f12efcf5
--- /dev/null
+++ b/compiler/nativeGen/SPARC/Cond.hs
@@ -0,0 +1,53 @@
+
+module SPARC.Cond (
+	Cond(..),
+	condUnsigned,
+	condToSigned,
+	condToUnsigned
+)
+
+where
+
+-- | Branch condition codes.
+data Cond
+	= ALWAYS
+	| EQQ
+	| GE
+	| GEU
+	| GTT
+	| GU
+	| LE
+	| LEU
+	| LTT
+	| LU
+	| NE
+	| NEG
+	| NEVER
+	| POS
+	| VC
+	| VS
+	deriving Eq
+
+
+condUnsigned :: Cond -> Bool
+condUnsigned GU  = True
+condUnsigned LU  = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _   = False
+
+
+condToSigned :: Cond -> Cond
+condToSigned GU  = GTT
+condToSigned LU  = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x   = x
+
+
+condToUnsigned :: Cond -> Cond
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE  = GEU
+condToUnsigned LE  = LEU
+condToUnsigned x   = x
diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs
index 9c332317ea..6dc6477f9c 100644
--- a/compiler/nativeGen/SPARC/Instr.hs
+++ b/compiler/nativeGen/SPARC/Instr.hs
@@ -10,56 +10,55 @@
 #include "nativeGen/NCG.h"
 
 module SPARC.Instr (
-	Cond(..),
 	RI(..),
 	Instr(..),
-	riZero,
-	fpRelEA,
-	moveSp,
-	fPair,
+	maxSpillSlots
 )
 
 where
 
-import BlockId
-import RegsBase
 import SPARC.Regs
+import SPARC.Cond
+import Instruction
+import RegClass
+import Reg
+import Size
+
+import BlockId
 import Cmm
 import Outputable
-import Constants	( wORD_SIZE )
+import Constants	(rESERVED_C_STACK_BYTES )
 import FastString
+import FastBool
 
 import GHC.Exts
 
 
--- | Branch condition codes.
-data Cond
-	= ALWAYS
-	| EQQ
-	| GE
-	| GEU
-	| GTT
-	| GU
-	| LE
-	| LEU
-	| LTT
-	| LU
-	| NE
-	| NEG
-	| NEVER
-	| POS
-	| VC
-	| VS
-	deriving Eq
-
-
 -- | Register or immediate
 data RI 
 	= RIReg Reg
 	| RIImm Imm
 
 
--- | SPARC isntruction set.
+-- | instance for sparc instruction set
+instance Instruction Instr where
+	regUsageOfInstr		= sparc_regUsageOfInstr
+	patchRegsOfInstr	= sparc_patchRegsOfInstr
+	isJumpishInstr		= sparc_isJumpishInstr
+	jumpDestsOfInstr	= sparc_jumpDestsOfInstr
+	patchJumpInstr		= sparc_patchJumpInstr
+	mkSpillInstr		= sparc_mkSpillInstr
+	mkLoadInstr		= sparc_mkLoadInstr
+	takeDeltaInstr		= sparc_takeDeltaInstr
+	isMetaInstr		= sparc_isMetaInstr
+	mkRegRegMoveInstr	= sparc_mkRegRegMoveInstr
+	takeRegRegMoveInstr	= sparc_takeRegRegMoveInstr
+	mkJumpInstr		= sparc_mkJumpInstr
+
+
+-- | SPARC instruction set.
+--	Not complete. This is only the ones we need.
+--
 data Instr
 
 	-- meta ops --------------------------------------------------
@@ -78,12 +77,6 @@ data Instr
 	-- specify current stack offset for benefit of subsequent passes.
 	| DELTA   Int
 
-	-- | spill this reg to a stack slot
-	| SPILL   Reg Int
-
-	-- | reload this reg from a stack slot
-	| RELOAD  Int Reg
-
 	-- real instrs -----------------------------------------------
 	-- Loads and stores.
 	| LD		Size AddrMode Reg 		-- size, src, dst
@@ -157,39 +150,290 @@ data Instr
 	| CALL		(Either Imm Reg) Int Bool 	-- target, args, terminal
 
 
--- | Check if a RI represents a zero value.
---  	- a literal zero
---	- register %g0, which is always zero.
+-- | regUsage returns the sets of src and destination registers used
+-- 	by a particular instruction.  Machine registers that are
+-- 	pre-allocated to stgRegs are filtered out, because they are
+-- 	uninteresting from a register allocation standpoint.  (We wouldn't
+-- 	want them to end up on the free list!)  As far as we are concerned,
+-- 	the fixed registers simply don't exist (for allocation purposes,
+-- 	anyway).
+
+-- 	regUsage doesn't need to do any trickery for jumps and such.  Just
+-- 	state precisely the regs read and written by that insn.  The
+-- 	consequences of control flow transfers, as far as register
+-- 	allocation goes, are taken care of by the register allocator.
+--
+sparc_regUsageOfInstr :: Instr -> RegUsage
+sparc_regUsageOfInstr instr 
+ = case instr of
+    LD    _ addr reg  		-> usage (regAddr addr, 	[reg])
+    ST    _ reg addr  		-> usage (reg : regAddr addr, 	[])
+    ADD   _ _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
+    SUB   _ _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
+    UMUL    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
+    SMUL    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
+    UDIV    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
+    SDIV    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
+    RDY       rd           	-> usage ([], 		 	[rd])
+    WRY       r1 r2        	-> usage ([r1, r2], 	 	[])
+    AND     _ r1 ar r2  	-> usage (r1 : regRI ar, 	[r2])
+    ANDN    _ r1 ar r2 		-> usage (r1 : regRI ar, 	[r2])
+    OR      _ r1 ar r2   	-> usage (r1 : regRI ar, 	[r2])
+    ORN     _ r1 ar r2  	-> usage (r1 : regRI ar, 	[r2])
+    XOR     _ r1 ar r2  	-> usage (r1 : regRI ar, 	[r2])
+    XNOR    _ r1 ar r2 		-> usage (r1 : regRI ar, 	[r2])
+    SLL       r1 ar r2    	-> usage (r1 : regRI ar, 	[r2])
+    SRL       r1 ar r2    	-> usage (r1 : regRI ar,	[r2])
+    SRA       r1 ar r2    	-> usage (r1 : regRI ar, 	[r2])
+    SETHI   _ reg   		-> usage ([], 			[reg])
+    FABS    _ r1 r2    		-> usage ([r1], 		[r2])
+    FADD    _ r1 r2 r3 		-> usage ([r1, r2],		[r3])
+    FCMP    _ _  r1 r2  	-> usage ([r1, r2], 		[])
+    FDIV    _ r1 r2 r3 		-> usage ([r1, r2], 		[r3])
+    FMOV    _ r1 r2    		-> usage ([r1],			[r2])
+    FMUL    _ r1 r2 r3 		-> usage ([r1, r2],	 	[r3])
+    FNEG    _ r1 r2    		-> usage ([r1], 		[r2])
+    FSQRT   _ r1 r2   		-> usage ([r1], 		[r2])
+    FSUB    _ r1 r2 r3 		-> usage ([r1, r2], 		[r3])
+    FxTOy   _ _  r1 r2 		-> usage ([r1], 		[r2])
+
+    JMP     addr 		-> usage (regAddr addr, [])
+    JMP_TBL addr _      	-> usage (regAddr addr, [])
+
+    CALL  (Left _  )  _ True  	-> noUsage
+    CALL  (Left _  )  n False 	-> usage (argRegs n, callClobberedRegs)
+    CALL  (Right reg) _ True  	-> usage ([reg], [])
+    CALL  (Right reg) n False 	-> usage (reg : (argRegs n), callClobberedRegs)
+    _ 	    	    		-> noUsage
+
+  where
+    usage (src, dst) 
+     = RU (filter interesting src) (filter interesting dst)
+
+    regAddr (AddrRegReg r1 r2)	= [r1, r2]
+    regAddr (AddrRegImm r1 _)	= [r1]
+
+    regRI (RIReg r)		= [r]
+    regRI  _			= []
+
+
+-- | Interesting regs are virtuals, or ones that are allocatable 
+--	by the register allocator.
+interesting :: Reg -> Bool
+interesting reg
+ = case reg of
+ 	VirtualRegI  _	-> True
+	VirtualRegHi _	-> True
+	VirtualRegF  _	-> True
+	VirtualRegD  _	-> True
+	RealReg i	-> isFastTrue (freeReg i)
+
+
+
+-- | Apply a given mapping to tall the register references in this instruction.
+sparc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+sparc_patchRegsOfInstr instr env = case instr of
+    LD    sz addr reg   	-> LD sz (fixAddr addr) (env reg)
+    ST    sz reg addr   	-> ST sz (env reg) (fixAddr addr)
+
+    ADD   x cc r1 ar r2 	-> ADD   x cc  (env r1) (fixRI ar) (env r2)
+    SUB   x cc r1 ar r2	 	-> SUB   x cc  (env r1) (fixRI ar) (env r2)
+    UMUL    cc r1 ar r2		-> UMUL    cc  (env r1) (fixRI ar) (env r2)
+    SMUL    cc r1 ar r2		-> SMUL    cc  (env r1) (fixRI ar) (env r2)
+    UDIV    cc r1 ar r2		-> UDIV    cc  (env r1) (fixRI ar) (env r2)
+    SDIV    cc r1 ar r2		-> SDIV    cc  (env r1) (fixRI ar) (env r2)
+    RDY   rd            	-> RDY         (env rd)
+    WRY   r1 r2			-> WRY         (env r1) (env r2)
+    AND   b r1 ar r2   	 	-> AND   b     (env r1) (fixRI ar) (env r2)
+    ANDN  b r1 ar r2    	-> ANDN  b     (env r1) (fixRI ar) (env r2)
+    OR    b r1 ar r2   		-> OR    b     (env r1) (fixRI ar) (env r2)
+    ORN   b r1 ar r2    	-> ORN   b     (env r1) (fixRI ar) (env r2)
+    XOR   b r1 ar r2    	-> XOR   b     (env r1) (fixRI ar) (env r2)
+    XNOR  b r1 ar r2    	-> XNOR  b     (env r1) (fixRI ar) (env r2)
+    SLL   r1 ar r2      	-> SLL         (env r1) (fixRI ar) (env r2)
+    SRL   r1 ar r2      	-> SRL         (env r1) (fixRI ar) (env r2)
+    SRA   r1 ar r2      	-> SRA         (env r1) (fixRI ar) (env r2)
+
+    SETHI imm reg       	-> SETHI imm (env reg)
+
+    FABS  s r1 r2       	-> FABS    s   (env r1) (env r2)
+    FADD  s r1 r2 r3    	-> FADD    s   (env r1) (env r2) (env r3)
+    FCMP  e s r1 r2     	-> FCMP e  s   (env r1) (env r2)
+    FDIV  s r1 r2 r3    	-> FDIV    s   (env r1) (env r2) (env r3)
+    FMOV  s r1 r2       	-> FMOV    s   (env r1) (env r2)
+    FMUL  s r1 r2 r3    	-> FMUL    s   (env r1) (env r2) (env r3)
+    FNEG  s r1 r2       	-> FNEG    s   (env r1) (env r2)
+    FSQRT s r1 r2       	-> FSQRT   s   (env r1) (env r2)
+    FSUB  s r1 r2 r3   	 	-> FSUB    s   (env r1) (env r2) (env r3)
+    FxTOy s1 s2 r1 r2   	-> FxTOy s1 s2 (env r1) (env r2)
+
+    JMP     addr        	-> JMP     (fixAddr addr)
+    JMP_TBL addr ids    	-> JMP_TBL (fixAddr addr) ids
+
+    CALL  (Left i) n t  	-> CALL (Left i) n t
+    CALL  (Right r) n t 	-> CALL (Right (env r)) n t
+    _ 				-> instr
+
+  where
+    fixAddr (AddrRegReg r1 r2) 	= AddrRegReg   (env r1) (env r2)
+    fixAddr (AddrRegImm r1 i)  	= AddrRegImm   (env r1) i
+
+    fixRI (RIReg r) 		= RIReg (env r)
+    fixRI other			= other
+
+
+--------------------------------------------------------------------------------
+sparc_isJumpishInstr :: Instr -> Bool
+sparc_isJumpishInstr instr
+ = case instr of
+	BI{}		-> True
+	BF{}		-> True
+	JMP{}		-> True
+	JMP_TBL{}	-> True
+	CALL{}		-> True
+	_		-> False
+
+sparc_jumpDestsOfInstr :: Instr -> [BlockId]
+sparc_jumpDestsOfInstr insn
+  = case insn of
+	BI   _ _ id	-> [id]
+	BF   _ _ id	-> [id]
+	JMP_TBL _ ids	-> ids
+	_		-> []
+
+
+sparc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
+sparc_patchJumpInstr insn patchF
+  = case insn of
+	BI cc annul id	-> BI cc annul (patchF id)
+	BF cc annul id	-> BF cc annul (patchF id)
+	_		-> insn
+
+
+--------------------------------------------------------------------------------
+-- | Make a spill instruction.
+-- 	On SPARC we spill below frame pointer leaving 2 words/spill
+sparc_mkSpillInstr
+	:: Reg		-- ^ register to spill
+	-> Int		-- ^ current stack delta
+	-> Int		-- ^ spill slot to use
+	-> Instr
+
+sparc_mkSpillInstr reg _ slot
+ = let	off     = spillSlotToOffset slot
+        off_w	= 1 + (off `div` 4)
+        sz 	= case regClass reg of
+			RcInteger -> II32
+			RcFloat   -> FF32
+			RcDouble  -> FF64
+		
+    in ST sz reg (fpRel (negate off_w))
+
+
+-- | Make a spill reload instruction.
+sparc_mkLoadInstr
+	:: Reg		-- ^ register to load
+	-> Int		-- ^ current stack delta
+	-> Int		-- ^ spill slot to use
+	-> Instr
+
+sparc_mkLoadInstr reg _ slot
+  = let off     = spillSlotToOffset slot
+	off_w	= 1 + (off `div` 4)
+        sz	= case regClass reg of
+			RcInteger -> II32
+			RcFloat   -> FF32
+			RcDouble  -> FF64
+
+        in LD sz (fpRel (- off_w)) reg
+
+-- | Convert a spill slot number to a *byte* offset, with no sign.
 --
-riZero :: RI -> Bool
-riZero (RIImm (ImmInt 0))	    = True
-riZero (RIImm (ImmInteger 0))	    = True
-riZero (RIReg (RealReg 0))          = True
-riZero _			    = False
+spillSlotToOffset :: Int -> Int
+spillSlotToOffset slot
+	| slot >= 0 && slot < maxSpillSlots
+	= 64 + spillSlotSize * slot
+
+	| otherwise
+	= pprPanic "spillSlotToOffset:" 
+	              (   text "invalid spill location: " <> int slot
+		      $$  text "maxSpillSlots:          " <> int maxSpillSlots)
+
+
+-- | We need 8 bytes because our largest registers are 64 bit.
+spillSlotSize :: Int
+spillSlotSize = 8
 
 
--- | Calculate the effective address which would be used by the
--- 	corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
--- 	alas -- can't have fpRelEA here because of module dependencies.
-fpRelEA :: Int -> Reg -> Instr
-fpRelEA n dst
-   = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
+-- | The maximum number of spill slots available on the C stack.
+--	If we use up all of the slots, then we're screwed.
+maxSpillSlots :: Int
+maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
 
 
--- | Code to shift the stack pointer by n words.
-moveSp :: Int -> Instr
-moveSp n
-   = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
+--------------------------------------------------------------------------------
+-- | See if this instruction is telling us the current C stack delta
+sparc_takeDeltaInstr
+	:: Instr
+	-> Maybe Int
+	
+sparc_takeDeltaInstr instr
+ = case instr of
+ 	DELTA i		-> Just i
+	_		-> Nothing
+
+
+sparc_isMetaInstr
+	:: Instr
+	-> Bool
+	
+sparc_isMetaInstr instr
+ = case instr of
+ 	COMMENT{}	-> True
+	LDATA{}		-> True
+	NEWBLOCK{}	-> True
+	DELTA{}		-> True
+	_		-> False
+	
+
+-- | Make a reg-reg move instruction.
+--	On SPARC v8 there are no instructions to move directly between
+--	floating point and integer regs. If we need to do that then we
+--	have to go via memory.
+--
+sparc_mkRegRegMoveInstr
+	:: Reg
+	-> Reg
+	-> Instr
+
+sparc_mkRegRegMoveInstr src dst
+ = case regClass src of
+	RcInteger -> ADD  False False src (RIReg g0) dst
+	RcDouble  -> FMOV FF64 src dst
+	RcFloat   -> FMOV FF32 src dst
+
+
+-- | Check whether an instruction represents a reg-reg move.
+-- 	The register allocator attempts to eliminate reg->reg moves whenever it can,
+-- 	by assigning the src and dest temporaries to the same real register.
+--
+sparc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
+sparc_takeRegRegMoveInstr instr
+ = case instr of
+ 	ADD False False src (RIReg src2) dst
+	 | g0 == src2		-> Just (src, dst)
+
+	FMOV FF64 src dst	-> Just (src, dst)
+	FMOV FF32  src dst	-> Just (src, dst)
+	_			-> Nothing
 
 
--- | Produce the second-half-of-a-double register given the first half.
-fPair :: Reg -> Maybe Reg
-fPair (RealReg n) 
-	| n >= 32 && n `mod` 2 == 0  = Just (RealReg (n+1))
+-- | Make an unconditional branch instruction.
+sparc_mkJumpInstr
+	:: BlockId
+	-> [Instr]
 
-fPair (VirtualRegD u)
-	= Just (VirtualRegHi u)
+sparc_mkJumpInstr id 
+ = 	 [BI ALWAYS False id
+	, NOP]			-- fill the branch delay slot.
 
-fPair _
-	= trace ("MachInstrs.fPair: can't get high half of supposed double reg ") 
-		Nothing
diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs
index 7d64df1b15..a0d5fffce1 100644
--- a/compiler/nativeGen/SPARC/Ppr.hs
+++ b/compiler/nativeGen/SPARC/Ppr.hs
@@ -7,12 +7,15 @@
 -----------------------------------------------------------------------------
 
 module SPARC.Ppr (
+	pprNatCmmTop,
+	pprBasicBlock,
+	pprSectionHeader,
+	pprData,
+	pprInstr,
 	pprUserReg,
 	pprSize,
 	pprImm,
-	pprSectionHeader,
-	pprDataItem,
-	pprInstr
+	pprDataItem
 )
 
 where
@@ -20,20 +23,119 @@ where
 #include "HsVersions.h"
 #include "nativeGen/NCG.h"
 
-import PprBase
-import RegsBase
 import SPARC.Regs
+import SPARC.RegInfo
 import SPARC.Instr
+import SPARC.Cond
+import Instruction
+import Reg
+import Size
+import PprBase
 
 import BlockId
 import Cmm
-
 import CLabel
 
-import Panic		( panic )
 import Unique		( pprUnique )
+import qualified Outputable
+import Outputable	(Outputable, panic)
 import Pretty
 import FastString
+import Data.Word
+
+-- -----------------------------------------------------------------------------
+-- Printing this stuff out
+
+pprNatCmmTop :: NatCmmTop Instr -> Doc
+pprNatCmmTop (CmmData section dats) = 
+  pprSectionHeader section $$ vcat (map pprData dats)
+
+ -- special case for split markers:
+pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
+
+pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) = 
+  pprSectionHeader Text $$
+  (if null info then -- blocks guaranteed not null, so label needed
+       pprLabel lbl
+   else
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+            pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+                <> char ':' $$
+#endif
+       vcat (map pprData info) $$
+       pprLabel (entryLblToInfoLbl lbl)
+  ) $$
+  vcat (map pprBasicBlock blocks)
+     -- above: Even the first block gets a label, because with branch-chain
+     -- elimination, it might be the target of a goto.
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+        -- If we are using the .subsections_via_symbols directive
+        -- (available on recent versions of Darwin),
+        -- we have to make sure that there is some kind of reference
+        -- from the entry code to a label on the _top_ of of the info table,
+        -- so that the linker will not think it is unreferenced and dead-strip
+        -- it. That's why the label is called a DeadStripPreventer (_dsp).
+  $$ if not (null info)
+		    then text "\t.long "
+		      <+> pprCLabel_asm (entryLblToInfoLbl lbl)
+		      <+> char '-'
+		      <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+		    else empty
+#endif
+
+
+pprBasicBlock :: NatBasicBlock Instr -> Doc
+pprBasicBlock (BasicBlock (BlockId id) instrs) =
+  pprLabel (mkAsmTempLabel id) $$
+  vcat (map pprInstr instrs)
+
+
+pprData :: CmmStatic -> Doc
+pprData (CmmAlign bytes)         = pprAlign bytes
+pprData (CmmDataLabel lbl)       = pprLabel lbl
+pprData (CmmString str)          = pprASCII str
+pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
+pprData (CmmStaticLit lit)       = pprDataItem lit
+
+pprGloblDecl :: CLabel -> Doc
+pprGloblDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext IF_ARCH_sparc((sLit ".global "), 
+				    (sLit ".globl ")) <>
+		pprCLabel_asm lbl
+
+pprTypeAndSizeDecl :: CLabel -> Doc
+#if linux_TARGET_OS
+pprTypeAndSizeDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext (sLit ".type ") <>
+		pprCLabel_asm lbl <> ptext (sLit ", @object")
+#else
+pprTypeAndSizeDecl _
+  = empty
+#endif
+
+pprLabel :: CLabel -> Doc
+pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
+
+
+pprASCII :: [Word8] -> Doc
+pprASCII str
+  = vcat (map do1 str) $$ do1 0
+    where
+       do1 :: Word8 -> Doc
+       do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
+
+pprAlign :: Int -> Doc
+pprAlign bytes =
+	ptext (sLit ".align ") <> int bytes
+
+
+-- -----------------------------------------------------------------------------
+-- pprInstr: print an 'Instr'
+
+instance Outputable Instr where
+    ppr	 instr	= Outputable.docToSDoc $ pprInstr instr
 
 
 -- | Pretty print a register.
@@ -101,12 +203,13 @@ pprSize :: Size -> Doc
 pprSize x 
  = ptext 
     (case x of
-	II8   -> sLit "ub"
-	II16  -> sLit "uh"
-	II32  -> sLit ""
-	II64  -> sLit "d"
-	FF32  -> sLit ""
-	FF64  -> sLit "d")
+	II8	-> sLit "ub"
+	II16	-> sLit "uh"
+	II32	-> sLit ""
+	II64	-> sLit "d"
+	FF32	-> sLit ""
+	FF64	-> sLit "d"
+	_	-> panic "SPARC.Ppr.pprSize: no match")
 
 
 -- | Pretty print a size for an instruction suffix.
@@ -120,7 +223,8 @@ pprStSize x
 	II32  -> sLit ""
 	II64  -> sLit "x"
 	FF32  -> sLit ""
-	FF64  -> sLit "d")
+	FF64  -> sLit "d"
+	_	-> panic "SPARC.Ppr.pprSize: no match")
 
 		
 -- | Pretty print a condition code.
@@ -258,6 +362,7 @@ pprInstr (NEWBLOCK _)
 pprInstr (LDATA _ _)
 	= panic "PprMach.pprInstr: LDATA"
 
+{-
 pprInstr (SPILL reg slot)
  = hcat [
  	ptext (sLit "\tSPILL"),
@@ -273,7 +378,7 @@ pprInstr (RELOAD slot reg)
 	ptext (sLit "SLOT") <> parens (int slot),
 	comma,
 	pprReg reg]
-
+-}
 
 -- a clumsy hack for now, to handle possible double alignment problems
 -- even clumsier, to allow for RegReg regs that show when doing indexed
diff --git a/compiler/nativeGen/SPARC/RegInfo.hs b/compiler/nativeGen/SPARC/RegInfo.hs
index 8f8a977ac7..025e302556 100644
--- a/compiler/nativeGen/SPARC/RegInfo.hs
+++ b/compiler/nativeGen/SPARC/RegInfo.hs
@@ -8,241 +8,115 @@
 -----------------------------------------------------------------------------
 
 module SPARC.RegInfo (
-	-- machine specific 
-	RegUsage(..),
-	noUsage,
-	regUsage,
-	patchRegs,
-	jumpDests,
-	isJumpish,
-	patchJump,
-	isRegRegMove,
+	mkVReg,
+
+	riZero,
+	fpRelEA,
+	moveSp,
+	fPair,
+
+	shortcutStatic,
+	regDotColor,
 
         JumpDest(..), 
 	canShortcut, 
-	shortcutJump, 
-
-	mkSpillInstr,
-	mkLoadInstr,
-	mkRegRegMoveInstr,
-	mkBranchInstr,
-	
-	spillSlotSize,
-	maxSpillSlots,
-	spillSlotToOffset		
+	shortcutJump,
 )
 
 where
 
-#include "nativeGen/NCG.h"
-#include "HsVersions.h"
-
 import SPARC.Instr
 import SPARC.Regs
-import RegsBase
+import RegClass
+import Reg
+import Size
 
+import Constants	(wORD_SIZE)
+import Cmm
+import CLabel
 import BlockId
 import Outputable
-import Constants	( rESERVED_C_STACK_BYTES )
-import FastBool
-
-
--- | Represents what regs are read and written to in an instruction.
---	
-data RegUsage 
-	= RU 	[Reg] 	-- regs read from
-		[Reg]	-- regs written to
-
-
--- | No regs read or written to.
-noUsage :: RegUsage
-noUsage  = RU [] []
-
-
--- | regUsage returns the sets of src and destination registers used
--- 	by a particular instruction.  Machine registers that are
--- 	pre-allocated to stgRegs are filtered out, because they are
--- 	uninteresting from a register allocation standpoint.  (We wouldn't
--- 	want them to end up on the free list!)  As far as we are concerned,
--- 	the fixed registers simply don't exist (for allocation purposes,
--- 	anyway).
-
--- 	regUsage doesn't need to do any trickery for jumps and such.  Just
--- 	state precisely the regs read and written by that insn.  The
--- 	consequences of control flow transfers, as far as register
--- 	allocation goes, are taken care of by the register allocator.
---
-regUsage :: Instr -> RegUsage
-regUsage instr 
- = case instr of
-    SPILL  reg _		-> usage ([reg], [])
-    RELOAD _   reg		-> usage ([], [reg])
-
-    LD    _ addr reg  		-> usage (regAddr addr, 	[reg])
-    ST    _ reg addr  		-> usage (reg : regAddr addr, 	[])
-    ADD   _ _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
-    SUB   _ _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
-    UMUL    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
-    SMUL    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
-    UDIV    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
-    SDIV    _ r1 ar r2		-> usage (r1 : regRI ar, 	[r2])
-    RDY       rd           	-> usage ([], 		 	[rd])
-    WRY       r1 r2        	-> usage ([r1, r2], 	 	[])
-    AND     _ r1 ar r2  	-> usage (r1 : regRI ar, 	[r2])
-    ANDN    _ r1 ar r2 		-> usage (r1 : regRI ar, 	[r2])
-    OR      _ r1 ar r2   	-> usage (r1 : regRI ar, 	[r2])
-    ORN     _ r1 ar r2  	-> usage (r1 : regRI ar, 	[r2])
-    XOR     _ r1 ar r2  	-> usage (r1 : regRI ar, 	[r2])
-    XNOR    _ r1 ar r2 		-> usage (r1 : regRI ar, 	[r2])
-    SLL       r1 ar r2    	-> usage (r1 : regRI ar, 	[r2])
-    SRL       r1 ar r2    	-> usage (r1 : regRI ar,	[r2])
-    SRA       r1 ar r2    	-> usage (r1 : regRI ar, 	[r2])
-    SETHI   _ reg   		-> usage ([], 			[reg])
-    FABS    _ r1 r2    		-> usage ([r1], 		[r2])
-    FADD    _ r1 r2 r3 		-> usage ([r1, r2],		[r3])
-    FCMP    _ _  r1 r2  	-> usage ([r1, r2], 		[])
-    FDIV    _ r1 r2 r3 		-> usage ([r1, r2], 		[r3])
-    FMOV    _ r1 r2    		-> usage ([r1],			[r2])
-    FMUL    _ r1 r2 r3 		-> usage ([r1, r2],	 	[r3])
-    FNEG    _ r1 r2    		-> usage ([r1], 		[r2])
-    FSQRT   _ r1 r2   		-> usage ([r1], 		[r2])
-    FSUB    _ r1 r2 r3 		-> usage ([r1, r2], 		[r3])
-    FxTOy   _ _  r1 r2 		-> usage ([r1], 		[r2])
-
-    JMP     addr 		-> usage (regAddr addr, [])
-    JMP_TBL addr _      	-> usage (regAddr addr, [])
-
-    CALL  (Left _  )  _ True  	-> noUsage
-    CALL  (Left _  )  n False 	-> usage (argRegs n, callClobberedRegs)
-    CALL  (Right reg) _ True  	-> usage ([reg], [])
-    CALL  (Right reg) n False 	-> usage (reg : (argRegs n), callClobberedRegs)
-    _ 	    	    		-> noUsage
-
-  where
-    usage (src, dst) 
-     = RU (filter interesting src) (filter interesting dst)
-
-    regAddr (AddrRegReg r1 r2)	= [r1, r2]
-    regAddr (AddrRegImm r1 _)	= [r1]
+import Unique
 
-    regRI (RIReg r)		= [r]
-    regRI  _			= []
 
+-- | Make a virtual reg with this size.
+mkVReg :: Unique -> Size -> Reg
+mkVReg u size
+	| not (isFloatSize size) 
+	= VirtualRegI u
 
--- | Interesting regs are virtuals, or ones that are allocatable 
---	by the register allocator.
-interesting :: Reg -> Bool
-interesting reg
- = case reg of
- 	VirtualRegI  _	-> True
-	VirtualRegHi _	-> True
-	VirtualRegF  _	-> True
-	VirtualRegD  _	-> True
-	RealReg i	-> isFastTrue (freeReg i)
-
-
-
--- | Apply a given mapping to tall the register references in this instruction.
-
-patchRegs :: Instr -> (Reg -> Reg) -> Instr
-patchRegs instr env = case instr of
-    SPILL reg slot		-> SPILL  (env reg) slot
-    RELOAD slot reg		-> RELOAD slot (env reg)
-
-    LD    sz addr reg   	-> LD sz (fixAddr addr) (env reg)
-    ST    sz reg addr   	-> ST sz (env reg) (fixAddr addr)
-
-    ADD   x cc r1 ar r2 	-> ADD   x cc  (env r1) (fixRI ar) (env r2)
-    SUB   x cc r1 ar r2	 	-> SUB   x cc  (env r1) (fixRI ar) (env r2)
-    UMUL    cc r1 ar r2		-> UMUL    cc  (env r1) (fixRI ar) (env r2)
-    SMUL    cc r1 ar r2		-> SMUL    cc  (env r1) (fixRI ar) (env r2)
-    UDIV    cc r1 ar r2		-> UDIV    cc  (env r1) (fixRI ar) (env r2)
-    SDIV    cc r1 ar r2		-> SDIV    cc  (env r1) (fixRI ar) (env r2)
-    RDY   rd            	-> RDY         (env rd)
-    WRY   r1 r2			-> WRY         (env r1) (env r2)
-    AND   b r1 ar r2   	 	-> AND   b     (env r1) (fixRI ar) (env r2)
-    ANDN  b r1 ar r2    	-> ANDN  b     (env r1) (fixRI ar) (env r2)
-    OR    b r1 ar r2   		-> OR    b     (env r1) (fixRI ar) (env r2)
-    ORN   b r1 ar r2    	-> ORN   b     (env r1) (fixRI ar) (env r2)
-    XOR   b r1 ar r2    	-> XOR   b     (env r1) (fixRI ar) (env r2)
-    XNOR  b r1 ar r2    	-> XNOR  b     (env r1) (fixRI ar) (env r2)
-    SLL   r1 ar r2      	-> SLL         (env r1) (fixRI ar) (env r2)
-    SRL   r1 ar r2      	-> SRL         (env r1) (fixRI ar) (env r2)
-    SRA   r1 ar r2      	-> SRA         (env r1) (fixRI ar) (env r2)
-
-    SETHI imm reg       	-> SETHI imm (env reg)
-
-    FABS  s r1 r2       	-> FABS    s   (env r1) (env r2)
-    FADD  s r1 r2 r3    	-> FADD    s   (env r1) (env r2) (env r3)
-    FCMP  e s r1 r2     	-> FCMP e  s   (env r1) (env r2)
-    FDIV  s r1 r2 r3    	-> FDIV    s   (env r1) (env r2) (env r3)
-    FMOV  s r1 r2       	-> FMOV    s   (env r1) (env r2)
-    FMUL  s r1 r2 r3    	-> FMUL    s   (env r1) (env r2) (env r3)
-    FNEG  s r1 r2       	-> FNEG    s   (env r1) (env r2)
-    FSQRT s r1 r2       	-> FSQRT   s   (env r1) (env r2)
-    FSUB  s r1 r2 r3   	 	-> FSUB    s   (env r1) (env r2) (env r3)
-    FxTOy s1 s2 r1 r2   	-> FxTOy s1 s2 (env r1) (env r2)
-
-    JMP     addr        	-> JMP     (fixAddr addr)
-    JMP_TBL addr ids    	-> JMP_TBL (fixAddr addr) ids
-
-    CALL  (Left i) n t  	-> CALL (Left i) n t
-    CALL  (Right r) n t 	-> CALL (Right (env r)) n t
-    _ 				-> instr
-
-  where
-    fixAddr (AddrRegReg r1 r2) 	= AddrRegReg   (env r1) (env r2)
-    fixAddr (AddrRegImm r1 i)  	= AddrRegImm   (env r1) i
-
-    fixRI (RIReg r) 		= RIReg (env r)
-    fixRI other			= other
-
-
--- -----------------------------------------------------------------------------
--- Determine the possible destinations from the current instruction.
-
--- (we always assume that the next instruction is also a valid destination;
--- if this isn't the case then the jump should be at the end of the basic
--- block).
-
-jumpDests :: Instr -> [BlockId] -> [BlockId]
-jumpDests insn acc
-  = case insn of
-	BI   _ _ id	-> id : acc
-	BF   _ _ id	-> id : acc
-	JMP_TBL _ ids	-> ids ++ acc
-	_other		-> acc
+	| otherwise
+	= case size of
+		FF32    -> VirtualRegF u
+		FF64	-> VirtualRegD u
+		_ 	-> panic "mkVReg"
 
 
--- | Check whether a particular instruction is a jump, branch or call instruction (jumpish)
---	We can't just use jumpDests above because the jump might take its arg,
---	so the instr won't contain a blockid.
+-- | Check if a RI represents a zero value.
+--  	- a literal zero
+--	- register %g0, which is always zero.
 --
-isJumpish :: Instr -> Bool
-isJumpish instr
- = case instr of
-	BI{}		-> True
-	BF{}		-> True
-	JMP{}		-> True
-	JMP_TBL{}	-> True
-	CALL{}		-> True
-	_		-> False
-
-
--- | Change the destination of this jump instruction
---	Used in joinToTargets in the linear allocator, when emitting fixup code
---	for join points.
-patchJump :: Instr -> BlockId -> BlockId -> Instr
-patchJump insn old new
-  = case insn of
-	BI cc annul id
-	 | id == old	-> BI cc annul new
-	 
-	BF cc annul id
-	 | id == old	-> BF cc annul new
-
-	_other		-> insn
-
+riZero :: RI -> Bool
+riZero (RIImm (ImmInt 0))	    = True
+riZero (RIImm (ImmInteger 0))	    = True
+riZero (RIReg (RealReg 0))          = True
+riZero _			    = False
+
+
+-- | Calculate the effective address which would be used by the
+-- 	corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
+-- 	alas -- can't have fpRelEA here because of module dependencies.
+fpRelEA :: Int -> Reg -> Instr
+fpRelEA n dst
+   = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
+
+
+-- | Code to shift the stack pointer by n words.
+moveSp :: Int -> Instr
+moveSp n
+   = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
+
+
+-- | Produce the second-half-of-a-double register given the first half.
+fPair :: Reg -> Maybe Reg
+fPair (RealReg n) 
+	| n >= 32 && n `mod` 2 == 0  = Just (RealReg (n+1))
+
+fPair (VirtualRegD u)
+	= Just (VirtualRegHi u)
+
+fPair _
+	= trace ("MachInstrs.fPair: can't get high half of supposed double reg ") 
+		Nothing
+
+-- Here because it knows about JumpDest
+shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
+shortcutStatic fn (CmmStaticLit (CmmLabel lab))
+  | Just uq <- maybeAsmTemp lab 
+  = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
+  | Just uq <- maybeAsmTemp lbl1
+  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
+        -- slightly dodgy, we're ignoring the second label, but this
+        -- works with the way we use CmmLabelDiffOff for jump tables now.
+shortcutStatic _ other_static
+        = other_static
+
+shortBlockId :: (BlockId -> Maybe JumpDest) -> BlockId -> CLabel
+shortBlockId fn blockid@(BlockId uq) =
+   case fn blockid of
+      Nothing -> mkAsmTempLabel uq
+      Just (DestBlockId blockid')  -> shortBlockId fn blockid'
+      Just (DestImm (ImmCLbl lbl)) -> lbl
+      _other -> panic "shortBlockId"
+
+
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = case regClass reg of
+ 	RcInteger	-> text "blue"
+	RcFloat		-> text "red"
+	RcDouble	-> text "green"
 
 
 
@@ -253,108 +127,3 @@ canShortcut _ = Nothing
 
 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
 shortcutJump _ other = other
-
-
-
--- | Make a spill instruction.
--- 	On SPARC we spill below frame pointer leaving 2 words/spill
-mkSpillInstr
-	:: Reg		-- ^ register to spill
-	-> Int		-- ^ current stack delta
-	-> Int		-- ^ spill slot to use
-	-> Instr
-
-mkSpillInstr reg _ slot
- = let	off     = spillSlotToOffset slot
-        off_w	= 1 + (off `div` 4)
-        sz 	= case regClass reg of
-			RcInteger -> II32
-			RcFloat   -> FF32
-			RcDouble  -> FF64
-		
-    in ST sz reg (fpRel (negate off_w))
-
-
--- | Make a spill reload instruction.
-mkLoadInstr
-	:: Reg		-- ^ register to load
-	-> Int		-- ^ current stack delta
-	-> Int		-- ^ spill slot to use
-	-> Instr
-
-mkLoadInstr reg _ slot
-  = let off     = spillSlotToOffset slot
-	off_w	= 1 + (off `div` 4)
-        sz	= case regClass reg of
-			RcInteger -> II32
-			RcFloat   -> FF32
-			RcDouble  -> FF64
-
-        in LD sz (fpRel (- off_w)) reg
-
-
--- | Make a reg-reg move instruction.
---	On SPARC v8 there are no instructions to move directly between
---	floating point and integer regs. If we need to do that then we
---	have to go via memory.
---
-mkRegRegMoveInstr
-	:: Reg
-	-> Reg
-	-> Instr
-
-mkRegRegMoveInstr src dst
- = case regClass src of
-	RcInteger -> ADD  False False src (RIReg g0) dst
-	RcDouble  -> FMOV FF64 src dst
-	RcFloat   -> FMOV FF32 src dst
-
-
--- | Check whether an instruction represents a reg-reg move.
--- 	The register allocator attempts to eliminate reg->reg moves whenever it can,
--- 	by assigning the src and dest temporaries to the same real register.
---
-isRegRegMove :: Instr -> Maybe (Reg,Reg)
-isRegRegMove instr
- = case instr of
- 	ADD False False src (RIReg src2) dst
-	 | g0 == src2		-> Just (src, dst)
-
-	FMOV FF64 src dst	-> Just (src, dst)
-	FMOV FF32  src dst	-> Just (src, dst)
-	_			-> Nothing
-
-
--- | Make an unconditional branch instruction.
-mkBranchInstr
-	:: BlockId
-	-> [Instr]
-
-mkBranchInstr id 
- = 	 [BI ALWAYS False id
-	, NOP]			-- fill the branch delay slot.
-
-
--- | TODO: Why do we need 8 bytes per slot?? -BL 2009/02
-spillSlotSize :: Int
-spillSlotSize = 8
-
-
--- | The maximum number of spill slots available on the C stack.
---	If we use up all of the slots, then we're screwed.
-maxSpillSlots :: Int
-maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
-
-
--- | Convert a spill slot number to a *byte* offset, with no sign.
---
-spillSlotToOffset :: Int -> Int
-spillSlotToOffset slot
-	| slot >= 0 && slot < maxSpillSlots
-	= 64 + spillSlotSize * slot
-
-	| otherwise
-	= pprPanic "spillSlotToOffset:" 
-	              (   text "invalid spill location: " <> int slot
-		      $$  text "maxSpillSlots:          " <> int maxSpillSlots)
-
diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs
index 987fc2da14..1fb6a01b87 100644
--- a/compiler/nativeGen/SPARC/Regs.hs
+++ b/compiler/nativeGen/SPARC/Regs.hs
@@ -5,17 +5,6 @@
 -- -----------------------------------------------------------------------------
 
 module SPARC.Regs (
-
-	-- sizes
-	Size(..),
-	intSize, 
-	floatSize, 
-	isFloatSize, 
-	wordSize,
-	cmmTypeSize,
-	sizeToWidth,
-	mkVReg,
-
 	-- immediate values
 	Imm(..),
 	strImmLit,
@@ -39,113 +28,33 @@ module SPARC.Regs (
 	fits13Bits, 
 	largeOffsetError,
 	gReg, iReg, lReg, oReg, fReg,
-	fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f26, f27,
+	fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f22, f26, f27,
 	nCG_FirstFloatReg,
 
-	-- horror show
+	-- allocatable
 	freeReg,
-	globalRegMaybe
+	allocatableRegs,
+	globalRegMaybe,
+
+	get_GlobalReg_reg_or_addr
 )
 
 where
 
-#include "nativeGen/NCG.h"
-#include "HsVersions.h"
-#include "../includes/MachRegs.h"
 
-import RegsBase
+import Reg
+import RegClass
 
+import CgUtils          ( get_GlobalReg_addr )
 import BlockId
 import Cmm
 import CLabel           ( CLabel )
 import Pretty
-import Outputable	( Outputable(..), pprPanic, panic )
+import Outputable	( panic )
 import qualified Outputable
-import Unique
 import Constants
 import FastBool
 
--- sizes -----------------------------------------------------------------------
-
--- | A 'Size' also includes format information, such as whether
---	the word is signed or unsigned.
---
-data Size
-	= II8     -- byte (signed)
-	| II16    -- halfword (signed, 2 bytes)
-	| II32    -- word (4 bytes)
-	| II64    -- word (8 bytes)
-	| FF32    -- IEEE single-precision floating pt
-	| FF64    -- IEEE single-precision floating pt
-	deriving Eq
-
-
--- | Get the integer size of this width.
-intSize :: Width -> Size
-intSize width
- = case width of
- 	W8 	-> II8
-	W16	-> II16
-	W32	-> II32
-	W64 	-> II64
-	other	-> pprPanic "SPARC.Regs.intSize" (ppr other)
-
-
--- | Get the float size of this width.
-floatSize :: Width -> Size
-floatSize width
- = case width of
- 	W32	-> FF32
-	W64	-> FF64
-	other 	-> pprPanic "SPARC.Regs.intSize" (ppr other)
-
-
--- | Check if a size represents a floating point value.
-isFloatSize :: Size -> Bool
-isFloatSize size
- = case size of
- 	FF32	-> True
-	FF64	-> True
-	_	-> False
-
-
--- | Size of a machine word. 
---	This is big enough to hold a pointer.
-wordSize :: Size
-wordSize = intSize wordWidth
-
-
--- | Convert a Cmm type to a Size.
-cmmTypeSize :: CmmType -> Size
-cmmTypeSize ty 
-	| isFloatType ty	= floatSize (typeWidth ty)
-	| otherwise		= intSize (typeWidth ty)
-
-
--- | Get the Width of a Size.
-sizeToWidth :: Size -> Width
-sizeToWidth size
- = case size of
- 	II8		-> W8
-	II16		-> W16
-	II32		-> W32
-	II64		-> W64
-	FF32		-> W32
-	FF64		-> W64
-
-
--- | Make a virtual reg with this size.
-mkVReg :: Unique -> Size -> Reg
-mkVReg u size
-	| not (isFloatSize size) 
-	= VirtualRegI u
-
-	| otherwise
-	= case size of
-		FF32    -> VirtualRegF u
-		FF64	-> VirtualRegD u
-		_ 	-> panic "mkVReg"
-
 
 -- immediates ------------------------------------------------------------------
 
@@ -390,48 +299,13 @@ o1  = RealReg (oReg 1)
 f0  = RealReg (fReg 0)
 
 
+-- | We use he first few float regs as double precision. 
+--	This is the RegNo of the first float regs we use as single precision.
+--
 nCG_FirstFloatReg :: RegNo
-nCG_FirstFloatReg = unRealReg NCG_FirstFloatReg
-#else
-nCG_FirstFloatReg :: RegNo
-nCG_FirstFloatReg = unRealReg f22
-#endif
-
-
--- horror show -----------------------------------------------------------------
-#if sparc_TARGET_ARCH
-#define g0 0
-#define g1 1
-#define g2 2
-#define g3 3
-#define g4 4
-#define g5 5
-#define g6 6
-#define g7 7
-#define o0 8
-#define o1 9
-#define o2 10
-#define o3 11
-#define o4 12
-#define o5 13
-#define o6 14
-#define o7 15
-#define l0 16
-#define l1 17
-#define l2 18
-#define l3 19
-#define l4 20
-#define l5 21
-#define l6 22
-#define l7 23
-#define i0 24
-#define i1 25
-#define i2 26
-#define i3 27
-#define i4 28
-#define i5 29
-#define i6 30
-#define i7 31
+nCG_FirstFloatReg = 54
+
+
 
 -- | Check whether a machine register is free for allocation.
 --	This needs to match the info in includes/MachRegs.h otherwise modules
@@ -445,7 +319,11 @@ freeReg regno
 
  	-- %g1(r1) - %g4(r4) are allocable -----------------
 
-freeReg :: RegNo -> FastBool
+	-- %g5(r5) - %g7(r7) 
+	--	are reserved for the OS
+	5	-> fastBool False
+	6	-> fastBool False
+	7	-> fastBool False
 
 	-- %o0(r8) - %o5(r13) are allocable ----------------
 
@@ -507,7 +385,15 @@ freeReg :: RegNo -> FastBool
 
 	-- regs not matched above are allocable.
 	_	-> fastBool True
-	
+
+
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: [RegNo]
+allocatableRegs
+   = let isFree i = isFastTrue (freeReg i)
+     in  filter isFree allMachRegNos
 
 
 -- | Returns Just the real register that a global register is stored in.
@@ -539,15 +425,20 @@ globalRegMaybe gg
 	Hp		-> Just (RealReg 27)	-- %i3
 	HpLim		-> Just (RealReg 28)	-- %i4
 
-globalRegMaybe :: GlobalReg -> Maybe Reg
-
-
-
-#else
-
-freeReg	_	= 0#
-globalRegMaybe	= panic "SPARC.Regs.globalRegMaybe: not defined"
+	BaseReg		-> Just (RealReg 25)	-- %i1
+		
+	_		-> Nothing 	
 
-#endif
 
+-- We map STG registers onto appropriate CmmExprs.  Either they map
+-- to real machine registers or stored as offsets from BaseReg.  Given
+-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
+-- register it is in, on this platform, or a CmmExpr denoting the
+-- address in the register table holding it.
+-- (See also get_GlobalReg_addr in CgUtils.)
 
+get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr
+get_GlobalReg_reg_or_addr mid
+   = case globalRegMaybe mid of
+        Just rr -> Left rr
+        Nothing -> Right (get_GlobalReg_addr mid)
diff --git a/compiler/nativeGen/Size.hs b/compiler/nativeGen/Size.hs
new file mode 100644
index 0000000000..3be5430e82
--- /dev/null
+++ b/compiler/nativeGen/Size.hs
@@ -0,0 +1,103 @@
+-- | Sizes on this architecture
+-- 	A Size is a combination of width and class
+-- 
+-- 	TODO: 	Rename this to "Format" instead of "Size" to reflect
+--		the fact that it represents floating point vs integer.
+--
+--	TODO: 	Signed vs unsigned?
+--
+--	TODO:	This module is currenly shared by all architectures because
+--		NCGMonad need to know about it to make a VReg. It would be better
+--		to have architecture specific formats, and do the overloading
+--		properly. eg SPARC doesn't care about FF80.
+--
+module Size (
+	Size(..),
+	intSize,
+	floatSize,
+	isFloatSize,
+	cmmTypeSize,
+	sizeToWidth
+)
+
+where
+
+import Cmm
+import Outputable
+
+-- It looks very like the old MachRep, but it's now of purely local
+-- significance, here in the native code generator.  You can change it
+-- without global consequences.
+--
+-- A major use is as an opcode qualifier; thus the opcode 
+--	mov.l a b
+-- might be encoded 
+-- 	MOV II32 a b
+-- where the Size field encodes the ".l" part.
+
+-- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
+-- 	  here.  I've removed them from the x86 version, we'll see what happens --SDM
+
+-- ToDo: quite a few occurrences of Size could usefully be replaced by Width
+
+data Size
+	= II8 
+	| II16 
+	| II32 
+	| II64 
+	| FF32 
+	| FF64 
+	| FF80
+	deriving (Show, Eq)
+
+
+-- | Get the integer size of this width.
+intSize :: Width -> Size
+intSize width
+ = case width of
+ 	W8 	-> II8
+	W16	-> II16
+	W32	-> II32
+	W64 	-> II64
+	other	-> pprPanic "Size.intSize" (ppr other)
+
+
+-- | Get the float size of this width.
+floatSize :: Width -> Size
+floatSize width
+ = case width of
+ 	W32	-> FF32
+	W64	-> FF64
+	other 	-> pprPanic "Size.floatSize" (ppr other)
+
+
+-- | Check if a size represents a floating point value.
+isFloatSize :: Size -> Bool
+isFloatSize size
+ = case size of
+ 	FF32	-> True
+	FF64	-> True
+	FF80	-> True
+	_	-> False
+
+
+-- | Convert a Cmm type to a Size.
+cmmTypeSize :: CmmType -> Size
+cmmTypeSize ty 
+	| isFloatType ty	= floatSize (typeWidth ty)
+	| otherwise		= intSize (typeWidth ty)
+
+
+-- | Get the Width of a Size.
+sizeToWidth :: Size -> Width
+sizeToWidth size
+ = case size of
+ 	II8		-> W8
+	II16		-> W16
+	II32		-> W32
+	II64		-> W64
+	FF32		-> W32
+	FF64		-> W64
+	FF80		-> W80
+	
+
diff --git a/compiler/nativeGen/TargetReg.hs b/compiler/nativeGen/TargetReg.hs
new file mode 100644
index 0000000000..2643b00d87
--- /dev/null
+++ b/compiler/nativeGen/TargetReg.hs
@@ -0,0 +1,101 @@
+
+-- | Hard wired things related to registers.
+--	This is module is preventing the native code generator being able to 
+--	emit code for non-host architectures.
+--
+--	TODO: Do a better job of the overloading, and eliminate this module.
+--	We'd probably do better with a Register type class, and hook this to 
+--	Instruction somehow.
+--
+--	TODO: We should also make arch specific versions of RegAlloc.Graph.TrivColorable
+
+module TargetReg (
+	targetRegClass,
+	targetMkVReg,
+	targetWordSize,
+	targetRegDotColor
+)
+
+where
+
+#include "HsVersions.h"
+
+import Reg
+import RegClass
+import Size
+
+import CmmExpr	(wordWidth)
+import Outputable
+import Unique
+
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+import qualified X86.Regs	as X86
+import qualified X86.RegInfo	as X86
+
+#elif powerpc_TARGET_ARCH
+import qualified PPC.Regs	as PPC
+import qualified PPC.RegInfo	as PPC
+
+#elif sparc_TARGET_ARCH	
+import qualified SPARC.Regs	as SPARC
+import qualified SPARC.RegInfo	as SPARC
+
+
+#else
+#error "RegAlloc.Graph.TargetReg: not defined"
+#endif
+
+-- x86 -------------------------------------------------------------------------
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+targetRegClass :: Reg -> RegClass
+targetRegClass	= X86.regClass
+
+targetWordSize :: Size
+targetWordSize = intSize wordWidth
+
+targetMkVReg :: Unique -> Size -> Reg
+targetMkVReg	= X86.mkVReg
+
+targetRegDotColor :: Reg -> SDoc
+targetRegDotColor = X86.regDotColor
+
+
+-- ppc -------------------------------------------------------------------------
+#elif powerpc_TARGET_ARCH
+targetRegClass :: Reg -> RegClass
+targetRegClass	= PPC.regClass
+
+targetWordSize :: Size
+targetWordSize = intSize wordWidth
+
+targetMkVReg :: Unique -> Size -> Reg
+targetMkVReg	= PPC.mkVReg
+
+targetRegDotColor :: Reg -> SDoc
+targetRegDotColor = PPC.regDotColor
+
+
+-- sparc -----------------------------------------------------------------------
+#elif sparc_TARGET_ARCH
+targetRegClass :: Reg -> RegClass
+targetRegClass	= SPARC.regClass
+
+-- | Size of a machine word. 
+--	This is big enough to hold a pointer.
+targetWordSize :: Size
+targetWordSize = intSize wordWidth
+
+targetMkVReg :: Unique -> Size -> Reg
+targetMkVReg	= SPARC.mkVReg
+
+targetRegDotColor :: Reg -> SDoc
+targetRegDotColor = SPARC.regDotColor
+
+--------------------------------------------------------------------------------
+#else
+#error "RegAlloc.Graph.TargetReg: not defined"
+#endif
+
+
+
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
new file mode 100644
index 0000000000..43495a45a5
--- /dev/null
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -0,0 +1,2313 @@
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+-- This is a big module, but, if you pay attention to
+-- (a) the sectioning, (b) the type signatures, and
+-- (c) the #if blah_TARGET_ARCH} things, the
+-- structure should not be too overwhelming.
+
+module X86.CodeGen ( 
+	cmmTopCodeGen, 
+	InstrBlock 
+) 
+
+where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import X86.Instr
+import X86.Cond
+import X86.Regs
+import X86.RegInfo
+import X86.Ppr
+import Instruction
+import PIC
+import NCGMonad
+import Size
+import Reg
+import RegClass
+import Platform
+
+-- Our intermediate code:
+import BasicTypes
+import BlockId
+import PprCmm		( pprExpr )
+import Cmm
+import CLabel
+import ClosureInfo	( C_SRT(..) )
+
+-- The rest:
+import StaticFlags	( opt_PIC )
+import ForeignCall	( CCallConv(..) )
+import OrdList
+import Pretty
+import qualified Outputable as O
+import Outputable
+import FastString
+import FastBool		( isFastTrue )
+import Constants	( wORD_SIZE )
+import DynFlags
+
+import Debug.Trace	( trace )
+
+import Control.Monad	( mapAndUnzipM )
+import Data.Maybe	( fromJust )
+import Data.Bits
+import Data.Word
+import Data.Int
+
+
+cmmTopCodeGen 
+	:: DynFlags
+	-> RawCmmTop
+	-> NatM [NatCmmTop Instr]
+
+cmmTopCodeGen dynflags 
+	(CmmProc info lab params (ListGraph blocks)) = do
+  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+  picBaseMb <- getPicBaseMaybeNat
+  let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
+      tops = proc : concat statics
+      os   = platformOS $ targetPlatform dynflags
+
+  case picBaseMb of
+      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
+      Nothing -> return tops
+  
+cmmTopCodeGen _ (CmmData sec dat) = do
+  return [CmmData sec dat]  -- no translation, we just use CmmStatic
+
+
+basicBlockCodeGen 
+	:: CmmBasicBlock 
+	-> NatM ( [NatBasicBlock Instr]
+		, [NatCmmTop Instr])
+
+basicBlockCodeGen (BasicBlock id stmts) = do
+  instrs <- stmtsToInstrs stmts
+  -- code generation may introduce new basic block boundaries, which
+  -- are indicated by the NEWBLOCK instruction.  We must split up the
+  -- instruction stream into basic blocks again.  Also, we extract
+  -- LDATAs here too.
+  let
+	(top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+	
+	mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
+	  = ([], BasicBlock id instrs : blocks, statics)
+	mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
+	  = (instrs, blocks, CmmData sec dat:statics)
+	mkBlocks instr (instrs,blocks,statics)
+	  = (instr:instrs, blocks, statics)
+  -- in
+  return (BasicBlock id top : other_blocks, statics)
+
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+   = do instrss <- mapM stmtToInstrs stmts
+        return (concatOL instrss)
+
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+    CmmNop	   -> return nilOL
+    CmmComment s   -> return (unitOL (COMMENT s))
+
+    CmmAssign reg src
+      | isFloatType ty -> assignReg_FltCode size reg src
+#if WORD_SIZE_IN_BITS==32
+      | isWord64 ty    -> assignReg_I64Code      reg src
+#endif
+      | otherwise	 -> assignReg_IntCode size reg src
+	where ty = cmmRegType reg
+	      size = cmmTypeSize ty
+
+    CmmStore addr src
+      | isFloatType ty -> assignMem_FltCode size addr src
+#if WORD_SIZE_IN_BITS==32
+      | isWord64 ty 	 -> assignMem_I64Code      addr src
+#endif
+      | otherwise	 -> assignMem_IntCode size addr src
+	where ty = cmmExprType src
+	      size = cmmTypeSize ty
+
+    CmmCall target result_regs args _ _
+       -> genCCall target result_regs args
+
+    CmmBranch id	  -> genBranch id
+    CmmCondBranch arg id  -> genCondJump id arg
+    CmmSwitch arg ids     -> genSwitch arg ids
+    CmmJump arg params	  -> genJump arg
+    CmmReturn params	  ->
+      panic "stmtToInstrs: return statement should have been cps'd away"
+
+
+--------------------------------------------------------------------------------
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- 	They are really trees of insns to facilitate fast appending, where a
+-- 	left-to-right traversal yields the insns in the correct	order.
+--
+type InstrBlock 
+	= OrdList Instr
+
+
+-- | Condition codes passed up the tree.
+--
+data CondCode 	
+	= CondCode Bool Cond InstrBlock
+
+
+-- | a.k.a "Register64"
+--	Reg is the lower 32-bit temporary which contains the result. 
+--	Use getHiVRegFromLo to find the other VRegUnique.  
+--
+--	Rules of this simplified insn selection game are therefore that
+--	the returned Reg may be modified
+--
+data ChildCode64 	
+   = ChildCode64 
+        InstrBlock
+        Reg	 	
+
+
+-- | Register's passed up the tree.  If the stix code forces the register
+-- 	to live in a pre-decided machine register, it comes out as @Fixed@;
+-- 	otherwise, it comes out as @Any@, and the parent can decide which
+-- 	register to put it in.
+--
+data Register
+	= Fixed	Size Reg InstrBlock
+	| Any	Size (Reg -> InstrBlock)
+
+
+swizzleRegisterRep :: Register -> Size -> Register
+swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
+swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
+
+
+-- | Grab the Reg for a CmmReg
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+  = mkVReg u (cmmTypeSize pk)
+
+getRegisterReg (CmmGlobal mid)
+  = case get_GlobalReg_reg_or_addr mid of
+       Left (RealReg rrno) -> RealReg rrno
+       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+          -- By this stage, the only MagicIds remaining should be the
+          -- ones which map to a real machine register on this
+          -- platform.  Hence ...
+
+
+-- | Memory addressing modes passed up the tree.
+data Amode 
+	= Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to.  So you can't put
+anything in between, lest it overwrite some of those registers.  If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+    code
+    LEA amode, tmp
+    ... other computation ...
+    ... (tmp) ...
+-}
+
+
+-- | Check whether an integer will fit in 32 bits.
+--	A CmmInt is intended to be truncated to the appropriate 
+-- 	number of bits, so here we truncate it to Int64.  This is
+-- 	important because e.g. -1 as a CmmInt might be either
+-- 	-1 or 18446744073709551615.
+--
+is32BitInteger :: Integer -> Bool
+is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
+  where i64 = fromIntegral i :: Int64
+
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+    where blockLabel = mkAsmTempLabel id
+
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+  where width = typeWidth (cmmRegType reg)
+
+-- | The dual to getAnyReg: compute an expression into a register, but
+-- 	we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed _ reg code -> 
+	return (reg, code)
+
+
+
+
+
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_I64Code addrTree valueTree = do
+  Amode addr addr_code <- getAmode addrTree
+  ChildCode64 vcode rlo <- iselExpr64 valueTree
+  let 
+        rhi = getHiVRegFromLo rlo
+
+        -- Little-endian store
+        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
+        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
+  -- in
+  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
+
+
+assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+   let 
+         r_dst_lo = mkVReg u_dst II32
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
+         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
+   -- in
+   return (
+        vcode `snocOL` mov_lo `snocOL` mov_hi
+     )
+
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(i386): invalid lvalue"
+
+
+
+
+iselExpr64        :: CmmExpr -> NatM ChildCode64
+iselExpr64 (CmmLit (CmmInt i _)) = do
+  (rlo,rhi) <- getNewRegPairNat II32
+  let
+	r = fromIntegral (fromIntegral i :: Word32)
+	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
+	code = toOL [
+		MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
+		MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
+		]
+  -- in
+  return (ChildCode64 code rlo)
+
+iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
+   Amode addr addr_code <- getAmode addrTree
+   (rlo,rhi) <- getNewRegPairNat II32
+   let 
+        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
+        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
+   -- in
+   return (
+            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
+                        rlo
+     )
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
+   = return (ChildCode64 nilOL (mkVReg vu II32))
+         
+-- we handle addition, but rather badly
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
+   ChildCode64 code1 r1lo <- iselExpr64 e1
+   (rlo,rhi) <- getNewRegPairNat II32
+   let
+	r = fromIntegral (fromIntegral i :: Word32)
+	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
+	r1hi = getHiVRegFromLo r1lo
+	code =  code1 `appOL`
+		toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+		       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
+		       MOV II32 (OpReg r1hi) (OpReg rhi),
+		       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
+   -- in
+   return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+   ChildCode64 code1 r1lo <- iselExpr64 e1
+   ChildCode64 code2 r2lo <- iselExpr64 e2
+   (rlo,rhi) <- getNewRegPairNat II32
+   let
+	r1hi = getHiVRegFromLo r1lo
+	r2hi = getHiVRegFromLo r2lo
+	code =  code1 `appOL`
+		code2 `appOL`
+		toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+		       ADD II32 (OpReg r2lo) (OpReg rlo),
+		       MOV II32 (OpReg r1hi) (OpReg rhi),
+		       ADC II32 (OpReg r2hi) (OpReg rhi) ]
+   -- in
+   return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
+     fn <- getAnyReg expr
+     r_dst_lo <-  getNewRegNat II32
+     let r_dst_hi = getHiVRegFromLo r_dst_lo
+         code = fn r_dst_lo
+     return (
+             ChildCode64 (code `snocOL` 
+                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
+                          r_dst_lo
+            )
+
+iselExpr64 expr
+   = pprPanic "iselExpr64(i386)" (ppr expr)
+
+
+
+--------------------------------------------------------------------------------
+getRegister :: CmmExpr -> NatM Register
+
+#if !x86_64_TARGET_ARCH
+    -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
+    -- register, it can only be used for rip-relative addressing.
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+  = do
+      reg <- getPicBaseNat archWordSize
+      return (Fixed archWordSize reg nilOL)
+#endif
+
+getRegister (CmmReg reg) 
+  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
+		  (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _) 
+  = getRegister (mangleIndexTree tree)
+
+
+#if WORD_SIZE_IN_BITS==32
+    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
+    -- TO_W_(x), TO_W_(x >> 32)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code       
+
+#endif
+
+
+
+
+#if i386_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat f W32)) = do
+    lbl <- getNewLabelNat
+    dflags <- getDynFlagsNat
+    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst =
+	    LDATA ReadOnlyData
+			[CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat f W32)]
+	    `consOL` (addr_code `snocOL`
+	    GLD FF32 addr dst)
+    -- in
+    return (Any FF32 code)
+
+
+getRegister (CmmLit (CmmFloat d W64))
+  | d == 0.0
+  = let code dst = unitOL (GLDZ dst)
+    in  return (Any FF64 code)
+
+  | d == 1.0
+  = let code dst = unitOL (GLD1 dst)
+    in  return (Any FF64 code)
+
+  | otherwise = do
+    lbl <- getNewLabelNat
+    dflags <- getDynFlagsNat
+    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst =
+	    LDATA ReadOnlyData
+			[CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat d W64)]
+	    `consOL` (addr_code `snocOL`
+	    GLD FF64 addr dst)
+    -- in
+    return (Any FF64 code)
+
+#endif /* i386_TARGET_ARCH */
+
+
+
+
+#if x86_64_TARGET_ARCH
+getRegister (CmmLit (CmmFloat 0.0 w)) = do
+   let size = floatSize w
+       code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
+	-- I don't know why there are xorpd, xorps, and pxor instructions.
+	-- They all appear to do the same thing --SDM
+   return (Any size code)
+
+getRegister (CmmLit (CmmFloat f w)) = do
+    lbl <- getNewLabelNat
+    let code dst = toOL [
+	    LDATA ReadOnlyData
+			[CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat f w)],
+	    MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+	    ]
+    -- in
+    return (Any size code)
+  where size = floatSize w
+
+#endif /* x86_64_TARGET_ARCH */
+
+
+
+
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL II8) addr
+  return (Any II32 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL II8) addr
+  return (Any II32 code)
+
+getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL II16) addr
+  return (Any II32 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL II16) addr
+  return (Any II32 code)
+
+
+#if x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL II8) addr
+  return (Any II64 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL II8) addr
+  return (Any II64 code)
+
+getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL II16) addr
+  return (Any II64 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL II16) addr
+  return (Any II64 code)
+
+getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
+  return (Any II64 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL II32) addr
+  return (Any II64 code)
+
+getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
+                                     CmmLit displacement])
+    = return $ Any II64 (\dst -> unitOL $
+        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
+
+getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
+  x_code <- getAnyReg x
+  lbl <- getNewLabelNat
+  let
+    code dst = x_code dst `appOL` toOL [
+	-- This is how gcc does it, so it can't be that bad:
+	LDATA ReadOnlyData16 [
+		CmmAlign 16,
+		CmmDataLabel lbl,
+		CmmStaticLit (CmmInt 0x80000000 W32),
+		CmmStaticLit (CmmInt 0 W32),
+		CmmStaticLit (CmmInt 0 W32),
+		CmmStaticLit (CmmInt 0 W32)
+	],
+	XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+		-- xorps, so we need the 128-bit constant
+		-- ToDo: rip-relative
+	]
+  --
+  return (Any FF32 code)
+
+getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
+  x_code <- getAnyReg x
+  lbl <- getNewLabelNat
+  let
+	-- This is how gcc does it, so it can't be that bad:
+    code dst = x_code dst `appOL` toOL [
+	LDATA ReadOnlyData16 [
+		CmmAlign 16,
+		CmmDataLabel lbl,
+		CmmStaticLit (CmmInt 0x8000000000000000 W64),
+		CmmStaticLit (CmmInt 0 W64)
+	],
+		-- gcc puts an unpck here.  Wonder if we need it.
+	XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+		-- xorpd, so we need the 128-bit constant
+	]
+  --
+  return (Any FF64 code)
+
+#endif /* x86_64_TARGET_ARCH */
+
+
+
+
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+#if i386_TARGET_ARCH
+      MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
+      MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
+#endif
+
+      MO_S_Neg w -> triv_ucode NEGI (intSize w)
+      MO_F_Neg w -> triv_ucode NEGI (floatSize w)
+      MO_Not w   -> triv_ucode NOT  (intSize w)
+
+      -- Nop conversions
+      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
+      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
+      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
+      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
+      MO_UU_Conv W32 W16 -> toI16Reg W32 x
+      MO_SS_Conv W32 W16 -> toI16Reg W32 x
+
+#if x86_64_TARGET_ARCH
+      MO_UU_Conv W64 W32 -> conversionNop II64 x
+      MO_SS_Conv W64 W32 -> conversionNop II64 x
+      MO_UU_Conv W64 W16 -> toI16Reg W64 x
+      MO_SS_Conv W64 W16 -> toI16Reg W64 x
+      MO_UU_Conv W64 W8  -> toI8Reg  W64 x
+      MO_SS_Conv W64 W8  -> toI8Reg  W64 x
+#endif
+
+      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
+      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
+
+      -- widenings
+      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
+      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
+      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
+
+      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
+      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
+      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
+
+#if x86_64_TARGET_ARCH
+      MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
+      MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
+      MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
+      MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
+      MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
+      MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
+	-- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
+	-- However, we don't want the register allocator to throw it
+	-- away as an unnecessary reg-to-reg move, so we keep it in
+	-- the form of a movzl and print it as a movl later.
+#endif
+
+#if i386_TARGET_ARCH
+      MO_FF_Conv W32 W64 -> conversionNop FF64 x
+      MO_FF_Conv W64 W32 -> conversionNop FF32 x
+#else
+      MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
+      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
+#endif
+
+      MO_FS_Conv from to -> coerceFP2Int from to x
+      MO_SF_Conv from to -> coerceInt2FP from to x
+
+      other -> pprPanic "getRegister" (pprMachOp mop)
+   where
+	triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
+	triv_ucode instr size = trivialUCode size (instr size) x
+
+	-- signed or unsigned extension.
+	integerExtend :: Width -> Width
+		      -> (Size -> Operand -> Operand -> Instr)
+		      -> CmmExpr -> NatM Register
+	integerExtend from to instr expr = do
+	    (reg,e_code) <- if from == W8 then getByteReg expr
+					  else getSomeReg expr
+	    let 
+		code dst = 
+		  e_code `snocOL`
+		  instr (intSize from) (OpReg reg) (OpReg dst)
+	    return (Any (intSize to) code)
+
+	toI8Reg :: Width -> CmmExpr -> NatM Register
+	toI8Reg new_rep expr
+            = do codefn <- getAnyReg expr
+		 return (Any (intSize new_rep) codefn)
+		-- HACK: use getAnyReg to get a byte-addressable register.
+		-- If the source was a Fixed register, this will add the
+		-- mov instruction to put it into the desired destination.
+		-- We're assuming that the destination won't be a fixed
+		-- non-byte-addressable register; it won't be, because all
+		-- fixed registers are word-sized.
+
+	toI16Reg = toI8Reg -- for now
+
+	conversionNop :: Size -> CmmExpr -> NatM Register
+        conversionNop new_size expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_size)
+
+
+getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
+  = case mop of
+      MO_F_Eq w -> condFltReg EQQ x y
+      MO_F_Ne w -> condFltReg NE x y
+      MO_F_Gt w -> condFltReg GTT x y
+      MO_F_Ge w -> condFltReg GE x y
+      MO_F_Lt w -> condFltReg LTT x y
+      MO_F_Le w -> condFltReg LE x y
+
+      MO_Eq rep   -> condIntReg EQQ x y
+      MO_Ne rep   -> condIntReg NE x y
+
+      MO_S_Gt rep -> condIntReg GTT x y
+      MO_S_Ge rep -> condIntReg GE x y
+      MO_S_Lt rep -> condIntReg LTT x y
+      MO_S_Le rep -> condIntReg LE x y
+
+      MO_U_Gt rep -> condIntReg GU  x y
+      MO_U_Ge rep -> condIntReg GEU x y
+      MO_U_Lt rep -> condIntReg LU  x y
+      MO_U_Le rep -> condIntReg LEU x y
+
+#if i386_TARGET_ARCH
+      MO_F_Add w -> trivialFCode w GADD x y
+      MO_F_Sub w -> trivialFCode w GSUB x y
+      MO_F_Quot w -> trivialFCode w GDIV x y
+      MO_F_Mul w -> trivialFCode w GMUL x y
+#endif
+
+#if x86_64_TARGET_ARCH
+      MO_F_Add w -> trivialFCode w ADD x y
+      MO_F_Sub w -> trivialFCode w SUB x y
+      MO_F_Quot w -> trivialFCode w FDIV x y
+      MO_F_Mul w -> trivialFCode w MUL x y
+#endif
+
+      MO_Add rep -> add_code rep x y
+      MO_Sub rep -> sub_code rep x y
+
+      MO_S_Quot rep -> div_code rep True  True  x y
+      MO_S_Rem  rep -> div_code rep True  False x y
+      MO_U_Quot rep -> div_code rep False True  x y
+      MO_U_Rem  rep -> div_code rep False False x y
+
+      MO_S_MulMayOflo rep -> imulMayOflo rep x y
+
+      MO_Mul rep -> triv_op rep IMUL
+      MO_And rep -> triv_op rep AND
+      MO_Or  rep -> triv_op rep OR
+      MO_Xor rep -> triv_op rep XOR
+
+	{- Shift ops on x86s have constraints on their source, it
+	   either has to be Imm, CL or 1
+	    => trivialCode is not restrictive enough (sigh.)
+	-}	   
+      MO_Shl rep   -> shift_code rep SHL x y {-False-}
+      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
+      MO_S_Shr rep -> shift_code rep SAR x y {-False-}
+
+      other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
+  where
+    --------------------
+    triv_op width instr = trivialCode width op (Just op) x y
+			where op   = instr (intSize width)
+
+    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
+    imulMayOflo rep a b = do
+         (a_reg, a_code) <- getNonClobberedReg a
+         b_code <- getAnyReg b
+         let 
+	     shift_amt  = case rep of
+			   W32 -> 31
+			   W64 -> 63
+			   _ -> panic "shift_amt"
+
+	     size = intSize rep
+             code = a_code `appOL` b_code eax `appOL`
+                        toOL [
+			   IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
+                           SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
+				-- sign extend lower part
+                           SUB size (OpReg edx) (OpReg eax)
+				-- compare against upper
+                           -- eax==0 if high part == sign extended low part
+                        ]
+         -- in
+	 return (Fixed size eax code)
+
+    --------------------
+    shift_code :: Width
+	       -> (Size -> Operand -> Operand -> Instr)
+	       -> CmmExpr
+	       -> CmmExpr
+	       -> NatM Register
+
+    {- Case1: shift length as immediate -}
+    shift_code width instr x y@(CmmLit lit) = do
+	  x_code <- getAnyReg x
+	  let
+	       size = intSize width
+	       code dst
+		  = x_code dst `snocOL` 
+		    instr size (OpImm (litToImm lit)) (OpReg dst)
+	  -- in
+	  return (Any size code)
+        
+    {- Case2: shift length is complex (non-immediate)
+      * y must go in %ecx.
+      * we cannot do y first *and* put its result in %ecx, because
+        %ecx might be clobbered by x.
+      * if we do y second, then x cannot be 
+        in a clobbered reg.  Also, we cannot clobber x's reg
+        with the instruction itself.
+      * so we can either:
+        - do y first, put its result in a fresh tmp, then copy it to %ecx later
+        - do y second and put its result into %ecx.  x gets placed in a fresh
+          tmp.  This is likely to be better, becuase the reg alloc can
+          eliminate this reg->reg move here (it won't eliminate the other one,
+          because the move is into the fixed %ecx).
+    -}
+    shift_code width instr x y{-amount-} = do
+        x_code <- getAnyReg x
+	let size = intSize width
+	tmp <- getNewRegNat size
+        y_code <- getAnyReg y
+	let 
+	   code = x_code tmp `appOL`
+		  y_code ecx `snocOL`
+		  instr size (OpReg ecx) (OpReg tmp)
+        -- in
+        return (Fixed size tmp code)
+
+    --------------------
+    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
+    add_code rep x (CmmLit (CmmInt y _))
+	| is32BitInteger y = add_int rep x y
+    add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
+      where size = intSize rep
+
+    --------------------
+    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
+    sub_code rep x (CmmLit (CmmInt y _))
+	| is32BitInteger (-y) = add_int rep x (-y)
+    sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
+
+    -- our three-operand add instruction:
+    add_int width x y = do
+	(x_reg, x_code) <- getSomeReg x
+	let
+	    size = intSize width
+	    imm = ImmInt (fromInteger y)
+	    code dst
+               = x_code `snocOL`
+		 LEA size
+			(OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
+                        (OpReg dst)
+	-- 
+	return (Any size code)
+
+    ----------------------
+    div_code width signed quotient x y = do
+	   (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
+	   x_code <- getAnyReg x
+	   let
+	     size = intSize width
+	     widen | signed    = CLTD size
+		   | otherwise = XOR size (OpReg edx) (OpReg edx)
+
+	     instr | signed    = IDIV
+		   | otherwise = DIV
+
+	     code = y_code `appOL`
+		    x_code eax `appOL`
+		    toOL [widen, instr size y_op]
+
+	     result | quotient  = eax
+		    | otherwise = edx
+
+	   -- in
+           return (Fixed size result code)
+
+
+getRegister (CmmLoad mem pk)
+  | isFloatType pk
+  = do
+    Amode src mem_code <- getAmode mem
+    let
+	size = cmmTypeSize pk
+    	code dst = mem_code `snocOL` 
+		   IF_ARCH_i386(GLD size src dst,
+			        MOV size (OpAddr src) (OpReg dst))
+    return (Any size code)
+
+#if i386_TARGET_ARCH
+getRegister (CmmLoad mem pk)
+  | not (isWord64 pk)
+  = do 
+    code <- intLoadCode instr mem
+    return (Any size code)
+  where
+    width = typeWidth pk
+    size = intSize width
+    instr = case width of
+		W8     -> MOVZxL II8
+		_other -> MOV size
+	-- We always zero-extend 8-bit loads, if we
+	-- can't think of anything better.  This is because
+	-- we can't guarantee access to an 8-bit variant of every register
+	-- (esi and edi don't have 8-bit variants), so to make things
+	-- simpler we do our 8-bit arithmetic with full 32-bit registers.
+#endif
+
+#if x86_64_TARGET_ARCH
+-- Simpler memory load code on x86_64
+getRegister (CmmLoad mem pk)
+  = do 
+    code <- intLoadCode (MOV size) mem
+    return (Any size code)
+  where size = intSize $ typeWidth pk
+#endif
+
+getRegister (CmmLit (CmmInt 0 width))
+  = let
+	size = intSize width
+
+	-- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
+	adj_size = case size of II64 -> II32; _ -> size
+	size1 = IF_ARCH_i386( size, adj_size ) 
+    	code dst 
+           = unitOL (XOR size1 (OpReg dst) (OpReg dst))
+    in
+    	return (Any size code)
+
+#if x86_64_TARGET_ARCH
+  -- optimisation for loading small literals on x86_64: take advantage
+  -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
+  -- instruction forms are shorter.
+getRegister (CmmLit lit) 
+  | isWord64 (cmmLitType lit), not (isBigLit lit)
+  = let 
+	imm = litToImm lit
+	code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
+    in
+    	return (Any II64 code)
+  where
+   isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
+   isBigLit _ = False
+	-- note1: not the same as (not.is32BitLit), because that checks for
+	-- signed literals that fit in 32 bits, but we want unsigned
+	-- literals here.
+	-- note2: all labels are small, because we're assuming the
+	-- small memory model (see gcc docs, -mcmodel=small).
+#endif
+
+getRegister (CmmLit lit)
+  = let 
+	size = cmmTypeSize (cmmLitType lit)
+	imm = litToImm lit
+	code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
+    in
+    	return (Any size code)
+
+getRegister other = pprPanic "getRegister(x86)" (ppr other)
+
+
+intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
+   -> NatM (Reg -> InstrBlock)
+intLoadCode instr mem = do
+  Amode src mem_code <- getAmode mem
+  return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
+
+-- Compute an expression into *any* register, adding the appropriate
+-- move instruction if necessary.
+getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
+getAnyReg expr = do
+  r <- getRegister expr
+  anyReg r
+
+anyReg :: Register -> NatM (Reg -> InstrBlock)
+anyReg (Any _ code)          = return code
+anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
+
+-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
+-- Fixed registers might not be byte-addressable, so we make sure we've
+-- got a temporary, inserting an extra reg copy if necessary.
+getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
+#if x86_64_TARGET_ARCH
+getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
+#else
+getByteReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed rep reg code 
+	| isVirtualReg reg -> return (reg,code)
+	| otherwise -> do
+	    tmp <- getNewRegNat rep
+	    return (tmp, code `snocOL` reg2reg rep reg tmp)
+	-- ToDo: could optimise slightly by checking for byte-addressable
+	-- real registers, but that will happen very rarely if at all.
+#endif
+
+-- Another variant: this time we want the result in a register that cannot
+-- be modified by code to evaluate an arbitrary expression.
+getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getNonClobberedReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed rep reg code
+	-- only free regs can be clobbered
+	| RealReg rr <- reg, isFastTrue (freeReg rr) -> do
+		tmp <- getNewRegNat rep
+		return (tmp, code `snocOL` reg2reg rep reg tmp)
+	| otherwise -> 
+		return (reg, code)
+
+reg2reg :: Size -> Reg -> Reg -> Instr
+reg2reg size src dst 
+#if i386_TARGET_ARCH
+  | isFloatSize size = GMOV src dst
+#endif
+  | otherwise	     = MOV size (OpReg src) (OpReg dst)
+
+
+
+--------------------------------------------------------------------------------
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+#if x86_64_TARGET_ARCH
+
+getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
+                                     CmmLit displacement])
+    = return $ Amode (ripRel (litToImm displacement)) nilOL
+
+#endif
+
+
+-- This is all just ridiculous, since it carefully undoes 
+-- what mangleIndexTree has just done.
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
+  | is32BitLit lit
+  -- ASSERT(rep == II32)???
+  = do (x_reg, x_code) <- getSomeReg x
+       let off = ImmInt (-(fromInteger i))
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
+  
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
+  | is32BitLit lit
+  -- ASSERT(rep == II32)???
+  = do (x_reg, x_code) <- getSomeReg x
+       let off = ImmInt (fromInteger i)
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
+
+-- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be 
+-- recognised by the next rule.
+getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
+				  b@(CmmLit _)])
+  = getAmode (CmmMachOp (MO_Add rep) [b,a])
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) 
+					[y, CmmLit (CmmInt shift _)]])
+  | shift == 0 || shift == 1 || shift == 2 || shift == 3
+  = x86_complex_amode x y shift 0
+
+getAmode (CmmMachOp (MO_Add rep) 
+                [x, CmmMachOp (MO_Add _)
+                        [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
+                         CmmLit (CmmInt offset _)]])
+  | shift == 0 || shift == 1 || shift == 2 || shift == 3
+  && is32BitInteger offset
+  = x86_complex_amode x y shift offset
+
+getAmode (CmmMachOp (MO_Add rep) [x,y])
+  = x86_complex_amode x y 0 0
+
+getAmode (CmmLit lit) | is32BitLit lit
+  = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
+
+getAmode expr = do
+  (reg,code) <- getSomeReg expr
+  return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
+
+
+x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
+x86_complex_amode base index shift offset
+  = do (x_reg, x_code) <- getNonClobberedReg base
+	-- x must be in a temp, because it has to stay live over y_code
+	-- we could compre x_reg and y_reg and do something better here...
+       (y_reg, y_code) <- getSomeReg index
+       let
+    	   code = x_code `appOL` y_code
+           base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
+               code)
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- getOperand: sometimes any operand will do.
+
+-- getNonClobberedOperand: the value of the operand will remain valid across
+-- the computation of an arbitrary expression, unless the expression
+-- is computed directly into a register which the operand refers to
+-- (see trivialCode where this function is used for an example).
+
+getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getNonClobberedOperand (CmmLit lit)
+  | isSuitableFloatingPointLit lit = do
+    lbl <- getNewLabelNat
+    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
+					   CmmStaticLit lit])
+    return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getNonClobberedOperand (CmmLit lit)
+  | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
+    return (OpImm (litToImm lit), nilOL)
+getNonClobberedOperand (CmmLoad mem pk) 
+  | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
+    Amode src mem_code <- getAmode mem
+    (src',save_code) <- 
+	if (amodeCouldBeClobbered src) 
+		then do
+		   tmp <- getNewRegNat archWordSize
+		   return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
+			   unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
+		else
+		   return (src, nilOL)
+    return (OpAddr src', save_code `appOL` mem_code)
+getNonClobberedOperand e = do
+    (reg, code) <- getNonClobberedReg e
+    return (OpReg reg, code)
+
+amodeCouldBeClobbered :: AddrMode -> Bool
+amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
+
+regClobbered (RealReg rr) = isFastTrue (freeReg rr)
+regClobbered _ = False
+
+-- getOperand: the operand is not required to remain valid across the
+-- computation of an arbitrary expression.
+getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getOperand (CmmLit lit)
+  | isSuitableFloatingPointLit lit = do
+    lbl <- getNewLabelNat
+    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
+					   CmmStaticLit lit])
+    return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getOperand (CmmLit lit)
+  | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
+    return (OpImm (litToImm lit), nilOL)
+getOperand (CmmLoad mem pk)
+  | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
+    Amode src mem_code <- getAmode mem
+    return (OpAddr src, mem_code)
+getOperand e = do
+    (reg, code) <- getSomeReg e
+    return (OpReg reg, code)
+
+isOperand :: CmmExpr -> Bool
+isOperand (CmmLoad _ _) = True
+isOperand (CmmLit lit)  = is32BitLit lit
+			  || isSuitableFloatingPointLit lit
+isOperand _             = False
+
+-- if we want a floating-point literal as an operand, we can
+-- use it directly from memory.  However, if the literal is
+-- zero, we're better off generating it into a register using
+-- xor.
+isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
+isSuitableFloatingPointLit _ = False
+
+getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
+getRegOrMem (CmmLoad mem pk)
+  | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
+    Amode src mem_code <- getAmode mem
+    return (OpAddr src, mem_code)
+getRegOrMem e = do
+    (reg, code) <- getNonClobberedReg e
+    return (OpReg reg, code)
+
+#if x86_64_TARGET_ARCH
+is32BitLit (CmmInt i W64) = is32BitInteger i
+   -- assume that labels are in the range 0-2^31-1: this assumes the
+   -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+is32BitLit x = True
+
+
+
+
+-- Set up a condition code for a conditional branch.
+
+getCondCode :: CmmExpr -> NatM CondCode
+
+-- yes, they really do seem to want exactly the same!
+
+getCondCode (CmmMachOp mop [x, y])
+  = 
+    case mop of
+      MO_F_Eq W32 -> condFltCode EQQ x y
+      MO_F_Ne W32 -> condFltCode NE  x y
+      MO_F_Gt W32 -> condFltCode GTT x y
+      MO_F_Ge W32 -> condFltCode GE  x y
+      MO_F_Lt W32 -> condFltCode LTT x y
+      MO_F_Le W32 -> condFltCode LE  x y
+
+      MO_F_Eq W64 -> condFltCode EQQ x y
+      MO_F_Ne W64 -> condFltCode NE  x y
+      MO_F_Gt W64 -> condFltCode GTT x y
+      MO_F_Ge W64 -> condFltCode GE  x y
+      MO_F_Lt W64 -> condFltCode LTT x y
+      MO_F_Le W64 -> condFltCode LE  x y
+
+      MO_Eq rep -> condIntCode EQQ  x y
+      MO_Ne rep -> condIntCode NE   x y
+
+      MO_S_Gt rep -> condIntCode GTT  x y
+      MO_S_Ge rep -> condIntCode GE   x y
+      MO_S_Lt rep -> condIntCode LTT  x y
+      MO_S_Le rep -> condIntCode LE   x y
+
+      MO_U_Gt rep -> condIntCode GU   x y
+      MO_U_Ge rep -> condIntCode GEU  x y
+      MO_U_Lt rep -> condIntCode LU   x y
+      MO_U_Le rep -> condIntCode LEU  x y
+
+      other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
+
+getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
+
+
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+
+-- memory vs immediate
+condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
+    Amode x_addr x_code <- getAmode x
+    let
+	imm  = litToImm lit
+    	code = x_code `snocOL`
+    	    	  CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
+    --
+    return (CondCode False cond code)
+
+-- anything vs zero, using a mask
+-- TODO: Add some sanity checking!!!!
+condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
+    | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
+    = do
+      (x_reg, x_code) <- getSomeReg x
+      let
+         code = x_code `snocOL`
+                TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
+      --
+      return (CondCode False cond code)
+
+-- anything vs zero
+condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
+    (x_reg, x_code) <- getSomeReg x
+    let
+	code = x_code `snocOL`
+    	    	  TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
+    --
+    return (CondCode False cond code)
+
+-- anything vs operand
+condIntCode cond x y | isOperand y = do
+    (x_reg, x_code) <- getNonClobberedReg x
+    (y_op,  y_code) <- getOperand y    
+    let
+	code = x_code `appOL` y_code `snocOL`
+                  CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
+    -- in
+    return (CondCode False cond code)
+
+-- anything vs anything
+condIntCode cond x y = do
+  (y_reg, y_code) <- getNonClobberedReg y
+  (x_op, x_code) <- getRegOrMem x
+  let
+	code = y_code `appOL`
+	       x_code `snocOL`
+    	    	  CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
+  -- in
+  return (CondCode False cond code)
+
+
+
+--------------------------------------------------------------------------------
+condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+
+#if i386_TARGET_ARCH
+condFltCode cond x y 
+  = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
+  (x_reg, x_code) <- getNonClobberedReg x
+  (y_reg, y_code) <- getSomeReg y
+  let
+	code = x_code `appOL` y_code `snocOL`
+		GCMP cond x_reg y_reg
+  -- The GCMP insn does the test and sets the zero flag if comparable
+  -- and true.  Hence we always supply EQQ as the condition to test.
+  return (CondCode True EQQ code)
+
+#elif x86_64_TARGET_ARCH
+-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+-- an operand, but the right must be a reg.  We can probably do better
+-- than this general case...
+condFltCode cond x y = do
+  (x_reg, x_code) <- getNonClobberedReg x
+  (y_op, y_code) <- getOperand y
+  let
+	code = x_code `appOL`
+	       y_code `snocOL`
+    	    	  CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
+	-- NB(1): we need to use the unsigned comparison operators on the
+	-- result of this comparison.
+  -- in
+  return (CondCode True (condToUnsigned cond) code)
+
+#else
+condFltCode 	= panic "X86.condFltCode: not defined"
+
+#endif
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business.  Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers.  If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side.  This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+
+-- integer assignment to memory
+
+-- specific case of adding/subtracting an integer to a particular address.
+-- ToDo: catch other cases where we can use an operation directly on a memory 
+-- address.
+assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
+                                                 CmmLit (CmmInt i _)])
+   | addr == addr2, pk /= II64 || is32BitInteger i,
+     Just instr <- check op
+   = do Amode amode code_addr <- getAmode addr
+        let code = code_addr `snocOL`
+                   instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
+        return code
+   where
+        check (MO_Add _) = Just ADD
+        check (MO_Sub _) = Just SUB
+        check _ = Nothing
+        -- ToDo: more?
+
+-- general case
+assignMem_IntCode pk addr src = do
+    Amode addr code_addr <- getAmode addr
+    (code_src, op_src)   <- get_op_RI src
+    let
+	code = code_src `appOL`
+	       code_addr `snocOL`
+                  MOV pk op_src (OpAddr addr)
+	-- NOTE: op_src is stable, so it will still be valid
+	-- after code_addr.  This may involve the introduction 
+	-- of an extra MOV to a temporary register, but we hope
+	-- the register allocator will get rid of it.
+    --
+    return code
+  where
+    get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)	-- code, operator
+    get_op_RI (CmmLit lit) | is32BitLit lit
+      = return (nilOL, OpImm (litToImm lit))
+    get_op_RI op
+      = do (reg,code) <- getNonClobberedReg op
+	   return (code, OpReg reg)
+
+
+-- Assign; dst is a reg, rhs is mem
+assignReg_IntCode pk reg (CmmLoad src _) = do
+  load_code <- intLoadCode (MOV pk) src
+  return (load_code (getRegisterReg reg))
+
+-- dst is a reg, but src could be anything
+assignReg_IntCode pk reg src = do
+  code <- getAnyReg src
+  return (code (getRegisterReg reg))
+
+
+-- Floating point assignment to memory
+assignMem_FltCode pk addr src = do
+  (src_reg, src_code) <- getNonClobberedReg src
+  Amode addr addr_code <- getAmode addr
+  let
+	code = src_code `appOL`
+	       addr_code `snocOL`
+                IF_ARCH_i386(GST pk src_reg addr,
+		             MOV pk (OpReg src_reg) (OpAddr addr))
+  return code
+
+-- Floating point assignment to a register/temporary
+assignReg_FltCode pk reg src = do
+  src_code <- getAnyReg src
+  return (src_code (getRegisterReg reg))
+
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+genJump (CmmLoad mem pk) = do
+  Amode target code <- getAmode mem
+  return (code `snocOL` JMP (OpAddr target))
+
+genJump (CmmLit lit) = do
+  return (unitOL (JMP (OpImm (litToImm lit))))
+
+genJump expr = do
+  (reg,code) <- getSomeReg expr
+  return (code `snocOL` JMP (OpReg reg))
+
+
+-- -----------------------------------------------------------------------------
+--  Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+genBranch = return . toOL . mkJumpInstr
+
+
+
+-- -----------------------------------------------------------------------------
+--  Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions.  We peek at the arguments to decide what kind of
+comparison to do.
+
+I386: First, we have to ensure that the condition
+codes are set according to the supplied comparison operation.
+-}
+
+genCondJump
+    :: BlockId	    -- the branch target
+    -> CmmExpr      -- the condition on which to branch
+    -> NatM InstrBlock
+
+#if i386_TARGET_ARCH
+genCondJump id bool = do
+  CondCode _ cond code <- getCondCode bool
+  return (code `snocOL` JXX cond id)
+
+#elif x86_64_TARGET_ARCH
+genCondJump id bool = do
+  CondCode is_float cond cond_code <- getCondCode bool
+  if not is_float
+    then
+	return (cond_code `snocOL` JXX cond id)
+    else do
+	lbl <- getBlockIdNat
+
+	-- see comment with condFltReg
+	let code = case cond of
+	  		NE  -> or_unordered
+			GU  -> plain_test
+			GEU -> plain_test
+			_   -> and_ordered
+
+	    plain_test = unitOL (
+		  JXX cond id
+		)
+	    or_unordered = toOL [
+		  JXX cond id,
+		  JXX PARITY id
+		]
+	    and_ordered = toOL [
+		  JXX PARITY lbl,
+		  JXX cond id,
+		  JXX ALWAYS lbl,
+		  NEWBLOCK lbl
+		]
+	return (cond_code `appOL` code)
+
+#else
+genCondJump	= panic "X86.genCondJump: not defined"
+
+#endif
+
+
+
+
+-- -----------------------------------------------------------------------------
+--  Generating C calls
+
+-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations.  Apart from that, the code is easy.
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+    :: CmmCallTarget		-- function to call
+    -> HintedCmmFormals		-- where to put the result
+    -> HintedCmmActuals		-- arguments (of mixed type)
+    -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
+	-- write barrier compiles to no code on x86/x86-64; 
+	-- we keep it this long in order to prevent earlier optimisations.
+
+-- we only cope with a single result for foreign calls
+genCCall (CmmPrim op) [CmmHinted r _] args = do
+  l1 <- getNewLabelNat
+  l2 <- getNewLabelNat
+  case op of
+	MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
+	MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
+	
+	MO_F32_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
+	MO_F64_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
+
+	MO_F32_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
+	MO_F64_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
+
+	MO_F32_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
+	MO_F64_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
+	
+	other_op    -> outOfLineFloatOp op r args
+ where
+  actuallyInlineFloatOp instr size [CmmHinted x _]
+	= do res <- trivialUFCode size (instr size) x
+	     any <- anyReg res
+ 	     return (any (getRegisterReg (CmmLocal r)))
+
+genCCall target dest_regs args = do
+    let
+        sizes               = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
+#if !darwin_TARGET_OS        
+        tot_arg_size        = sum sizes
+#else
+        raw_arg_size        = sum sizes
+        tot_arg_size        = roundTo 16 raw_arg_size
+        arg_pad_size        = tot_arg_size - raw_arg_size
+    delta0 <- getDeltaNat
+    setDeltaNat (delta0 - arg_pad_size)
+#endif
+
+    push_codes <- mapM push_arg (reverse args)
+    delta <- getDeltaNat
+
+    -- in
+    -- deal with static vs dynamic call targets
+    (callinsns,cconv) <-
+      case target of
+	-- CmmPrim -> ...
+        CmmCallee (CmmLit (CmmLabel lbl)) conv
+           -> -- ToDo: stdcall arg sizes
+	      return (unitOL (CALL (Left fn_imm) []), conv)
+	   where fn_imm = ImmCLbl lbl
+        CmmCallee expr conv
+           -> do { (dyn_c, dyn_r) <- get_op expr
+                 ; ASSERT( isWord32 (cmmExprType expr) )
+                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
+
+    let	push_code
+#if darwin_TARGET_OS
+            | arg_pad_size /= 0
+            = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
+                    DELTA (delta0 - arg_pad_size)]
+              `appOL` concatOL push_codes
+            | otherwise
+#endif
+            = concatOL push_codes
+	call = callinsns `appOL`
+               toOL (
+			-- Deallocate parameters after call for ccall;
+			-- but not for stdcall (callee does it)
+                  (if cconv == StdCallConv || tot_arg_size==0 then [] else 
+		   [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
+                  ++
+                  [DELTA (delta + tot_arg_size)]
+               )
+    -- in
+    setDeltaNat (delta + tot_arg_size)
+
+    let
+	-- assign the results, if necessary
+	assign_code []     = nilOL
+	assign_code [CmmHinted dest _hint]
+	  | isFloatType ty = unitOL (GMOV fake0 r_dest)
+	  | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
+				    MOV II32 (OpReg edx) (OpReg r_dest_hi)]
+	  | otherwise      = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
+	  where 
+		ty = localRegType dest
+		w  = typeWidth ty
+		r_dest_hi = getHiVRegFromLo r_dest
+		r_dest    = getRegisterReg (CmmLocal dest)
+	assign_code many = panic "genCCall.assign_code many"
+
+    return (push_code `appOL` 
+	    call `appOL` 
+	    assign_code dest_regs)
+
+  where
+    arg_size :: CmmType -> Int	-- Width in bytes
+    arg_size ty = widthInBytes (typeWidth ty)
+
+    roundTo a x | x `mod` a == 0 = x
+                | otherwise = x + a - (x `mod` a)
+
+
+    push_arg :: HintedCmmActual {-current argument-}
+                    -> NatM InstrBlock  -- code
+
+    push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
+      | isWord64 arg_ty = do
+        ChildCode64 code r_lo <- iselExpr64 arg
+        delta <- getDeltaNat
+        setDeltaNat (delta - 8)
+        let 
+            r_hi = getHiVRegFromLo r_lo
+        -- in
+	return (       code `appOL`
+                       toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
+                             PUSH II32 (OpReg r_lo), DELTA (delta - 8),
+			     DELTA (delta-8)]
+            )
+
+      | otherwise = do
+        (code, reg) <- get_op arg
+        delta <- getDeltaNat
+        let size = arg_size arg_ty	-- Byte size
+        setDeltaNat (delta-size)
+        if (isFloatType arg_ty)
+           then return (code `appOL`
+                        toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
+                              DELTA (delta-size),
+                              GST (floatSize (typeWidth arg_ty))
+				  reg (AddrBaseIndex (EABaseReg esp) 
+                                                        EAIndexNone
+                                                        (ImmInt 0))]
+                       )
+           else return (code `snocOL`
+                        PUSH II32 (OpReg reg) `snocOL`
+                        DELTA (delta-size)
+                       )
+      where
+         arg_ty = cmmExprType arg
+
+    ------------
+    get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
+    get_op op = do
+        (reg,code) <- getSomeReg op
+	return (code, reg)
+
+#elif x86_64_TARGET_ARCH
+
+genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
+	-- write barrier compiles to no code on x86/x86-64; 
+	-- we keep it this long in order to prevent earlier optimisations.
+
+
+genCCall (CmmPrim op) [CmmHinted r _] args = 
+  outOfLineFloatOp op r args
+
+genCCall target dest_regs args = do
+
+	-- load up the register arguments
+    (stack_args, aregs, fregs, load_args_code)
+	 <- load_args args allArgRegs allFPArgRegs nilOL
+
+    let
+	fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
+	int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
+	arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
+		-- for annotating the call instruction with
+
+	sse_regs = length fp_regs_used
+
+	tot_arg_size = arg_size * length stack_args
+
+	-- On entry to the called function, %rsp should be aligned
+	-- on a 16-byte boundary +8 (i.e. the first stack arg after
+	-- the return address is 16-byte aligned).  In STG land
+	-- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+	-- need to make sure we push a multiple of 16-bytes of args,
+	-- plus the return address, to get the correct alignment.
+	-- Urg, this is hard.  We need to feed the delta back into
+	-- the arg pushing code.
+    (real_size, adjust_rsp) <-
+	if tot_arg_size `rem` 16 == 0
+	    then return (tot_arg_size, nilOL)
+	    else do -- we need to adjust...
+		delta <- getDeltaNat
+		setDeltaNat (delta-8)
+		return (tot_arg_size+8, toOL [
+				SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
+				DELTA (delta-8)
+			])
+
+	-- push the stack args, right to left
+    push_code <- push_args (reverse stack_args) nilOL
+    delta <- getDeltaNat
+
+    -- deal with static vs dynamic call targets
+    (callinsns,cconv) <-
+      case target of
+	-- CmmPrim -> ...
+        CmmCallee (CmmLit (CmmLabel lbl)) conv
+           -> -- ToDo: stdcall arg sizes
+	      return (unitOL (CALL (Left fn_imm) arg_regs), conv)
+	   where fn_imm = ImmCLbl lbl
+        CmmCallee expr conv
+           -> do (dyn_r, dyn_c) <- getSomeReg expr
+		 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
+
+    let
+	-- The x86_64 ABI requires us to set %al to the number of SSE
+	-- registers that contain arguments, if the called routine
+	-- is a varargs function.  We don't know whether it's a
+	-- varargs function or not, so we have to assume it is.
+	--
+	-- It's not safe to omit this assignment, even if the number
+	-- of SSE regs in use is zero.  If %al is larger than 8
+	-- on entry to a varargs function, seg faults ensue.
+	assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
+
+    let call = callinsns `appOL`
+               toOL (
+			-- Deallocate parameters after call for ccall;
+			-- but not for stdcall (callee does it)
+                  (if cconv == StdCallConv || real_size==0 then [] else 
+		   [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
+                  ++
+                  [DELTA (delta + real_size)]
+               )
+    -- in
+    setDeltaNat (delta + real_size)
+
+    let
+	-- assign the results, if necessary
+	assign_code []     = nilOL
+	assign_code [CmmHinted dest _hint] = 
+	  case typeWidth rep of
+		W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
+		W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
+		_ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
+	  where 
+		rep = localRegType dest
+		r_dest = getRegisterReg (CmmLocal dest)
+	assign_code many = panic "genCCall.assign_code many"
+
+    return (load_args_code 	`appOL` 
+	    adjust_rsp 		`appOL`
+	    push_code 		`appOL`
+	    assign_eax sse_regs `appOL`
+	    call 		`appOL` 
+	    assign_code dest_regs)
+
+  where
+    arg_size = 8 -- always, at the mo
+
+    load_args :: [CmmHinted CmmExpr]
+	      -> [Reg] 			-- int regs avail for args
+	      -> [Reg] 			-- FP regs avail for args
+	      -> InstrBlock
+	      -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
+    load_args args [] [] code     =  return (args, [], [], code)
+	-- no more regs to use
+    load_args [] aregs fregs code =  return ([], aregs, fregs, code)
+	-- no more args to push
+    load_args ((CmmHinted arg hint) : rest) aregs fregs code
+	| isFloatType arg_rep = 
+	case fregs of
+	  [] -> push_this_arg
+	  (r:rs) -> do
+	     arg_code <- getAnyReg arg
+	     load_args rest aregs rs (code `appOL` arg_code r)
+	| otherwise =
+	case aregs of
+	  [] -> push_this_arg
+	  (r:rs) -> do
+	     arg_code <- getAnyReg arg
+	     load_args rest rs fregs (code `appOL` arg_code r)
+	where
+	  arg_rep = cmmExprType arg
+
+	  push_this_arg = do
+	    (args',ars,frs,code') <- load_args rest aregs fregs code
+	    return ((CmmHinted arg hint):args', ars, frs, code')
+
+    push_args [] code = return code
+    push_args ((CmmHinted arg hint):rest) code
+       | isFloatType arg_rep = do
+	 (arg_reg, arg_code) <- getSomeReg arg
+         delta <- getDeltaNat
+         setDeltaNat (delta-arg_size)
+	 let code' = code `appOL` arg_code `appOL` toOL [
+			SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
+	 		DELTA (delta-arg_size),
+			MOV (floatSize width) (OpReg arg_reg) (OpAddr  (spRel 0))]
+	 push_args rest code'
+
+       | otherwise = do
+       -- we only ever generate word-sized function arguments.  Promotion
+       -- has already happened: our Int8# type is kept sign-extended
+       -- in an Int#, for example.
+	 ASSERT(width == W64) return ()
+	 (arg_op, arg_code) <- getOperand arg
+         delta <- getDeltaNat
+         setDeltaNat (delta-arg_size)
+	 let code' = code `appOL` arg_code `appOL` toOL [
+	 			PUSH II64 arg_op, 
+	 		        DELTA (delta-arg_size)]
+	 push_args rest code'
+	where
+	  arg_rep = cmmExprType arg
+	  width = typeWidth arg_rep
+
+#else
+genCCall	= panic "X86.genCCAll: not defined"
+
+#endif /* x86_64_TARGET_ARCH */
+
+
+
+
+outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals -> NatM InstrBlock
+outOfLineFloatOp mop res args
+  = do
+      dflags <- getDynFlagsNat
+      targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
+      let target = CmmCallee targetExpr CCallConv
+        
+      if isFloat64 (localRegType res)
+        then
+          stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
+        else do
+          uq <- getUniqueNat
+          let 
+            tmp = LocalReg uq f64
+          -- in
+          code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
+          code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
+          return (code1 `appOL` code2)
+  where
+	lbl = mkForeignLabel fn Nothing False IsFunction
+
+	fn = case mop of
+	      MO_F32_Sqrt  -> fsLit "sqrtf"
+	      MO_F32_Sin   -> fsLit "sinf"
+	      MO_F32_Cos   -> fsLit "cosf"
+	      MO_F32_Tan   -> fsLit "tanf"
+	      MO_F32_Exp   -> fsLit "expf"
+	      MO_F32_Log   -> fsLit "logf"
+
+	      MO_F32_Asin  -> fsLit "asinf"
+	      MO_F32_Acos  -> fsLit "acosf"
+	      MO_F32_Atan  -> fsLit "atanf"
+
+	      MO_F32_Sinh  -> fsLit "sinhf"
+	      MO_F32_Cosh  -> fsLit "coshf"
+	      MO_F32_Tanh  -> fsLit "tanhf"
+	      MO_F32_Pwr   -> fsLit "powf"
+
+	      MO_F64_Sqrt  -> fsLit "sqrt"
+	      MO_F64_Sin   -> fsLit "sin"
+	      MO_F64_Cos   -> fsLit "cos"
+	      MO_F64_Tan   -> fsLit "tan"
+	      MO_F64_Exp   -> fsLit "exp"
+	      MO_F64_Log   -> fsLit "log"
+
+	      MO_F64_Asin  -> fsLit "asin"
+	      MO_F64_Acos  -> fsLit "acos"
+	      MO_F64_Atan  -> fsLit "atan"
+
+	      MO_F64_Sinh  -> fsLit "sinh"
+	      MO_F64_Cosh  -> fsLit "cosh"
+	      MO_F64_Tanh  -> fsLit "tanh"
+	      MO_F64_Pwr   -> fsLit "pow"
+
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+
+genSwitch expr ids
+  | opt_PIC
+  = do
+        (reg,e_code) <- getSomeReg expr
+        lbl <- getNewLabelNat
+        dflags <- getDynFlagsNat
+        dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+        (tableReg,t_code) <- getSomeReg $ dynRef
+        let
+            jumpTable = map jumpTableEntryRel ids
+            
+            jumpTableEntryRel Nothing
+                = CmmStaticLit (CmmInt 0 wordWidth)
+            jumpTableEntryRel (Just (BlockId id))
+                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+                where blockLabel = mkAsmTempLabel id
+
+            op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
+                                       (EAIndex reg wORD_SIZE) (ImmInt 0))
+
+#if x86_64_TARGET_ARCH
+#if darwin_TARGET_OS
+    -- on Mac OS X/x86_64, put the jump table in the text section
+    -- to work around a limitation of the linker.
+    -- ld64 is unable to handle the relocations for
+    --     .quad L1 - L0
+    -- if L0 is not preceded by a non-anonymous label in its section.
+    
+            code = e_code `appOL` t_code `appOL` toOL [
+                            ADD (intSize wordWidth) op (OpReg tableReg),
+                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
+                            LDATA Text (CmmDataLabel lbl : jumpTable)
+                    ]
+#else
+    -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
+    -- relocations, hence we only get 32-bit offsets in the jump
+    -- table. As these offsets are always negative we need to properly
+    -- sign extend them to 64-bit. This hack should be removed in
+    -- conjunction with the hack in PprMach.hs/pprDataItem once
+    -- binutils 2.17 is standard.
+            code = e_code `appOL` t_code `appOL` toOL [
+			    LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+			    MOVSxL II32
+				   (OpAddr (AddrBaseIndex (EABaseReg tableReg)
+							  (EAIndex reg wORD_SIZE) (ImmInt 0)))
+				   (OpReg reg),
+			    ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
+			    JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
+		   ]
+#endif
+#else
+            code = e_code `appOL` t_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            ADD (intSize wordWidth) op (OpReg tableReg),
+                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
+                    ]
+#endif
+        return code
+  | otherwise
+  = do
+        (reg,e_code) <- getSomeReg expr
+        lbl <- getNewLabelNat
+        let
+            jumpTable = map jumpTableEntry ids
+            op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
+            code = e_code `appOL` toOL [
+                    LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                    JMP_TBL op [ id | Just id <- ids ]
+                 ]
+        -- in
+        return code
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+condIntReg cond x y = do
+  CondCode _ cond cond_code <- condIntCode cond x y
+  tmp <- getNewRegNat II8
+  let 
+	code dst = cond_code `appOL` toOL [
+		    SETCC cond (OpReg tmp),
+		    MOVZxL II8 (OpReg tmp) (OpReg dst)
+		  ]
+  -- in
+  return (Any II32 code)
+
+
+
+condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+#if i386_TARGET_ARCH
+condFltReg cond x y = do
+  CondCode _ cond cond_code <- condFltCode cond x y
+  tmp <- getNewRegNat II8
+  let 
+	code dst = cond_code `appOL` toOL [
+		    SETCC cond (OpReg tmp),
+		    MOVZxL II8 (OpReg tmp) (OpReg dst)
+		  ]
+  -- in
+  return (Any II32 code)
+
+#elif x86_64_TARGET_ARCH
+condFltReg cond x y = do
+  CondCode _ cond cond_code <- condFltCode cond x y
+  tmp1 <- getNewRegNat wordSize
+  tmp2 <- getNewRegNat wordSize
+  let 
+	-- We have to worry about unordered operands (eg. comparisons
+	-- against NaN).  If the operands are unordered, the comparison
+	-- sets the parity flag, carry flag and zero flag.
+	-- All comparisons are supposed to return false for unordered
+	-- operands except for !=, which returns true.
+	--
+	-- Optimisation: we don't have to test the parity flag if we
+	-- know the test has already excluded the unordered case: eg >
+	-- and >= test for a zero carry flag, which can only occur for
+	-- ordered operands.
+	--
+	-- ToDo: by reversing comparisons we could avoid testing the
+	-- parity flag in more cases.
+
+	code dst = 
+	   cond_code `appOL` 
+	     (case cond of
+	  	NE  -> or_unordered dst
+		GU  -> plain_test   dst
+		GEU -> plain_test   dst
+		_   -> and_ordered  dst)
+
+	plain_test dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    MOVZxL II8 (OpReg tmp1) (OpReg dst)
+		 ]
+	or_unordered dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    SETCC PARITY (OpReg tmp2),
+		    OR II8 (OpReg tmp1) (OpReg tmp2),
+		    MOVZxL II8 (OpReg tmp2) (OpReg dst)
+		  ]
+	and_ordered dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    SETCC NOTPARITY (OpReg tmp2),
+		    AND II8 (OpReg tmp1) (OpReg tmp2),
+		    MOVZxL II8 (OpReg tmp2) (OpReg dst)
+		  ]
+  -- in
+  return (Any II32 code)
+
+#else
+condFltReg	= panic "X86.condFltReg: not defined"
+
+#endif
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+
+{-
+The Rules of the Game are:
+
+* You cannot assume anything about the destination register dst;
+  it may be anything, including a fixed reg.
+
+* You may compute an operand into a fixed reg, but you may not 
+  subsequently change the contents of that fixed reg.  If you
+  want to do so, first copy the value either to a temporary
+  or into dst.  You are free to modify dst even if it happens
+  to be a fixed reg -- that's not your problem.
+
+* You cannot assume that a fixed reg will stay live over an
+  arbitrary computation.  The same applies to the dst reg.
+
+* Temporary regs obtained from getNewRegNat are distinct from 
+  each other and from all other regs, and stay live over 
+  arbitrary computations.
+
+--------------------
+
+SDM's version of The Rules:
+
+* If getRegister returns Any, that means it can generate correct
+  code which places the result in any register, period.  Even if that
+  register happens to be read during the computation.
+
+  Corollary #1: this means that if you are generating code for an
+  operation with two arbitrary operands, you cannot assign the result
+  of the first operand into the destination register before computing
+  the second operand.  The second operand might require the old value
+  of the destination register.
+
+  Corollary #2: A function might be able to generate more efficient
+  code if it knows the destination register is a new temporary (and
+  therefore not read by any of the sub-computations).
+
+* If getRegister returns Any, then the code it generates may modify only:
+	(a) fresh temporaries
+	(b) the destination register
+	(c) known registers (eg. %ecx is used by shifts)
+  In particular, it may *not* modify global registers, unless the global
+  register happens to be the destination register.
+-}
+
+trivialCode width instr (Just revinstr) (CmmLit lit_a) b
+  | is32BitLit lit_a = do
+  b_code <- getAnyReg b
+  let
+       code dst 
+	 = b_code dst `snocOL`
+           revinstr (OpImm (litToImm lit_a)) (OpReg dst)
+  -- in
+  return (Any (intSize width) code)
+
+trivialCode width instr maybe_revinstr a b
+  = genTrivialCode (intSize width) instr a b
+
+-- This is re-used for floating pt instructions too.
+genTrivialCode rep instr a b = do
+  (b_op, b_code) <- getNonClobberedOperand b
+  a_code <- getAnyReg a
+  tmp <- getNewRegNat rep
+  let
+     -- We want the value of b to stay alive across the computation of a.
+     -- But, we want to calculate a straight into the destination register,
+     -- because the instruction only has two operands (dst := dst `op` src).
+     -- The troublesome case is when the result of b is in the same register
+     -- as the destination reg.  In this case, we have to save b in a
+     -- new temporary across the computation of a.
+     code dst
+	| dst `regClashesWithOp` b_op =
+		b_code `appOL`
+		unitOL (MOV rep b_op (OpReg tmp)) `appOL`
+		a_code dst `snocOL`
+		instr (OpReg tmp) (OpReg dst)
+	| otherwise =
+		b_code `appOL`
+		a_code dst `snocOL`
+		instr b_op (OpReg dst)
+  -- in
+  return (Any rep code)
+
+reg `regClashesWithOp` OpReg reg2   = reg == reg2
+reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
+reg `regClashesWithOp` _            = False
+
+-----------
+
+trivialUCode rep instr x = do
+  x_code <- getAnyReg x
+  let
+     code dst =
+	x_code dst `snocOL`
+	instr (OpReg dst)
+  return (Any rep code)
+
+-----------
+
+#if i386_TARGET_ARCH
+
+trivialFCode width instr x y = do
+  (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
+  (y_reg, y_code) <- getSomeReg y
+  let
+     size = floatSize width
+     code dst =
+	x_code `appOL`
+	y_code `snocOL`
+	instr size x_reg y_reg dst
+  return (Any size code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+trivialFCode pk instr x y 
+  = genTrivialCode size (instr size) x y
+  where size = floatSize pk
+#endif
+
+trivialUFCode size instr x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+     code dst =
+	x_code `snocOL`
+	instr x_reg dst
+  -- in
+  return (Any size code)
+
+
+--------------------------------------------------------------------------------
+coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
+
+#if i386_TARGET_ARCH
+coerceInt2FP from to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case to of W32 -> GITOF; W64 -> GITOD
+        code dst = x_code `snocOL` opc x_reg dst
+	-- ToDo: works for non-II32 reps?
+  return (Any (floatSize to) code)
+
+#elif x86_64_TARGET_ARCH
+coerceInt2FP from to x = do
+  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
+  let
+        opc  = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
+        code dst = x_code `snocOL` opc x_op dst
+  -- in
+  return (Any (floatSize to) code) -- works even if the destination rep is <II32
+
+#else
+coerceInt2FP	= panic "X86.coerceInt2FP: not defined"
+
+#endif
+
+
+
+
+--------------------------------------------------------------------------------
+coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
+
+#if i386_TARGET_ARCH
+coerceFP2Int from to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case from of W32 -> GFTOI; W64 -> GDTOI
+        code dst = x_code `snocOL` opc x_reg dst
+	-- ToDo: works for non-II32 reps?
+  -- in
+  return (Any (intSize to) code)
+
+#elif x86_64_TARGET_ARCH
+coerceFP2Int from to x = do
+  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
+  let
+        opc  = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
+        code dst = x_code `snocOL` opc x_op dst
+  -- in
+  return (Any (intSize to) code) -- works even if the destination rep is <II32
+
+#else
+coerceFP2Int	= panic "X86.coerceFP2Int: not defined"
+
+#endif
+
+
+
+
+--------------------------------------------------------------------------------
+coerceFP2FP :: Width -> CmmExpr -> NatM Register
+
+#if x86_64_TARGET_ARCH
+coerceFP2FP to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
+        code dst = x_code `snocOL` opc x_reg dst
+  -- in
+  return (Any (floatSize to) code)
+
+#else
+coerceFP2FP	= panic "X86.coerceFP2FP: not defined"
+
+#endif
+
+
+
diff --git a/compiler/nativeGen/X86/Cond.hs b/compiler/nativeGen/X86/Cond.hs
new file mode 100644
index 0000000000..60e40b9654
--- /dev/null
+++ b/compiler/nativeGen/X86/Cond.hs
@@ -0,0 +1,52 @@
+
+module X86.Cond (
+	Cond(..),
+	condUnsigned,
+	condToSigned,
+	condToUnsigned
+)
+
+where
+
+data Cond
+	= ALWAYS	-- What's really used? ToDo
+	| EQQ
+	| GE
+	| GEU
+	| GTT
+	| GU
+	| LE
+	| LEU
+	| LTT
+	| LU
+	| NE
+	| NEG
+	| POS
+	| CARRY
+	| OFLO
+	| PARITY
+	| NOTPARITY
+
+
+condUnsigned :: Cond -> Bool
+condUnsigned GU  = True
+condUnsigned LU  = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _   = False
+
+
+condToSigned :: Cond -> Cond
+condToSigned GU  = GTT
+condToSigned LU  = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x   = x
+
+
+condToUnsigned :: Cond -> Cond
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE  = GEU
+condToUnsigned LE  = LEU
+condToUnsigned x   = x
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 0dea1dd66d..b4b6fb5f4b 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -12,34 +12,46 @@
 module X86.Instr
 where
 
-import BlockId
+import X86.Cond
 import X86.Regs
-import RegsBase
+import Instruction
+import Size
+import RegClass
+import Reg
+
+import BlockId
 import Cmm
 import FastString
+import FastBool
 
 import CLabel
 import Panic
 
-data Cond
-	= ALWAYS	-- What's really used? ToDo
-	| EQQ
-	| GE
-	| GEU
-	| GTT
-	| GU
-	| LE
-	| LEU
-	| LTT
-	| LU
-	| NE
-	| NEG
-	| POS
-	| CARRY
-	| OFLO
-	| PARITY
-	| NOTPARITY
-	deriving (Eq)
+-- Size of a PPC memory address, in bytes.
+--
+archWordSize :: Size
+#if i386_TARGET_ARCH
+archWordSize	= II32
+#elif x86_64_TARGET_ARCH
+archWordSize	= II64
+#else
+archWordSize	= panic "X86.Instr.archWordSize: not defined"
+#endif
+
+-- | Instruction instance for x86 instruction set.
+instance Instruction Instr where
+	regUsageOfInstr		= x86_regUsageOfInstr
+	patchRegsOfInstr	= x86_patchRegsOfInstr
+	isJumpishInstr		= x86_isJumpishInstr
+	jumpDestsOfInstr	= x86_jumpDestsOfInstr
+	patchJumpInstr		= x86_patchJumpInstr
+	mkSpillInstr		= x86_mkSpillInstr
+	mkLoadInstr		= x86_mkLoadInstr
+	takeDeltaInstr		= x86_takeDeltaInstr
+	isMetaInstr		= x86_isMetaInstr
+	mkRegRegMoveInstr	= x86_mkRegRegMoveInstr
+	takeRegRegMoveInstr	= x86_takeRegRegMoveInstr
+	mkJumpInstr		= x86_mkJumpInstr
 
 
 -- -----------------------------------------------------------------------------
@@ -154,13 +166,6 @@ data Instr
         -- benefit of subsequent passes
 	| DELTA   Int
 
-	-- | spill this reg to a stack slot
-	| SPILL   Reg Int
-
-	-- | reload this reg from a stack slot
-	| RELOAD  Int Reg
-
-
 	-- Moves.
 	| MOV	      Size Operand Operand
 	| MOVZxL      Size Operand Operand -- size is the size of operand 1
@@ -301,7 +306,436 @@ data Operand
 
 
 
-i386_insert_ffrees :: [GenBasicBlock Instr] -> [GenBasicBlock Instr]
+x86_regUsageOfInstr :: Instr -> RegUsage
+x86_regUsageOfInstr instr 
+ = case instr of
+    MOV    _ src dst	-> usageRW src dst
+    MOVZxL _ src dst	-> usageRW src dst
+    MOVSxL _ src dst	-> usageRW src dst
+    LEA    _ src dst	-> usageRW src dst
+    ADD    _ src dst	-> usageRM src dst
+    ADC    _ src dst	-> usageRM src dst
+    SUB    _ src dst	-> usageRM src dst
+    IMUL   _ src dst	-> usageRM src dst
+    IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
+    MUL    _ src dst	-> usageRM src dst
+    DIV    _ op	-> mkRU (eax:edx:use_R op) [eax,edx]
+    IDIV   _ op	-> mkRU (eax:edx:use_R op) [eax,edx]
+    AND    _ src dst	-> usageRM src dst
+    OR     _ src dst	-> usageRM src dst
+
+    XOR    _ (OpReg src) (OpReg dst)
+        | src == dst    -> mkRU [] [dst]
+
+    XOR    _ src dst	-> usageRM src dst
+    NOT    _ op		-> usageM op
+    NEGI   _ op		-> usageM op
+    SHL    _ imm dst	-> usageRM imm dst
+    SAR    _ imm dst	-> usageRM imm dst
+    SHR    _ imm dst	-> usageRM imm dst
+    BT     _ _   src	-> mkRUR (use_R src)
+
+    PUSH   _ op		-> mkRUR (use_R op)
+    POP    _ op		-> mkRU [] (def_W op)
+    TEST   _ src dst	-> mkRUR (use_R src ++ use_R dst)
+    CMP    _ src dst	-> mkRUR (use_R src ++ use_R dst)
+    SETCC  _ op		-> mkRU [] (def_W op)
+    JXX    _ _		-> mkRU [] []
+    JXX_GBL _ _		-> mkRU [] []
+    JMP     op		-> mkRUR (use_R op)
+    JMP_TBL op _        -> mkRUR (use_R op)
+    CALL (Left _)  params   -> mkRU params callClobberedRegs
+    CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
+    CLTD   _		-> mkRU [eax] [edx]
+    NOP			-> mkRU [] []
+
+#if i386_TARGET_ARCH
+    GMOV   src dst	-> mkRU [src] [dst]
+    GLD    _ src dst	-> mkRU (use_EA src) [dst]
+    GST    _ src dst	-> mkRUR (src : use_EA dst)
+
+    GLDZ   dst		-> mkRU [] [dst]
+    GLD1   dst		-> mkRU [] [dst]
+
+    GFTOI  src dst	-> mkRU [src] [dst]
+    GDTOI  src dst	-> mkRU [src] [dst]
+
+    GITOF  src dst	-> mkRU [src] [dst]
+    GITOD  src dst	-> mkRU [src] [dst]
+
+    GADD   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
+    GSUB   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
+    GMUL   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
+    GDIV   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
+
+    GCMP   _ src1 src2   -> mkRUR [src1,src2]
+    GABS   _ src dst     -> mkRU [src] [dst]
+    GNEG   _ src dst     -> mkRU [src] [dst]
+    GSQRT  _ src dst     -> mkRU [src] [dst]
+    GSIN   _ _ _ src dst -> mkRU [src] [dst]
+    GCOS   _ _ _ src dst -> mkRU [src] [dst]
+    GTAN   _ _ _ src dst -> mkRU [src] [dst]
+#endif
+
+#if x86_64_TARGET_ARCH
+    CVTSS2SD   src dst	-> mkRU [src] [dst]
+    CVTSD2SS   src dst	-> mkRU [src] [dst]
+    CVTTSS2SIQ src dst	-> mkRU (use_R src) [dst]
+    CVTTSD2SIQ src dst	-> mkRU (use_R src) [dst]
+    CVTSI2SS   src dst	-> mkRU (use_R src) [dst]
+    CVTSI2SD   src dst	-> mkRU (use_R src) [dst]
+    FDIV _     src dst	-> usageRM src dst
+#endif    
+
+    FETCHGOT reg        -> mkRU [] [reg]
+    FETCHPC  reg        -> mkRU [] [reg]
+
+    COMMENT _		-> noUsage
+    DELTA   _           -> noUsage
+
+    _other		-> panic "regUsage: unrecognised instr"
+
+ where
+    -- 2 operand form; first operand Read; second Written
+    usageRW :: Operand -> Operand -> RegUsage
+    usageRW op (OpReg reg)	= mkRU (use_R op) [reg]
+    usageRW op (OpAddr ea)	= mkRUR (use_R op ++ use_EA ea)
+    usageRW _ _			= panic "X86.RegInfo.usageRW: no match"
+
+    -- 2 operand form; first operand Read; second Modified
+    usageRM :: Operand -> Operand -> RegUsage
+    usageRM op (OpReg reg)	= mkRU (use_R op ++ [reg]) [reg]
+    usageRM op (OpAddr ea)	= mkRUR (use_R op ++ use_EA ea)
+    usageRM _ _			= panic "X86.RegInfo.usageRM: no match"
+
+    -- 1 operand form; operand Modified
+    usageM :: Operand -> RegUsage
+    usageM (OpReg reg)		= mkRU [reg] [reg]
+    usageM (OpAddr ea)		= mkRUR (use_EA ea)
+    usageM _			= panic "X86.RegInfo.usageM: no match"
+
+    -- Registers defd when an operand is written.
+    def_W (OpReg reg)		= [reg]
+    def_W (OpAddr _ )		= []
+    def_W _			= panic "X86.RegInfo.def_W: no match"
+
+    -- Registers used when an operand is read.
+    use_R (OpReg reg)  = [reg]
+    use_R (OpImm _)    = []
+    use_R (OpAddr ea)  = use_EA ea
+
+    -- Registers used to compute an effective address.
+    use_EA (ImmAddr _ _) = []
+    use_EA (AddrBaseIndex base index _) = 
+	use_base base $! use_index index
+	where use_base (EABaseReg r) x = r : x
+	      use_base _ x             = x
+	      use_index EAIndexNone   = []
+	      use_index (EAIndex i _) = [i]
+
+    mkRUR src = src' `seq` RU src' []
+	where src' = filter interesting src
+
+    mkRU src dst = src' `seq` dst' `seq` RU src' dst'
+	where src' = filter interesting src
+	      dst' = filter interesting dst
+
+interesting :: Reg -> Bool
+interesting (VirtualRegI  _)  = True
+interesting (VirtualRegHi _)  = True
+interesting (VirtualRegF  _)  = True
+interesting (VirtualRegD  _)  = True
+interesting (RealReg i)       = isFastTrue (freeReg i)
+
+
+
+
+x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+x86_patchRegsOfInstr instr env 
+ = case instr of
+    MOV  sz src dst	-> patch2 (MOV  sz) src dst
+    MOVZxL sz src dst	-> patch2 (MOVZxL sz) src dst
+    MOVSxL sz src dst	-> patch2 (MOVSxL sz) src dst
+    LEA  sz src dst	-> patch2 (LEA  sz) src dst
+    ADD  sz src dst	-> patch2 (ADD  sz) src dst
+    ADC  sz src dst	-> patch2 (ADC  sz) src dst
+    SUB  sz src dst	-> patch2 (SUB  sz) src dst
+    IMUL sz src dst 	-> patch2 (IMUL sz) src dst
+    IMUL2 sz src        -> patch1 (IMUL2 sz) src
+    MUL sz src dst 	-> patch2 (MUL sz) src dst
+    IDIV sz op		-> patch1 (IDIV sz) op
+    DIV sz op		-> patch1 (DIV sz) op
+    AND  sz src dst	-> patch2 (AND  sz) src dst
+    OR   sz src dst	-> patch2 (OR   sz) src dst
+    XOR  sz src dst	-> patch2 (XOR  sz) src dst
+    NOT  sz op 		-> patch1 (NOT  sz) op
+    NEGI sz op		-> patch1 (NEGI sz) op
+    SHL  sz imm dst 	-> patch1 (SHL sz imm) dst
+    SAR  sz imm dst 	-> patch1 (SAR sz imm) dst
+    SHR  sz imm dst 	-> patch1 (SHR sz imm) dst
+    BT   sz imm src     -> patch1 (BT  sz imm) src
+    TEST sz src dst	-> patch2 (TEST sz) src dst
+    CMP  sz src dst	-> patch2 (CMP  sz) src dst
+    PUSH sz op		-> patch1 (PUSH sz) op
+    POP  sz op		-> patch1 (POP  sz) op
+    SETCC cond op	-> patch1 (SETCC cond) op
+    JMP op		-> patch1 JMP op
+    JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
+
+#if i386_TARGET_ARCH
+    GMOV src dst	-> GMOV (env src) (env dst)
+    GLD  sz src dst	-> GLD sz (lookupAddr src) (env dst)
+    GST  sz src dst	-> GST sz (env src) (lookupAddr dst)
+
+    GLDZ dst		-> GLDZ (env dst)
+    GLD1 dst		-> GLD1 (env dst)
+
+    GFTOI src dst	-> GFTOI (env src) (env dst)
+    GDTOI src dst	-> GDTOI (env src) (env dst)
+
+    GITOF src dst	-> GITOF (env src) (env dst)
+    GITOD src dst	-> GITOD (env src) (env dst)
+
+    GADD sz s1 s2 dst	-> GADD sz (env s1) (env s2) (env dst)
+    GSUB sz s1 s2 dst	-> GSUB sz (env s1) (env s2) (env dst)
+    GMUL sz s1 s2 dst	-> GMUL sz (env s1) (env s2) (env dst)
+    GDIV sz s1 s2 dst	-> GDIV sz (env s1) (env s2) (env dst)
+
+    GCMP sz src1 src2	-> GCMP sz (env src1) (env src2)
+    GABS sz src dst	-> GABS sz (env src) (env dst)
+    GNEG sz src dst	-> GNEG sz (env src) (env dst)
+    GSQRT sz src dst	-> GSQRT sz (env src) (env dst)
+    GSIN sz l1 l2 src dst	-> GSIN sz l1 l2 (env src) (env dst)
+    GCOS sz l1 l2 src dst	-> GCOS sz l1 l2 (env src) (env dst)
+    GTAN sz l1 l2 src dst	-> GTAN sz l1 l2 (env src) (env dst)
+#endif
+
+#if x86_64_TARGET_ARCH
+    CVTSS2SD src dst	-> CVTSS2SD (env src) (env dst)
+    CVTSD2SS src dst	-> CVTSD2SS (env src) (env dst)
+    CVTTSS2SIQ src dst	-> CVTTSS2SIQ (patchOp src) (env dst)
+    CVTTSD2SIQ src dst	-> CVTTSD2SIQ (patchOp src) (env dst)
+    CVTSI2SS src dst	-> CVTSI2SS (patchOp src) (env dst)
+    CVTSI2SD src dst	-> CVTSI2SD (patchOp src) (env dst)
+    FDIV sz src dst	-> FDIV sz (patchOp src) (patchOp dst)
+#endif    
+
+    CALL (Left _)  _	-> instr
+    CALL (Right reg) p	-> CALL (Right (env reg)) p
+    
+    FETCHGOT reg        -> FETCHGOT (env reg)
+    FETCHPC  reg        -> FETCHPC  (env reg)
+   
+    NOP			-> instr
+    COMMENT _		-> instr
+    DELTA _ 		-> instr
+
+    JXX _ _		-> instr
+    JXX_GBL _ _		-> instr
+    CLTD _		-> instr
+
+    _other		-> panic "patchRegs: unrecognised instr"
+
+  where
+    patch1 insn op      = insn $! patchOp op
+    patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
+
+    patchOp (OpReg  reg) = OpReg $! env reg
+    patchOp (OpImm  imm) = OpImm imm
+    patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
+
+    lookupAddr (ImmAddr imm off) = ImmAddr imm off
+    lookupAddr (AddrBaseIndex base index disp)
+      = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
+      where
+	lookupBase EABaseNone       = EABaseNone
+	lookupBase EABaseRip        = EABaseRip
+	lookupBase (EABaseReg r)    = EABaseReg (env r)
+				 
+	lookupIndex EAIndexNone     = EAIndexNone
+	lookupIndex (EAIndex r i)   = EAIndex (env r) i
+
+
+--------------------------------------------------------------------------------
+x86_isJumpishInstr 
+	:: Instr -> Bool
+
+x86_isJumpishInstr instr
+ = case instr of
+	JMP{}		-> True
+	JXX{}		-> True
+	JXX_GBL{}	-> True
+	JMP_TBL{}	-> True
+	CALL{}		-> True
+	_		-> False
+
+
+x86_jumpDestsOfInstr
+	:: Instr 
+	-> [BlockId]
+
+x86_jumpDestsOfInstr insn 
+  = case insn of
+	JXX _ id	-> [id]
+	JMP_TBL _ ids	-> ids
+	_		-> []
+
+
+x86_patchJumpInstr 
+	:: Instr -> (BlockId -> BlockId) -> Instr
+
+x86_patchJumpInstr insn patchF
+  = case insn of
+	JXX cc id 	-> JXX cc (patchF id)
+	JMP_TBL _ _     -> error "Cannot patch JMP_TBL"
+	_		-> insn
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- | Make a spill instruction.
+x86_mkSpillInstr
+	:: Reg		-- register to spill
+	-> Int		-- current stack delta
+	-> Int		-- spill slot to use
+	-> Instr
+
+#if   i386_TARGET_ARCH
+x86_mkSpillInstr reg delta slot
+  = let	off     = spillSlotToOffset slot
+    in
+    let off_w = (off-delta) `div` 4
+    in case regClass reg of
+	   RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
+	   _         -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
+
+#elif x86_64_TARGET_ARCH
+x86_mkSpillInstr reg delta slot
+  = let	off     = spillSlotToOffset slot
+    in
+    let off_w = (off-delta) `div` 8
+    in case regClass reg of
+	   RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w))
+	   RcDouble  -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
+           _         -> panic "X86.mkSpillInstr: no match"
+		-- ToDo: will it work to always spill as a double?
+		-- does that cause a stall if the data was a float?
+#else
+x86_mkSpillInstr _ _ _
+    =   panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
+#endif
+
+
+-- | Make a spill reload instruction.
+x86_mkLoadInstr
+	:: Reg		-- register to load
+	-> Int		-- current stack delta
+	-> Int		-- spill slot to use
+	-> Instr
+
+#if   i386_TARGET_ARCH
+x86_mkLoadInstr reg delta slot
+  = let off     = spillSlotToOffset slot
+    in
+	let off_w = (off-delta) `div` 4
+        in case regClass reg of {
+              RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg);
+              _         -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -}
+#elif x86_64_TARGET_ARCH
+x86_mkLoadInstr reg delta slot
+  = let off     = spillSlotToOffset slot
+    in
+	let off_w = (off-delta) `div` 8
+        in case regClass reg of
+              RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg)
+              _         -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
+#else
+x86_mkLoadInstr _ _ _
+ 	= panic "X86.RegInfo.mkLoadInstr: not defined for this architecture."
+#endif
+
+
+--------------------------------------------------------------------------------
+
+-- | See if this instruction is telling us the current C stack delta
+x86_takeDeltaInstr
+	:: Instr
+	-> Maybe Int
+	
+x86_takeDeltaInstr instr
+ = case instr of
+ 	DELTA i		-> Just i
+	_		-> Nothing
+
+
+x86_isMetaInstr
+	:: Instr
+	-> Bool
+	
+x86_isMetaInstr instr
+ = case instr of
+ 	COMMENT{}	-> True
+	LDATA{}		-> True
+	NEWBLOCK{}	-> True
+	DELTA{}		-> True
+	_		-> False
+
+
+
+-- | Make a reg-reg move instruction.
+--	On SPARC v8 there are no instructions to move directly between
+--	floating point and integer regs. If we need to do that then we
+--	have to go via memory.
+--
+x86_mkRegRegMoveInstr
+	:: Reg
+	-> Reg
+	-> Instr
+
+x86_mkRegRegMoveInstr src dst
+ = case regClass src of
+#if   i386_TARGET_ARCH
+        RcInteger -> MOV II32 (OpReg src) (OpReg dst)
+        RcDouble  -> GMOV src dst
+	RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
+#else
+        RcInteger -> MOV II64 (OpReg src) (OpReg dst)
+        RcDouble  -> MOV FF64 (OpReg src) (OpReg dst)
+	RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
+#endif
+
+
+-- | Check whether an instruction represents a reg-reg move.
+-- 	The register allocator attempts to eliminate reg->reg moves whenever it can,
+-- 	by assigning the src and dest temporaries to the same real register.
+--
+x86_takeRegRegMoveInstr
+	:: Instr 
+	-> Maybe (Reg,Reg)
+
+x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2)) 
+	= Just (r1,r2)
+
+x86_takeRegRegMoveInstr _  = Nothing
+
+
+-- | Make an unconditional branch instruction.
+x86_mkJumpInstr
+	:: BlockId
+	-> [Instr]
+
+x86_mkJumpInstr id 
+	= [JXX ALWAYS id]
+
+
+
+
+
+i386_insert_ffrees 
+	:: [GenBasicBlock Instr] 
+	-> [GenBasicBlock Instr]
+
 i386_insert_ffrees blocks
    | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
    = map ffree_before_nonlocal_transfers blocks
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index c0ad496d78..3f181fc056 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -7,12 +7,15 @@
 -----------------------------------------------------------------------------
 
 module X86.Ppr (
+	pprNatCmmTop,
+	pprBasicBlock,
+	pprSectionHeader,
+	pprData,
+	pprInstr,
 	pprUserReg,
 	pprSize,
 	pprImm,
-	pprSectionHeader,
 	pprDataItem,
-	pprInstr
 )
 
 where
@@ -20,24 +23,145 @@ where
 #include "HsVersions.h"
 #include "nativeGen/NCG.h"
 
-import PprBase
-import RegsBase
 import X86.Regs
 import X86.Instr
+import X86.Cond
+import Instruction
+import Size
+import Reg
+import PprBase
+
 
 import BlockId
 import Cmm
-
-import CLabel		( CLabel, mkAsmTempLabel )
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
-import CLabel       ( mkDeadStripPreventer )
-#endif
-
+import CLabel
 import Unique		( pprUnique )
 import Pretty
 import FastString
 import qualified Outputable
-import Outputable	(panic)
+import Outputable	(panic, Outputable)
+
+import Data.Word
+
+
+
+-- -----------------------------------------------------------------------------
+-- Printing this stuff out
+
+pprNatCmmTop :: NatCmmTop Instr -> Doc
+pprNatCmmTop (CmmData section dats) = 
+  pprSectionHeader section $$ vcat (map pprData dats)
+
+ -- special case for split markers:
+pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
+
+pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) = 
+  pprSectionHeader Text $$
+  (if null info then -- blocks guaranteed not null, so label needed
+       pprLabel lbl
+   else
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+            pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+                <> char ':' $$
+#endif
+       vcat (map pprData info) $$
+       pprLabel (entryLblToInfoLbl lbl)
+  ) $$
+  vcat (map pprBasicBlock blocks)
+     -- above: Even the first block gets a label, because with branch-chain
+     -- elimination, it might be the target of a goto.
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+        -- If we are using the .subsections_via_symbols directive
+        -- (available on recent versions of Darwin),
+        -- we have to make sure that there is some kind of reference
+        -- from the entry code to a label on the _top_ of of the info table,
+        -- so that the linker will not think it is unreferenced and dead-strip
+        -- it. That's why the label is called a DeadStripPreventer (_dsp).
+  $$ if not (null info)
+		    then text "\t.long "
+		      <+> pprCLabel_asm (entryLblToInfoLbl lbl)
+		      <+> char '-'
+		      <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+		    else empty
+#endif
+
+
+pprBasicBlock :: NatBasicBlock Instr -> Doc
+pprBasicBlock (BasicBlock (BlockId id) instrs) =
+  pprLabel (mkAsmTempLabel id) $$
+  vcat (map pprInstr instrs)
+
+
+pprData :: CmmStatic -> Doc
+pprData (CmmAlign bytes)         = pprAlign bytes
+pprData (CmmDataLabel lbl)       = pprLabel lbl
+pprData (CmmString str)          = pprASCII str
+pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
+pprData (CmmStaticLit lit)       = pprDataItem lit
+
+pprGloblDecl :: CLabel -> Doc
+pprGloblDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext IF_ARCH_sparc((sLit ".global "), 
+				    (sLit ".globl ")) <>
+		pprCLabel_asm lbl
+
+pprTypeAndSizeDecl :: CLabel -> Doc
+#if linux_TARGET_OS
+pprTypeAndSizeDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext (sLit ".type ") <>
+		pprCLabel_asm lbl <> ptext (sLit ", @object")
+#else
+pprTypeAndSizeDecl _
+  = empty
+#endif
+
+pprLabel :: CLabel -> Doc
+pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
+
+
+pprASCII :: [Word8] -> Doc
+pprASCII str
+  = vcat (map do1 str) $$ do1 0
+    where
+       do1 :: Word8 -> Doc
+       do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
+
+pprAlign :: Int -> Doc
+
+
+pprAlign bytes
+	= ptext (sLit ".align ") <> int IF_OS_darwin(pow2, bytes)
+  where
+
+#if darwin_TARGET_OS
+	pow2 = log2 bytes
+
+	log2 :: Int -> Int  -- cache the common ones
+	log2 1 = 0 
+	log2 2 = 1
+	log2 4 = 2
+	log2 8 = 3
+	log2 n = 1 + log2 (n `quot` 2)
+#endif
+
+-- -----------------------------------------------------------------------------
+-- pprInstr: print an 'Instr'
+
+instance Outputable Instr where
+    ppr	 instr	= Outputable.docToSDoc $ pprInstr instr
+
+
+
+
+
+
+
+
+
+
+
 
 #if  i386_TARGET_ARCH || x86_64_TARGET_ARCH
 pprUserReg :: Reg -> Doc
@@ -49,7 +173,6 @@ pprUserReg = panic "X86.Ppr.pprUserReg: not defined"
 
 #endif
 
-
 pprReg :: Size -> Reg -> Doc
 
 pprReg s r
@@ -228,7 +351,7 @@ pprAddr (AddrBaseIndex base index displacement)
   = let
 	pp_disp  = ppr_disp displacement
 	pp_off p = pp_disp <> char '(' <> p <> char ')'
-	pp_reg r = pprReg wordSize r
+	pp_reg r = pprReg archWordSize r
     in
     case (base, index) of
       (EABaseNone,  EAIndexNone) -> pp_disp
@@ -384,6 +507,7 @@ pprInstr (NEWBLOCK _)
 pprInstr (LDATA _ _)
    = panic "PprMach.pprInstr: LDATA"
 
+{-
 pprInstr (SPILL reg slot)
    = hcat [
    	ptext (sLit "\tSPILL"),
@@ -399,6 +523,7 @@ pprInstr (RELOAD slot reg)
 	ptext (sLit "SLOT") <> parens (int slot),
 	comma,
 	pprUserReg reg]
+-}
 
 pprInstr (MOV size src dst)
   = pprSizeOpOp (sLit "mov") size src dst
@@ -414,7 +539,7 @@ pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src
 	-- the remaining zero-extension to 64 bits is automatic, and the 32-bit
 	-- instruction is shorter.
 
-pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes wordSize src dst
+pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes archWordSize src dst
 
 -- here we do some patching, since the physical registers are only set late
 -- in the code generation.
@@ -497,10 +622,10 @@ pprInstr (JXX cond (BlockId id))
 pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
 
 pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm)
-pprInstr (JMP op)          = (<>) (ptext (sLit "\tjmp *")) (pprOperand wordSize op)
+pprInstr (JMP op)          = (<>) (ptext (sLit "\tjmp *")) (pprOperand archWordSize op)
 pprInstr (JMP_TBL op _)  = pprInstr (JMP op)
 pprInstr (CALL (Left imm) _)    = (<>) (ptext (sLit "\tcall ")) (pprImm imm)
-pprInstr (CALL (Right reg) _)   = (<>) (ptext (sLit "\tcall *")) (pprReg wordSize reg)
+pprInstr (CALL (Right reg) _)   = (<>) (ptext (sLit "\tcall *")) (pprReg archWordSize reg)
 
 pprInstr (IDIV sz op)	= pprSizeOp (sLit "idiv") sz op
 pprInstr (DIV sz op)    = pprSizeOp (sLit "div")  sz op
@@ -941,9 +1066,9 @@ pprRegReg :: LitString -> Reg -> Reg -> Doc
 pprRegReg name reg1 reg2
   = hcat [
 	pprMnemonic_ name,
-	pprReg wordSize reg1,
+	pprReg archWordSize reg1,
         comma,
-        pprReg wordSize reg2
+        pprReg archWordSize reg2
     ]
 
 
@@ -951,9 +1076,9 @@ pprOpReg :: LitString -> Operand -> Reg -> Doc
 pprOpReg name op1 reg2
   = hcat [
 	pprMnemonic_ name,
-	pprOperand wordSize op1,
+	pprOperand archWordSize op1,
         comma,
-        pprReg wordSize reg2
+        pprReg archWordSize reg2
     ]
 
 
diff --git a/compiler/nativeGen/X86/RegInfo.hs b/compiler/nativeGen/X86/RegInfo.hs
index 39bc6dec10..58d063bd88 100644
--- a/compiler/nativeGen/X86/RegInfo.hs
+++ b/compiler/nativeGen/X86/RegInfo.hs
@@ -1,26 +1,17 @@
 
 module X86.RegInfo (
-	RegUsage(..),
-	noUsage,
-	regUsage,
-	patchRegs,
-	jumpDests,
-	isJumpish,
-	patchJump,
-	isRegRegMove,
-
-        JumpDest(..), 
+	mkVReg,
+
+        JumpDest, 
 	canShortcut, 
 	shortcutJump, 
 
-	mkSpillInstr,
-	mkLoadInstr,
-	mkRegRegMoveInstr,
-	mkBranchInstr,
-
 	spillSlotSize,
 	maxSpillSlots,
-	spillSlotToOffset		
+	spillSlotToOffset,
+	
+	shortcutStatic,
+	regDotColor
 )
 
 where
@@ -29,341 +20,26 @@ where
 #include "HsVersions.h"
 
 import X86.Instr
+import X86.Cond
 import X86.Regs
-import RegsBase
+import Size
+import Reg
 
+import Cmm
+import CLabel
 import BlockId
 import Outputable
 import Constants	( rESERVED_C_STACK_BYTES )
-import FastBool
-
-
--- -----------------------------------------------------------------------------
--- RegUsage type
-
--- @regUsage@ returns the sets of src and destination registers used
--- by a particular instruction.  Machine registers that are
--- pre-allocated to stgRegs are filtered out, because they are
--- uninteresting from a register allocation standpoint.  (We wouldn't
--- want them to end up on the free list!)  As far as we are concerned,
--- the fixed registers simply don't exist (for allocation purposes,
--- anyway).
-
--- regUsage doesn't need to do any trickery for jumps and such.  Just
--- state precisely the regs read and written by that insn.  The
--- consequences of control flow transfers, as far as register
--- allocation goes, are taken care of by the register allocator.
-
-data RegUsage = RU [Reg] [Reg]
-
-noUsage :: RegUsage
-noUsage  = RU [] []
-
-
-regUsage :: Instr -> RegUsage
-regUsage instr = case instr of
-    MOV    _ src dst	-> usageRW src dst
-    MOVZxL _ src dst	-> usageRW src dst
-    MOVSxL _ src dst	-> usageRW src dst
-    LEA    _ src dst	-> usageRW src dst
-    ADD    _ src dst	-> usageRM src dst
-    ADC    _ src dst	-> usageRM src dst
-    SUB    _ src dst	-> usageRM src dst
-    IMUL   _ src dst	-> usageRM src dst
-    IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
-    MUL    _ src dst	-> usageRM src dst
-    DIV    _ op	-> mkRU (eax:edx:use_R op) [eax,edx]
-    IDIV   _ op	-> mkRU (eax:edx:use_R op) [eax,edx]
-    AND    _ src dst	-> usageRM src dst
-    OR     _ src dst	-> usageRM src dst
-
-    XOR    _ (OpReg src) (OpReg dst)
-        | src == dst    -> mkRU [] [dst]
-
-    XOR    _ src dst	-> usageRM src dst
-    NOT    _ op		-> usageM op
-    NEGI   _ op		-> usageM op
-    SHL    _ imm dst	-> usageRM imm dst
-    SAR    _ imm dst	-> usageRM imm dst
-    SHR    _ imm dst	-> usageRM imm dst
-    BT     _ _   src	-> mkRUR (use_R src)
-
-    PUSH   _ op		-> mkRUR (use_R op)
-    POP    _ op		-> mkRU [] (def_W op)
-    TEST   _ src dst	-> mkRUR (use_R src ++ use_R dst)
-    CMP    _ src dst	-> mkRUR (use_R src ++ use_R dst)
-    SETCC  _ op		-> mkRU [] (def_W op)
-    JXX    _ _		-> mkRU [] []
-    JXX_GBL _ _		-> mkRU [] []
-    JMP     op		-> mkRUR (use_R op)
-    JMP_TBL op _        -> mkRUR (use_R op)
-    CALL (Left _)  params   -> mkRU params callClobberedRegs
-    CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
-    CLTD   _		-> mkRU [eax] [edx]
-    NOP			-> mkRU [] []
-
-#if i386_TARGET_ARCH
-    GMOV   src dst	-> mkRU [src] [dst]
-    GLD    _ src dst	-> mkRU (use_EA src) [dst]
-    GST    _ src dst	-> mkRUR (src : use_EA dst)
-
-    GLDZ   dst		-> mkRU [] [dst]
-    GLD1   dst		-> mkRU [] [dst]
-
-    GFTOI  src dst	-> mkRU [src] [dst]
-    GDTOI  src dst	-> mkRU [src] [dst]
-
-    GITOF  src dst	-> mkRU [src] [dst]
-    GITOD  src dst	-> mkRU [src] [dst]
-
-    GADD   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
-    GSUB   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
-    GMUL   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
-    GDIV   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
-
-    GCMP   _ src1 src2   -> mkRUR [src1,src2]
-    GABS   _ src dst     -> mkRU [src] [dst]
-    GNEG   _ src dst     -> mkRU [src] [dst]
-    GSQRT  _ src dst     -> mkRU [src] [dst]
-    GSIN   _ _ _ src dst -> mkRU [src] [dst]
-    GCOS   _ _ _ src dst -> mkRU [src] [dst]
-    GTAN   _ _ _ src dst -> mkRU [src] [dst]
-#endif
-
-#if x86_64_TARGET_ARCH
-    CVTSS2SD   src dst	-> mkRU [src] [dst]
-    CVTSD2SS   src dst	-> mkRU [src] [dst]
-    CVTTSS2SIQ src dst	-> mkRU (use_R src) [dst]
-    CVTTSD2SIQ src dst	-> mkRU (use_R src) [dst]
-    CVTSI2SS   src dst	-> mkRU (use_R src) [dst]
-    CVTSI2SD   src dst	-> mkRU (use_R src) [dst]
-    FDIV _     src dst	-> usageRM src dst
-#endif    
-
-    FETCHGOT reg        -> mkRU [] [reg]
-    FETCHPC  reg        -> mkRU [] [reg]
-
-    COMMENT _		-> noUsage
-    DELTA   _           -> noUsage
-    SPILL   reg _	-> mkRU [reg] []
-    RELOAD  _  reg	-> mkRU []    [reg]
-
-    _other		-> panic "regUsage: unrecognised instr"
-
- where
-    -- 2 operand form; first operand Read; second Written
-    usageRW :: Operand -> Operand -> RegUsage
-    usageRW op (OpReg reg)	= mkRU (use_R op) [reg]
-    usageRW op (OpAddr ea)	= mkRUR (use_R op ++ use_EA ea)
-    usageRW _ _			= panic "X86.RegInfo.usageRW: no match"
-
-    -- 2 operand form; first operand Read; second Modified
-    usageRM :: Operand -> Operand -> RegUsage
-    usageRM op (OpReg reg)	= mkRU (use_R op ++ [reg]) [reg]
-    usageRM op (OpAddr ea)	= mkRUR (use_R op ++ use_EA ea)
-    usageRM _ _			= panic "X86.RegInfo.usageRM: no match"
-
-    -- 1 operand form; operand Modified
-    usageM :: Operand -> RegUsage
-    usageM (OpReg reg)		= mkRU [reg] [reg]
-    usageM (OpAddr ea)		= mkRUR (use_EA ea)
-    usageM _			= panic "X86.RegInfo.usageM: no match"
-
-    -- Registers defd when an operand is written.
-    def_W (OpReg reg)		= [reg]
-    def_W (OpAddr _ )		= []
-    def_W _			= panic "X86.RegInfo.def_W: no match"
-
-    -- Registers used when an operand is read.
-    use_R (OpReg reg)  = [reg]
-    use_R (OpImm _)    = []
-    use_R (OpAddr ea)  = use_EA ea
-
-    -- Registers used to compute an effective address.
-    use_EA (ImmAddr _ _) = []
-    use_EA (AddrBaseIndex base index _) = 
-	use_base base $! use_index index
-	where use_base (EABaseReg r) x = r : x
-	      use_base _ x             = x
-	      use_index EAIndexNone   = []
-	      use_index (EAIndex i _) = [i]
-
-    mkRUR src = src' `seq` RU src' []
-	where src' = filter interesting src
-
-    mkRU src dst = src' `seq` dst' `seq` RU src' dst'
-	where src' = filter interesting src
-	      dst' = filter interesting dst
-
-interesting :: Reg -> Bool
-interesting (VirtualRegI  _)  = True
-interesting (VirtualRegHi _)  = True
-interesting (VirtualRegF  _)  = True
-interesting (VirtualRegD  _)  = True
-interesting (RealReg i)       = isFastTrue (freeReg i)
-
-
-
-
--- -----------------------------------------------------------------------------
--- 'patchRegs' function
-
--- 'patchRegs' takes an instruction and applies the given mapping to
--- all the register references.
-
-patchRegs :: Instr -> (Reg -> Reg) -> Instr
-patchRegs instr env = case instr of
-    MOV  sz src dst	-> patch2 (MOV  sz) src dst
-    MOVZxL sz src dst	-> patch2 (MOVZxL sz) src dst
-    MOVSxL sz src dst	-> patch2 (MOVSxL sz) src dst
-    LEA  sz src dst	-> patch2 (LEA  sz) src dst
-    ADD  sz src dst	-> patch2 (ADD  sz) src dst
-    ADC  sz src dst	-> patch2 (ADC  sz) src dst
-    SUB  sz src dst	-> patch2 (SUB  sz) src dst
-    IMUL sz src dst 	-> patch2 (IMUL sz) src dst
-    IMUL2 sz src        -> patch1 (IMUL2 sz) src
-    MUL sz src dst 	-> patch2 (MUL sz) src dst
-    IDIV sz op		-> patch1 (IDIV sz) op
-    DIV sz op		-> patch1 (DIV sz) op
-    AND  sz src dst	-> patch2 (AND  sz) src dst
-    OR   sz src dst	-> patch2 (OR   sz) src dst
-    XOR  sz src dst	-> patch2 (XOR  sz) src dst
-    NOT  sz op 		-> patch1 (NOT  sz) op
-    NEGI sz op		-> patch1 (NEGI sz) op
-    SHL  sz imm dst 	-> patch1 (SHL sz imm) dst
-    SAR  sz imm dst 	-> patch1 (SAR sz imm) dst
-    SHR  sz imm dst 	-> patch1 (SHR sz imm) dst
-    BT   sz imm src     -> patch1 (BT  sz imm) src
-    TEST sz src dst	-> patch2 (TEST sz) src dst
-    CMP  sz src dst	-> patch2 (CMP  sz) src dst
-    PUSH sz op		-> patch1 (PUSH sz) op
-    POP  sz op		-> patch1 (POP  sz) op
-    SETCC cond op	-> patch1 (SETCC cond) op
-    JMP op		-> patch1 JMP op
-    JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
-
-#if i386_TARGET_ARCH
-    GMOV src dst	-> GMOV (env src) (env dst)
-    GLD  sz src dst	-> GLD sz (lookupAddr src) (env dst)
-    GST  sz src dst	-> GST sz (env src) (lookupAddr dst)
-
-    GLDZ dst		-> GLDZ (env dst)
-    GLD1 dst		-> GLD1 (env dst)
-
-    GFTOI src dst	-> GFTOI (env src) (env dst)
-    GDTOI src dst	-> GDTOI (env src) (env dst)
-
-    GITOF src dst	-> GITOF (env src) (env dst)
-    GITOD src dst	-> GITOD (env src) (env dst)
-
-    GADD sz s1 s2 dst	-> GADD sz (env s1) (env s2) (env dst)
-    GSUB sz s1 s2 dst	-> GSUB sz (env s1) (env s2) (env dst)
-    GMUL sz s1 s2 dst	-> GMUL sz (env s1) (env s2) (env dst)
-    GDIV sz s1 s2 dst	-> GDIV sz (env s1) (env s2) (env dst)
-
-    GCMP sz src1 src2	-> GCMP sz (env src1) (env src2)
-    GABS sz src dst	-> GABS sz (env src) (env dst)
-    GNEG sz src dst	-> GNEG sz (env src) (env dst)
-    GSQRT sz src dst	-> GSQRT sz (env src) (env dst)
-    GSIN sz l1 l2 src dst	-> GSIN sz l1 l2 (env src) (env dst)
-    GCOS sz l1 l2 src dst	-> GCOS sz l1 l2 (env src) (env dst)
-    GTAN sz l1 l2 src dst	-> GTAN sz l1 l2 (env src) (env dst)
-#endif
-
-#if x86_64_TARGET_ARCH
-    CVTSS2SD src dst	-> CVTSS2SD (env src) (env dst)
-    CVTSD2SS src dst	-> CVTSD2SS (env src) (env dst)
-    CVTTSS2SIQ src dst	-> CVTTSS2SIQ (patchOp src) (env dst)
-    CVTTSD2SIQ src dst	-> CVTTSD2SIQ (patchOp src) (env dst)
-    CVTSI2SS src dst	-> CVTSI2SS (patchOp src) (env dst)
-    CVTSI2SD src dst	-> CVTSI2SD (patchOp src) (env dst)
-    FDIV sz src dst	-> FDIV sz (patchOp src) (patchOp dst)
-#endif    
-
-    CALL (Left _)  _	-> instr
-    CALL (Right reg) p	-> CALL (Right (env reg)) p
-    
-    FETCHGOT reg        -> FETCHGOT (env reg)
-    FETCHPC  reg        -> FETCHPC  (env reg)
-   
-    NOP			-> instr
-    COMMENT _		-> instr
-    DELTA _ 		-> instr
-    SPILL  reg slot	-> SPILL (env reg) slot
-    RELOAD slot reg	-> RELOAD slot (env reg)
-
-    JXX _ _		-> instr
-    JXX_GBL _ _		-> instr
-    CLTD _		-> instr
-
-    _other		-> panic "patchRegs: unrecognised instr"
-
-  where
-    patch1 insn op      = insn $! patchOp op
-    patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
-
-    patchOp (OpReg  reg) = OpReg $! env reg
-    patchOp (OpImm  imm) = OpImm imm
-    patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
-
-    lookupAddr (ImmAddr imm off) = ImmAddr imm off
-    lookupAddr (AddrBaseIndex base index disp)
-      = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
-      where
-	lookupBase EABaseNone       = EABaseNone
-	lookupBase EABaseRip        = EABaseRip
-	lookupBase (EABaseReg r)    = EABaseReg (env r)
-				 
-	lookupIndex EAIndexNone     = EAIndexNone
-	lookupIndex (EAIndex r i)   = EAIndex (env r) i
-
-
--- -----------------------------------------------------------------------------
--- Determine the possible destinations from the current instruction.
-
--- (we always assume that the next instruction is also a valid destination;
--- if this isn't the case then the jump should be at the end of the basic
--- block).
-
-jumpDests :: Instr -> [BlockId] -> [BlockId]
-jumpDests insn acc
-  = case insn of
-	JXX _ id	-> id : acc
-	JMP_TBL _ ids	-> ids ++ acc
-	_		-> acc
-
-
-isJumpish :: Instr -> Bool
-isJumpish instr
- = case instr of
-	JMP{}		-> True
-	JXX{}		-> True
-	JXX_GBL{}	-> True
-	JMP_TBL{}	-> True
-	CALL{}		-> True
-	_		-> False
-
--- | Change the destination of this jump instruction
---	Used in joinToTargets in the linear allocator, when emitting fixup code
---	for join points.
-patchJump :: Instr -> BlockId -> BlockId -> Instr
-patchJump insn old new
-  = case insn of
-	JXX cc id | id == old -> JXX cc new
-	JMP_TBL _ _     -> error "Cannot patch JMP_TBL"
-	_other		-> insn
-
-
--- -----------------------------------------------------------------------------
--- Detecting reg->reg moves
-
--- The register allocator attempts to eliminate reg->reg moves whenever it can,
--- by assigning the src and dest temporaries to the same real register.
-
-isRegRegMove :: Instr -> Maybe (Reg,Reg)
-isRegRegMove (MOV _ (OpReg r1) (OpReg r2)) = Just (r1,r2)
-isRegRegMove _  = Nothing
+import Unique
 
+mkVReg :: Unique -> Size -> Reg
+mkVReg u size
+   | not (isFloatSize size) = VirtualRegI u
+   | otherwise
+   = case size of
+        FF32	-> VirtualRegD u
+        FF64	-> VirtualRegD u
+	_	-> panic "mkVReg"
 
 
 data JumpDest = DestBlockId BlockId | DestImm Imm
@@ -386,92 +62,6 @@ shortcutJump _ other = other
 
 
 
--- -----------------------------------------------------------------------------
--- Generating spill instructions
-
-mkSpillInstr
-   :: Reg		-- register to spill
-   -> Int		-- current stack delta
-   -> Int		-- spill slot to use
-   -> Instr
-
-#if   i386_TARGET_ARCH
-mkSpillInstr reg delta slot
-  = let	off     = spillSlotToOffset slot
-    in
-    let off_w = (off-delta) `div` 4
-    in case regClass reg of
-	   RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
-	   _         -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
-
-#elif x86_64_TARGET_ARCH
-mkSpillInstr reg delta slot
-  = let	off     = spillSlotToOffset slot
-    in
-    let off_w = (off-delta) `div` 8
-    in case regClass reg of
-	   RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w))
-	   RcDouble  -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
-	   RcFloat   -> panic "mkSpillInstr/RcFloat"
-		-- ToDo: will it work to always spill as a double?
-		-- does that cause a stall if the data was a float?
-#else
-mkSpillInstr _ _ _
-    =   panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
-#endif
-
-
-mkLoadInstr
-   :: Reg		-- register to load
-   -> Int		-- current stack delta
-   -> Int		-- spill slot to use
-   -> Instr
-#if   i386_TARGET_ARCH
-mkLoadInstr reg delta slot
-  = let off     = spillSlotToOffset slot
-    in
-	let off_w = (off-delta) `div` 4
-        in case regClass reg of {
-              RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg);
-              _         -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -}
-#elif x86_64_TARGET_ARCH
-mkLoadInstr reg delta slot
-  = let off     = spillSlotToOffset slot
-    in
-	let off_w = (off-delta) `div` 8
-        in case regClass reg of
-              RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg)
-              _         -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
-#else
-mkLoadInstr _ _ _
-  =    panic "X86.RegInfo.mkLoadInstr: not defined for this architecture."
-#endif
-
-
-
-mkRegRegMoveInstr
-    :: Reg
-    -> Reg
-    -> Instr
-mkRegRegMoveInstr src dst
-    = case regClass src of
-        RcInteger -> MOV wordSize (OpReg src) (OpReg dst)
-#if   i386_TARGET_ARCH
-        RcDouble  -> GMOV src dst
-	RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
-#else
-        RcDouble  -> MOV FF64 (OpReg src) (OpReg dst)
-	RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
-#endif
-
-
-mkBranchInstr
-    :: BlockId
-    -> [Instr]
-
-mkBranchInstr id = [JXX ALWAYS id]
-
-
 spillSlotSize :: Int
 spillSlotSize = IF_ARCH_i386(12, 8)
 
@@ -489,3 +79,82 @@ spillSlotToOffset slot
    = pprPanic "spillSlotToOffset:" 
               (   text "invalid spill location: " <> int slot
 	      $$  text "maxSpillSlots:          " <> int maxSpillSlots)
+
+
+-- Here because it knows about JumpDest
+shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
+shortcutStatic fn (CmmStaticLit (CmmLabel lab))
+  | Just uq <- maybeAsmTemp lab 
+  = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
+  | Just uq <- maybeAsmTemp lbl1
+  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
+        -- slightly dodgy, we're ignoring the second label, but this
+        -- works with the way we use CmmLabelDiffOff for jump tables now.
+
+shortcutStatic _ other_static
+        = other_static
+
+shortBlockId 
+	:: (BlockId -> Maybe JumpDest)
+	-> BlockId
+	-> CLabel
+
+shortBlockId fn blockid@(BlockId uq) =
+   case fn blockid of
+      Nothing -> mkAsmTempLabel uq
+      Just (DestBlockId blockid')  -> shortBlockId fn blockid'
+      Just (DestImm (ImmCLbl lbl)) -> lbl
+      _other -> panic "shortBlockId"
+
+
+
+-- reg colors for x86
+#if i386_TARGET_ARCH
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = let	Just	str	= lookupUFM regColors reg
+   in	text str
+
+regColors
+ = listToUFM
+ $  	[ (eax,	"#00ff00")
+	, (ebx,	"#0000ff")
+	, (ecx,	"#00ffff")
+	, (edx,	"#0080ff")
+
+	, (fake0, "#ff00ff")
+	, (fake1, "#ff00aa")
+	, (fake2, "#aa00ff")
+	, (fake3, "#aa00aa")
+	, (fake4, "#ff0055")
+	, (fake5, "#5500ff") ]
+
+
+-- reg colors for x86_64
+#elif x86_64_TARGET_ARCH
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = let	Just	str	= lookupUFM regColors reg
+   in	text str
+
+regColors
+ = listToUFM
+ $	[ (rax, "#00ff00"), (eax, "#00ff00")
+	, (rbx,	"#0000ff"), (ebx, "#0000ff")
+	, (rcx,	"#00ffff"), (ecx, "#00ffff")
+	, (rdx,	"#0080ff"), (edx, "#00ffff")
+	, (r8,  "#00ff80")
+	, (r9,  "#008080")
+	, (r10, "#0040ff")
+	, (r11, "#00ff40")
+	, (r12, "#008040")
+	, (r13, "#004080")
+	, (r14, "#004040")
+	, (r15, "#002080") ]
+
+	++ zip (map RealReg [16..31]) (repeat "red")
+#else
+regDotColor :: Reg -> SDoc
+regDotColor	= panic "not defined"
+#endif
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
index 1f1c7249a7..87564b860c 100644
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -1,15 +1,4 @@
 module X86.Regs (
-
-	-- sizes
-	Size(..),
-	intSize, 
-	floatSize, 
-	isFloatSize, 
-	wordSize,
-	cmmTypeSize,
-	sizeToWidth,
-	mkVReg,
-
 	-- immediates
 	Imm(..),
 	strImmLit,
@@ -45,7 +34,10 @@ module X86.Regs (
 
 	-- horror show
 	freeReg,
-	globalRegMaybe
+	globalRegMaybe,
+	
+	get_GlobalReg_reg_or_addr,
+	allocatableRegs
 )
 
 where
@@ -60,93 +52,22 @@ where
 
 #include "../includes/MachRegs.h"
 
-import RegsBase
+import Reg
+import RegClass
 
+import CgUtils          ( get_GlobalReg_addr )
 import BlockId
 import Cmm
 import CLabel           ( CLabel )
 import Pretty
-import Outputable	( Outputable(..), pprPanic, panic )
+import Outputable	( panic )
 import qualified Outputable
-import Unique
 import FastBool
 
 #if  defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
 import Constants
 #endif
 
--- -----------------------------------------------------------------------------
--- Sizes on this architecture
--- 
--- A Size is usually a combination of width and class
-
--- It looks very like the old MachRep, but it's now of purely local
--- significance, here in the native code generator.  You can change it
--- without global consequences.
---
--- A major use is as an opcode qualifier; thus the opcode 
---	mov.l a b
--- might be encoded 
--- 	MOV II32 a b
--- where the Size field encodes the ".l" part.
-
--- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
--- here.  I've removed them from the x86 version, we'll see what happens --SDM
-
--- ToDo: quite a few occurrences of Size could usefully be replaced by Width
-
-data Size
-	= II8 | II16 | II32 | II64 | FF32 | FF64 | FF80
-	deriving Eq
-
-intSize, floatSize :: Width -> Size
-intSize W8 = II8
-intSize W16 = II16
-intSize W32 = II32
-intSize W64 = II64
-intSize other = pprPanic "MachInstrs.intSize" (ppr other)
-
-
-floatSize W32 = FF32
-floatSize W64 = FF64
-floatSize other = pprPanic "MachInstrs.intSize" (ppr other)
-
-
-isFloatSize :: Size -> Bool
-isFloatSize FF32 = True
-isFloatSize FF64 = True
-isFloatSize FF80 = True
-isFloatSize _    = False
-
-
-wordSize :: Size
-wordSize = intSize wordWidth
-
-
-cmmTypeSize :: CmmType -> Size
-cmmTypeSize ty | isFloatType ty = floatSize (typeWidth ty)
-	       | otherwise	= intSize (typeWidth ty)
-
-
-sizeToWidth :: Size -> Width
-sizeToWidth II8  = W8
-sizeToWidth II16 = W16
-sizeToWidth II32 = W32
-sizeToWidth II64 = W64
-sizeToWidth FF32 = W32
-sizeToWidth FF64 = W64
-sizeToWidth _ = panic "MachInstrs.sizeToWidth"
-
-
-mkVReg :: Unique -> Size -> Reg
-mkVReg u size
-   | not (isFloatSize size) = VirtualRegI u
-   | otherwise
-   = case size of
-        FF32	-> VirtualRegD u
-        FF64	-> VirtualRegD u
-	_	-> panic "mkVReg"
-
 
 -- -----------------------------------------------------------------------------
 -- Immediates
@@ -699,4 +620,26 @@ callClobberedRegs	= panic "X86.Regs.globalRegMaybe: not defined"
 
 #endif
 
+-- We map STG registers onto appropriate CmmExprs.  Either they map
+-- to real machine registers or stored as offsets from BaseReg.  Given
+-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
+-- register it is in, on this platform, or a CmmExpr denoting the
+-- address in the register table holding it.
+-- (See also get_GlobalReg_addr in CgUtils.)
+
+get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr
+get_GlobalReg_reg_or_addr mid
+   = case globalRegMaybe mid of
+        Just rr -> Left rr
+        Nothing -> Right (get_GlobalReg_addr mid)
+
+
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: [RegNo]
+allocatableRegs
+   = let isFree i = isFastTrue (freeReg i)
+     in  filter isFree allMachRegNos
+
author	Ben.Lippmeier@anu.edu.au <unknown>	2009-02-15 05:51:58 +0000
committer	Ben.Lippmeier@anu.edu.au <unknown>	2009-02-15 05:51:58 +0000
commit	b04a210e26ca57242fd052f2aa91011a80b76299 (patch)
tree	6f26993cc3ef37f4555087bd80da4195edcda4ed /compiler
parent	77ed23d51b968505b3ad8541c075657ae94f0ea3 (diff)
download	haskell-b04a210e26ca57242fd052f2aa91011a80b76299.tar.gz