1 files changed, 4654 insertions, 0 deletions
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs
new file mode 100644
index 0000000000..90ce6b5bf8
--- /dev/null
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -0,0 +1,4654 @@
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+-- This is a big module, but, if you pay attention to
+-- (a) the sectioning, (b) the type signatures, and
+-- (c) the #if blah_TARGET_ARCH} things, the
+-- structure should not be too overwhelming.
+
+module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import MachInstrs
+import MachRegs
+import NCGMonad
+import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
+import RegAllocInfo ( mkBranchInstr )
+
+-- Our intermediate code:
+import PprCmm		( pprExpr )
+import Cmm
+import MachOp
+import CLabel
+
+-- The rest:
+import StaticFlags	( opt_PIC )
+import ForeignCall	( CCallConv(..) )
+import OrdList
+import Pretty
+import Outputable
+import FastString
+import FastTypes	( isFastTrue )
+import Constants	( wORD_SIZE )
+
+#ifdef DEBUG
+import Outputable	( assertPanic )
+import TRACE		( trace )
+#endif
+
+import Control.Monad	( mapAndUnzipM )
+import Maybe		( fromJust )
+import DATA_BITS
+import DATA_WORD
+
+-- -----------------------------------------------------------------------------
+-- Top-level of the instruction selector
+
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- They are really trees of insns to facilitate fast appending, where a
+-- left-to-right traversal (pre-order?) yields the insns in the correct
+-- order.
+
+type InstrBlock = OrdList Instr
+
+cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
+cmmTopCodeGen (CmmProc info lab params blocks) = do
+  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+  picBaseMb <- getPicBaseMaybeNat
+  let proc = CmmProc info lab params (concat nat_blocks)
+      tops = proc : concat statics
+  case picBaseMb of
+      Just picBase -> initializePicBase picBase tops
+      Nothing -> return tops
+  
+cmmTopCodeGen (CmmData sec dat) = do
+  return [CmmData sec dat]  -- no translation, we just use CmmStatic
+
+basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
+basicBlockCodeGen (BasicBlock id stmts) = do
+  instrs <- stmtsToInstrs stmts
+  -- code generation may introduce new basic block boundaries, which
+  -- are indicated by the NEWBLOCK instruction.  We must split up the
+  -- instruction stream into basic blocks again.  Also, we extract
+  -- LDATAs here too.
+  let
+	(top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+	
+	mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
+	  = ([], BasicBlock id instrs : blocks, statics)
+	mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
+	  = (instrs, blocks, CmmData sec dat:statics)
+	mkBlocks instr (instrs,blocks,statics)
+	  = (instr:instrs, blocks, statics)
+  -- in
+  return (BasicBlock id top : other_blocks, statics)
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+   = do instrss <- mapM stmtToInstrs stmts
+        return (concatOL instrss)
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+    CmmNop	   -> return nilOL
+    CmmComment s   -> return (unitOL (COMMENT s))
+
+    CmmAssign reg src
+      | isFloatingRep kind -> assignReg_FltCode kind reg src
+#if WORD_SIZE_IN_BITS==32
+      | kind == I64 	   -> assignReg_I64Code      reg src
+#endif
+      | otherwise	   -> assignReg_IntCode kind reg src
+	where kind = cmmRegRep reg
+
+    CmmStore addr src
+      | isFloatingRep kind -> assignMem_FltCode kind addr src
+#if WORD_SIZE_IN_BITS==32
+      | kind == I64 	 -> assignMem_I64Code      addr src
+#endif
+      | otherwise	 -> assignMem_IntCode kind addr src
+	where kind = cmmExprRep src
+
+    CmmCall target result_regs args vols
+       -> genCCall target result_regs args vols
+
+    CmmBranch id	  -> genBranch id
+    CmmCondBranch arg id  -> genCondJump id arg
+    CmmSwitch arg ids     -> genSwitch arg ids
+    CmmJump arg params	  -> genJump arg
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+  = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
+  where rep = cmmRegRep reg
+
+-- -----------------------------------------------------------------------------
+--  Code gen for 64-bit arithmetic on 32-bit platforms
+
+{-
+Simple support for generating 64-bit code (ie, 64 bit values and 64
+bit assignments) on 32-bit platforms.  Unlike the main code generator
+we merely shoot for generating working code as simply as possible, and
+pay little attention to code quality.  Specifically, there is no
+attempt to deal cleverly with the fixed-vs-floating register
+distinction; all values are generated into (pairs of) floating
+registers, even if this would mean some redundant reg-reg moves as a
+result.  Only one of the VRegUniques is returned, since it will be
+of the VRegUniqueLo form, and the upper-half VReg can be determined
+by applying getHiVRegFromLo to it.
+-}
+
+data ChildCode64 	-- a.k.a "Register64"
+   = ChildCode64 
+        InstrBlock 	-- code
+        Reg	 	-- the lower 32-bit temporary which contains the
+			-- result; use getHiVRegFromLo to find the other
+			-- VRegUnique.  Rules of this simplified insn
+			-- selection game are therefore that the returned
+			-- Reg may be modified
+
+#if WORD_SIZE_IN_BITS==32
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
+#endif
+
+#ifndef x86_64_TARGET_ARCH
+iselExpr64        :: CmmExpr -> NatM ChildCode64
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+assignMem_I64Code addrTree valueTree = do
+  Amode addr addr_code <- getAmode addrTree
+  ChildCode64 vcode rlo <- iselExpr64 valueTree
+  let 
+        rhi = getHiVRegFromLo rlo
+
+        -- Little-endian store
+        mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
+        mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
+  -- in
+  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
+
+
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+   let 
+         r_dst_lo = mkVReg u_dst I32
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
+         mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
+   -- in
+   return (
+        vcode `snocOL` mov_lo `snocOL` mov_hi
+     )
+
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(i386): invalid lvalue"
+
+------------
+
+iselExpr64 (CmmLit (CmmInt i _)) = do
+  (rlo,rhi) <- getNewRegPairNat I32
+  let
+	r = fromIntegral (fromIntegral i :: Word32)
+	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
+	code = toOL [
+		MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
+		MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
+		]
+  -- in
+  return (ChildCode64 code rlo)
+
+iselExpr64 (CmmLoad addrTree I64) = do
+   Amode addr addr_code <- getAmode addrTree
+   (rlo,rhi) <- getNewRegPairNat I32
+   let 
+        mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
+        mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
+   -- in
+   return (
+            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
+                        rlo
+     )
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
+   = return (ChildCode64 nilOL (mkVReg vu I32))
+         
+-- we handle addition, but rather badly
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
+   ChildCode64 code1 r1lo <- iselExpr64 e1
+   (rlo,rhi) <- getNewRegPairNat I32
+   let
+	r = fromIntegral (fromIntegral i :: Word32)
+	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
+	r1hi = getHiVRegFromLo r1lo
+	code =  code1 `appOL`
+		toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
+		       ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
+		       MOV I32 (OpReg r1hi) (OpReg rhi),
+		       ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
+   -- in
+   return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+   ChildCode64 code1 r1lo <- iselExpr64 e1
+   ChildCode64 code2 r2lo <- iselExpr64 e2
+   (rlo,rhi) <- getNewRegPairNat I32
+   let
+	r1hi = getHiVRegFromLo r1lo
+	r2hi = getHiVRegFromLo r2lo
+	code =  code1 `appOL`
+		code2 `appOL`
+		toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
+		       ADD I32 (OpReg r2lo) (OpReg rlo),
+		       MOV I32 (OpReg r1hi) (OpReg rhi),
+		       ADC I32 (OpReg r2hi) (OpReg rhi) ]
+   -- in
+   return (ChildCode64 code rlo)
+
+iselExpr64 expr
+   = pprPanic "iselExpr64(i386)" (ppr expr)
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+assignMem_I64Code addrTree valueTree = do
+     Amode addr addr_code <- getAmode addrTree
+     ChildCode64 vcode rlo <- iselExpr64 valueTree  
+     (src, code) <- getSomeReg addrTree
+     let 
+         rhi = getHiVRegFromLo rlo
+         -- Big-endian store
+         mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
+         mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
+     return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
+
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+     ChildCode64 vcode r_src_lo <- iselExpr64 valueTree    
+     let 
+         r_dst_lo = mkVReg u_dst pk
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = mkMOV r_src_lo r_dst_lo
+         mov_hi = mkMOV r_src_hi r_dst_hi
+         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+     return (vcode `snocOL` mov_hi `snocOL` mov_lo)
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(sparc): invalid lvalue"
+
+
+-- Don't delete this -- it's very handy for debugging.
+--iselExpr64 expr 
+--   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
+--   = panic "iselExpr64(???)"
+
+iselExpr64 (CmmLoad addrTree I64) = do
+     Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
+     rlo <- getNewRegNat I32
+     let rhi = getHiVRegFromLo rlo
+         mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
+         mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
+     return (
+            ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo) 
+                         rlo
+          )
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
+     r_dst_lo <-  getNewRegNat I32
+     let r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_lo = mkVReg uq I32
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = mkMOV r_src_lo r_dst_lo
+         mov_hi = mkMOV r_src_hi r_dst_hi
+         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+     return (
+            ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
+         )
+
+iselExpr64 expr
+   = pprPanic "iselExpr64(sparc)" (ppr expr)
+
+#endif /* sparc_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if powerpc_TARGET_ARCH
+
+getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
+getI64Amodes addrTree = do
+    Amode hi_addr addr_code <- getAmode addrTree
+    case addrOffset hi_addr 4 of
+        Just lo_addr -> return (hi_addr, lo_addr, addr_code)
+        Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
+                           return (AddrRegImm hi_ptr (ImmInt 0),
+                                   AddrRegImm hi_ptr (ImmInt 4),
+                                   code)
+
+assignMem_I64Code addrTree valueTree = do
+        (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
+	ChildCode64 vcode rlo <- iselExpr64 valueTree
+	let 
+		rhi = getHiVRegFromLo rlo
+
+		-- Big-endian store
+		mov_hi = ST I32 rhi hi_addr
+		mov_lo = ST I32 rlo lo_addr
+	-- in
+	return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
+
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+   let 
+         r_dst_lo = mkVReg u_dst I32
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = MR r_dst_lo r_src_lo
+         mov_hi = MR r_dst_hi r_src_hi
+   -- in
+   return (
+        vcode `snocOL` mov_lo `snocOL` mov_hi
+     )
+
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(powerpc): invalid lvalue"
+
+
+-- Don't delete this -- it's very handy for debugging.
+--iselExpr64 expr 
+--   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
+--   = panic "iselExpr64(???)"
+
+iselExpr64 (CmmLoad addrTree I64) = do
+    (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
+    (rlo, rhi) <- getNewRegPairNat I32
+    let mov_hi = LD I32 rhi hi_addr
+        mov_lo = LD I32 rlo lo_addr
+    return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
+                         rlo
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
+   = return (ChildCode64 nilOL (mkVReg vu I32))
+
+iselExpr64 (CmmLit (CmmInt i _)) = do
+  (rlo,rhi) <- getNewRegPairNat I32
+  let
+	half0 = fromIntegral (fromIntegral i :: Word16)
+	half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
+	half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
+	half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
+	
+	code = toOL [
+		LIS rlo (ImmInt half1),
+		OR rlo rlo (RIImm $ ImmInt half0),
+		LIS rhi (ImmInt half3),
+		OR rlo rlo (RIImm $ ImmInt half2)
+		]
+  -- in
+  return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+   ChildCode64 code1 r1lo <- iselExpr64 e1
+   ChildCode64 code2 r2lo <- iselExpr64 e2
+   (rlo,rhi) <- getNewRegPairNat I32
+   let
+	r1hi = getHiVRegFromLo r1lo
+	r2hi = getHiVRegFromLo r2lo
+	code =  code1 `appOL`
+		code2 `appOL`
+		toOL [ ADDC rlo r1lo r2lo,
+		       ADDE rhi r1hi r2hi ]
+   -- in
+   return (ChildCode64 code rlo)
+
+iselExpr64 expr
+   = pprPanic "iselExpr64(powerpc)" (ppr expr)
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- The 'Register' type
+
+-- 'Register's passed up the tree.  If the stix code forces the register
+-- to live in a pre-decided machine register, it comes out as @Fixed@;
+-- otherwise, it comes out as @Any@, and the parent can decide which
+-- register to put it in.
+
+data Register
+  = Fixed   MachRep Reg InstrBlock
+  | Any	    MachRep (Reg -> InstrBlock)
+
+swizzleRegisterRep :: Register -> MachRep -> Register
+swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
+swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
+
+
+-- -----------------------------------------------------------------------------
+-- Utils based on getRegister, below
+
+-- The dual to getAnyReg: compute an expression into a register, but
+-- we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed _ reg code -> 
+	return (reg, code)
+
+-- -----------------------------------------------------------------------------
+-- Grab the Reg for a CmmReg
+
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+  = mkVReg u pk
+
+getRegisterReg (CmmGlobal mid)
+  = case get_GlobalReg_reg_or_addr mid of
+       Left (RealReg rrno) -> RealReg rrno
+       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+          -- By this stage, the only MagicIds remaining should be the
+          -- ones which map to a real machine register on this
+          -- platform.  Hence ...
+
+
+-- -----------------------------------------------------------------------------
+-- Generate code to get a subtree into a Register
+
+-- Don't delete this -- it's very handy for debugging.
+--getRegister expr 
+--   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
+--   = panic "getRegister(???)"
+
+getRegister :: CmmExpr -> NatM Register
+
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+  = do
+      reg <- getPicBaseNat wordRep
+      return (Fixed wordRep reg nilOL)
+
+getRegister (CmmReg reg) 
+  = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _) 
+  = getRegister (mangleIndexTree tree)
+
+-- end of machine-"independent" bit; here we go on the rest...
+
+#if alpha_TARGET_ARCH
+
+getRegister (StDouble d)
+  = getBlockIdNat 	    	    `thenNat` \ lbl ->
+    getNewRegNat PtrRep    	    `thenNat` \ tmp ->
+    let code dst = mkSeqInstrs [
+	    LDATA RoDataSegment lbl [
+		    DATA TF [ImmLab (rational d)]
+		],
+	    LDA tmp (AddrImm (ImmCLbl lbl)),
+	    LD TF dst (AddrReg tmp)]
+    in
+    	return (Any F64 code)
+
+getRegister (StPrim primop [x]) -- unary PrimOps
+  = case primop of
+      IntNegOp -> trivialUCode (NEG Q False) x
+
+      NotOp    -> trivialUCode NOT x
+
+      FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
+      DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
+
+      OrdOp -> coerceIntCode IntRep x
+      ChrOp -> chrCode x
+
+      Float2IntOp  -> coerceFP2Int    x
+      Int2FloatOp  -> coerceInt2FP pr x
+      Double2IntOp -> coerceFP2Int    x
+      Int2DoubleOp -> coerceInt2FP pr x
+
+      Double2FloatOp -> coerceFltCode x
+      Float2DoubleOp -> coerceFltCode x
+
+      other_op -> getRegister (StCall fn CCallConv F64 [x])
+	where
+	  fn = case other_op of
+		 FloatExpOp    -> FSLIT("exp")
+		 FloatLogOp    -> FSLIT("log")
+		 FloatSqrtOp   -> FSLIT("sqrt")
+		 FloatSinOp    -> FSLIT("sin")
+		 FloatCosOp    -> FSLIT("cos")
+		 FloatTanOp    -> FSLIT("tan")
+		 FloatAsinOp   -> FSLIT("asin")
+		 FloatAcosOp   -> FSLIT("acos")
+		 FloatAtanOp   -> FSLIT("atan")
+		 FloatSinhOp   -> FSLIT("sinh")
+		 FloatCoshOp   -> FSLIT("cosh")
+		 FloatTanhOp   -> FSLIT("tanh")
+		 DoubleExpOp   -> FSLIT("exp")
+		 DoubleLogOp   -> FSLIT("log")
+		 DoubleSqrtOp  -> FSLIT("sqrt")
+		 DoubleSinOp   -> FSLIT("sin")
+		 DoubleCosOp   -> FSLIT("cos")
+		 DoubleTanOp   -> FSLIT("tan")
+		 DoubleAsinOp  -> FSLIT("asin")
+		 DoubleAcosOp  -> FSLIT("acos")
+		 DoubleAtanOp  -> FSLIT("atan")
+		 DoubleSinhOp  -> FSLIT("sinh")
+		 DoubleCoshOp  -> FSLIT("cosh")
+		 DoubleTanhOp  -> FSLIT("tanh")
+  where
+    pr = panic "MachCode.getRegister: no primrep needed for Alpha"
+
+getRegister (StPrim primop [x, y]) -- dyadic PrimOps
+  = case primop of
+      CharGtOp -> trivialCode (CMP LTT) y x
+      CharGeOp -> trivialCode (CMP LE) y x
+      CharEqOp -> trivialCode (CMP EQQ) x y
+      CharNeOp -> int_NE_code x y
+      CharLtOp -> trivialCode (CMP LTT) x y
+      CharLeOp -> trivialCode (CMP LE) x y
+
+      IntGtOp  -> trivialCode (CMP LTT) y x
+      IntGeOp  -> trivialCode (CMP LE) y x
+      IntEqOp  -> trivialCode (CMP EQQ) x y
+      IntNeOp  -> int_NE_code x y
+      IntLtOp  -> trivialCode (CMP LTT) x y
+      IntLeOp  -> trivialCode (CMP LE) x y
+
+      WordGtOp -> trivialCode (CMP ULT) y x
+      WordGeOp -> trivialCode (CMP ULE) x y
+      WordEqOp -> trivialCode (CMP EQQ)  x y
+      WordNeOp -> int_NE_code x y
+      WordLtOp -> trivialCode (CMP ULT) x y
+      WordLeOp -> trivialCode (CMP ULE) x y
+
+      AddrGtOp -> trivialCode (CMP ULT) y x
+      AddrGeOp -> trivialCode (CMP ULE) y x
+      AddrEqOp -> trivialCode (CMP EQQ)  x y
+      AddrNeOp -> int_NE_code x y
+      AddrLtOp -> trivialCode (CMP ULT) x y
+      AddrLeOp -> trivialCode (CMP ULE) x y
+	
+      FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
+      FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
+      FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
+      FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
+      FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
+      FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
+
+      DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
+      DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
+      DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
+      DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
+      DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
+      DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
+
+      IntAddOp  -> trivialCode (ADD Q False) x y
+      IntSubOp  -> trivialCode (SUB Q False) x y
+      IntMulOp  -> trivialCode (MUL Q False) x y
+      IntQuotOp -> trivialCode (DIV Q False) x y
+      IntRemOp  -> trivialCode (REM Q False) x y
+
+      WordAddOp  -> trivialCode (ADD Q False) x y
+      WordSubOp  -> trivialCode (SUB Q False) x y
+      WordMulOp  -> trivialCode (MUL Q False) x y
+      WordQuotOp -> trivialCode (DIV Q True) x y
+      WordRemOp  -> trivialCode (REM Q True) x y
+
+      FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
+      FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
+      FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
+      FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
+
+      DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
+      DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
+      DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
+      DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
+
+      AddrAddOp  -> trivialCode (ADD Q False) x y
+      AddrSubOp  -> trivialCode (SUB Q False) x y
+      AddrRemOp  -> trivialCode (REM Q True) x y
+
+      AndOp  -> trivialCode AND x y
+      OrOp   -> trivialCode OR  x y
+      XorOp  -> trivialCode XOR x y
+      SllOp  -> trivialCode SLL x y
+      SrlOp  -> trivialCode SRL x y
+
+      ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
+      ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
+      ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
+
+      FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
+      DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
+  where
+    {- ------------------------------------------------------------
+	Some bizarre special code for getting condition codes into
+	registers.  Integer non-equality is a test for equality
+	followed by an XOR with 1.  (Integer comparisons always set
+	the result register to 0 or 1.)  Floating point comparisons of
+	any kind leave the result in a floating point register, so we
+	need to wrangle an integer register out of things.
+    -}
+    int_NE_code :: StixTree -> StixTree -> NatM Register
+
+    int_NE_code x y
+      = trivialCode (CMP EQQ) x y	`thenNat` \ register ->
+	getNewRegNat IntRep		`thenNat` \ tmp ->
+	let
+	    code = registerCode register tmp
+	    src  = registerName register tmp
+	    code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
+	in
+	return (Any IntRep code__2)
+
+    {- ------------------------------------------------------------
+	Comments for int_NE_code also apply to cmpF_code
+    -}
+    cmpF_code
+	:: (Reg -> Reg -> Reg -> Instr)
+	-> Cond
+	-> StixTree -> StixTree
+	-> NatM Register
+
+    cmpF_code instr cond x y
+      = trivialFCode pr instr x y	`thenNat` \ register ->
+	getNewRegNat F64		`thenNat` \ tmp ->
+	getBlockIdNat			`thenNat` \ lbl ->
+	let
+	    code = registerCode register tmp
+	    result  = registerName register tmp
+
+	    code__2 dst = code . mkSeqInstrs [
+		OR zeroh (RIImm (ImmInt 1)) dst,
+		BF cond  result (ImmCLbl lbl),
+		OR zeroh (RIReg zeroh) dst,
+		NEWBLOCK lbl]
+	in
+	return (Any IntRep code__2)
+      where
+	pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
+      ------------------------------------------------------------
+
+getRegister (CmmLoad pk mem)
+  = getAmode mem    	    	    `thenNat` \ amode ->
+    let
+    	code = amodeCode amode
+    	src   = amodeAddr amode
+    	size = primRepToSize pk
+    	code__2 dst = code . mkSeqInstr (LD size dst src)
+    in
+    return (Any pk code__2)
+
+getRegister (StInt i)
+  | fits8Bits i
+  = let
+    	code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
+    in
+    return (Any IntRep code)
+  | otherwise
+  = let
+    	code dst = mkSeqInstr (LDI Q dst src)
+    in
+    return (Any IntRep code)
+  where
+    src = ImmInt (fromInteger i)
+
+getRegister leaf
+  | isJust imm
+  = let
+    	code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
+    in
+    return (Any PtrRep code)
+  where
+    imm = maybeImm leaf
+    imm__2 = case imm of Just x -> x
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat f F32)) = do
+    lbl <- getNewLabelNat
+    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst =
+	    LDATA ReadOnlyData
+			[CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat f F32)]
+	    `consOL` (addr_code `snocOL`
+	    GLD F32 addr dst)
+    -- in
+    return (Any F32 code)
+
+
+getRegister (CmmLit (CmmFloat d F64))
+  | d == 0.0
+  = let code dst = unitOL (GLDZ dst)
+    in  return (Any F64 code)
+
+  | d == 1.0
+  = let code dst = unitOL (GLD1 dst)
+    in  return (Any F64 code)
+
+  | otherwise = do
+    lbl <- getNewLabelNat
+    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst =
+	    LDATA ReadOnlyData
+			[CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat d F64)]
+	    `consOL` (addr_code `snocOL`
+	    GLD F64 addr dst)
+    -- in
+    return (Any F64 code)
+
+#endif /* i386_TARGET_ARCH */
+
+#if x86_64_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat 0.0 rep)) = do
+   let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
+	-- I don't know why there are xorpd, xorps, and pxor instructions.
+	-- They all appear to do the same thing --SDM
+   return (Any rep code)
+
+getRegister (CmmLit (CmmFloat f rep)) = do
+    lbl <- getNewLabelNat
+    let code dst = toOL [
+	    LDATA ReadOnlyData
+			[CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat f rep)],
+	    MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+	    ]
+    -- in
+    return (Any rep code)
+
+#endif /* x86_64_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL I8) addr
+  return (Any I32 code)
+
+getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I8) addr
+  return (Any I32 code)
+
+getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL I16) addr
+  return (Any I32 code)
+
+getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I16) addr
+  return (Any I32 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL I8) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I8) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL I16) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I16) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I32) addr
+  return (Any I64 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
+  x_code <- getAnyReg x
+  lbl <- getNewLabelNat
+  let
+    code dst = x_code dst `appOL` toOL [
+	-- This is how gcc does it, so it can't be that bad:
+	LDATA ReadOnlyData16 [
+		CmmAlign 16,
+		CmmDataLabel lbl,
+		CmmStaticLit (CmmInt 0x80000000 I32),
+		CmmStaticLit (CmmInt 0 I32),
+		CmmStaticLit (CmmInt 0 I32),
+		CmmStaticLit (CmmInt 0 I32)
+	],
+	XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+		-- xorps, so we need the 128-bit constant
+		-- ToDo: rip-relative
+	]
+  --
+  return (Any F32 code)
+
+getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
+  x_code <- getAnyReg x
+  lbl <- getNewLabelNat
+  let
+	-- This is how gcc does it, so it can't be that bad:
+    code dst = x_code dst `appOL` toOL [
+	LDATA ReadOnlyData16 [
+		CmmAlign 16,
+		CmmDataLabel lbl,
+		CmmStaticLit (CmmInt 0x8000000000000000 I64),
+		CmmStaticLit (CmmInt 0 I64)
+	],
+		-- gcc puts an unpck here.  Wonder if we need it.
+	XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+		-- xorpd, so we need the 128-bit constant
+	]
+  --
+  return (Any F64 code)
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+#if i386_TARGET_ARCH
+      MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
+      MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
+#endif
+
+      MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
+      MO_Not rep   -> trivialUCode rep (NOT  rep) x
+
+      -- Nop conversions
+      -- TODO: these are only nops if the arg is not a fixed register that
+      -- can't be byte-addressed.
+      MO_U_Conv I32 I8  -> conversionNop I32 x
+      MO_S_Conv I32 I8  -> conversionNop I32 x
+      MO_U_Conv I16 I8  -> conversionNop I16 x
+      MO_S_Conv I16 I8  -> conversionNop I16 x
+      MO_U_Conv I32 I16 -> conversionNop I32 x
+      MO_S_Conv I32 I16 -> conversionNop I32 x
+#if x86_64_TARGET_ARCH
+      MO_U_Conv I64 I32 -> conversionNop I64 x
+      MO_S_Conv I64 I32 -> conversionNop I64 x
+      MO_U_Conv I64 I16 -> conversionNop I64 x
+      MO_S_Conv I64 I16 -> conversionNop I64 x
+      MO_U_Conv I64 I8  -> conversionNop I64 x
+      MO_S_Conv I64 I8  -> conversionNop I64 x
+#endif
+
+      MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
+      MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
+
+      -- widenings
+      MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
+      MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
+      MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
+
+      MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
+      MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
+      MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
+
+#if x86_64_TARGET_ARCH
+      MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
+      MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
+      MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
+      MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
+      MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
+      MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
+	-- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
+	-- However, we don't want the register allocator to throw it
+	-- away as an unnecessary reg-to-reg move, so we keep it in
+	-- the form of a movzl and print it as a movl later.
+#endif
+
+#if i386_TARGET_ARCH
+      MO_S_Conv F32 F64 -> conversionNop F64 x
+      MO_S_Conv F64 F32 -> conversionNop F32 x
+#else
+      MO_S_Conv F32 F64 -> coerceFP2FP F64 x
+      MO_S_Conv F64 F32 -> coerceFP2FP F32 x
+#endif
+
+      MO_S_Conv from to
+	| isFloatingRep from -> coerceFP2Int from to x
+	| isFloatingRep to   -> coerceInt2FP from to x
+
+      other -> pprPanic "getRegister" (pprMachOp mop)
+   where
+	-- signed or unsigned extension.
+	integerExtend from to instr expr = do
+	    (reg,e_code) <- if from == I8 then getByteReg expr
+					  else getSomeReg expr
+	    let 
+		code dst = 
+		  e_code `snocOL`
+		  instr from (OpReg reg) (OpReg dst)
+	    return (Any to code)
+
+        conversionNop new_rep expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_rep)
+
+
+getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
+  = ASSERT2(cmmExprRep x /= I8, pprExpr e)
+    case mop of
+      MO_Eq F32   -> condFltReg EQQ x y
+      MO_Ne F32   -> condFltReg NE x y
+      MO_S_Gt F32 -> condFltReg GTT x y
+      MO_S_Ge F32 -> condFltReg GE x y
+      MO_S_Lt F32 -> condFltReg LTT x y
+      MO_S_Le F32 -> condFltReg LE x y
+
+      MO_Eq F64   -> condFltReg EQQ x y
+      MO_Ne F64   -> condFltReg NE x y
+      MO_S_Gt F64 -> condFltReg GTT x y
+      MO_S_Ge F64 -> condFltReg GE x y
+      MO_S_Lt F64 -> condFltReg LTT x y
+      MO_S_Le F64 -> condFltReg LE x y
+
+      MO_Eq rep   -> condIntReg EQQ x y
+      MO_Ne rep   -> condIntReg NE x y
+
+      MO_S_Gt rep -> condIntReg GTT x y
+      MO_S_Ge rep -> condIntReg GE x y
+      MO_S_Lt rep -> condIntReg LTT x y
+      MO_S_Le rep -> condIntReg LE x y
+
+      MO_U_Gt rep -> condIntReg GU  x y
+      MO_U_Ge rep -> condIntReg GEU x y
+      MO_U_Lt rep -> condIntReg LU  x y
+      MO_U_Le rep -> condIntReg LEU x y
+
+#if i386_TARGET_ARCH
+      MO_Add F32 -> trivialFCode F32 GADD x y
+      MO_Sub F32 -> trivialFCode F32 GSUB x y
+
+      MO_Add F64 -> trivialFCode F64 GADD x y
+      MO_Sub F64 -> trivialFCode F64 GSUB x y
+
+      MO_S_Quot F32 -> trivialFCode F32 GDIV x y
+      MO_S_Quot F64 -> trivialFCode F64 GDIV x y
+#endif
+
+#if x86_64_TARGET_ARCH
+      MO_Add F32 -> trivialFCode F32 ADD x y
+      MO_Sub F32 -> trivialFCode F32 SUB x y
+
+      MO_Add F64 -> trivialFCode F64 ADD x y
+      MO_Sub F64 -> trivialFCode F64 SUB x y
+
+      MO_S_Quot F32 -> trivialFCode F32 FDIV x y
+      MO_S_Quot F64 -> trivialFCode F64 FDIV x y
+#endif
+
+      MO_Add rep -> add_code rep x y
+      MO_Sub rep -> sub_code rep x y
+
+      MO_S_Quot rep -> div_code rep True  True  x y
+      MO_S_Rem  rep -> div_code rep True  False x y
+      MO_U_Quot rep -> div_code rep False True  x y
+      MO_U_Rem  rep -> div_code rep False False x y
+
+#if i386_TARGET_ARCH
+      MO_Mul   F32 -> trivialFCode F32 GMUL x y
+      MO_Mul   F64 -> trivialFCode F64 GMUL x y
+#endif
+
+#if x86_64_TARGET_ARCH
+      MO_Mul   F32 -> trivialFCode F32 MUL x y
+      MO_Mul   F64 -> trivialFCode F64 MUL x y
+#endif
+
+      MO_Mul   rep -> let op = IMUL rep in 
+		      trivialCode rep op (Just op) x y
+
+      MO_S_MulMayOflo rep -> imulMayOflo rep x y
+
+      MO_And rep -> let op = AND rep in 
+		    trivialCode rep op (Just op) x y
+      MO_Or  rep -> let op = OR  rep in
+		    trivialCode rep op (Just op) x y
+      MO_Xor rep -> let op = XOR rep in
+		    trivialCode rep op (Just op) x y
+
+	{- Shift ops on x86s have constraints on their source, it
+	   either has to be Imm, CL or 1
+	    => trivialCode is not restrictive enough (sigh.)
+	-}	   
+      MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
+      MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
+      MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
+
+      other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
+  where
+    --------------------
+    imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+    imulMayOflo rep a b = do
+         (a_reg, a_code) <- getNonClobberedReg a
+         b_code <- getAnyReg b
+         let 
+	     shift_amt  = case rep of
+			   I32 -> 31
+			   I64 -> 63
+			   _ -> panic "shift_amt"
+
+             code = a_code `appOL` b_code eax `appOL`
+                        toOL [
+			   IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
+                           SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
+				-- sign extend lower part
+                           SUB rep (OpReg edx) (OpReg eax)
+				-- compare against upper
+                           -- eax==0 if high part == sign extended low part
+                        ]
+         -- in
+	 return (Fixed rep eax code)
+
+    --------------------
+    shift_code :: MachRep
+	       -> (Operand -> Operand -> Instr)
+	       -> CmmExpr
+	       -> CmmExpr
+	       -> NatM Register
+
+    {- Case1: shift length as immediate -}
+    shift_code rep instr x y@(CmmLit lit) = do
+	  x_code <- getAnyReg x
+	  let
+	       code dst
+		  = x_code dst `snocOL` 
+		    instr (OpImm (litToImm lit)) (OpReg dst)
+	  -- in
+	  return (Any rep code)
+        
+    {- Case2: shift length is complex (non-immediate) -}
+    shift_code rep instr x y{-amount-} = do
+        (x_reg, x_code) <- getNonClobberedReg x
+        y_code <- getAnyReg y
+	let 
+	   code = x_code `appOL`
+		  y_code ecx `snocOL`
+		  instr (OpReg ecx) (OpReg x_reg)
+        -- in
+        return (Fixed rep x_reg code)
+
+    --------------------
+    add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+    add_code rep x (CmmLit (CmmInt y _))
+	| not (is64BitInteger y) = add_int rep x y
+    add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
+
+    --------------------
+    sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+    sub_code rep x (CmmLit (CmmInt y _))
+	| not (is64BitInteger (-y)) = add_int rep x (-y)
+    sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
+
+    -- our three-operand add instruction:
+    add_int rep x y = do
+	(x_reg, x_code) <- getSomeReg x
+	let
+	    imm = ImmInt (fromInteger y)
+	    code dst
+               = x_code `snocOL`
+		 LEA rep
+			(OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
+                        (OpReg dst)
+	-- 
+	return (Any rep code)
+
+    ----------------------
+    div_code rep signed quotient x y = do
+	   (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
+	   x_code <- getAnyReg x
+	   let
+	     widen | signed    = CLTD rep
+		   | otherwise = XOR rep (OpReg edx) (OpReg edx)
+
+	     instr | signed    = IDIV
+		   | otherwise = DIV
+
+	     code = y_code `appOL`
+		    x_code eax `appOL`
+		    toOL [widen, instr rep y_op]
+
+	     result | quotient  = eax
+		    | otherwise = edx
+
+	   -- in
+           return (Fixed rep result code)
+
+
+getRegister (CmmLoad mem pk)
+  | isFloatingRep pk
+  = do
+    Amode src mem_code <- getAmode mem
+    let
+    	code dst = mem_code `snocOL` 
+		   IF_ARCH_i386(GLD pk src dst,
+			        MOV pk (OpAddr src) (OpReg dst))
+    --
+    return (Any pk code)
+
+#if i386_TARGET_ARCH
+getRegister (CmmLoad mem pk)
+  | pk /= I64
+  = do 
+    code <- intLoadCode (instr pk) mem
+    return (Any pk code)
+  where
+	instr I8  = MOVZxL pk
+	instr I16 = MOV I16
+	instr I32 = MOV I32
+	-- we always zero-extend 8-bit loads, if we
+	-- can't think of anything better.  This is because
+	-- we can't guarantee access to an 8-bit variant of every register
+	-- (esi and edi don't have 8-bit variants), so to make things
+	-- simpler we do our 8-bit arithmetic with full 32-bit registers.
+#endif
+
+#if x86_64_TARGET_ARCH
+-- Simpler memory load code on x86_64
+getRegister (CmmLoad mem pk)
+  = do 
+    code <- intLoadCode (MOV pk) mem
+    return (Any pk code)
+#endif
+
+getRegister (CmmLit (CmmInt 0 rep))
+  = let
+	-- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
+	adj_rep = case rep of I64 -> I32; _ -> rep
+	rep1 = IF_ARCH_i386( rep, adj_rep ) 
+    	code dst 
+           = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
+    in
+    	return (Any rep code)
+
+#if x86_64_TARGET_ARCH
+  -- optimisation for loading small literals on x86_64: take advantage
+  -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
+  -- instruction forms are shorter.
+getRegister (CmmLit lit) 
+  | I64 <- cmmLitRep lit, not (isBigLit lit)
+  = let 
+	imm = litToImm lit
+	code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
+    in
+    	return (Any I64 code)
+  where
+   isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
+   isBigLit _ = False
+	-- note1: not the same as is64BitLit, because that checks for
+	-- signed literals that fit in 32 bits, but we want unsigned
+	-- literals here.
+	-- note2: all labels are small, because we're assuming the
+	-- small memory model (see gcc docs, -mcmodel=small).
+#endif
+
+getRegister (CmmLit lit)
+  = let 
+	rep = cmmLitRep lit
+	imm = litToImm lit
+	code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
+    in
+    	return (Any rep code)
+
+getRegister other = pprPanic "getRegister(x86)" (ppr other)
+
+
+intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
+   -> NatM (Reg -> InstrBlock)
+intLoadCode instr mem = do
+  Amode src mem_code <- getAmode mem
+  return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
+
+-- Compute an expression into *any* register, adding the appropriate
+-- move instruction if necessary.
+getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
+getAnyReg expr = do
+  r <- getRegister expr
+  anyReg r
+
+anyReg :: Register -> NatM (Reg -> InstrBlock)
+anyReg (Any _ code)          = return code
+anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
+
+-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
+-- Fixed registers might not be byte-addressable, so we make sure we've
+-- got a temporary, inserting an extra reg copy if necessary.
+getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
+#if x86_64_TARGET_ARCH
+getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
+#else
+getByteReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed rep reg code 
+	| isVirtualReg reg -> return (reg,code)
+	| otherwise -> do
+	    tmp <- getNewRegNat rep
+	    return (tmp, code `snocOL` reg2reg rep reg tmp)
+	-- ToDo: could optimise slightly by checking for byte-addressable
+	-- real registers, but that will happen very rarely if at all.
+#endif
+
+-- Another variant: this time we want the result in a register that cannot
+-- be modified by code to evaluate an arbitrary expression.
+getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getNonClobberedReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed rep reg code
+	-- only free regs can be clobbered
+	| RealReg rr <- reg, isFastTrue (freeReg rr) -> do
+		tmp <- getNewRegNat rep
+		return (tmp, code `snocOL` reg2reg rep reg tmp)
+	| otherwise -> 
+		return (reg, code)
+
+reg2reg :: MachRep -> Reg -> Reg -> Instr
+reg2reg rep src dst 
+#if i386_TARGET_ARCH
+  | isFloatingRep rep = GMOV src dst
+#endif
+  | otherwise	      = MOV rep (OpReg src) (OpReg dst)
+
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat f F32)) = do
+    lbl <- getNewLabelNat
+    let code dst = toOL [
+	    LDATA ReadOnlyData
+	                [CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat f F32)],
+	    SETHI (HI (ImmCLbl lbl)) dst,
+	    LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] 
+    return (Any F32 code)
+
+getRegister (CmmLit (CmmFloat d F64)) = do
+    lbl <- getNewLabelNat
+    let code dst = toOL [
+	    LDATA ReadOnlyData
+	                [CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat d F64)],
+	    SETHI (HI (ImmCLbl lbl)) dst,
+	    LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] 
+    return (Any F64 code)
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+      MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
+      MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
+
+      MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
+      MO_Not rep       -> trivialUCode rep (XNOR False g0) x
+
+      MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
+
+      MO_U_Conv F64 F32-> coerceDbl2Flt x
+      MO_U_Conv F32 F64-> coerceFlt2Dbl x
+
+      MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
+      MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
+      MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
+      MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
+
+      -- Conversions which are a nop on sparc
+      MO_U_Conv from to
+	| from == to   -> conversionNop to   x
+      MO_U_Conv I32 to -> conversionNop to   x
+      MO_S_Conv I32 to -> conversionNop to   x
+
+      -- widenings
+      MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
+      MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
+      MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
+      MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
+
+      other_op -> panic "Unknown unary mach op"
+    where
+        -- XXX SLL/SRL?
+        integerExtend signed from to expr = do
+           (reg, e_code) <- getSomeReg expr
+	   let
+	       code dst =
+		   e_code `snocOL` 
+		   ((if signed then SRA else SRL)
+		          reg (RIImm (ImmInt 0)) dst)
+	   return (Any to code)
+        conversionNop new_rep expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_rep)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+  = case mop of
+      MO_Eq F32 -> condFltReg EQQ x y
+      MO_Ne F32 -> condFltReg NE x y
+
+      MO_S_Gt F32 -> condFltReg GTT x y
+      MO_S_Ge F32 -> condFltReg GE x y 
+      MO_S_Lt F32 -> condFltReg LTT x y
+      MO_S_Le F32 -> condFltReg LE x y
+
+      MO_Eq F64 -> condFltReg EQQ x y
+      MO_Ne F64 -> condFltReg NE x y
+
+      MO_S_Gt F64 -> condFltReg GTT x y
+      MO_S_Ge F64 -> condFltReg GE x y
+      MO_S_Lt F64 -> condFltReg LTT x y
+      MO_S_Le F64 -> condFltReg LE x y
+
+      MO_Eq rep -> condIntReg EQQ x y
+      MO_Ne rep -> condIntReg NE x y
+
+      MO_S_Gt rep -> condIntReg GTT x y
+      MO_S_Ge rep -> condIntReg GE x y
+      MO_S_Lt rep -> condIntReg LTT x y
+      MO_S_Le rep -> condIntReg LE x y
+	      
+      MO_U_Gt I32  -> condIntReg GTT x y
+      MO_U_Ge I32  -> condIntReg GE x y
+      MO_U_Lt I32  -> condIntReg LTT x y
+      MO_U_Le I32  -> condIntReg LE x y
+
+      MO_U_Gt I16 -> condIntReg GU  x y
+      MO_U_Ge I16 -> condIntReg GEU x y
+      MO_U_Lt I16 -> condIntReg LU  x y
+      MO_U_Le I16 -> condIntReg LEU x y
+
+      MO_Add I32 -> trivialCode I32 (ADD False False) x y
+      MO_Sub I32 -> trivialCode I32 (SUB False False) x y
+
+      MO_S_MulMayOflo rep -> imulMayOflo rep x y
+{-
+      -- ToDo: teach about V8+ SPARC div instructions
+      MO_S_Quot I32 -> idiv FSLIT(".div")  x y
+      MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
+      MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
+      MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
+-}
+      MO_Add F32  -> trivialFCode F32 FADD  x y
+      MO_Sub F32   -> trivialFCode F32  FSUB x y
+      MO_Mul F32   -> trivialFCode F32  FMUL  x y
+      MO_S_Quot F32   -> trivialFCode F32  FDIV x y
+
+      MO_Add F64   -> trivialFCode F64 FADD  x y
+      MO_Sub F64   -> trivialFCode F64  FSUB x y
+      MO_Mul F64   -> trivialFCode F64  FMUL x y
+      MO_S_Quot F64   -> trivialFCode F64  FDIV x y
+
+      MO_And rep   -> trivialCode rep (AND False) x y
+      MO_Or rep    -> trivialCode rep (OR  False) x y
+      MO_Xor rep   -> trivialCode rep (XOR False) x y
+
+      MO_Mul rep -> trivialCode rep (SMUL False) x y
+
+      MO_Shl rep   -> trivialCode rep SLL  x y
+      MO_U_Shr rep   -> trivialCode rep SRL x y
+      MO_S_Shr rep   -> trivialCode rep SRA x y
+
+{-
+      MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64 
+                                         [promote x, promote y])
+		       where promote x = CmmMachOp MO_F32_to_Dbl [x]
+      MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64 
+                                        [x, y])
+-}
+      other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
+  where
+    --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
+
+    --------------------
+    imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+    imulMayOflo rep a b = do
+         (a_reg, a_code) <- getSomeReg a
+	 (b_reg, b_code) <- getSomeReg b
+	 res_lo <- getNewRegNat I32
+	 res_hi <- getNewRegNat I32
+	 let
+	    shift_amt  = case rep of
+			  I32 -> 31
+			  I64 -> 63
+			  _ -> panic "shift_amt"
+	    code dst = a_code `appOL` b_code `appOL`
+                       toOL [
+                           SMUL False a_reg (RIReg b_reg) res_lo,
+                           RDY res_hi,
+                           SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
+                           SUB False False res_lo (RIReg res_hi) dst
+                        ]
+         return (Any I32 code)
+
+getRegister (CmmLoad mem pk) = do
+    Amode src code <- getAmode mem
+    let
+    	code__2 dst = code `snocOL` LD pk src dst
+    return (Any pk code__2)
+
+getRegister (CmmLit (CmmInt i _))
+  | fits13Bits i
+  = let
+    	src = ImmInt (fromInteger i)
+    	code dst = unitOL (OR False g0 (RIImm src) dst)
+    in
+    	return (Any I32 code)
+
+getRegister (CmmLit lit)
+  = let rep = cmmLitRep lit
+	imm = litToImm lit
+    	code dst = toOL [
+    	    SETHI (HI imm) dst,
+    	    OR False dst (RIImm (LO imm)) dst]
+    in return (Any I32 code)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+getRegister (CmmLoad mem pk)
+  | pk /= I64
+  = do
+        Amode addr addr_code <- getAmode mem
+        let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
+                       addr_code `snocOL` LD pk dst addr
+        return (Any pk code)
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
+    Amode addr addr_code <- getAmode mem
+    return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
+
+-- Note: there is no Load Byte Arithmetic instruction, so no signed case here
+
+getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
+    Amode addr addr_code <- getAmode mem
+    return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
+
+getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
+    Amode addr addr_code <- getAmode mem
+    return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+      MO_Not rep   -> trivialUCode rep NOT x
+
+      MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
+      MO_S_Conv F32 F64 -> conversionNop F64 x
+
+      MO_S_Conv from to
+        | from == to         -> conversionNop to x
+	| isFloatingRep from -> coerceFP2Int from to x
+	| isFloatingRep to   -> coerceInt2FP from to x
+
+        -- narrowing is a nop: we treat the high bits as undefined
+      MO_S_Conv I32 to -> conversionNop to x
+      MO_S_Conv I16 I8 -> conversionNop I8 x
+      MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
+      MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
+
+      MO_U_Conv from to
+        | from == to -> conversionNop to x
+        -- narrowing is a nop: we treat the high bits as undefined
+      MO_U_Conv I32 to -> conversionNop to x
+      MO_U_Conv I16 I8 -> conversionNop I8 x
+      MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
+      MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32)) 
+
+      MO_S_Neg F32      -> trivialUCode F32 FNEG x
+      MO_S_Neg F64      -> trivialUCode F64 FNEG x
+      MO_S_Neg rep      -> trivialUCode rep NEG x
+      
+    where
+        conversionNop new_rep expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_rep)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+  = case mop of
+      MO_Eq F32 -> condFltReg EQQ x y
+      MO_Ne F32 -> condFltReg NE  x y
+
+      MO_S_Gt F32 -> condFltReg GTT x y
+      MO_S_Ge F32 -> condFltReg GE  x y
+      MO_S_Lt F32 -> condFltReg LTT x y
+      MO_S_Le F32 -> condFltReg LE  x y
+
+      MO_Eq F64 -> condFltReg EQQ x y
+      MO_Ne F64 -> condFltReg NE  x y
+
+      MO_S_Gt F64 -> condFltReg GTT x y
+      MO_S_Ge F64 -> condFltReg GE  x y
+      MO_S_Lt F64 -> condFltReg LTT x y
+      MO_S_Le F64 -> condFltReg LE  x y
+
+      MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
+      MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
+
+      MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
+
+      MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
+
+      MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
+      MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
+      MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
+      MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
+      
+      MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
+      MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
+      MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
+      MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
+
+         -- optimize addition with 32-bit immediate
+         -- (needed for PIC)
+      MO_Add I32 ->
+        case y of
+          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
+            -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
+          CmmLit lit
+            -> do
+                (src, srcCode) <- getSomeReg x
+                let imm = litToImm lit
+                    code dst = srcCode `appOL` toOL [
+                                    ADDIS dst src (HA imm),
+                                    ADD dst dst (RIImm (LO imm))
+                                ]
+                return (Any I32 code)
+          _ -> trivialCode I32 True ADD x y
+
+      MO_Add rep -> trivialCode rep True ADD x y
+      MO_Sub rep ->
+        case y of    -- subfi ('substract from' with immediate) doesn't exist
+          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
+            -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
+          _ -> trivialCodeNoImm rep SUBF y x
+
+      MO_Mul rep -> trivialCode rep True MULLW x y
+
+      MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
+      
+      MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
+      MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
+
+      MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
+      MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
+      
+      MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
+      MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
+      
+      MO_And rep   -> trivialCode rep False AND x y
+      MO_Or rep    -> trivialCode rep False OR x y
+      MO_Xor rep   -> trivialCode rep False XOR x y
+
+      MO_Shl rep   -> trivialCode rep False SLW x y
+      MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
+      MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
+
+getRegister (CmmLit (CmmInt i rep))
+  | Just imm <- makeImmediate rep True i
+  = let
+    	code dst = unitOL (LI dst imm)
+    in
+    	return (Any rep code)
+
+getRegister (CmmLit (CmmFloat f frep)) = do
+    lbl <- getNewLabelNat
+    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst = 
+	    LDATA ReadOnlyData  [CmmDataLabel lbl,
+				 CmmStaticLit (CmmFloat f frep)]
+            `consOL` (addr_code `snocOL` LD frep dst addr)
+    return (Any frep code)
+
+getRegister (CmmLit lit)
+  = let rep = cmmLitRep lit
+        imm = litToImm lit
+        code dst = toOL [
+              LIS dst (HI imm),
+              OR dst dst (RIImm (LO imm))
+          ]
+    in return (Any rep code)
+
+getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
+    
+    -- extend?Rep: wrap integer expression of type rep
+    -- in a conversion to I32
+extendSExpr I32 x = x
+extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
+extendUExpr I32 x = x
+extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+--  The 'Amode' type: Memory addressing modes passed up the tree.
+
+data Amode = Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to.  So you can't put
+anything in between, lest it overwrite some of those registers.  If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+    code
+    LEA amode, tmp
+    ... other computation ...
+    ... (tmp) ...
+-}
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+getAmode (StPrim IntSubOp [x, StInt i])
+  = getNewRegNat PtrRep		`thenNat` \ tmp ->
+    getRegister x		`thenNat` \ register ->
+    let
+    	code = registerCode register tmp
+    	reg  = registerName register tmp
+    	off  = ImmInt (-(fromInteger i))
+    in
+    return (Amode (AddrRegImm reg off) code)
+
+getAmode (StPrim IntAddOp [x, StInt i])
+  = getNewRegNat PtrRep		`thenNat` \ tmp ->
+    getRegister x		`thenNat` \ register ->
+    let
+    	code = registerCode register tmp
+    	reg  = registerName register tmp
+    	off  = ImmInt (fromInteger i)
+    in
+    return (Amode (AddrRegImm reg off) code)
+
+getAmode leaf
+  | isJust imm
+  = return (Amode (AddrImm imm__2) id)
+  where
+    imm = maybeImm leaf
+    imm__2 = case imm of Just x -> x
+
+getAmode other
+  = getNewRegNat PtrRep		`thenNat` \ tmp ->
+    getRegister other		`thenNat` \ register ->
+    let
+    	code = registerCode register tmp
+    	reg  = registerName register tmp
+    in
+    return (Amode (AddrReg reg) code)
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- This is all just ridiculous, since it carefully undoes 
+-- what mangleIndexTree has just done.
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
+  | not (is64BitLit lit)
+  -- ASSERT(rep == I32)???
+  = do (x_reg, x_code) <- getSomeReg x
+       let off = ImmInt (-(fromInteger i))
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
+  
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
+  | not (is64BitLit lit)
+  -- ASSERT(rep == I32)???
+  = do (x_reg, x_code) <- getSomeReg x
+       let off = ImmInt (fromInteger i)
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
+
+-- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be 
+-- recognised by the next rule.
+getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
+				  b@(CmmLit _)])
+  = getAmode (CmmMachOp (MO_Add rep) [b,a])
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) 
+					[y, CmmLit (CmmInt shift _)]])
+  | shift == 0 || shift == 1 || shift == 2 || shift == 3
+  = do (x_reg, x_code) <- getNonClobberedReg x
+	-- x must be in a temp, because it has to stay live over y_code
+	-- we could compre x_reg and y_reg and do something better here...
+       (y_reg, y_code) <- getSomeReg y
+       let
+    	   code = x_code `appOL` y_code
+           base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
+               code)
+
+getAmode (CmmLit lit) | not (is64BitLit lit)
+  = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
+
+getAmode expr = do
+  (reg,code) <- getSomeReg expr
+  return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
+
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
+  | fits13Bits (-i)
+  = do
+       (reg, code) <- getSomeReg x
+       let
+         off  = ImmInt (-(fromInteger i))
+       return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
+  | fits13Bits i
+  = do
+       (reg, code) <- getSomeReg x
+       let
+     	 off  = ImmInt (fromInteger i)
+       return (Amode (AddrRegImm reg off) code)
+
+getAmode (CmmMachOp (MO_Add rep) [x, y])
+  = do
+    (regX, codeX) <- getSomeReg x
+    (regY, codeY) <- getSomeReg y
+    let
+    	code = codeX `appOL` codeY
+    return (Amode (AddrRegReg regX regY) code)
+
+-- XXX Is this same as "leaf" in Stix?
+getAmode (CmmLit lit)
+  = do
+      tmp <- getNewRegNat I32
+      let
+    	code = unitOL (SETHI (HI imm__2) tmp)
+      return (Amode (AddrRegImm tmp (LO imm__2)) code)
+      where
+         imm__2 = litToImm lit
+
+getAmode other
+  = do
+       (reg, code) <- getSomeReg other
+       let
+    	    off  = ImmInt 0
+       return (Amode (AddrRegImm reg off) code)
+
+#endif /* sparc_TARGET_ARCH */
+
+#ifdef powerpc_TARGET_ARCH
+getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
+  | Just off <- makeImmediate I32 True (-i)
+  = do
+        (reg, code) <- getSomeReg x
+        return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
+  | Just off <- makeImmediate I32 True i
+  = do
+        (reg, code) <- getSomeReg x
+        return (Amode (AddrRegImm reg off) code)
+
+   -- optimize addition with 32-bit immediate
+   -- (needed for PIC)
+getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
+  = do
+        tmp <- getNewRegNat I32
+        (src, srcCode) <- getSomeReg x
+        let imm = litToImm lit
+            code = srcCode `snocOL` ADDIS tmp src (HA imm)
+        return (Amode (AddrRegImm tmp (LO imm)) code)
+
+getAmode (CmmLit lit)
+  = do
+        tmp <- getNewRegNat I32
+        let imm = litToImm lit
+            code = unitOL (LIS tmp (HA imm))
+        return (Amode (AddrRegImm tmp (LO imm)) code)
+    
+getAmode (CmmMachOp (MO_Add I32) [x, y])
+  = do
+        (regX, codeX) <- getSomeReg x
+        (regY, codeY) <- getSomeReg y
+        return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
+    
+getAmode other
+  = do
+        (reg, code) <- getSomeReg other
+        let
+            off  = ImmInt 0
+        return (Amode (AddrRegImm reg off) code)
+#endif /* powerpc_TARGET_ARCH */
+
+-- -----------------------------------------------------------------------------
+-- getOperand: sometimes any operand will do.
+
+-- getNonClobberedOperand: the value of the operand will remain valid across
+-- the computation of an arbitrary expression, unless the expression
+-- is computed directly into a register which the operand refers to
+-- (see trivialCode where this function is used for an example).
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getNonClobberedOperand (CmmLit lit)
+  | isSuitableFloatingPointLit lit = do
+    lbl <- getNewLabelNat
+    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
+					   CmmStaticLit lit])
+    return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getNonClobberedOperand (CmmLit lit)
+  | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
+    return (OpImm (litToImm lit), nilOL)
+getNonClobberedOperand (CmmLoad mem pk) 
+  | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+    Amode src mem_code <- getAmode mem
+    (src',save_code) <- 
+	if (amodeCouldBeClobbered src) 
+		then do
+		   tmp <- getNewRegNat wordRep
+		   return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
+			   unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
+		else
+		   return (src, nilOL)
+    return (OpAddr src', save_code `appOL` mem_code)
+getNonClobberedOperand e = do
+    (reg, code) <- getNonClobberedReg e
+    return (OpReg reg, code)
+
+amodeCouldBeClobbered :: AddrMode -> Bool
+amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
+
+regClobbered (RealReg rr) = isFastTrue (freeReg rr)
+regClobbered _ = False
+
+-- getOperand: the operand is not required to remain valid across the
+-- computation of an arbitrary expression.
+getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getOperand (CmmLit lit)
+  | isSuitableFloatingPointLit lit = do
+    lbl <- getNewLabelNat
+    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
+					   CmmStaticLit lit])
+    return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getOperand (CmmLit lit)
+  | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
+    return (OpImm (litToImm lit), nilOL)
+getOperand (CmmLoad mem pk)
+  | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+    Amode src mem_code <- getAmode mem
+    return (OpAddr src, mem_code)
+getOperand e = do
+    (reg, code) <- getSomeReg e
+    return (OpReg reg, code)
+
+isOperand :: CmmExpr -> Bool
+isOperand (CmmLoad _ _) = True
+isOperand (CmmLit lit)  = not (is64BitLit lit)
+			  || isSuitableFloatingPointLit lit
+isOperand _             = False
+
+-- if we want a floating-point literal as an operand, we can
+-- use it directly from memory.  However, if the literal is
+-- zero, we're better off generating it into a register using
+-- xor.
+isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
+isSuitableFloatingPointLit _ = False
+
+getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
+getRegOrMem (CmmLoad mem pk)
+  | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+    Amode src mem_code <- getAmode mem
+    return (OpAddr src, mem_code)
+getRegOrMem e = do
+    (reg, code) <- getNonClobberedReg e
+    return (OpReg reg, code)
+
+#if x86_64_TARGET_ARCH
+is64BitLit (CmmInt i I64) = is64BitInteger i
+   -- assume that labels are in the range 0-2^31-1: this assumes the
+   -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+is64BitLit x = False
+#endif
+
+is64BitInteger :: Integer -> Bool
+is64BitInteger i = i > 0x7fffffff || i < -0x80000000
+
+-- -----------------------------------------------------------------------------
+--  The 'CondCode' type:  Condition codes passed up the tree.
+
+data CondCode = CondCode Bool Cond InstrBlock
+
+-- Set up a condition code for a conditional branch.
+
+getCondCode :: CmmExpr -> NatM CondCode
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+getCondCode = panic "MachCode.getCondCode: not on Alphas"
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
+-- yes, they really do seem to want exactly the same!
+
+getCondCode (CmmMachOp mop [x, y])
+  = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
+    case mop of
+      MO_Eq F32 -> condFltCode EQQ x y
+      MO_Ne F32 -> condFltCode NE  x y
+
+      MO_S_Gt F32 -> condFltCode GTT x y
+      MO_S_Ge F32 -> condFltCode GE  x y
+      MO_S_Lt F32 -> condFltCode LTT x y
+      MO_S_Le F32 -> condFltCode LE  x y
+
+      MO_Eq F64 -> condFltCode EQQ x y
+      MO_Ne F64 -> condFltCode NE  x y
+
+      MO_S_Gt F64 -> condFltCode GTT x y
+      MO_S_Ge F64 -> condFltCode GE  x y
+      MO_S_Lt F64 -> condFltCode LTT x y
+      MO_S_Le F64 -> condFltCode LE  x y
+
+      MO_Eq rep -> condIntCode EQQ  x y
+      MO_Ne rep -> condIntCode NE   x y
+
+      MO_S_Gt rep -> condIntCode GTT  x y
+      MO_S_Ge rep -> condIntCode GE   x y
+      MO_S_Lt rep -> condIntCode LTT  x y
+      MO_S_Le rep -> condIntCode LE   x y
+
+      MO_U_Gt rep -> condIntCode GU   x y
+      MO_U_Ge rep -> condIntCode GEU  x y
+      MO_U_Lt rep -> condIntCode LU   x y
+      MO_U_Le rep -> condIntCode LEU  x y
+
+      other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
+
+getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
+
+#elif powerpc_TARGET_ARCH
+
+-- almost the same as everywhere else - but we need to
+-- extend small integers to 32 bit first
+
+getCondCode (CmmMachOp mop [x, y])
+  = case mop of
+      MO_Eq F32 -> condFltCode EQQ x y
+      MO_Ne F32 -> condFltCode NE  x y
+
+      MO_S_Gt F32 -> condFltCode GTT x y
+      MO_S_Ge F32 -> condFltCode GE  x y
+      MO_S_Lt F32 -> condFltCode LTT x y
+      MO_S_Le F32 -> condFltCode LE  x y
+
+      MO_Eq F64 -> condFltCode EQQ x y
+      MO_Ne F64 -> condFltCode NE  x y
+
+      MO_S_Gt F64 -> condFltCode GTT x y
+      MO_S_Ge F64 -> condFltCode GE  x y
+      MO_S_Lt F64 -> condFltCode LTT x y
+      MO_S_Le F64 -> condFltCode LE  x y
+
+      MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
+      MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
+
+      MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
+
+      MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
+
+      other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
+
+getCondCode other =  panic "getCondCode(2)(powerpc)"
+
+
+#endif
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+
+#if alpha_TARGET_ARCH
+condIntCode = panic "MachCode.condIntCode: not on Alphas"
+condFltCode = panic "MachCode.condFltCode: not on Alphas"
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- memory vs immediate
+condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
+    Amode x_addr x_code <- getAmode x
+    let
+	imm  = litToImm lit
+    	code = x_code `snocOL`
+    	    	  CMP pk (OpImm imm) (OpAddr x_addr)
+    --
+    return (CondCode False cond code)
+
+-- anything vs zero
+condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
+    (x_reg, x_code) <- getSomeReg x
+    let
+	code = x_code `snocOL`
+    	    	  TEST pk (OpReg x_reg) (OpReg x_reg)
+    --
+    return (CondCode False cond code)
+
+-- anything vs operand
+condIntCode cond x y | isOperand y = do
+    (x_reg, x_code) <- getNonClobberedReg x
+    (y_op,  y_code) <- getOperand y    
+    let
+	code = x_code `appOL` y_code `snocOL`
+                  CMP (cmmExprRep x) y_op (OpReg x_reg)
+    -- in
+    return (CondCode False cond code)
+
+-- anything vs anything
+condIntCode cond x y = do
+  (y_reg, y_code) <- getNonClobberedReg y
+  (x_op, x_code) <- getRegOrMem x
+  let
+	code = y_code `appOL`
+	       x_code `snocOL`
+    	    	  CMP (cmmExprRep x) (OpReg y_reg) x_op
+  -- in
+  return (CondCode False cond code)
+#endif
+
+#if i386_TARGET_ARCH
+condFltCode cond x y 
+  = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
+  (x_reg, x_code) <- getNonClobberedReg x
+  (y_reg, y_code) <- getSomeReg y
+  let
+	code = x_code `appOL` y_code `snocOL`
+		GCMP cond x_reg y_reg
+  -- The GCMP insn does the test and sets the zero flag if comparable
+  -- and true.  Hence we always supply EQQ as the condition to test.
+  return (CondCode True EQQ code)
+#endif /* i386_TARGET_ARCH */
+
+#if x86_64_TARGET_ARCH
+-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+-- an operand, but the right must be a reg.  We can probably do better
+-- than this general case...
+condFltCode cond x y = do
+  (x_reg, x_code) <- getNonClobberedReg x
+  (y_op, y_code) <- getOperand y
+  let
+	code = x_code `appOL`
+	       y_code `snocOL`
+    	    	  CMP (cmmExprRep x) y_op (OpReg x_reg)
+	-- NB(1): we need to use the unsigned comparison operators on the
+	-- result of this comparison.
+  -- in
+  return (CondCode True (condToUnsigned cond) code)
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+condIntCode cond x (CmmLit (CmmInt y rep))
+  | fits13Bits y
+  = do
+       (src1, code) <- getSomeReg x
+       let
+           src2 = ImmInt (fromInteger y)
+           code' = code `snocOL` SUB False True src1 (RIImm src2) g0
+       return (CondCode False cond code')
+
+condIntCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+	code__2 = code1 `appOL` code2 `snocOL`
+    	    	  SUB False True src1 (RIReg src2) g0
+    return (CondCode False cond code__2)
+
+-----------
+condFltCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp <- getNewRegNat F64
+    let
+   	promote x = FxTOy F32 F64 x tmp
+
+    	pk1   = cmmExprRep x
+    	pk2   = cmmExprRep y
+
+    	code__2 =
+		if pk1 == pk2 then
+    	            code1 `appOL` code2 `snocOL`
+    	    	    FCMP True pk1 src1 src2
+    	    	else if pk1 == F32 then
+    	    	    code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+    	    	    FCMP True F64 tmp src2
+    	    	else
+    	    	    code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+    	    	    FCMP True F64 src1 tmp
+    return (CondCode True cond code__2)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+--  ###FIXME: I16 and I8!
+condIntCode cond x (CmmLit (CmmInt y rep))
+  | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
+  = do
+        (src1, code) <- getSomeReg x
+        let
+            code' = code `snocOL` 
+                (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
+        return (CondCode False cond code')
+
+condIntCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+	code' = code1 `appOL` code2 `snocOL`
+    	    	  (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
+    return (CondCode False cond code')
+
+condFltCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+	code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
+	code'' = case cond of -- twiddle CR to handle unordered case
+                    GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
+	            LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
+	            _ -> code'
+                 where
+                    ltbit = 0 ; eqbit = 2 ; gtbit = 1
+    return (CondCode True cond code'')
+
+#endif /* powerpc_TARGET_ARCH */
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business.  Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers.  If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side.  This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+assignIntCode pk (CmmLoad dst _) src
+  = getNewRegNat IntRep    	    `thenNat` \ tmp ->
+    getAmode dst    	    	    `thenNat` \ amode ->
+    getRegister src	     	    `thenNat` \ register ->
+    let
+    	code1   = amodeCode amode []
+    	dst__2  = amodeAddr amode
+    	code2   = registerCode register tmp []
+    	src__2  = registerName register tmp
+    	sz      = primRepToSize pk
+    	code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
+    in
+    return code__2
+
+assignIntCode pk dst src
+  = getRegister dst	    	    	    `thenNat` \ register1 ->
+    getRegister src	    	    	    `thenNat` \ register2 ->
+    let
+    	dst__2  = registerName register1 zeroh
+    	code    = registerCode register2 dst__2
+    	src__2  = registerName register2 dst__2
+    	code__2 = if isFixed register2
+		  then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
+    	    	  else code
+    in
+    return code__2
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- integer assignment to memory
+assignMem_IntCode pk addr src = do
+    Amode addr code_addr <- getAmode addr
+    (code_src, op_src)   <- get_op_RI src
+    let
+	code = code_src `appOL`
+	       code_addr `snocOL`
+                  MOV pk op_src (OpAddr addr)
+	-- NOTE: op_src is stable, so it will still be valid
+	-- after code_addr.  This may involve the introduction 
+	-- of an extra MOV to a temporary register, but we hope
+	-- the register allocator will get rid of it.
+    --
+    return code
+  where
+    get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)	-- code, operator
+    get_op_RI (CmmLit lit) | not (is64BitLit lit)
+      = return (nilOL, OpImm (litToImm lit))
+    get_op_RI op
+      = do (reg,code) <- getNonClobberedReg op
+	   return (code, OpReg reg)
+
+
+-- Assign; dst is a reg, rhs is mem
+assignReg_IntCode pk reg (CmmLoad src _) = do
+  load_code <- intLoadCode (MOV pk) src
+  return (load_code (getRegisterReg reg))
+
+-- dst is a reg, but src could be anything
+assignReg_IntCode pk reg src = do
+  code <- getAnyReg src
+  return (code (getRegisterReg reg))
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+assignMem_IntCode pk addr src = do
+    (srcReg, code) <- getSomeReg src
+    Amode dstAddr addr_code <- getAmode addr
+    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+assignReg_IntCode pk reg src = do
+    r <- getRegister src
+    return $ case r of
+	Any _ code         -> code dst
+	Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
+    where
+      dst = getRegisterReg reg
+
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+
+assignMem_IntCode pk addr src = do
+    (srcReg, code) <- getSomeReg src
+    Amode dstAddr addr_code <- getAmode addr
+    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+-- dst is a reg, but src could be anything
+assignReg_IntCode pk reg src
+    = do
+        r <- getRegister src
+        return $ case r of
+            Any _ code         -> code dst
+            Fixed _ freg fcode -> fcode `snocOL` MR dst freg
+    where
+        dst = getRegisterReg reg
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Floating-point assignments
+
+#if alpha_TARGET_ARCH
+
+assignFltCode pk (CmmLoad dst _) src
+  = getNewRegNat pk        	    `thenNat` \ tmp ->
+    getAmode dst    	    	    `thenNat` \ amode ->
+    getRegister src	    	    	    `thenNat` \ register ->
+    let
+    	code1   = amodeCode amode []
+    	dst__2  = amodeAddr amode
+    	code2   = registerCode register tmp []
+    	src__2  = registerName register tmp
+    	sz      = primRepToSize pk
+    	code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
+    in
+    return code__2
+
+assignFltCode pk dst src
+  = getRegister dst	    	    	    `thenNat` \ register1 ->
+    getRegister src	    	    	    `thenNat` \ register2 ->
+    let
+    	dst__2  = registerName register1 zeroh
+    	code    = registerCode register2 dst__2
+    	src__2  = registerName register2 dst__2
+    	code__2 = if isFixed register2
+		  then code . mkSeqInstr (FMOV src__2 dst__2)
+		  else code
+    in
+    return code__2
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- Floating point assignment to memory
+assignMem_FltCode pk addr src = do
+  (src_reg, src_code) <- getNonClobberedReg src
+  Amode addr addr_code <- getAmode addr
+  let
+	code = src_code `appOL`
+	       addr_code `snocOL`
+                IF_ARCH_i386(GST pk src_reg addr,
+		             MOV pk (OpReg src_reg) (OpAddr addr))
+  return code
+
+-- Floating point assignment to a register/temporary
+assignReg_FltCode pk reg src = do
+  src_code <- getAnyReg src
+  return (src_code (getRegisterReg reg))
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+-- Floating point assignment to memory
+assignMem_FltCode pk addr src = do
+    Amode dst__2 code1 <- getAmode addr
+    (src__2, code2) <- getSomeReg src
+    tmp1 <- getNewRegNat pk
+    let
+    	pk__2   = cmmExprRep src
+    	code__2 = code1 `appOL` code2 `appOL`
+	    if   pk == pk__2 
+            then unitOL (ST pk src__2 dst__2)
+	    else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
+    return code__2
+
+-- Floating point assignment to a register/temporary
+-- ToDo: Verify correctness
+assignReg_FltCode pk reg src = do
+    r <- getRegister src
+    v1 <- getNewRegNat pk
+    return $ case r of
+        Any _ code         -> code dst
+	Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
+    where
+      dst = getRegisterReg reg
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+
+-- Easy, isn't it?
+assignMem_FltCode = assignMem_IntCode
+assignReg_FltCode = assignReg_IntCode
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Generating an non-local jump
+
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+genJump (CmmLabel lbl)
+  | isAsmTemp lbl = returnInstr (BR target)
+  | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
+  where
+    target = ImmCLbl lbl
+
+genJump tree
+  = getRegister tree	     	    `thenNat` \ register ->
+    getNewRegNat PtrRep    	    `thenNat` \ tmp ->
+    let
+    	dst    = registerName register pv
+    	code   = registerCode register pv
+    	target = registerName register pv
+    in
+    if isFixed register then
+	returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
+    else
+    return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+genJump (CmmLoad mem pk) = do
+  Amode target code <- getAmode mem
+  return (code `snocOL` JMP (OpAddr target))
+
+genJump (CmmLit lit) = do
+  return (unitOL (JMP (OpImm (litToImm lit))))
+
+genJump expr = do
+  (reg,code) <- getSomeReg expr
+  return (code `snocOL` JMP (OpReg reg))
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+genJump (CmmLit (CmmLabel lbl))
+  = return (toOL [CALL (Left target) 0 True, NOP])
+  where
+    target = ImmCLbl lbl
+
+genJump tree
+  = do
+        (target, code) <- getSomeReg tree
+	return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+genJump (CmmLit (CmmLabel lbl))
+  = return (unitOL $ JMP lbl)
+
+genJump tree
+  = do
+        (target,code) <- getSomeReg tree
+        return (code `snocOL` MTCTR target `snocOL` BCTR [])
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+--  Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+
+genBranch = return . toOL . mkBranchInstr
+
+-- -----------------------------------------------------------------------------
+--  Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions.  We peek at the arguments to decide what kind of
+comparison to do.
+
+ALPHA: For comparisons with 0, we're laughing, because we can just do
+the desired conditional branch.
+
+I386: First, we have to ensure that the condition
+codes are set according to the supplied comparison operation.
+
+SPARC: First, we have to ensure that the condition codes are set
+according to the supplied comparison operation.  We generate slightly
+different code for floating point comparisons, because a floating
+point operation cannot directly precede a @BF@.  We assume the worst
+and fill that slot with a @NOP@.
+
+SPARC: Do not fill the delay slots here; you will confuse the register
+allocator.
+-}
+
+
+genCondJump
+    :: BlockId	    -- the branch target
+    -> CmmExpr      -- the condition on which to branch
+    -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+genCondJump id (StPrim op [x, StInt 0])
+  = getRegister x	  	    	    `thenNat` \ register ->
+    getNewRegNat (registerRep register)
+    	    	        	    `thenNat` \ tmp ->
+    let
+    	code   = registerCode register tmp
+    	value  = registerName register tmp
+    	pk     = registerRep register
+	target = ImmCLbl lbl
+    in
+    returnSeq code [BI (cmpOp op) value target]
+  where
+    cmpOp CharGtOp = GTT
+    cmpOp CharGeOp = GE
+    cmpOp CharEqOp = EQQ
+    cmpOp CharNeOp = NE
+    cmpOp CharLtOp = LTT
+    cmpOp CharLeOp = LE
+    cmpOp IntGtOp = GTT
+    cmpOp IntGeOp = GE
+    cmpOp IntEqOp = EQQ
+    cmpOp IntNeOp = NE
+    cmpOp IntLtOp = LTT
+    cmpOp IntLeOp = LE
+    cmpOp WordGtOp = NE
+    cmpOp WordGeOp = ALWAYS
+    cmpOp WordEqOp = EQQ
+    cmpOp WordNeOp = NE
+    cmpOp WordLtOp = NEVER
+    cmpOp WordLeOp = EQQ
+    cmpOp AddrGtOp = NE
+    cmpOp AddrGeOp = ALWAYS
+    cmpOp AddrEqOp = EQQ
+    cmpOp AddrNeOp = NE
+    cmpOp AddrLtOp = NEVER
+    cmpOp AddrLeOp = EQQ
+
+genCondJump lbl (StPrim op [x, StDouble 0.0])
+  = getRegister x	  	    	    `thenNat` \ register ->
+    getNewRegNat (registerRep register)
+    	    	        	    `thenNat` \ tmp ->
+    let
+    	code   = registerCode register tmp
+    	value  = registerName register tmp
+    	pk     = registerRep register
+	target = ImmCLbl lbl
+    in
+    return (code . mkSeqInstr (BF (cmpOp op) value target))
+  where
+    cmpOp FloatGtOp = GTT
+    cmpOp FloatGeOp = GE
+    cmpOp FloatEqOp = EQQ
+    cmpOp FloatNeOp = NE
+    cmpOp FloatLtOp = LTT
+    cmpOp FloatLeOp = LE
+    cmpOp DoubleGtOp = GTT
+    cmpOp DoubleGeOp = GE
+    cmpOp DoubleEqOp = EQQ
+    cmpOp DoubleNeOp = NE
+    cmpOp DoubleLtOp = LTT
+    cmpOp DoubleLeOp = LE
+
+genCondJump lbl (StPrim op [x, y])
+  | fltCmpOp op
+  = trivialFCode pr instr x y 	    `thenNat` \ register ->
+    getNewRegNat F64    	    `thenNat` \ tmp ->
+    let
+    	code   = registerCode register tmp
+    	result = registerName register tmp
+	target = ImmCLbl lbl
+    in
+    return (code . mkSeqInstr (BF cond result target))
+  where
+    pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
+
+    fltCmpOp op = case op of
+	FloatGtOp -> True
+	FloatGeOp -> True
+	FloatEqOp -> True
+	FloatNeOp -> True
+	FloatLtOp -> True
+	FloatLeOp -> True
+	DoubleGtOp -> True
+	DoubleGeOp -> True
+	DoubleEqOp -> True
+	DoubleNeOp -> True
+	DoubleLtOp -> True
+	DoubleLeOp -> True
+	_ -> False
+    (instr, cond) = case op of
+	FloatGtOp -> (FCMP TF LE, EQQ)
+	FloatGeOp -> (FCMP TF LTT, EQQ)
+	FloatEqOp -> (FCMP TF EQQ, NE)
+	FloatNeOp -> (FCMP TF EQQ, EQQ)
+	FloatLtOp -> (FCMP TF LTT, NE)
+	FloatLeOp -> (FCMP TF LE, NE)
+	DoubleGtOp -> (FCMP TF LE, EQQ)
+	DoubleGeOp -> (FCMP TF LTT, EQQ)
+	DoubleEqOp -> (FCMP TF EQQ, NE)
+	DoubleNeOp -> (FCMP TF EQQ, EQQ)
+	DoubleLtOp -> (FCMP TF LTT, NE)
+	DoubleLeOp -> (FCMP TF LE, NE)
+
+genCondJump lbl (StPrim op [x, y])
+  = trivialCode instr x y    	    `thenNat` \ register ->
+    getNewRegNat IntRep    	    `thenNat` \ tmp ->
+    let
+    	code   = registerCode register tmp
+    	result = registerName register tmp
+	target = ImmCLbl lbl
+    in
+    return (code . mkSeqInstr (BI cond result target))
+  where
+    (instr, cond) = case op of
+	CharGtOp -> (CMP LE, EQQ)
+	CharGeOp -> (CMP LTT, EQQ)
+	CharEqOp -> (CMP EQQ, NE)
+	CharNeOp -> (CMP EQQ, EQQ)
+	CharLtOp -> (CMP LTT, NE)
+	CharLeOp -> (CMP LE, NE)
+	IntGtOp -> (CMP LE, EQQ)
+	IntGeOp -> (CMP LTT, EQQ)
+	IntEqOp -> (CMP EQQ, NE)
+	IntNeOp -> (CMP EQQ, EQQ)
+	IntLtOp -> (CMP LTT, NE)
+	IntLeOp -> (CMP LE, NE)
+	WordGtOp -> (CMP ULE, EQQ)
+	WordGeOp -> (CMP ULT, EQQ)
+	WordEqOp -> (CMP EQQ, NE)
+	WordNeOp -> (CMP EQQ, EQQ)
+	WordLtOp -> (CMP ULT, NE)
+	WordLeOp -> (CMP ULE, NE)
+	AddrGtOp -> (CMP ULE, EQQ)
+	AddrGeOp -> (CMP ULT, EQQ)
+	AddrEqOp -> (CMP EQQ, NE)
+	AddrNeOp -> (CMP EQQ, EQQ)
+	AddrLtOp -> (CMP ULT, NE)
+	AddrLeOp -> (CMP ULE, NE)
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+genCondJump id bool = do
+  CondCode _ cond code <- getCondCode bool
+  return (code `snocOL` JXX cond id)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCondJump id bool = do
+  CondCode is_float cond cond_code <- getCondCode bool
+  if not is_float
+    then
+	return (cond_code `snocOL` JXX cond id)
+    else do
+	lbl <- getBlockIdNat
+
+	-- see comment with condFltReg
+	let code = case cond of
+	  		NE  -> or_unordered
+			GU  -> plain_test
+			GEU -> plain_test
+			_   -> and_ordered
+
+	    plain_test = unitOL (
+		  JXX cond id
+		)
+	    or_unordered = toOL [
+		  JXX cond id,
+		  JXX PARITY id
+		]
+	    and_ordered = toOL [
+		  JXX PARITY lbl,
+		  JXX cond id,
+		  JXX ALWAYS lbl,
+		  NEWBLOCK lbl
+		]
+	return (cond_code `appOL` code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+genCondJump (BlockId id) bool = do
+  CondCode is_float cond code <- getCondCode bool
+  return (
+       code `appOL` 
+       toOL (
+         if   is_float
+         then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
+         else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
+       )
+    )
+
+#endif /* sparc_TARGET_ARCH */
+
+
+#if powerpc_TARGET_ARCH
+
+genCondJump id bool = do
+  CondCode is_float cond code <- getCondCode bool
+  return (code `snocOL` BCC cond id)
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+--  Generating C calls
+
+-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations.  Apart from that, the code is easy.
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+    :: CmmCallTarget		-- function to call
+    -> [(CmmReg,MachHint)]	-- where to put the result
+    -> [(CmmExpr,MachHint)]	-- arguments (of mixed type)
+    -> Maybe [GlobalReg]	-- volatile regs to save
+    -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+ccallResultRegs = 
+
+genCCall fn cconv result_regs args
+  = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
+    	    	  	  `thenNat` \ ((unused,_), argCode) ->
+    let
+    	nRegs = length allArgRegs - length unused
+    	code = asmSeqThen (map ($ []) argCode)
+    in
+    	returnSeq code [
+    	    LDA pv (AddrImm (ImmLab (ptext fn))),
+    	    JSR ra (AddrReg pv) nRegs,
+    	    LDGP gp (AddrReg ra)]
+  where
+    ------------------------
+    {-	Try to get a value into a specific register (or registers) for
+	a call.  The first 6 arguments go into the appropriate
+	argument register (separate registers for integer and floating
+	point arguments, but used in lock-step), and the remaining
+	arguments are dumped to the stack, beginning at 0(sp).  Our
+	first argument is a pair of the list of remaining argument
+	registers to be assigned for this call and the next stack
+	offset to use for overflowing arguments.  This way,
+	@get_Arg@ can be applied to all of a call's arguments using
+	@mapAccumLNat@.
+    -}
+    get_arg
+	:: ([(Reg,Reg)], Int)	-- Argument registers and stack offset (accumulator)
+	-> StixTree		-- Current argument
+	-> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
+
+    -- We have to use up all of our argument registers first...
+
+    get_arg ((iDst,fDst):dsts, offset) arg
+      = getRegister arg	    	    	    `thenNat` \ register ->
+	let
+	    reg  = if isFloatingRep pk then fDst else iDst
+	    code = registerCode register reg
+	    src  = registerName register reg
+	    pk   = registerRep register
+	in
+	return (
+	    if isFloatingRep pk then
+		((dsts, offset), if isFixed register then
+		    code . mkSeqInstr (FMOV src fDst)
+		    else code)
+	    else
+		((dsts, offset), if isFixed register then
+		    code . mkSeqInstr (OR src (RIReg src) iDst)
+		    else code))
+
+    -- Once we have run out of argument registers, we move to the
+    -- stack...
+
+    get_arg ([], offset) arg
+      = getRegister arg			`thenNat` \ register ->
+	getNewRegNat (registerRep register)
+					`thenNat` \ tmp ->
+	let
+	    code = registerCode register tmp
+	    src  = registerName register tmp
+	    pk   = registerRep register
+	    sz   = primRepToSize pk
+	in
+	return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+-- we only cope with a single result for foreign calls
+genCCall (CmmPrim op) [(r,_)] args vols = do
+  case op of
+	MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
+	MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
+	
+	MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
+	MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
+	
+	MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
+	MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
+	
+	MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
+	MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
+	
+	other_op    -> outOfLineFloatOp op r args vols
+ where
+  actuallyInlineFloatOp rep instr [(x,_)]
+	= do res <- trivialUFCode rep instr x
+	     any <- anyReg res
+ 	     return (any (getRegisterReg r))
+
+genCCall target dest_regs args vols = do
+    let
+        sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
+#if !darwin_TARGET_OS        
+        tot_arg_size        = sum sizes
+#else
+        raw_arg_size        = sum sizes
+        tot_arg_size        = roundTo 16 raw_arg_size
+        arg_pad_size        = tot_arg_size - raw_arg_size
+    delta0 <- getDeltaNat
+    setDeltaNat (delta0 - arg_pad_size)
+#endif
+
+    push_codes <- mapM push_arg (reverse args)
+    delta <- getDeltaNat
+
+    -- in
+    -- deal with static vs dynamic call targets
+    (callinsns,cconv) <-
+      case target of
+	-- CmmPrim -> ...
+        CmmForeignCall (CmmLit (CmmLabel lbl)) conv
+           -> -- ToDo: stdcall arg sizes
+	      return (unitOL (CALL (Left fn_imm) []), conv)
+	   where fn_imm = ImmCLbl lbl
+        CmmForeignCall expr conv
+           -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
+                 ASSERT(dyn_rep == I32)
+                  return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
+
+    let	push_code
+#if darwin_TARGET_OS
+            | arg_pad_size /= 0
+            = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
+                    DELTA (delta0 - arg_pad_size)]
+              `appOL` concatOL push_codes
+            | otherwise
+#endif
+            = concatOL push_codes
+	call = callinsns `appOL`
+               toOL (
+			-- Deallocate parameters after call for ccall;
+			-- but not for stdcall (callee does it)
+                  (if cconv == StdCallConv || tot_arg_size==0 then [] else 
+		   [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
+                  ++
+                  [DELTA (delta + tot_arg_size)]
+               )
+    -- in
+    setDeltaNat (delta + tot_arg_size)
+
+    let
+	-- assign the results, if necessary
+	assign_code []     = nilOL
+	assign_code [(dest,_hint)] = 
+	  case rep of
+		I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
+			     MOV I32 (OpReg edx) (OpReg r_dest_hi)]
+		F32 -> unitOL (GMOV fake0 r_dest)
+		F64 -> unitOL (GMOV fake0 r_dest)
+		rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
+	  where 
+		r_dest_hi = getHiVRegFromLo r_dest
+		rep = cmmRegRep dest
+		r_dest = getRegisterReg dest
+	assign_code many = panic "genCCall.assign_code many"
+
+    return (push_code `appOL` 
+	    call `appOL` 
+	    assign_code dest_regs)
+
+  where
+    arg_size F64 = 8
+    arg_size F32 = 4
+    arg_size I64 = 8
+    arg_size _   = 4
+
+    roundTo a x | x `mod` a == 0 = x
+                | otherwise = x + a - (x `mod` a)
+
+
+    push_arg :: (CmmExpr,MachHint){-current argument-}
+                    -> NatM InstrBlock  -- code
+
+    push_arg (arg,_hint) -- we don't need the hints on x86
+      | arg_rep == I64 = do
+        ChildCode64 code r_lo <- iselExpr64 arg
+        delta <- getDeltaNat
+        setDeltaNat (delta - 8)
+        let 
+            r_hi = getHiVRegFromLo r_lo
+        -- in
+	return (       code `appOL`
+                       toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
+                             PUSH I32 (OpReg r_lo), DELTA (delta - 8),
+			     DELTA (delta-8)]
+            )
+
+      | otherwise = do
+        (code, reg, sz) <- get_op arg
+        delta <- getDeltaNat
+        let size = arg_size sz
+        setDeltaNat (delta-size)
+        if (case sz of F64 -> True; F32 -> True; _ -> False)
+           then return (code `appOL`
+                        toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
+                              DELTA (delta-size),
+                              GST sz reg (AddrBaseIndex (EABaseReg esp) 
+                                                        EAIndexNone
+                                                        (ImmInt 0))]
+                       )
+           else return (code `snocOL`
+                        PUSH I32 (OpReg reg) `snocOL`
+                        DELTA (delta-size)
+                       )
+      where
+         arg_rep = cmmExprRep arg
+
+    ------------
+    get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
+    get_op op = do
+        (reg,code) <- getSomeReg op
+	return (code, reg, cmmExprRep op)
+
+#endif /* i386_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
+  -> Maybe [GlobalReg] -> NatM InstrBlock
+outOfLineFloatOp mop res args vols
+  = do
+      targetExpr <- cmmMakeDynamicReference addImportNat True lbl
+      let target = CmmForeignCall targetExpr CCallConv
+        
+      if cmmRegRep res == F64
+        then
+          stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)  
+        else do
+          uq <- getUniqueNat
+          let 
+            tmp = CmmLocal (LocalReg uq F64)
+          -- in
+          code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
+          code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
+          return (code1 `appOL` code2)
+  where
+	lbl = mkForeignLabel fn Nothing True
+
+	fn = case mop of
+	      MO_F32_Sqrt  -> FSLIT("sqrtf")
+	      MO_F32_Sin   -> FSLIT("sinf")
+	      MO_F32_Cos   -> FSLIT("cosf")
+	      MO_F32_Tan   -> FSLIT("tanf")
+	      MO_F32_Exp   -> FSLIT("expf")
+	      MO_F32_Log   -> FSLIT("logf")
+
+	      MO_F32_Asin  -> FSLIT("asinf")
+	      MO_F32_Acos  -> FSLIT("acosf")
+	      MO_F32_Atan  -> FSLIT("atanf")
+
+	      MO_F32_Sinh  -> FSLIT("sinhf")
+	      MO_F32_Cosh  -> FSLIT("coshf")
+	      MO_F32_Tanh  -> FSLIT("tanhf")
+	      MO_F32_Pwr   -> FSLIT("powf")
+
+	      MO_F64_Sqrt  -> FSLIT("sqrt")
+	      MO_F64_Sin   -> FSLIT("sin")
+	      MO_F64_Cos   -> FSLIT("cos")
+	      MO_F64_Tan   -> FSLIT("tan")
+	      MO_F64_Exp   -> FSLIT("exp")
+	      MO_F64_Log   -> FSLIT("log")
+
+	      MO_F64_Asin  -> FSLIT("asin")
+	      MO_F64_Acos  -> FSLIT("acos")
+	      MO_F64_Atan  -> FSLIT("atan")
+
+	      MO_F64_Sinh  -> FSLIT("sinh")
+	      MO_F64_Cosh  -> FSLIT("cosh")
+	      MO_F64_Tanh  -> FSLIT("tanh")
+	      MO_F64_Pwr   -> FSLIT("pow")
+
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCCall (CmmPrim op) [(r,_)] args vols = 
+  outOfLineFloatOp op r args vols
+
+genCCall target dest_regs args vols = do
+
+	-- load up the register arguments
+    (stack_args, aregs, fregs, load_args_code)
+	 <- load_args args allArgRegs allFPArgRegs nilOL
+
+    let
+	fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
+	int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
+	arg_regs = int_regs_used ++ fp_regs_used
+		-- for annotating the call instruction with
+
+	sse_regs = length fp_regs_used
+
+	tot_arg_size = arg_size * length stack_args
+
+	-- On entry to the called function, %rsp should be aligned
+	-- on a 16-byte boundary +8 (i.e. the first stack arg after
+	-- the return address is 16-byte aligned).  In STG land
+	-- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+	-- need to make sure we push a multiple of 16-bytes of args,
+	-- plus the return address, to get the correct alignment.
+	-- Urg, this is hard.  We need to feed the delta back into
+	-- the arg pushing code.
+    (real_size, adjust_rsp) <-
+	if tot_arg_size `rem` 16 == 0
+	    then return (tot_arg_size, nilOL)
+	    else do -- we need to adjust...
+		delta <- getDeltaNat
+		setDeltaNat (delta-8)
+		return (tot_arg_size+8, toOL [
+				SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
+				DELTA (delta-8)
+			])
+
+	-- push the stack args, right to left
+    push_code <- push_args (reverse stack_args) nilOL
+    delta <- getDeltaNat
+
+    -- deal with static vs dynamic call targets
+    (callinsns,cconv) <-
+      case target of
+	-- CmmPrim -> ...
+        CmmForeignCall (CmmLit (CmmLabel lbl)) conv
+           -> -- ToDo: stdcall arg sizes
+	      return (unitOL (CALL (Left fn_imm) arg_regs), conv)
+	   where fn_imm = ImmCLbl lbl
+        CmmForeignCall expr conv
+           -> do (dyn_r, dyn_c) <- getSomeReg expr
+		 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
+
+    let
+	-- The x86_64 ABI requires us to set %al to the number of SSE
+	-- registers that contain arguments, if the called routine
+	-- is a varargs function.  We don't know whether it's a
+	-- varargs function or not, so we have to assume it is.
+	--
+	-- It's not safe to omit this assignment, even if the number
+	-- of SSE regs in use is zero.  If %al is larger than 8
+	-- on entry to a varargs function, seg faults ensue.
+	assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
+
+    let call = callinsns `appOL`
+               toOL (
+			-- Deallocate parameters after call for ccall;
+			-- but not for stdcall (callee does it)
+                  (if cconv == StdCallConv || real_size==0 then [] else 
+		   [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
+                  ++
+                  [DELTA (delta + real_size)]
+               )
+    -- in
+    setDeltaNat (delta + real_size)
+
+    let
+	-- assign the results, if necessary
+	assign_code []     = nilOL
+	assign_code [(dest,_hint)] = 
+	  case rep of
+		F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+		F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+		rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
+	  where 
+		rep = cmmRegRep dest
+		r_dest = getRegisterReg dest
+	assign_code many = panic "genCCall.assign_code many"
+
+    return (load_args_code 	`appOL` 
+	    adjust_rsp 		`appOL`
+	    push_code 		`appOL`
+	    assign_eax sse_regs `appOL`
+	    call 		`appOL` 
+	    assign_code dest_regs)
+
+  where
+    arg_size = 8 -- always, at the mo
+
+    load_args :: [(CmmExpr,MachHint)]
+	      -> [Reg] 			-- int regs avail for args
+	      -> [Reg] 			-- FP regs avail for args
+	      -> InstrBlock
+	      -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
+    load_args args [] [] code     =  return (args, [], [], code)
+	-- no more regs to use
+    load_args [] aregs fregs code =  return ([], aregs, fregs, code)
+	-- no more args to push
+    load_args ((arg,hint) : rest) aregs fregs code
+	| isFloatingRep arg_rep = 
+	case fregs of
+	  [] -> push_this_arg
+	  (r:rs) -> do
+	     arg_code <- getAnyReg arg
+	     load_args rest aregs rs (code `appOL` arg_code r)
+	| otherwise =
+	case aregs of
+	  [] -> push_this_arg
+	  (r:rs) -> do
+	     arg_code <- getAnyReg arg
+	     load_args rest rs fregs (code `appOL` arg_code r)
+	where
+	  arg_rep = cmmExprRep arg
+
+	  push_this_arg = do
+	    (args',ars,frs,code') <- load_args rest aregs fregs code
+	    return ((arg,hint):args', ars, frs, code')
+
+    push_args [] code = return code
+    push_args ((arg,hint):rest) code
+       | isFloatingRep arg_rep = do
+	 (arg_reg, arg_code) <- getSomeReg arg
+         delta <- getDeltaNat
+         setDeltaNat (delta-arg_size)
+	 let code' = code `appOL` toOL [
+			MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
+			SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
+	 		DELTA (delta-arg_size)]
+	 push_args rest code'
+
+       | otherwise = do
+       -- we only ever generate word-sized function arguments.  Promotion
+       -- has already happened: our Int8# type is kept sign-extended
+       -- in an Int#, for example.
+	 ASSERT(arg_rep == I64) return ()
+	 (arg_op, arg_code) <- getOperand arg
+         delta <- getDeltaNat
+         setDeltaNat (delta-arg_size)
+	 let code' = code `appOL` toOL [PUSH I64 arg_op, 
+	 			        DELTA (delta-arg_size)]
+	 push_args rest code'
+	where
+	  arg_rep = cmmExprRep arg
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+{- 
+   The SPARC calling convention is an absolute
+   nightmare.  The first 6x32 bits of arguments are mapped into
+   %o0 through %o5, and the remaining arguments are dumped to the
+   stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
+
+   If we have to put args on the stack, move %o6==%sp down by
+   the number of words to go on the stack, to ensure there's enough space.
+
+   According to Fraser and Hanson's lcc book, page 478, fig 17.2,
+   16 words above the stack pointer is a word for the address of
+   a structure return value.  I use this as a temporary location
+   for moving values from float to int regs.  Certainly it isn't
+   safe to put anything in the 16 words starting at %sp, since
+   this area can get trashed at any time due to window overflows
+   caused by signal handlers.
+
+   A final complication (if the above isn't enough) is that 
+   we can't blithely calculate the arguments one by one into
+   %o0 .. %o5.  Consider the following nested calls:
+
+       fff a (fff b c)
+
+   Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
+   the inner call will itself use %o0, which trashes the value put there
+   in preparation for the outer call.  Upshot: we need to calculate the
+   args into temporary regs, and move those to arg regs or onto the
+   stack only immediately prior to the call proper.  Sigh.
+-}
+
+genCCall target dest_regs argsAndHints vols = do
+    let
+        args = map fst argsAndHints
+    argcode_and_vregs <- mapM arg_to_int_vregs args
+    let 
+        (argcodes, vregss) = unzip argcode_and_vregs
+        n_argRegs          = length allArgRegs
+        n_argRegs_used     = min (length vregs) n_argRegs
+        vregs              = concat vregss
+    -- deal with static vs dynamic call targets
+    callinsns <- (case target of
+        CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
+		return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+        CmmForeignCall expr conv -> do
+                (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+                return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+	CmmPrim mop -> do
+		  (res, reduce) <- outOfLineFloatOp mop
+		  lblOrMopExpr <- case res of
+		       Left lbl -> do
+		            return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+		       Right mopExpr -> do
+			    (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+			    return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+		  if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
+
+      )
+    let
+        argcode = concatOL argcodes
+        (move_sp_down, move_sp_up)
+           = let diff = length vregs - n_argRegs
+                 nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
+             in  if   nn <= 0
+                 then (nilOL, nilOL)
+                 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+        transfer_code
+           = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
+    return (argcode       `appOL`
+            move_sp_down  `appOL`
+            transfer_code `appOL`
+            callinsns     `appOL`
+            unitOL NOP    `appOL`
+            move_sp_up)
+  where
+     -- move args from the integer vregs into which they have been 
+     -- marshalled, into %o0 .. %o5, and the rest onto the stack.
+     move_final :: [Reg] -> [Reg] -> Int -> [Instr]
+
+     move_final [] _ offset          -- all args done
+        = []
+
+     move_final (v:vs) [] offset     -- out of aregs; move to stack
+        = ST I32 v (spRel offset)
+          : move_final vs [] (offset+1)
+
+     move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
+        = OR False g0 (RIReg v) a
+          : move_final vs az offset
+
+     -- generate code to calculate an argument, and move it into one
+     -- or two integer vregs.
+     arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
+     arg_to_int_vregs arg
+        | (cmmExprRep arg) == I64
+        = do
+	  (ChildCode64 code r_lo) <- iselExpr64 arg
+          let 
+              r_hi = getHiVRegFromLo r_lo
+          return (code, [r_hi, r_lo])
+        | otherwise
+        = do
+	  (src, code) <- getSomeReg arg
+          tmp <- getNewRegNat (cmmExprRep arg)
+          let
+              pk   = cmmExprRep arg
+          case pk of
+             F64 -> do
+                      v1 <- getNewRegNat I32
+                      v2 <- getNewRegNat I32
+                      return (
+                        code                          `snocOL`
+                        FMOV F64 src f0                `snocOL`
+                        ST   F32  f0 (spRel 16)         `snocOL`
+                        LD   I32  (spRel 16) v1         `snocOL`
+                        ST   F32  (fPair f0) (spRel 16) `snocOL`
+                        LD   I32  (spRel 16) v2
+                        ,
+                        [v1,v2]
+                       )
+             F32 -> do
+                      v1 <- getNewRegNat I32
+                      return (
+                        code                    `snocOL`
+                        ST   F32  src (spRel 16)  `snocOL`
+                        LD   I32  (spRel 16) v1
+                        ,
+                        [v1]
+                       )
+             other -> do
+                        v1 <- getNewRegNat I32
+                        return (
+                          code `snocOL` OR False g0 (RIReg src) v1
+                          , 
+                          [v1]
+                         )
+outOfLineFloatOp mop =
+    do
+      mopExpr <- cmmMakeDynamicReference addImportNat True $
+		  mkForeignLabel functionName Nothing True
+      let mopLabelOrExpr = case mopExpr of
+			CmmLit (CmmLabel lbl) -> Left lbl
+                        _ -> Right mopExpr
+      return (mopLabelOrExpr, reduce)
+            where
+                (reduce, functionName) = case mop of
+	          MO_F32_Exp    -> (True,  FSLIT("exp"))
+	          MO_F32_Log    -> (True,  FSLIT("log"))
+	          MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
+
+	          MO_F32_Sin    -> (True,  FSLIT("sin"))
+	          MO_F32_Cos    -> (True,  FSLIT("cos"))
+	          MO_F32_Tan    -> (True,  FSLIT("tan"))
+
+	          MO_F32_Asin   -> (True,  FSLIT("asin"))
+	          MO_F32_Acos   -> (True,  FSLIT("acos"))
+	          MO_F32_Atan   -> (True,  FSLIT("atan"))
+
+	          MO_F32_Sinh   -> (True,  FSLIT("sinh"))
+	          MO_F32_Cosh   -> (True,  FSLIT("cosh"))
+	          MO_F32_Tanh   -> (True,  FSLIT("tanh"))
+
+	          MO_F64_Exp    -> (False, FSLIT("exp"))
+	          MO_F64_Log    -> (False, FSLIT("log"))
+	          MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
+
+	          MO_F64_Sin    -> (False, FSLIT("sin"))
+	          MO_F64_Cos    -> (False, FSLIT("cos"))
+	          MO_F64_Tan    -> (False, FSLIT("tan"))
+
+	          MO_F64_Asin   -> (False, FSLIT("asin"))
+	          MO_F64_Acos   -> (False, FSLIT("acos"))
+	          MO_F64_Atan   -> (False, FSLIT("atan"))
+
+	          MO_F64_Sinh   -> (False, FSLIT("sinh"))
+	          MO_F64_Cosh   -> (False, FSLIT("cosh"))
+	          MO_F64_Tanh   -> (False, FSLIT("tanh"))
+
+                  other -> pprPanic "outOfLineFloatOp(sparc) "
+                                (pprCallishMachOp mop)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+
+#if darwin_TARGET_OS || linux_TARGET_OS
+{-
+    The PowerPC calling convention for Darwin/Mac OS X
+    is described in Apple's document
+    "Inside Mac OS X - Mach-O Runtime Architecture".
+    
+    PowerPC Linux uses the System V Release 4 Calling Convention
+    for PowerPC. It is described in the
+    "System V Application Binary Interface PowerPC Processor Supplement".
+
+    Both conventions are similar:
+    Parameters may be passed in general-purpose registers starting at r3, in
+    floating point registers starting at f1, or on the stack. 
+    
+    But there are substantial differences:
+    * The number of registers used for parameter passing and the exact set of
+      nonvolatile registers differs (see MachRegs.lhs).
+    * On Darwin, stack space is always reserved for parameters, even if they are
+      passed in registers. The called routine may choose to save parameters from
+      registers to the corresponding space on the stack.
+    * On Darwin, a corresponding amount of GPRs is skipped when a floating point
+      parameter is passed in an FPR.
+    * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
+      starting with an odd-numbered GPR. It may skip a GPR to achieve this.
+      Darwin just treats an I64 like two separate I32s (high word first).
+    * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
+      4-byte aligned like everything else on Darwin.
+    * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
+      PowerPC Linux does not agree, so neither do we.
+      
+    According to both conventions, The parameter area should be part of the
+    caller's stack frame, allocated in the caller's prologue code (large enough
+    to hold the parameter lists for all called routines). The NCG already
+    uses the stack for register spilling, leaving 64 bytes free at the top.
+    If we need a larger parameter area than that, we just allocate a new stack
+    frame just before ccalling.
+-}
+
+genCCall target dest_regs argsAndHints vols
+  = ASSERT (not $ any (`elem` [I8,I16]) argReps)
+        -- we rely on argument promotion in the codeGen
+    do
+        (finalStack,passArgumentsCode,usedRegs) <- passArguments
+                                                        (zip args argReps)
+                                                        allArgRegs allFPArgRegs
+                                                        initialStackOffset
+                                                        (toOL []) []
+                                                
+        (labelOrExpr, reduceToF32) <- case target of
+            CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
+            CmmForeignCall expr conv -> return  (Right expr, False)
+            CmmPrim mop -> outOfLineFloatOp mop
+                                                        
+        let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
+            codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
+
+        case labelOrExpr of
+            Left lbl -> do
+		return (         codeBefore
+                        `snocOL` BL lbl usedRegs
+                        `appOL`	 codeAfter)
+            Right dyn -> do
+		(dynReg, dynCode) <- getSomeReg dyn
+		return (         dynCode
+			`snocOL` MTCTR dynReg
+                        `appOL`	 codeBefore
+                        `snocOL` BCTRL usedRegs
+                        `appOL`	 codeAfter)
+    where
+#if darwin_TARGET_OS
+        initialStackOffset = 24
+	    -- size of linkage area + size of arguments, in bytes	
+	stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
+	                               map machRepByteWidth argReps
+#elif linux_TARGET_OS
+        initialStackOffset = 8
+        stackDelta finalStack = roundTo 16 finalStack
+#endif
+	args = map fst argsAndHints
+	argReps = map cmmExprRep args
+
+	roundTo a x | x `mod` a == 0 = x
+		    | otherwise = x + a - (x `mod` a)
+
+        move_sp_down finalStack
+               | delta > 64 =
+                        toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
+	                      DELTA (-delta)]
+	       | otherwise = nilOL
+	       where delta = stackDelta finalStack
+	move_sp_up finalStack
+	       | delta > 64 =
+                        toOL [ADD sp sp (RIImm (ImmInt delta)),
+                              DELTA 0]
+	       | otherwise = nilOL
+	       where delta = stackDelta finalStack
+	       
+
+        passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
+        passArguments ((arg,I64):args) gprs fprs stackOffset
+               accumCode accumUsed =
+            do
+                ChildCode64 code vr_lo <- iselExpr64 arg
+                let vr_hi = getHiVRegFromLo vr_lo
+
+#if darwin_TARGET_OS                
+                passArguments args
+                              (drop 2 gprs)
+                              fprs
+                              (stackOffset+8)
+                              (accumCode `appOL` code
+                                    `snocOL` storeWord vr_hi gprs stackOffset
+                                    `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
+                              ((take 2 gprs) ++ accumUsed)
+            where
+                storeWord vr (gpr:_) offset = MR gpr vr
+                storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
+                
+#elif linux_TARGET_OS
+                let stackOffset' = roundTo 8 stackOffset
+                    stackCode = accumCode `appOL` code
+                        `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
+                        `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
+                    regCode hireg loreg =
+                        accumCode `appOL` code
+                            `snocOL` MR hireg vr_hi
+                            `snocOL` MR loreg vr_lo
+                                        
+                case gprs of
+                    hireg : loreg : regs | even (length gprs) ->
+                        passArguments args regs fprs stackOffset
+                                      (regCode hireg loreg) (hireg : loreg : accumUsed)
+                    _skipped : hireg : loreg : regs ->
+                        passArguments args regs fprs stackOffset
+                                      (regCode hireg loreg) (hireg : loreg : accumUsed)
+                    _ -> -- only one or no regs left
+                        passArguments args [] fprs (stackOffset'+8)
+                                      stackCode accumUsed
+#endif
+        
+        passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
+            | reg : _ <- regs = do
+                register <- getRegister arg
+                let code = case register of
+                            Fixed _ freg fcode -> fcode `snocOL` MR reg freg
+                            Any _ acode -> acode reg
+                passArguments args
+                              (drop nGprs gprs)
+                              (drop nFprs fprs)
+#if darwin_TARGET_OS
+        -- The Darwin ABI requires that we reserve stack slots for register parameters
+                              (stackOffset + stackBytes)
+#elif linux_TARGET_OS
+        -- ... the SysV ABI doesn't.
+                              stackOffset
+#endif
+                              (accumCode `appOL` code)
+                              (reg : accumUsed)
+            | otherwise = do
+                (vr, code) <- getSomeReg arg
+                passArguments args
+                              (drop nGprs gprs)
+                              (drop nFprs fprs)
+                              (stackOffset' + stackBytes)
+                              (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
+                              accumUsed
+            where
+#if darwin_TARGET_OS
+        -- stackOffset is at least 4-byte aligned
+        -- The Darwin ABI is happy with that.
+                stackOffset' = stackOffset
+#else
+        -- ... the SysV ABI requires 8-byte alignment for doubles.
+                stackOffset' | rep == F64 = roundTo 8 stackOffset
+                             | otherwise  =           stackOffset
+#endif
+                stackSlot = AddrRegImm sp (ImmInt stackOffset')
+                (nGprs, nFprs, stackBytes, regs) = case rep of
+                    I32 -> (1, 0, 4, gprs)
+#if darwin_TARGET_OS
+        -- The Darwin ABI requires that we skip a corresponding number of GPRs when
+        -- we use the FPRs.
+                    F32 -> (1, 1, 4, fprs)
+                    F64 -> (2, 1, 8, fprs)
+#elif linux_TARGET_OS
+        -- ... the SysV ABI doesn't.
+                    F32 -> (0, 1, 4, fprs)
+                    F64 -> (0, 1, 8, fprs)
+#endif
+        
+        moveResult reduceToF32 =
+            case dest_regs of
+                [] -> nilOL
+                [(dest, _hint)]
+                    | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
+                    | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
+                    | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
+                                          MR r_dest r4]
+                    | otherwise -> unitOL (MR r_dest r3)
+                    where rep = cmmRegRep dest
+                          r_dest = getRegisterReg dest
+                          
+        outOfLineFloatOp mop =
+            do
+                mopExpr <- cmmMakeDynamicReference addImportNat True $
+                              mkForeignLabel functionName Nothing True
+                let mopLabelOrExpr = case mopExpr of
+                        CmmLit (CmmLabel lbl) -> Left lbl
+                        _ -> Right mopExpr
+                return (mopLabelOrExpr, reduce)
+            where
+                (functionName, reduce) = case mop of
+                    MO_F32_Exp   -> (FSLIT("exp"), True)
+                    MO_F32_Log   -> (FSLIT("log"), True)
+                    MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
+                        
+                    MO_F32_Sin   -> (FSLIT("sin"), True)
+                    MO_F32_Cos   -> (FSLIT("cos"), True)
+                    MO_F32_Tan   -> (FSLIT("tan"), True)
+                    
+                    MO_F32_Asin  -> (FSLIT("asin"), True)
+                    MO_F32_Acos  -> (FSLIT("acos"), True)
+                    MO_F32_Atan  -> (FSLIT("atan"), True)
+                    
+                    MO_F32_Sinh  -> (FSLIT("sinh"), True)
+                    MO_F32_Cosh  -> (FSLIT("cosh"), True)
+                    MO_F32_Tanh  -> (FSLIT("tanh"), True)
+                    MO_F32_Pwr   -> (FSLIT("pow"), True)
+                        
+                    MO_F64_Exp   -> (FSLIT("exp"), False)
+                    MO_F64_Log   -> (FSLIT("log"), False)
+                    MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
+                        
+                    MO_F64_Sin   -> (FSLIT("sin"), False)
+                    MO_F64_Cos   -> (FSLIT("cos"), False)
+                    MO_F64_Tan   -> (FSLIT("tan"), False)
+                     
+                    MO_F64_Asin  -> (FSLIT("asin"), False)
+                    MO_F64_Acos  -> (FSLIT("acos"), False)
+                    MO_F64_Atan  -> (FSLIT("atan"), False)
+                    
+                    MO_F64_Sinh  -> (FSLIT("sinh"), False)
+                    MO_F64_Cosh  -> (FSLIT("cosh"), False)
+                    MO_F64_Tanh  -> (FSLIT("tanh"), False)
+                    MO_F64_Pwr   -> (FSLIT("pow"), False)
+                    other -> pprPanic "genCCall(ppc): unknown callish op"
+                                    (pprCallishMachOp other)
+
+#endif /* darwin_TARGET_OS || linux_TARGET_OS */
+                
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+genSwitch expr ids
+  | opt_PIC
+  = do
+        (reg,e_code) <- getSomeReg expr
+        lbl <- getNewLabelNat
+        dynRef <- cmmMakeDynamicReference addImportNat False lbl
+        (tableReg,t_code) <- getSomeReg $ dynRef
+        let
+            jumpTable = map jumpTableEntryRel ids
+            
+            jumpTableEntryRel Nothing
+                = CmmStaticLit (CmmInt 0 wordRep)
+            jumpTableEntryRel (Just (BlockId id))
+                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+                where blockLabel = mkAsmTempLabel id
+
+            op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
+                                       (EAIndex reg wORD_SIZE) (ImmInt 0))
+
+            code = e_code `appOL` t_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            ADD wordRep op (OpReg tableReg),
+                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
+                    ]
+        return code
+  | otherwise
+  = do
+        (reg,e_code) <- getSomeReg expr
+        lbl <- getNewLabelNat
+        let
+            jumpTable = map jumpTableEntry ids
+            op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
+            code = e_code `appOL` toOL [
+                    LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                    JMP_TBL op [ id | Just id <- ids ]
+                 ]
+        -- in
+        return code
+#elif powerpc_TARGET_ARCH
+genSwitch expr ids 
+  | opt_PIC
+  = do
+        (reg,e_code) <- getSomeReg expr
+        tmp <- getNewRegNat I32
+        lbl <- getNewLabelNat
+        dynRef <- cmmMakeDynamicReference addImportNat False lbl
+        (tableReg,t_code) <- getSomeReg $ dynRef
+        let
+            jumpTable = map jumpTableEntryRel ids
+            
+            jumpTableEntryRel Nothing
+                = CmmStaticLit (CmmInt 0 wordRep)
+            jumpTableEntryRel (Just (BlockId id))
+                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+                where blockLabel = mkAsmTempLabel id
+
+            code = e_code `appOL` t_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            SLW tmp reg (RIImm (ImmInt 2)),
+                            LD I32 tmp (AddrRegReg tableReg tmp),
+                            ADD tmp tmp (RIReg tableReg),
+                            MTCTR tmp,
+                            BCTR [ id | Just id <- ids ]
+                    ]
+        return code
+  | otherwise
+  = do
+        (reg,e_code) <- getSomeReg expr
+        tmp <- getNewRegNat I32
+        lbl <- getNewLabelNat
+        let
+            jumpTable = map jumpTableEntry ids
+        
+            code = e_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            SLW tmp reg (RIImm (ImmInt 2)),
+                            ADDIS tmp tmp (HA (ImmCLbl lbl)),
+                            LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
+                            MTCTR tmp,
+                            BCTR [ id | Just id <- ids ]
+                    ]
+        return code
+#else
+genSwitch expr ids = panic "ToDo: genSwitch"
+#endif
+
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+    where blockLabel = mkAsmTempLabel id
+
+-- -----------------------------------------------------------------------------
+-- Support bits
+-- -----------------------------------------------------------------------------
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+condIntReg = panic "MachCode.condIntReg (not on Alpha)"
+condFltReg = panic "MachCode.condFltReg (not on Alpha)"
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+condIntReg cond x y = do
+  CondCode _ cond cond_code <- condIntCode cond x y
+  tmp <- getNewRegNat I8
+  let 
+	code dst = cond_code `appOL` toOL [
+		    SETCC cond (OpReg tmp),
+		    MOVZxL I8 (OpReg tmp) (OpReg dst)
+		  ]
+  -- in
+  return (Any I32 code)
+
+#endif
+
+#if i386_TARGET_ARCH
+
+condFltReg cond x y = do
+  CondCode _ cond cond_code <- condFltCode cond x y
+  tmp <- getNewRegNat I8
+  let 
+	code dst = cond_code `appOL` toOL [
+		    SETCC cond (OpReg tmp),
+		    MOVZxL I8 (OpReg tmp) (OpReg dst)
+		  ]
+  -- in
+  return (Any I32 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+
+condFltReg cond x y = do
+  CondCode _ cond cond_code <- condFltCode cond x y
+  tmp1 <- getNewRegNat wordRep
+  tmp2 <- getNewRegNat wordRep
+  let 
+	-- We have to worry about unordered operands (eg. comparisons
+	-- against NaN).  If the operands are unordered, the comparison
+	-- sets the parity flag, carry flag and zero flag.
+	-- All comparisons are supposed to return false for unordered
+	-- operands except for !=, which returns true.
+	--
+	-- Optimisation: we don't have to test the parity flag if we
+	-- know the test has already excluded the unordered case: eg >
+	-- and >= test for a zero carry flag, which can only occur for
+	-- ordered operands.
+	--
+	-- ToDo: by reversing comparisons we could avoid testing the
+	-- parity flag in more cases.
+
+	code dst = 
+	   cond_code `appOL` 
+	     (case cond of
+	  	NE  -> or_unordered dst
+		GU  -> plain_test   dst
+		GEU -> plain_test   dst
+		_   -> and_ordered  dst)
+
+	plain_test dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    MOVZxL I8 (OpReg tmp1) (OpReg dst)
+		 ]
+	or_unordered dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    SETCC PARITY (OpReg tmp2),
+		    OR I8 (OpReg tmp1) (OpReg tmp2),
+		    MOVZxL I8 (OpReg tmp2) (OpReg dst)
+		  ]
+	and_ordered dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    SETCC NOTPARITY (OpReg tmp2),
+		    AND I8 (OpReg tmp1) (OpReg tmp2),
+		    MOVZxL I8 (OpReg tmp2) (OpReg dst)
+		  ]
+  -- in
+  return (Any I32 code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat I32
+    let
+	code__2 dst = code `appOL` toOL [
+    	    SUB False True g0 (RIReg src) g0,
+    	    SUB True False g0 (RIImm (ImmInt (-1))) dst]
+    return (Any I32 code__2)
+
+condIntReg EQQ x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat I32
+    tmp2 <- getNewRegNat I32
+    let
+    	code__2 dst = code1 `appOL` code2 `appOL` toOL [
+    	    XOR False src1 (RIReg src2) dst,
+    	    SUB False True g0 (RIReg dst) g0,
+    	    SUB True False g0 (RIImm (ImmInt (-1))) dst]
+    return (Any I32 code__2)
+
+condIntReg NE x (CmmLit (CmmInt 0 d)) = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat I32
+    let
+    	code__2 dst = code `appOL` toOL [
+    	    SUB False True g0 (RIReg src) g0,
+    	    ADD True False g0 (RIImm (ImmInt 0)) dst]
+    return (Any I32 code__2)
+
+condIntReg NE x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat I32
+    tmp2 <- getNewRegNat I32
+    let
+	code__2 dst = code1 `appOL` code2 `appOL` toOL [
+    	    XOR False src1 (RIReg src2) dst,
+    	    SUB False True g0 (RIReg dst) g0,
+    	    ADD True False g0 (RIImm (ImmInt 0)) dst]
+    return (Any I32 code__2)
+
+condIntReg cond x y = do
+    BlockId lbl1 <- getBlockIdNat
+    BlockId lbl2 <- getBlockIdNat
+    CondCode _ cond cond_code <- condIntCode cond x y
+    let
+	code__2 dst = cond_code `appOL` toOL [
+	    BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
+	    OR False g0 (RIImm (ImmInt 0)) dst,
+	    BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
+	    NEWBLOCK (BlockId lbl1),
+	    OR False g0 (RIImm (ImmInt 1)) dst,
+	    NEWBLOCK (BlockId lbl2)]
+    return (Any I32 code__2)
+
+condFltReg cond x y = do
+    BlockId lbl1 <- getBlockIdNat
+    BlockId lbl2 <- getBlockIdNat
+    CondCode _ cond cond_code <- condFltCode cond x y
+    let
+    	code__2 dst = cond_code `appOL` toOL [ 
+    	    NOP,
+	    BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
+	    OR False g0 (RIImm (ImmInt 0)) dst,
+	    BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
+	    NEWBLOCK (BlockId lbl1),
+	    OR False g0 (RIImm (ImmInt 1)) dst,
+	    NEWBLOCK (BlockId lbl2)]
+    return (Any I32 code__2)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+condReg getCond = do
+    lbl1 <- getBlockIdNat
+    lbl2 <- getBlockIdNat
+    CondCode _ cond cond_code <- getCond
+    let
+{-        code dst = cond_code `appOL` toOL [
+                BCC cond lbl1,
+                LI dst (ImmInt 0),
+                BCC ALWAYS lbl2,
+                NEWBLOCK lbl1,
+                LI dst (ImmInt 1),
+                BCC ALWAYS lbl2,
+                NEWBLOCK lbl2
+            ]-}
+        code dst = cond_code
+            `appOL` negate_code
+            `appOL` toOL [
+                MFCR dst,
+                RLWINM dst dst (bit + 1) 31 31
+            ]
+        
+        negate_code | do_negate = unitOL (CRNOR bit bit bit)
+                    | otherwise = nilOL
+                    
+        (bit, do_negate) = case cond of
+            LTT -> (0, False)
+            LE  -> (1, True)
+            EQQ -> (2, False)
+            GE  -> (0, True)
+            GTT -> (1, False)
+            
+            NE  -> (2, True)
+            
+            LU  -> (0, False)
+            LEU -> (1, True)
+            GEU -> (0, True)
+            GU  -> (1, False)
+                
+    return (Any I32 code)
+    
+condIntReg cond x y = condReg (condIntCode cond x y)
+condFltReg cond x y = condReg (condFltCode cond x y)
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+trivialCode
+    :: MachRep 
+    -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
+      ,IF_ARCH_i386 ((Operand -> Operand -> Instr) 
+                     -> Maybe (Operand -> Operand -> Instr)
+      ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr) 
+                     -> Maybe (Operand -> Operand -> Instr)
+      ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
+      ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
+      ,)))))
+    -> CmmExpr -> CmmExpr -- the two arguments
+    -> NatM Register
+
+#ifndef powerpc_TARGET_ARCH
+trivialFCode
+    :: MachRep
+    -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
+      ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
+      ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
+      ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
+      ,))))
+    -> CmmExpr -> CmmExpr -- the two arguments
+    -> NatM Register
+#endif
+
+trivialUCode
+    :: MachRep 
+    -> IF_ARCH_alpha((RI -> Reg -> Instr)
+      ,IF_ARCH_i386 ((Operand -> Instr)
+      ,IF_ARCH_x86_64 ((Operand -> Instr)
+      ,IF_ARCH_sparc((RI -> Reg -> Instr)
+      ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
+      ,)))))
+    -> CmmExpr	-- the one argument
+    -> NatM Register
+
+#ifndef powerpc_TARGET_ARCH
+trivialUFCode
+    :: MachRep
+    -> IF_ARCH_alpha((Reg -> Reg -> Instr)
+      ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
+      ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
+      ,IF_ARCH_sparc((Reg -> Reg -> Instr)
+      ,))))
+    -> CmmExpr -- the one argument
+    -> NatM Register
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+trivialCode instr x (StInt y)
+  | fits8Bits y
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat IntRep		`thenNat` \ tmp ->
+    let
+    	code = registerCode register tmp
+    	src1 = registerName register tmp
+    	src2 = ImmInt (fromInteger y)
+    	code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
+    in
+    return (Any IntRep code__2)
+
+trivialCode instr x y
+  = getRegister x		`thenNat` \ register1 ->
+    getRegister y		`thenNat` \ register2 ->
+    getNewRegNat IntRep		`thenNat` \ tmp1 ->
+    getNewRegNat IntRep		`thenNat` \ tmp2 ->
+    let
+    	code1 = registerCode register1 tmp1 []
+    	src1  = registerName register1 tmp1
+    	code2 = registerCode register2 tmp2 []
+    	src2  = registerName register2 tmp2
+    	code__2 dst = asmSeqThen [code1, code2] .
+    	    	     mkSeqInstr (instr src1 (RIReg src2) dst)
+    in
+    return (Any IntRep code__2)
+
+------------
+trivialUCode instr x
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat IntRep		`thenNat` \ tmp ->
+    let
+    	code = registerCode register tmp
+    	src  = registerName register tmp
+    	code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
+    in
+    return (Any IntRep code__2)
+
+------------
+trivialFCode _ instr x y
+  = getRegister x		`thenNat` \ register1 ->
+    getRegister y		`thenNat` \ register2 ->
+    getNewRegNat F64	`thenNat` \ tmp1 ->
+    getNewRegNat F64	`thenNat` \ tmp2 ->
+    let
+    	code1 = registerCode register1 tmp1
+    	src1  = registerName register1 tmp1
+
+    	code2 = registerCode register2 tmp2
+    	src2  = registerName register2 tmp2
+
+    	code__2 dst = asmSeqThen [code1 [], code2 []] .
+    	    	      mkSeqInstr (instr src1 src2 dst)
+    in
+    return (Any F64 code__2)
+
+trivialUFCode _ instr x
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat F64	`thenNat` \ tmp ->
+    let
+    	code = registerCode register tmp
+    	src  = registerName register tmp
+    	code__2 dst = code . mkSeqInstr (instr src dst)
+    in
+    return (Any F64 code__2)
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+{-
+The Rules of the Game are:
+
+* You cannot assume anything about the destination register dst;
+  it may be anything, including a fixed reg.
+
+* You may compute an operand into a fixed reg, but you may not 
+  subsequently change the contents of that fixed reg.  If you
+  want to do so, first copy the value either to a temporary
+  or into dst.  You are free to modify dst even if it happens
+  to be a fixed reg -- that's not your problem.
+
+* You cannot assume that a fixed reg will stay live over an
+  arbitrary computation.  The same applies to the dst reg.
+
+* Temporary regs obtained from getNewRegNat are distinct from 
+  each other and from all other regs, and stay live over 
+  arbitrary computations.
+
+--------------------
+
+SDM's version of The Rules:
+
+* If getRegister returns Any, that means it can generate correct
+  code which places the result in any register, period.  Even if that
+  register happens to be read during the computation.
+
+  Corollary #1: this means that if you are generating code for an
+  operation with two arbitrary operands, you cannot assign the result
+  of the first operand into the destination register before computing
+  the second operand.  The second operand might require the old value
+  of the destination register.
+
+  Corollary #2: A function might be able to generate more efficient
+  code if it knows the destination register is a new temporary (and
+  therefore not read by any of the sub-computations).
+
+* If getRegister returns Any, then the code it generates may modify only:
+	(a) fresh temporaries
+	(b) the destination register
+	(c) known registers (eg. %ecx is used by shifts)
+  In particular, it may *not* modify global registers, unless the global
+  register happens to be the destination register.
+-}
+
+trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
+  | not (is64BitLit lit_a) = do
+  b_code <- getAnyReg b
+  let
+       code dst 
+	 = b_code dst `snocOL`
+           revinstr (OpImm (litToImm lit_a)) (OpReg dst)
+  -- in
+  return (Any rep code)
+
+trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
+
+-- This is re-used for floating pt instructions too.
+genTrivialCode rep instr a b = do
+  (b_op, b_code) <- getNonClobberedOperand b
+  a_code <- getAnyReg a
+  tmp <- getNewRegNat rep
+  let
+     -- We want the value of b to stay alive across the computation of a.
+     -- But, we want to calculate a straight into the destination register,
+     -- because the instruction only has two operands (dst := dst `op` src).
+     -- The troublesome case is when the result of b is in the same register
+     -- as the destination reg.  In this case, we have to save b in a
+     -- new temporary across the computation of a.
+     code dst
+	| dst `regClashesWithOp` b_op =
+		b_code `appOL`
+		unitOL (MOV rep b_op (OpReg tmp)) `appOL`
+		a_code dst `snocOL`
+		instr (OpReg tmp) (OpReg dst)
+	| otherwise =
+		b_code `appOL`
+		a_code dst `snocOL`
+		instr b_op (OpReg dst)
+  -- in
+  return (Any rep code)
+
+reg `regClashesWithOp` OpReg reg2   = reg == reg2
+reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
+reg `regClashesWithOp` _            = False
+
+-----------
+
+trivialUCode rep instr x = do
+  x_code <- getAnyReg x
+  let
+     code dst =
+	x_code dst `snocOL`
+	instr (OpReg dst)
+  -- in
+  return (Any rep code)
+
+-----------
+
+#if i386_TARGET_ARCH
+
+trivialFCode pk instr x y = do
+  (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
+  (y_reg, y_code) <- getSomeReg y
+  let
+     code dst =
+	x_code `appOL`
+	y_code `snocOL`
+	instr pk x_reg y_reg dst
+  -- in
+  return (Any pk code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+
+trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
+
+#endif
+
+-------------
+
+trivialUFCode rep instr x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+     code dst =
+	x_code `snocOL`
+	instr x_reg dst
+  -- in
+  return (Any rep code)
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+trivialCode pk instr x (CmmLit (CmmInt y d))
+  | fits13Bits y
+  = do
+      (src1, code) <- getSomeReg x
+      tmp <- getNewRegNat I32
+      let
+    	src2 = ImmInt (fromInteger y)
+    	code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
+      return (Any I32 code__2)
+
+trivialCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat I32
+    tmp2 <- getNewRegNat I32
+    let
+    	code__2 dst = code1 `appOL` code2 `snocOL`
+    	    	      instr src1 (RIReg src2) dst
+    return (Any I32 code__2)
+
+------------
+trivialFCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat (cmmExprRep x)
+    tmp2 <- getNewRegNat (cmmExprRep y)
+    tmp <- getNewRegNat F64
+    let
+    	promote x = FxTOy F32 F64 x tmp
+
+    	pk1   = cmmExprRep x
+    	pk2   = cmmExprRep y
+
+    	code__2 dst =
+    	    	if pk1 == pk2 then
+    	            code1 `appOL` code2 `snocOL`
+    	    	    instr pk src1 src2 dst
+    	    	else if pk1 == F32 then
+    	    	    code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+    	    	    instr F64 tmp src2 dst
+    	    	else
+    	    	    code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+    	    	    instr F64 src1 tmp dst
+    return (Any (if pk1 == pk2 then pk1 else F64) code__2)
+
+------------
+trivialUCode pk instr x = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat pk
+    let
+    	code__2 dst = code `snocOL` instr (RIReg src) dst
+    return (Any pk code__2)
+
+-------------
+trivialUFCode pk instr x = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat pk
+    let
+    	code__2 dst = code `snocOL` instr src dst
+    return (Any pk code__2)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+
+{-
+Wolfgang's PowerPC version of The Rules:
+
+A slightly modified version of The Rules to take advantage of the fact
+that PowerPC instructions work on all registers and don't implicitly
+clobber any fixed registers.
+
+* The only expression for which getRegister returns Fixed is (CmmReg reg).
+
+* If getRegister returns Any, then the code it generates may modify only:
+	(a) fresh temporaries
+	(b) the destination register
+  It may *not* modify global registers, unless the global
+  register happens to be the destination register.
+  It may not clobber any other registers. In fact, only ccalls clobber any
+  fixed registers.
+  Also, it may not modify the counter register (used by genCCall).
+  
+  Corollary: If a getRegister for a subexpression returns Fixed, you need
+  not move it to a fresh temporary before evaluating the next subexpression.
+  The Fixed register won't be modified.
+  Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
+  
+* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
+  the value of the destination register.
+-}
+
+trivialCode rep signed instr x (CmmLit (CmmInt y _))
+    | Just imm <- makeImmediate rep signed y 
+    = do
+        (src1, code1) <- getSomeReg x
+        let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
+        return (Any rep code)
+  
+trivialCode rep signed instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
+    return (Any rep code)
+
+trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
+    -> CmmExpr -> CmmExpr -> NatM Register
+trivialCodeNoImm rep instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
+    return (Any rep code)
+    
+trivialUCode rep instr x = do
+    (src, code) <- getSomeReg x
+    let code' dst = code `snocOL` instr dst src
+    return (Any rep code')
+    
+-- There is no "remainder" instruction on the PPC, so we have to do
+-- it the hard way.
+-- The "div" parameter is the division instruction to use (DIVW or DIVWU)
+
+remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
+    -> CmmExpr -> CmmExpr -> NatM Register
+remainderCode rep div x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let code dst = code1 `appOL` code2 `appOL` toOL [
+                div dst src1 src2,
+                MULLW dst dst (RIReg src2),
+                SUBF dst dst src1
+            ]
+    return (Any rep code)
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+--  Coercing to/from integer/floating-point...
+
+-- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
+-- conversions.  We have to store temporaries in memory to move
+-- between the integer and the floating point register sets.
+
+-- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
+-- pretend, on sparc at least, that double and float regs are seperate
+-- kinds, so the value has to be computed into one kind before being
+-- explicitly "converted" to live in the other kind.
+
+coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
+coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
+
+#if sparc_TARGET_ARCH
+coerceDbl2Flt :: CmmExpr -> NatM Register
+coerceFlt2Dbl :: CmmExpr -> NatM Register
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+coerceInt2FP _ x
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat IntRep		`thenNat` \ reg ->
+    let
+    	code = registerCode register reg
+    	src  = registerName register reg
+
+    	code__2 dst = code . mkSeqInstrs [
+    	    ST Q src (spRel 0),
+    	    LD TF dst (spRel 0),
+    	    CVTxy Q TF dst dst]
+    in
+    return (Any F64 code__2)
+
+-------------
+coerceFP2Int x
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat F64	`thenNat` \ tmp ->
+    let
+    	code = registerCode register tmp
+    	src  = registerName register tmp
+
+    	code__2 dst = code . mkSeqInstrs [
+    	    CVTxy TF Q src tmp,
+    	    ST TF tmp (spRel 0),
+    	    LD Q dst (spRel 0)]
+    in
+    return (Any IntRep code__2)
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+coerceInt2FP from to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case to of F32 -> GITOF; F64 -> GITOD
+        code dst = x_code `snocOL` opc x_reg dst
+	-- ToDo: works for non-I32 reps?
+  -- in
+  return (Any to code)
+
+------------
+
+coerceFP2Int from to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case from of F32 -> GFTOI; F64 -> GDTOI
+        code dst = x_code `snocOL` opc x_reg dst
+	-- ToDo: works for non-I32 reps?
+  -- in
+  return (Any to code)
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+coerceFP2Int from to x = do
+  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
+  let
+        opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+        code dst = x_code `snocOL` opc x_op dst
+  -- in
+  return (Any to code) -- works even if the destination rep is <I32
+
+coerceInt2FP from to x = do
+  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
+  let
+        opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
+        code dst = x_code `snocOL` opc x_op dst
+  -- in
+  return (Any to code) -- works even if the destination rep is <I32
+
+coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
+coerceFP2FP to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
+        code dst = x_code `snocOL` opc x_reg dst
+  -- in
+  return (Any to code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+coerceInt2FP pk1 pk2 x = do
+    (src, code) <- getSomeReg x
+    let
+    	code__2 dst = code `appOL` toOL [
+    	    ST pk1 src (spRel (-2)),
+    	    LD pk1 (spRel (-2)) dst,
+    	    FxTOy pk1 pk2 dst dst]
+    return (Any pk2 code__2)
+
+------------
+coerceFP2Int pk fprep x = do
+    (src, code) <- getSomeReg x
+    reg <- getNewRegNat fprep
+    tmp <- getNewRegNat pk
+    let
+    	code__2 dst = ASSERT(fprep == F64 || fprep == F32)
+	    code `appOL` toOL [
+    	    FxTOy fprep pk src tmp,
+    	    ST pk tmp (spRel (-2)),
+    	    LD pk (spRel (-2)) dst]
+    return (Any pk code__2)
+
+------------
+coerceDbl2Flt x = do
+    (src, code) <- getSomeReg x
+    return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst)) 
+
+------------
+coerceFlt2Dbl x = do
+    (src, code) <- getSomeReg x
+    return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+coerceInt2FP fromRep toRep x = do
+    (src, code) <- getSomeReg x
+    lbl <- getNewLabelNat
+    itmp <- getNewRegNat I32
+    ftmp <- getNewRegNat F64
+    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    Amode addr addr_code <- getAmode dynRef
+    let
+    	code' dst = code `appOL` maybe_exts `appOL` toOL [
+		LDATA ReadOnlyData
+				[CmmDataLabel lbl,
+				 CmmStaticLit (CmmInt 0x43300000 I32),
+				 CmmStaticLit (CmmInt 0x80000000 I32)],
+		XORIS itmp src (ImmInt 0x8000),
+		ST I32 itmp (spRel 3),
+		LIS itmp (ImmInt 0x4330),
+		ST I32 itmp (spRel 2),
+		LD F64 ftmp (spRel 2)
+            ] `appOL` addr_code `appOL` toOL [
+		LD F64 dst addr,
+		FSUB F64 dst ftmp dst
+	    ] `appOL` maybe_frsp dst
+            
+        maybe_exts = case fromRep of
+                        I8 ->  unitOL $ EXTS I8 src src
+                        I16 -> unitOL $ EXTS I16 src src
+                        I32 -> nilOL
+        maybe_frsp dst = case toRep of
+                        F32 -> unitOL $ FRSP dst dst
+                        F64 -> nilOL
+    return (Any toRep code')
+
+coerceFP2Int fromRep toRep x = do
+    -- the reps don't really matter: F*->F64 and I32->I* are no-ops
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat F64
+    let
+    	code' dst = code `appOL` toOL [
+		-- convert to int in FP reg
+    	    FCTIWZ tmp src,
+		-- store value (64bit) from FP to stack
+    	    ST F64 tmp (spRel 2),
+		-- read low word of value (high word is undefined)
+    	    LD I32 dst (spRel 3)]	
+    return (Any toRep code')
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- eXTRA_STK_ARGS_HERE
+
+-- We (allegedly) put the first six C-call arguments in registers;
+-- where do we start putting the rest of them?
+
+-- Moved from MachInstrs (SDM):
+
+#if alpha_TARGET_ARCH || sparc_TARGET_ARCH
+eXTRA_STK_ARGS_HERE :: Int
+eXTRA_STK_ARGS_HERE
+  = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
+#endif
+