Reorganisation of the source tree

Most of the other users of the fptools build system have migrated to Cabal, and with the move to darcs we can now flatten the source tree without losing history, so here goes. The main change is that the ghc/ subdir is gone, and most of what it contained is now at the top level. The build system now makes no pretense at being multi-project, it is just the GHC build system. No doubt this will break many things, and there will be a period of instability while we fix the dependencies. A straightforward build should work, but I haven't yet fixed binary/source distributions. Changes to the Building Guide will follow, too.
author: Simon Marlow <simonmar@microsoft.com> 2006-04-07 02:05:11 +0000
committer: Simon Marlow <simonmar@microsoft.com> 2006-04-07 02:05:11 +0000
commit: 0065d5ab628975892cea1ec7303f968c3338cbe1 (patch)
tree: 8e2afe0ab48ee33cf95009809d67c9649573ef92 /compiler/nativeGen
parent: 28a464a75e14cece5db40f2765a29348273ff2d2 (diff)
download: haskell-0065d5ab628975892cea1ec7303f968c3338cbe1.tar.gz
11 files changed, 12531 insertions, 0 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs
new file mode 100644
index 0000000000..1576162167
--- /dev/null
+++ b/compiler/nativeGen/AsmCodeGen.lhs
@@ -0,0 +1,545 @@
+-- -----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow 1993-2004
+-- 
+-- This is the top-level module in the native code generator.
+--
+-- -----------------------------------------------------------------------------
+
+\begin{code}
+module AsmCodeGen ( nativeCodeGen ) where
+
+#include "HsVersions.h"
+#include "NCG.h"
+
+import MachInstrs
+import MachRegs
+import MachCodeGen
+import PprMach
+import RegisterAlloc
+import RegAllocInfo	( jumpDests )
+import NCGMonad
+import PositionIndependentCode
+
+import Cmm
+import CmmOpt		( cmmMiniInline, cmmMachOpFold )
+import PprCmm		( pprStmt, pprCmms )
+import MachOp
+import CLabel           ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel )
+#if powerpc_TARGET_ARCH
+import CLabel           ( mkRtsCodeLabel )
+#endif
+
+import UniqFM
+import Unique		( Unique, getUnique )
+import UniqSupply
+import FastTypes
+import List		( groupBy, sortBy )
+import CLabel           ( pprCLabel )
+import ErrUtils		( dumpIfSet_dyn )
+import DynFlags		( DynFlags, DynFlag(..), dopt )
+import StaticFlags	( opt_Static, opt_PIC )
+
+import Digraph
+import qualified Pretty
+import Outputable
+import FastString
+
+-- DEBUGGING ONLY
+--import OrdList
+
+#ifdef NCG_DEBUG
+import List		( intersperse )
+#endif
+
+import DATA_INT
+import DATA_WORD
+import DATA_BITS
+import GLAEXTS
+
+{-
+The native-code generator has machine-independent and
+machine-dependent modules.
+
+This module ("AsmCodeGen") is the top-level machine-independent
+module.  Before entering machine-dependent land, we do some
+machine-independent optimisations (defined below) on the
+'CmmStmts's.
+
+We convert to the machine-specific 'Instr' datatype with
+'cmmCodeGen', assuming an infinite supply of registers.  We then use
+a machine-independent register allocator ('regAlloc') to rejoin
+reality.  Obviously, 'regAlloc' has machine-specific helper
+functions (see about "RegAllocInfo" below).
+
+Finally, we order the basic blocks of the function so as to minimise
+the number of jumps between blocks, by utilising fallthrough wherever
+possible.
+
+The machine-dependent bits break down as follows:
+
+  * ["MachRegs"]  Everything about the target platform's machine
+    registers (and immediate operands, and addresses, which tend to
+    intermingle/interact with registers).
+
+  * ["MachInstrs"]  Includes the 'Instr' datatype (possibly should
+    have a module of its own), plus a miscellany of other things
+    (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
+
+  * ["MachCodeGen"]  is where 'Cmm' stuff turns into
+    machine instructions.
+
+  * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
+    a 'Doc').
+
+  * ["RegAllocInfo"] In the register allocator, we manipulate
+    'MRegsState's, which are 'BitSet's, one bit per machine register.
+    When we want to say something about a specific machine register
+    (e.g., ``it gets clobbered by this instruction''), we set/unset
+    its bit.  Obviously, we do this 'BitSet' thing for efficiency
+    reasons.
+
+    The 'RegAllocInfo' module collects together the machine-specific
+    info needed to do register allocation.
+
+   * ["RegisterAlloc"] The (machine-independent) register allocator.
+-}
+
+-- -----------------------------------------------------------------------------
+-- Top-level of the native codegen
+
+-- NB. We *lazilly* compile each block of code for space reasons.
+
+nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc
+nativeCodeGen dflags cmms us
+  = let (res, _) = initUs us $
+	   cgCmm (concat (map add_split cmms))
+
+	cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [CLabel])
+	cgCmm tops = 
+	   lazyMapUs (cmmNativeGen dflags) tops  `thenUs` \ results -> 
+	   case unzip3 results of { (cmms,docs,imps) ->
+	   returnUs (Cmm cmms, my_vcat docs, concat imps)
+	   }
+    in 
+    case res of { (ppr_cmms, insn_sdoc, imports) -> do
+    dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms])
+    return (insn_sdoc Pretty.$$ dyld_stubs imports
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+                -- On recent versions of Darwin, the linker supports
+                -- dead-stripping of code and data on a per-symbol basis.
+                -- There's a hack to make this work in PprMach.pprNatCmmTop.
+            Pretty.$$ Pretty.text ".subsections_via_symbols"
+#endif
+            )
+   }
+
+  where
+
+    add_split (Cmm tops)
+	| dopt Opt_SplitObjs dflags = split_marker : tops
+	| otherwise		    = tops
+
+    split_marker = CmmProc [] mkSplitMarkerLabel [] []
+
+     	 -- Generate "symbol stubs" for all external symbols that might
+	 -- come from a dynamic library.
+{-    dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
+				    map head $ group $ sort imps-}
+				    
+	-- (Hack) sometimes two Labels pretty-print the same, but have
+	-- different uniques; so we compare their text versions...
+    dyld_stubs imps 
+        | needImportedSymbols
+          = Pretty.vcat $
+            (pprGotDeclaration :) $
+            map (pprImportedSymbol . fst . head) $
+            groupBy (\(_,a) (_,b) -> a == b) $
+            sortBy (\(_,a) (_,b) -> compare a b) $
+            map doPpr $
+            imps
+        | otherwise
+          = Pretty.empty
+        
+        where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
+              astyle = mkCodeStyle AsmStyle
+
+#ifndef NCG_DEBUG
+    my_vcat sds = Pretty.vcat sds
+#else
+    my_vcat sds = Pretty.vcat (
+                      intersperse (
+                         Pretty.char ' ' 
+                            Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
+                            Pretty.$$ Pretty.char ' '
+                      ) 
+                      sds
+                   )
+#endif
+
+
+-- Complete native code generation phase for a single top-level chunk
+-- of Cmm.
+
+cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [CLabel])
+cmmNativeGen dflags cmm
+   = {-# SCC "fixAssigns"       #-} 
+ 	fixAssignsTop cmm	     `thenUs` \ fixed_cmm ->
+     {-# SCC "genericOpt"       #-} 
+	cmmToCmm fixed_cmm           `bind`   \ (cmm, imports) ->
+        (if dopt Opt_D_dump_opt_cmm dflags  -- space leak avoidance
+	   then cmm 
+	   else CmmData Text [])     `bind`   \ ppr_cmm ->
+     {-# SCC "genMachCode"      #-}
+	genMachCode cmm              `thenUs` \ (pre_regalloc, lastMinuteImports) ->
+     {-# SCC "regAlloc"         #-}
+	mapUs regAlloc pre_regalloc `thenUs`   \ with_regs ->
+     {-# SCC "sequenceBlocks"   #-}
+	map sequenceTop with_regs    `bind`   \ sequenced ->
+     {-# SCC "x86fp_kludge"     #-}
+	map x86fp_kludge sequenced   `bind`   \ final_mach_code ->
+     {-# SCC "vcat"             #-}
+	Pretty.vcat (map pprNatCmmTop final_mach_code)  `bind`   \ final_sdoc ->
+
+        returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports)
+     where
+        x86fp_kludge :: NatCmmTop -> NatCmmTop
+        x86fp_kludge top@(CmmData _ _) = top
+#if i386_TARGET_ARCH
+        x86fp_kludge top@(CmmProc info lbl params code) = 
+		CmmProc info lbl params (map bb_i386_insert_ffrees code)
+		where
+		  bb_i386_insert_ffrees (BasicBlock id instrs) =
+			BasicBlock id (i386_insert_ffrees instrs)
+#else
+        x86fp_kludge top =  top
+#endif
+
+-- -----------------------------------------------------------------------------
+-- Sequencing the basic blocks
+
+-- Cmm BasicBlocks are self-contained entities: they always end in a
+-- jump, either non-local or to another basic block in the same proc.
+-- In this phase, we attempt to place the basic blocks in a sequence
+-- such that as many of the local jumps as possible turn into
+-- fallthroughs.
+
+sequenceTop :: NatCmmTop -> NatCmmTop
+sequenceTop top@(CmmData _ _) = top
+sequenceTop (CmmProc info lbl params blocks) = 
+  CmmProc info lbl params (sequenceBlocks blocks)
+
+-- The algorithm is very simple (and stupid): we make a graph out of
+-- the blocks where there is an edge from one block to another iff the
+-- first block ends by jumping to the second.  Then we topologically
+-- sort this graph.  Then traverse the list: for each block, we first
+-- output the block, then if it has an out edge, we move the
+-- destination of the out edge to the front of the list, and continue.
+
+sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
+sequenceBlocks [] = []
+sequenceBlocks (entry:blocks) = 
+  seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
+  -- the first block is the entry point ==> it must remain at the start.
+
+sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
+sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
+
+getOutEdges :: [Instr] -> [Unique]
+getOutEdges instrs = case jumpDests (last instrs) [] of
+			[one] -> [getUnique one]
+			_many -> []
+		-- we're only interested in the last instruction of
+		-- the block, and only if it has a single destination.
+
+mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
+
+seqBlocks [] = []
+seqBlocks ((block,_,[]) : rest)
+  = block : seqBlocks rest
+seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
+  | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
+  | otherwise       = block : seqBlocks rest'
+  where
+	(can_fallthrough, rest') = reorder next [] rest
+	  -- TODO: we should do a better job for cycles; try to maximise the
+	  -- fallthroughs within a loop.
+seqBlocks _ = panic "AsmCodegen:seqBlocks"
+
+reorder id accum [] = (False, reverse accum)
+reorder id accum (b@(block,id',out) : rest)
+  | id == id'  = (True, (block,id,out) : reverse accum ++ rest)
+  | otherwise  = reorder id (b:accum) rest
+
+-- -----------------------------------------------------------------------------
+-- Instruction selection
+
+-- Native code instruction selection for a chunk of stix code.  For
+-- this part of the computation, we switch from the UniqSM monad to
+-- the NatM monad.  The latter carries not only a Unique, but also an
+-- Int denoting the current C stack pointer offset in the generated
+-- code; this is needed for creating correct spill offsets on
+-- architectures which don't offer, or for which it would be
+-- prohibitively expensive to employ, a frame pointer register.  Viz,
+-- x86.
+
+-- The offset is measured in bytes, and indicates the difference
+-- between the current (simulated) C stack-ptr and the value it was at
+-- the beginning of the block.  For stacks which grow down, this value
+-- should be either zero or negative.
+
+-- Switching between the two monads whilst carrying along the same
+-- Unique supply breaks abstraction.  Is that bad?
+
+genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [CLabel])
+
+genMachCode cmm_top initial_us
+  = let initial_st             = mkNatM_State initial_us 0
+        (new_tops, final_st)   = initNat initial_st (cmmTopCodeGen cmm_top)
+        final_us               = natm_us final_st
+        final_delta            = natm_delta final_st
+	final_imports          = natm_imports final_st
+    in
+        if   final_delta == 0
+        then ((new_tops, final_imports), final_us)
+        else pprPanic "genMachCode: nonzero final delta"
+                      (int final_delta)
+
+-- -----------------------------------------------------------------------------
+-- Fixup assignments to global registers so that they assign to 
+-- locations within the RegTable, if appropriate.
+
+-- Note that we currently don't fixup reads here: they're done by
+-- the generic optimiser below, to avoid having two separate passes
+-- over the Cmm.
+
+fixAssignsTop :: CmmTop -> UniqSM CmmTop
+fixAssignsTop top@(CmmData _ _) = returnUs top
+fixAssignsTop (CmmProc info lbl params blocks) =
+  mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
+  returnUs (CmmProc info lbl params blocks')
+
+fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
+fixAssignsBlock (BasicBlock id stmts) =
+  fixAssigns stmts `thenUs` \ stmts' ->
+  returnUs (BasicBlock id stmts')
+
+fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
+fixAssigns stmts =
+  mapUs fixAssign stmts `thenUs` \ stmtss ->
+  returnUs (concat stmtss)
+
+fixAssign :: CmmStmt -> UniqSM [CmmStmt]
+fixAssign (CmmAssign (CmmGlobal BaseReg) src)
+   = panic "cmmStmtConFold: assignment to BaseReg";
+
+fixAssign (CmmAssign (CmmGlobal reg) src)
+  | Left  realreg <- reg_or_addr
+  = returnUs [CmmAssign (CmmGlobal reg) src]
+  | Right baseRegAddr <- reg_or_addr
+  = returnUs [CmmStore baseRegAddr src]
+           -- Replace register leaves with appropriate StixTrees for
+           -- the given target. GlobalRegs which map to a reg on this
+           -- arch are left unchanged.  Assigning to BaseReg is always
+           -- illegal, so we check for that.
+  where
+	reg_or_addr = get_GlobalReg_reg_or_addr reg
+
+fixAssign (CmmCall target results args vols)
+  = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) ->
+    returnUs (caller_save ++
+	      CmmCall target results' args vols :
+	      caller_restore ++
+	      concat stores)
+  where
+	-- we also save/restore any caller-saves STG registers here
+	(caller_save, caller_restore) = callerSaveVolatileRegs vols
+
+	fixResult g@(CmmGlobal reg,hint) = 
+	  case get_GlobalReg_reg_or_addr reg of
+		Left realreg -> returnUs (g, [])
+		Right baseRegAddr ->
+		    getUniqueUs `thenUs` \ uq ->
+		    let local = CmmLocal (LocalReg uq (globalRegRep reg)) in
+		    returnUs ((local,hint), 
+			      [CmmStore baseRegAddr (CmmReg local)])
+	fixResult other =
+	  returnUs (other,[])
+
+fixAssign other_stmt = returnUs [other_stmt]
+
+-- -----------------------------------------------------------------------------
+-- Generic Cmm optimiser
+
+{-
+Here we do:
+
+  (a) Constant folding
+  (b) Simple inlining: a temporary which is assigned to and then
+      used, once, can be shorted.
+  (c) Replacement of references to GlobalRegs which do not have
+      machine registers by the appropriate memory load (eg.
+      Hp ==>  *(BaseReg + 34) ).
+  (d) Position independent code and dynamic linking
+        (i)  introduce the appropriate indirections
+             and position independent refs
+        (ii) compile a list of imported symbols
+
+Ideas for other things we could do (ToDo):
+
+  - shortcut jumps-to-jumps
+  - eliminate dead code blocks
+  - simple CSE: if an expr is assigned to a temp, then replace later occs of
+    that expr with the temp, until the expr is no longer valid (can push through
+    temp assignments, and certain assigns to mem...)
+-}
+
+cmmToCmm :: CmmTop -> (CmmTop, [CLabel])
+cmmToCmm top@(CmmData _ _) = (top, [])
+cmmToCmm (CmmProc info lbl params blocks) = runCmmOpt $ do
+  blocks' <- mapM cmmBlockConFold (cmmMiniInline blocks)
+  return $ CmmProc info lbl params blocks'
+
+newtype CmmOptM a = CmmOptM ([CLabel] -> (# a, [CLabel] #))
+
+instance Monad CmmOptM where
+  return x = CmmOptM $ \imports -> (# x,imports #)
+  (CmmOptM f) >>= g =
+    CmmOptM $ \imports ->
+                case f imports of
+                  (# x, imports' #) ->
+                    case g x of
+                      CmmOptM g' -> g' imports'
+
+addImportCmmOpt :: CLabel -> CmmOptM ()
+addImportCmmOpt lbl = CmmOptM $ \imports -> (# (), lbl:imports #)
+
+runCmmOpt :: CmmOptM a -> (a, [CLabel])
+runCmmOpt (CmmOptM f) = case f [] of
+                        (# result, imports #) -> (result, imports)
+
+cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
+cmmBlockConFold (BasicBlock id stmts) = do
+  stmts' <- mapM cmmStmtConFold stmts
+  return $ BasicBlock id stmts'
+
+cmmStmtConFold stmt
+   = case stmt of
+        CmmAssign reg src
+           -> do src' <- cmmExprConFold False src
+                 return $ case src' of
+		   CmmReg reg' | reg == reg' -> CmmNop
+		   new_src -> CmmAssign reg new_src
+
+        CmmStore addr src
+           -> do addr' <- cmmExprConFold False addr
+                 src'  <- cmmExprConFold False src
+                 return $ CmmStore addr' src'
+
+        CmmJump addr regs
+           -> do addr' <- cmmExprConFold True addr
+                 return $ CmmJump addr' regs
+
+	CmmCall target regs args vols
+	   -> do target' <- case target of
+			      CmmForeignCall e conv -> do
+			        e' <- cmmExprConFold True e
+			        return $ CmmForeignCall e' conv
+			      other -> return other
+                 args' <- mapM (\(arg, hint) -> do
+                                  arg' <- cmmExprConFold False arg
+                                  return (arg', hint)) args
+	         return $ CmmCall target' regs args' vols
+
+        CmmCondBranch test dest
+           -> do test' <- cmmExprConFold False test
+	         return $ case test' of
+		   CmmLit (CmmInt 0 _) -> 
+		     CmmComment (mkFastString ("deleted: " ++ 
+					showSDoc (pprStmt stmt)))
+
+		   CmmLit (CmmInt n _) -> CmmBranch dest
+		   other -> CmmCondBranch test' dest
+
+	CmmSwitch expr ids
+	   -> do expr' <- cmmExprConFold False expr
+	         return $ CmmSwitch expr' ids
+
+        other
+           -> return other
+
+
+cmmExprConFold isJumpTarget expr
+   = case expr of
+        CmmLoad addr rep
+           -> do addr' <- cmmExprConFold False addr
+                 return $ CmmLoad addr' rep
+
+        CmmMachOp mop args
+           -- For MachOps, we first optimize the children, and then we try 
+           -- our hand at some constant-folding.
+           -> do args' <- mapM (cmmExprConFold False) args
+                 return $ cmmMachOpFold mop args'
+
+        CmmLit (CmmLabel lbl)
+           -> cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
+        CmmLit (CmmLabelOff lbl off)
+           -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
+                 return $ cmmMachOpFold (MO_Add wordRep) [
+                     dynRef,
+                     (CmmLit $ CmmInt (fromIntegral off) wordRep)
+                   ]
+
+#if powerpc_TARGET_ARCH
+           -- On powerpc (non-PIC), it's easier to jump directly to a label than
+           -- to use the register table, so we replace these registers
+           -- with the corresponding labels:
+        CmmReg (CmmGlobal GCEnter1)
+          | not opt_PIC
+          -> cmmExprConFold isJumpTarget $
+             CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1"))) 
+        CmmReg (CmmGlobal GCFun)
+          | not opt_PIC
+          -> cmmExprConFold isJumpTarget $
+             CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
+#endif
+
+        CmmReg (CmmGlobal mid)
+           -- Replace register leaves with appropriate StixTrees for
+           -- the given target.  MagicIds which map to a reg on this
+           -- arch are left unchanged.  For the rest, BaseReg is taken
+           -- to mean the address of the reg table in MainCapability,
+           -- and for all others we generate an indirection to its
+           -- location in the register table.
+           -> case get_GlobalReg_reg_or_addr mid of
+                 Left  realreg -> return expr
+                 Right baseRegAddr 
+                    -> case mid of 
+                          BaseReg -> cmmExprConFold False baseRegAddr
+                          other   -> cmmExprConFold False (CmmLoad baseRegAddr 
+							(globalRegRep mid))
+	   -- eliminate zero offsets
+	CmmRegOff reg 0
+	   -> cmmExprConFold False (CmmReg reg)
+
+        CmmRegOff (CmmGlobal mid) offset
+           -- RegOf leaves are just a shorthand form. If the reg maps
+           -- to a real reg, we keep the shorthand, otherwise, we just
+           -- expand it and defer to the above code. 
+           -> case get_GlobalReg_reg_or_addr mid of
+                Left  realreg -> return expr
+                Right baseRegAddr
+                   -> cmmExprConFold False (CmmMachOp (MO_Add wordRep) [
+                                        CmmReg (CmmGlobal mid),
+                                        CmmLit (CmmInt (fromIntegral offset)
+                                                       wordRep)])
+        other
+           -> return other
+
+-- -----------------------------------------------------------------------------
+-- Utils
+
+bind f x = x $! f
+
+\end{code}
+
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs
new file mode 100644
index 0000000000..90ce6b5bf8
--- /dev/null
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -0,0 +1,4654 @@
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+-- This is a big module, but, if you pay attention to
+-- (a) the sectioning, (b) the type signatures, and
+-- (c) the #if blah_TARGET_ARCH} things, the
+-- structure should not be too overwhelming.
+
+module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import MachInstrs
+import MachRegs
+import NCGMonad
+import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
+import RegAllocInfo ( mkBranchInstr )
+
+-- Our intermediate code:
+import PprCmm		( pprExpr )
+import Cmm
+import MachOp
+import CLabel
+
+-- The rest:
+import StaticFlags	( opt_PIC )
+import ForeignCall	( CCallConv(..) )
+import OrdList
+import Pretty
+import Outputable
+import FastString
+import FastTypes	( isFastTrue )
+import Constants	( wORD_SIZE )
+
+#ifdef DEBUG
+import Outputable	( assertPanic )
+import TRACE		( trace )
+#endif
+
+import Control.Monad	( mapAndUnzipM )
+import Maybe		( fromJust )
+import DATA_BITS
+import DATA_WORD
+
+-- -----------------------------------------------------------------------------
+-- Top-level of the instruction selector
+
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- They are really trees of insns to facilitate fast appending, where a
+-- left-to-right traversal (pre-order?) yields the insns in the correct
+-- order.
+
+type InstrBlock = OrdList Instr
+
+cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
+cmmTopCodeGen (CmmProc info lab params blocks) = do
+  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+  picBaseMb <- getPicBaseMaybeNat
+  let proc = CmmProc info lab params (concat nat_blocks)
+      tops = proc : concat statics
+  case picBaseMb of
+      Just picBase -> initializePicBase picBase tops
+      Nothing -> return tops
+  
+cmmTopCodeGen (CmmData sec dat) = do
+  return [CmmData sec dat]  -- no translation, we just use CmmStatic
+
+basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
+basicBlockCodeGen (BasicBlock id stmts) = do
+  instrs <- stmtsToInstrs stmts
+  -- code generation may introduce new basic block boundaries, which
+  -- are indicated by the NEWBLOCK instruction.  We must split up the
+  -- instruction stream into basic blocks again.  Also, we extract
+  -- LDATAs here too.
+  let
+	(top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+	
+	mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
+	  = ([], BasicBlock id instrs : blocks, statics)
+	mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
+	  = (instrs, blocks, CmmData sec dat:statics)
+	mkBlocks instr (instrs,blocks,statics)
+	  = (instr:instrs, blocks, statics)
+  -- in
+  return (BasicBlock id top : other_blocks, statics)
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+   = do instrss <- mapM stmtToInstrs stmts
+        return (concatOL instrss)
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+    CmmNop	   -> return nilOL
+    CmmComment s   -> return (unitOL (COMMENT s))
+
+    CmmAssign reg src
+      | isFloatingRep kind -> assignReg_FltCode kind reg src
+#if WORD_SIZE_IN_BITS==32
+      | kind == I64 	   -> assignReg_I64Code      reg src
+#endif
+      | otherwise	   -> assignReg_IntCode kind reg src
+	where kind = cmmRegRep reg
+
+    CmmStore addr src
+      | isFloatingRep kind -> assignMem_FltCode kind addr src
+#if WORD_SIZE_IN_BITS==32
+      | kind == I64 	 -> assignMem_I64Code      addr src
+#endif
+      | otherwise	 -> assignMem_IntCode kind addr src
+	where kind = cmmExprRep src
+
+    CmmCall target result_regs args vols
+       -> genCCall target result_regs args vols
+
+    CmmBranch id	  -> genBranch id
+    CmmCondBranch arg id  -> genCondJump id arg
+    CmmSwitch arg ids     -> genSwitch arg ids
+    CmmJump arg params	  -> genJump arg
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+  = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
+  where rep = cmmRegRep reg
+
+-- -----------------------------------------------------------------------------
+--  Code gen for 64-bit arithmetic on 32-bit platforms
+
+{-
+Simple support for generating 64-bit code (ie, 64 bit values and 64
+bit assignments) on 32-bit platforms.  Unlike the main code generator
+we merely shoot for generating working code as simply as possible, and
+pay little attention to code quality.  Specifically, there is no
+attempt to deal cleverly with the fixed-vs-floating register
+distinction; all values are generated into (pairs of) floating
+registers, even if this would mean some redundant reg-reg moves as a
+result.  Only one of the VRegUniques is returned, since it will be
+of the VRegUniqueLo form, and the upper-half VReg can be determined
+by applying getHiVRegFromLo to it.
+-}
+
+data ChildCode64 	-- a.k.a "Register64"
+   = ChildCode64 
+        InstrBlock 	-- code
+        Reg	 	-- the lower 32-bit temporary which contains the
+			-- result; use getHiVRegFromLo to find the other
+			-- VRegUnique.  Rules of this simplified insn
+			-- selection game are therefore that the returned
+			-- Reg may be modified
+
+#if WORD_SIZE_IN_BITS==32
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
+#endif
+
+#ifndef x86_64_TARGET_ARCH
+iselExpr64        :: CmmExpr -> NatM ChildCode64
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+assignMem_I64Code addrTree valueTree = do
+  Amode addr addr_code <- getAmode addrTree
+  ChildCode64 vcode rlo <- iselExpr64 valueTree
+  let 
+        rhi = getHiVRegFromLo rlo
+
+        -- Little-endian store
+        mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
+        mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
+  -- in
+  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
+
+
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+   let 
+         r_dst_lo = mkVReg u_dst I32
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
+         mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
+   -- in
+   return (
+        vcode `snocOL` mov_lo `snocOL` mov_hi
+     )
+
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(i386): invalid lvalue"
+
+------------
+
+iselExpr64 (CmmLit (CmmInt i _)) = do
+  (rlo,rhi) <- getNewRegPairNat I32
+  let
+	r = fromIntegral (fromIntegral i :: Word32)
+	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
+	code = toOL [
+		MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
+		MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
+		]
+  -- in
+  return (ChildCode64 code rlo)
+
+iselExpr64 (CmmLoad addrTree I64) = do
+   Amode addr addr_code <- getAmode addrTree
+   (rlo,rhi) <- getNewRegPairNat I32
+   let 
+        mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
+        mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
+   -- in
+   return (
+            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
+                        rlo
+     )
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
+   = return (ChildCode64 nilOL (mkVReg vu I32))
+         
+-- we handle addition, but rather badly
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
+   ChildCode64 code1 r1lo <- iselExpr64 e1
+   (rlo,rhi) <- getNewRegPairNat I32
+   let
+	r = fromIntegral (fromIntegral i :: Word32)
+	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
+	r1hi = getHiVRegFromLo r1lo
+	code =  code1 `appOL`
+		toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
+		       ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
+		       MOV I32 (OpReg r1hi) (OpReg rhi),
+		       ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
+   -- in
+   return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+   ChildCode64 code1 r1lo <- iselExpr64 e1
+   ChildCode64 code2 r2lo <- iselExpr64 e2
+   (rlo,rhi) <- getNewRegPairNat I32
+   let
+	r1hi = getHiVRegFromLo r1lo
+	r2hi = getHiVRegFromLo r2lo
+	code =  code1 `appOL`
+		code2 `appOL`
+		toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
+		       ADD I32 (OpReg r2lo) (OpReg rlo),
+		       MOV I32 (OpReg r1hi) (OpReg rhi),
+		       ADC I32 (OpReg r2hi) (OpReg rhi) ]
+   -- in
+   return (ChildCode64 code rlo)
+
+iselExpr64 expr
+   = pprPanic "iselExpr64(i386)" (ppr expr)
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+assignMem_I64Code addrTree valueTree = do
+     Amode addr addr_code <- getAmode addrTree
+     ChildCode64 vcode rlo <- iselExpr64 valueTree  
+     (src, code) <- getSomeReg addrTree
+     let 
+         rhi = getHiVRegFromLo rlo
+         -- Big-endian store
+         mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
+         mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
+     return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
+
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+     ChildCode64 vcode r_src_lo <- iselExpr64 valueTree    
+     let 
+         r_dst_lo = mkVReg u_dst pk
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = mkMOV r_src_lo r_dst_lo
+         mov_hi = mkMOV r_src_hi r_dst_hi
+         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+     return (vcode `snocOL` mov_hi `snocOL` mov_lo)
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(sparc): invalid lvalue"
+
+
+-- Don't delete this -- it's very handy for debugging.
+--iselExpr64 expr 
+--   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
+--   = panic "iselExpr64(???)"
+
+iselExpr64 (CmmLoad addrTree I64) = do
+     Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
+     rlo <- getNewRegNat I32
+     let rhi = getHiVRegFromLo rlo
+         mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
+         mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
+     return (
+            ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo) 
+                         rlo
+          )
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
+     r_dst_lo <-  getNewRegNat I32
+     let r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_lo = mkVReg uq I32
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = mkMOV r_src_lo r_dst_lo
+         mov_hi = mkMOV r_src_hi r_dst_hi
+         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+     return (
+            ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
+         )
+
+iselExpr64 expr
+   = pprPanic "iselExpr64(sparc)" (ppr expr)
+
+#endif /* sparc_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if powerpc_TARGET_ARCH
+
+getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
+getI64Amodes addrTree = do
+    Amode hi_addr addr_code <- getAmode addrTree
+    case addrOffset hi_addr 4 of
+        Just lo_addr -> return (hi_addr, lo_addr, addr_code)
+        Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
+                           return (AddrRegImm hi_ptr (ImmInt 0),
+                                   AddrRegImm hi_ptr (ImmInt 4),
+                                   code)
+
+assignMem_I64Code addrTree valueTree = do
+        (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
+	ChildCode64 vcode rlo <- iselExpr64 valueTree
+	let 
+		rhi = getHiVRegFromLo rlo
+
+		-- Big-endian store
+		mov_hi = ST I32 rhi hi_addr
+		mov_lo = ST I32 rlo lo_addr
+	-- in
+	return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
+
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+   let 
+         r_dst_lo = mkVReg u_dst I32
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = MR r_dst_lo r_src_lo
+         mov_hi = MR r_dst_hi r_src_hi
+   -- in
+   return (
+        vcode `snocOL` mov_lo `snocOL` mov_hi
+     )
+
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(powerpc): invalid lvalue"
+
+
+-- Don't delete this -- it's very handy for debugging.
+--iselExpr64 expr 
+--   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
+--   = panic "iselExpr64(???)"
+
+iselExpr64 (CmmLoad addrTree I64) = do
+    (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
+    (rlo, rhi) <- getNewRegPairNat I32
+    let mov_hi = LD I32 rhi hi_addr
+        mov_lo = LD I32 rlo lo_addr
+    return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
+                         rlo
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
+   = return (ChildCode64 nilOL (mkVReg vu I32))
+
+iselExpr64 (CmmLit (CmmInt i _)) = do
+  (rlo,rhi) <- getNewRegPairNat I32
+  let
+	half0 = fromIntegral (fromIntegral i :: Word16)
+	half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
+	half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
+	half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
+	
+	code = toOL [
+		LIS rlo (ImmInt half1),
+		OR rlo rlo (RIImm $ ImmInt half0),
+		LIS rhi (ImmInt half3),
+		OR rlo rlo (RIImm $ ImmInt half2)
+		]
+  -- in
+  return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+   ChildCode64 code1 r1lo <- iselExpr64 e1
+   ChildCode64 code2 r2lo <- iselExpr64 e2
+   (rlo,rhi) <- getNewRegPairNat I32
+   let
+	r1hi = getHiVRegFromLo r1lo
+	r2hi = getHiVRegFromLo r2lo
+	code =  code1 `appOL`
+		code2 `appOL`
+		toOL [ ADDC rlo r1lo r2lo,
+		       ADDE rhi r1hi r2hi ]
+   -- in
+   return (ChildCode64 code rlo)
+
+iselExpr64 expr
+   = pprPanic "iselExpr64(powerpc)" (ppr expr)
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- The 'Register' type
+
+-- 'Register's passed up the tree.  If the stix code forces the register
+-- to live in a pre-decided machine register, it comes out as @Fixed@;
+-- otherwise, it comes out as @Any@, and the parent can decide which
+-- register to put it in.
+
+data Register
+  = Fixed   MachRep Reg InstrBlock
+  | Any	    MachRep (Reg -> InstrBlock)
+
+swizzleRegisterRep :: Register -> MachRep -> Register
+swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
+swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
+
+
+-- -----------------------------------------------------------------------------
+-- Utils based on getRegister, below
+
+-- The dual to getAnyReg: compute an expression into a register, but
+-- we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed _ reg code -> 
+	return (reg, code)
+
+-- -----------------------------------------------------------------------------
+-- Grab the Reg for a CmmReg
+
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+  = mkVReg u pk
+
+getRegisterReg (CmmGlobal mid)
+  = case get_GlobalReg_reg_or_addr mid of
+       Left (RealReg rrno) -> RealReg rrno
+       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+          -- By this stage, the only MagicIds remaining should be the
+          -- ones which map to a real machine register on this
+          -- platform.  Hence ...
+
+
+-- -----------------------------------------------------------------------------
+-- Generate code to get a subtree into a Register
+
+-- Don't delete this -- it's very handy for debugging.
+--getRegister expr 
+--   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
+--   = panic "getRegister(???)"
+
+getRegister :: CmmExpr -> NatM Register
+
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+  = do
+      reg <- getPicBaseNat wordRep
+      return (Fixed wordRep reg nilOL)
+
+getRegister (CmmReg reg) 
+  = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _) 
+  = getRegister (mangleIndexTree tree)
+
+-- end of machine-"independent" bit; here we go on the rest...
+
+#if alpha_TARGET_ARCH
+
+getRegister (StDouble d)
+  = getBlockIdNat 	    	    `thenNat` \ lbl ->
+    getNewRegNat PtrRep    	    `thenNat` \ tmp ->
+    let code dst = mkSeqInstrs [
+	    LDATA RoDataSegment lbl [
+		    DATA TF [ImmLab (rational d)]
+		],
+	    LDA tmp (AddrImm (ImmCLbl lbl)),
+	    LD TF dst (AddrReg tmp)]
+    in
+    	return (Any F64 code)
+
+getRegister (StPrim primop [x]) -- unary PrimOps
+  = case primop of
+      IntNegOp -> trivialUCode (NEG Q False) x
+
+      NotOp    -> trivialUCode NOT x
+
+      FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
+      DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
+
+      OrdOp -> coerceIntCode IntRep x
+      ChrOp -> chrCode x
+
+      Float2IntOp  -> coerceFP2Int    x
+      Int2FloatOp  -> coerceInt2FP pr x
+      Double2IntOp -> coerceFP2Int    x
+      Int2DoubleOp -> coerceInt2FP pr x
+
+      Double2FloatOp -> coerceFltCode x
+      Float2DoubleOp -> coerceFltCode x
+
+      other_op -> getRegister (StCall fn CCallConv F64 [x])
+	where
+	  fn = case other_op of
+		 FloatExpOp    -> FSLIT("exp")
+		 FloatLogOp    -> FSLIT("log")
+		 FloatSqrtOp   -> FSLIT("sqrt")
+		 FloatSinOp    -> FSLIT("sin")
+		 FloatCosOp    -> FSLIT("cos")
+		 FloatTanOp    -> FSLIT("tan")
+		 FloatAsinOp   -> FSLIT("asin")
+		 FloatAcosOp   -> FSLIT("acos")
+		 FloatAtanOp   -> FSLIT("atan")
+		 FloatSinhOp   -> FSLIT("sinh")
+		 FloatCoshOp   -> FSLIT("cosh")
+		 FloatTanhOp   -> FSLIT("tanh")
+		 DoubleExpOp   -> FSLIT("exp")
+		 DoubleLogOp   -> FSLIT("log")
+		 DoubleSqrtOp  -> FSLIT("sqrt")
+		 DoubleSinOp   -> FSLIT("sin")
+		 DoubleCosOp   -> FSLIT("cos")
+		 DoubleTanOp   -> FSLIT("tan")
+		 DoubleAsinOp  -> FSLIT("asin")
+		 DoubleAcosOp  -> FSLIT("acos")
+		 DoubleAtanOp  -> FSLIT("atan")
+		 DoubleSinhOp  -> FSLIT("sinh")
+		 DoubleCoshOp  -> FSLIT("cosh")
+		 DoubleTanhOp  -> FSLIT("tanh")
+  where
+    pr = panic "MachCode.getRegister: no primrep needed for Alpha"
+
+getRegister (StPrim primop [x, y]) -- dyadic PrimOps
+  = case primop of
+      CharGtOp -> trivialCode (CMP LTT) y x
+      CharGeOp -> trivialCode (CMP LE) y x
+      CharEqOp -> trivialCode (CMP EQQ) x y
+      CharNeOp -> int_NE_code x y
+      CharLtOp -> trivialCode (CMP LTT) x y
+      CharLeOp -> trivialCode (CMP LE) x y
+
+      IntGtOp  -> trivialCode (CMP LTT) y x
+      IntGeOp  -> trivialCode (CMP LE) y x
+      IntEqOp  -> trivialCode (CMP EQQ) x y
+      IntNeOp  -> int_NE_code x y
+      IntLtOp  -> trivialCode (CMP LTT) x y
+      IntLeOp  -> trivialCode (CMP LE) x y
+
+      WordGtOp -> trivialCode (CMP ULT) y x
+      WordGeOp -> trivialCode (CMP ULE) x y
+      WordEqOp -> trivialCode (CMP EQQ)  x y
+      WordNeOp -> int_NE_code x y
+      WordLtOp -> trivialCode (CMP ULT) x y
+      WordLeOp -> trivialCode (CMP ULE) x y
+
+      AddrGtOp -> trivialCode (CMP ULT) y x
+      AddrGeOp -> trivialCode (CMP ULE) y x
+      AddrEqOp -> trivialCode (CMP EQQ)  x y
+      AddrNeOp -> int_NE_code x y
+      AddrLtOp -> trivialCode (CMP ULT) x y
+      AddrLeOp -> trivialCode (CMP ULE) x y
+	
+      FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
+      FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
+      FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
+      FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
+      FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
+      FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
+
+      DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
+      DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
+      DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
+      DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
+      DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
+      DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
+
+      IntAddOp  -> trivialCode (ADD Q False) x y
+      IntSubOp  -> trivialCode (SUB Q False) x y
+      IntMulOp  -> trivialCode (MUL Q False) x y
+      IntQuotOp -> trivialCode (DIV Q False) x y
+      IntRemOp  -> trivialCode (REM Q False) x y
+
+      WordAddOp  -> trivialCode (ADD Q False) x y
+      WordSubOp  -> trivialCode (SUB Q False) x y
+      WordMulOp  -> trivialCode (MUL Q False) x y
+      WordQuotOp -> trivialCode (DIV Q True) x y
+      WordRemOp  -> trivialCode (REM Q True) x y
+
+      FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
+      FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
+      FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
+      FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
+
+      DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
+      DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
+      DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
+      DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
+
+      AddrAddOp  -> trivialCode (ADD Q False) x y
+      AddrSubOp  -> trivialCode (SUB Q False) x y
+      AddrRemOp  -> trivialCode (REM Q True) x y
+
+      AndOp  -> trivialCode AND x y
+      OrOp   -> trivialCode OR  x y
+      XorOp  -> trivialCode XOR x y
+      SllOp  -> trivialCode SLL x y
+      SrlOp  -> trivialCode SRL x y
+
+      ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
+      ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
+      ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
+
+      FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
+      DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
+  where
+    {- ------------------------------------------------------------
+	Some bizarre special code for getting condition codes into
+	registers.  Integer non-equality is a test for equality
+	followed by an XOR with 1.  (Integer comparisons always set
+	the result register to 0 or 1.)  Floating point comparisons of
+	any kind leave the result in a floating point register, so we
+	need to wrangle an integer register out of things.
+    -}
+    int_NE_code :: StixTree -> StixTree -> NatM Register
+
+    int_NE_code x y
+      = trivialCode (CMP EQQ) x y	`thenNat` \ register ->
+	getNewRegNat IntRep		`thenNat` \ tmp ->
+	let
+	    code = registerCode register tmp
+	    src  = registerName register tmp
+	    code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
+	in
+	return (Any IntRep code__2)
+
+    {- ------------------------------------------------------------
+	Comments for int_NE_code also apply to cmpF_code
+    -}
+    cmpF_code
+	:: (Reg -> Reg -> Reg -> Instr)
+	-> Cond
+	-> StixTree -> StixTree
+	-> NatM Register
+
+    cmpF_code instr cond x y
+      = trivialFCode pr instr x y	`thenNat` \ register ->
+	getNewRegNat F64		`thenNat` \ tmp ->
+	getBlockIdNat			`thenNat` \ lbl ->
+	let
+	    code = registerCode register tmp
+	    result  = registerName register tmp
+
+	    code__2 dst = code . mkSeqInstrs [
+		OR zeroh (RIImm (ImmInt 1)) dst,
+		BF cond  result (ImmCLbl lbl),
+		OR zeroh (RIReg zeroh) dst,
+		NEWBLOCK lbl]
+	in
+	return (Any IntRep code__2)
+      where
+	pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
+      ------------------------------------------------------------
+
+getRegister (CmmLoad pk mem)
+  = getAmode mem    	    	    `thenNat` \ amode ->
+    let
+    	code = amodeCode amode
+    	src   = amodeAddr amode
+    	size = primRepToSize pk
+    	code__2 dst = code . mkSeqInstr (LD size dst src)
+    in
+    return (Any pk code__2)
+
+getRegister (StInt i)
+  | fits8Bits i
+  = let
+    	code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
+    in
+    return (Any IntRep code)
+  | otherwise
+  = let
+    	code dst = mkSeqInstr (LDI Q dst src)
+    in
+    return (Any IntRep code)
+  where
+    src = ImmInt (fromInteger i)
+
+getRegister leaf
+  | isJust imm
+  = let
+    	code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
+    in
+    return (Any PtrRep code)
+  where
+    imm = maybeImm leaf
+    imm__2 = case imm of Just x -> x
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat f F32)) = do
+    lbl <- getNewLabelNat
+    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst =
+	    LDATA ReadOnlyData
+			[CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat f F32)]
+	    `consOL` (addr_code `snocOL`
+	    GLD F32 addr dst)
+    -- in
+    return (Any F32 code)
+
+
+getRegister (CmmLit (CmmFloat d F64))
+  | d == 0.0
+  = let code dst = unitOL (GLDZ dst)
+    in  return (Any F64 code)
+
+  | d == 1.0
+  = let code dst = unitOL (GLD1 dst)
+    in  return (Any F64 code)
+
+  | otherwise = do
+    lbl <- getNewLabelNat
+    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst =
+	    LDATA ReadOnlyData
+			[CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat d F64)]
+	    `consOL` (addr_code `snocOL`
+	    GLD F64 addr dst)
+    -- in
+    return (Any F64 code)
+
+#endif /* i386_TARGET_ARCH */
+
+#if x86_64_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat 0.0 rep)) = do
+   let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
+	-- I don't know why there are xorpd, xorps, and pxor instructions.
+	-- They all appear to do the same thing --SDM
+   return (Any rep code)
+
+getRegister (CmmLit (CmmFloat f rep)) = do
+    lbl <- getNewLabelNat
+    let code dst = toOL [
+	    LDATA ReadOnlyData
+			[CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat f rep)],
+	    MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+	    ]
+    -- in
+    return (Any rep code)
+
+#endif /* x86_64_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL I8) addr
+  return (Any I32 code)
+
+getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I8) addr
+  return (Any I32 code)
+
+getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL I16) addr
+  return (Any I32 code)
+
+getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I16) addr
+  return (Any I32 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL I8) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I8) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL I16) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I16) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I32) addr
+  return (Any I64 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
+  x_code <- getAnyReg x
+  lbl <- getNewLabelNat
+  let
+    code dst = x_code dst `appOL` toOL [
+	-- This is how gcc does it, so it can't be that bad:
+	LDATA ReadOnlyData16 [
+		CmmAlign 16,
+		CmmDataLabel lbl,
+		CmmStaticLit (CmmInt 0x80000000 I32),
+		CmmStaticLit (CmmInt 0 I32),
+		CmmStaticLit (CmmInt 0 I32),
+		CmmStaticLit (CmmInt 0 I32)
+	],
+	XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+		-- xorps, so we need the 128-bit constant
+		-- ToDo: rip-relative
+	]
+  --
+  return (Any F32 code)
+
+getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
+  x_code <- getAnyReg x
+  lbl <- getNewLabelNat
+  let
+	-- This is how gcc does it, so it can't be that bad:
+    code dst = x_code dst `appOL` toOL [
+	LDATA ReadOnlyData16 [
+		CmmAlign 16,
+		CmmDataLabel lbl,
+		CmmStaticLit (CmmInt 0x8000000000000000 I64),
+		CmmStaticLit (CmmInt 0 I64)
+	],
+		-- gcc puts an unpck here.  Wonder if we need it.
+	XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+		-- xorpd, so we need the 128-bit constant
+	]
+  --
+  return (Any F64 code)
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+#if i386_TARGET_ARCH
+      MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
+      MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
+#endif
+
+      MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
+      MO_Not rep   -> trivialUCode rep (NOT  rep) x
+
+      -- Nop conversions
+      -- TODO: these are only nops if the arg is not a fixed register that
+      -- can't be byte-addressed.
+      MO_U_Conv I32 I8  -> conversionNop I32 x
+      MO_S_Conv I32 I8  -> conversionNop I32 x
+      MO_U_Conv I16 I8  -> conversionNop I16 x
+      MO_S_Conv I16 I8  -> conversionNop I16 x
+      MO_U_Conv I32 I16 -> conversionNop I32 x
+      MO_S_Conv I32 I16 -> conversionNop I32 x
+#if x86_64_TARGET_ARCH
+      MO_U_Conv I64 I32 -> conversionNop I64 x
+      MO_S_Conv I64 I32 -> conversionNop I64 x
+      MO_U_Conv I64 I16 -> conversionNop I64 x
+      MO_S_Conv I64 I16 -> conversionNop I64 x
+      MO_U_Conv I64 I8  -> conversionNop I64 x
+      MO_S_Conv I64 I8  -> conversionNop I64 x
+#endif
+
+      MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
+      MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
+
+      -- widenings
+      MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
+      MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
+      MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
+
+      MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
+      MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
+      MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
+
+#if x86_64_TARGET_ARCH
+      MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
+      MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
+      MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
+      MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
+      MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
+      MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
+	-- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
+	-- However, we don't want the register allocator to throw it
+	-- away as an unnecessary reg-to-reg move, so we keep it in
+	-- the form of a movzl and print it as a movl later.
+#endif
+
+#if i386_TARGET_ARCH
+      MO_S_Conv F32 F64 -> conversionNop F64 x
+      MO_S_Conv F64 F32 -> conversionNop F32 x
+#else
+      MO_S_Conv F32 F64 -> coerceFP2FP F64 x
+      MO_S_Conv F64 F32 -> coerceFP2FP F32 x
+#endif
+
+      MO_S_Conv from to
+	| isFloatingRep from -> coerceFP2Int from to x
+	| isFloatingRep to   -> coerceInt2FP from to x
+
+      other -> pprPanic "getRegister" (pprMachOp mop)
+   where
+	-- signed or unsigned extension.
+	integerExtend from to instr expr = do
+	    (reg,e_code) <- if from == I8 then getByteReg expr
+					  else getSomeReg expr
+	    let 
+		code dst = 
+		  e_code `snocOL`
+		  instr from (OpReg reg) (OpReg dst)
+	    return (Any to code)
+
+        conversionNop new_rep expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_rep)
+
+
+getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
+  = ASSERT2(cmmExprRep x /= I8, pprExpr e)
+    case mop of
+      MO_Eq F32   -> condFltReg EQQ x y
+      MO_Ne F32   -> condFltReg NE x y
+      MO_S_Gt F32 -> condFltReg GTT x y
+      MO_S_Ge F32 -> condFltReg GE x y
+      MO_S_Lt F32 -> condFltReg LTT x y
+      MO_S_Le F32 -> condFltReg LE x y
+
+      MO_Eq F64   -> condFltReg EQQ x y
+      MO_Ne F64   -> condFltReg NE x y
+      MO_S_Gt F64 -> condFltReg GTT x y
+      MO_S_Ge F64 -> condFltReg GE x y
+      MO_S_Lt F64 -> condFltReg LTT x y
+      MO_S_Le F64 -> condFltReg LE x y
+
+      MO_Eq rep   -> condIntReg EQQ x y
+      MO_Ne rep   -> condIntReg NE x y
+
+      MO_S_Gt rep -> condIntReg GTT x y
+      MO_S_Ge rep -> condIntReg GE x y
+      MO_S_Lt rep -> condIntReg LTT x y
+      MO_S_Le rep -> condIntReg LE x y
+
+      MO_U_Gt rep -> condIntReg GU  x y
+      MO_U_Ge rep -> condIntReg GEU x y
+      MO_U_Lt rep -> condIntReg LU  x y
+      MO_U_Le rep -> condIntReg LEU x y
+
+#if i386_TARGET_ARCH
+      MO_Add F32 -> trivialFCode F32 GADD x y
+      MO_Sub F32 -> trivialFCode F32 GSUB x y
+
+      MO_Add F64 -> trivialFCode F64 GADD x y
+      MO_Sub F64 -> trivialFCode F64 GSUB x y
+
+      MO_S_Quot F32 -> trivialFCode F32 GDIV x y
+      MO_S_Quot F64 -> trivialFCode F64 GDIV x y
+#endif
+
+#if x86_64_TARGET_ARCH
+      MO_Add F32 -> trivialFCode F32 ADD x y
+      MO_Sub F32 -> trivialFCode F32 SUB x y
+
+      MO_Add F64 -> trivialFCode F64 ADD x y
+      MO_Sub F64 -> trivialFCode F64 SUB x y
+
+      MO_S_Quot F32 -> trivialFCode F32 FDIV x y
+      MO_S_Quot F64 -> trivialFCode F64 FDIV x y
+#endif
+
+      MO_Add rep -> add_code rep x y
+      MO_Sub rep -> sub_code rep x y
+
+      MO_S_Quot rep -> div_code rep True  True  x y
+      MO_S_Rem  rep -> div_code rep True  False x y
+      MO_U_Quot rep -> div_code rep False True  x y
+      MO_U_Rem  rep -> div_code rep False False x y
+
+#if i386_TARGET_ARCH
+      MO_Mul   F32 -> trivialFCode F32 GMUL x y
+      MO_Mul   F64 -> trivialFCode F64 GMUL x y
+#endif
+
+#if x86_64_TARGET_ARCH
+      MO_Mul   F32 -> trivialFCode F32 MUL x y
+      MO_Mul   F64 -> trivialFCode F64 MUL x y
+#endif
+
+      MO_Mul   rep -> let op = IMUL rep in 
+		      trivialCode rep op (Just op) x y
+
+      MO_S_MulMayOflo rep -> imulMayOflo rep x y
+
+      MO_And rep -> let op = AND rep in 
+		    trivialCode rep op (Just op) x y
+      MO_Or  rep -> let op = OR  rep in
+		    trivialCode rep op (Just op) x y
+      MO_Xor rep -> let op = XOR rep in
+		    trivialCode rep op (Just op) x y
+
+	{- Shift ops on x86s have constraints on their source, it
+	   either has to be Imm, CL or 1
+	    => trivialCode is not restrictive enough (sigh.)
+	-}	   
+      MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
+      MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
+      MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
+
+      other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
+  where
+    --------------------
+    imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+    imulMayOflo rep a b = do
+         (a_reg, a_code) <- getNonClobberedReg a
+         b_code <- getAnyReg b
+         let 
+	     shift_amt  = case rep of
+			   I32 -> 31
+			   I64 -> 63
+			   _ -> panic "shift_amt"
+
+             code = a_code `appOL` b_code eax `appOL`
+                        toOL [
+			   IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
+                           SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
+				-- sign extend lower part
+                           SUB rep (OpReg edx) (OpReg eax)
+				-- compare against upper
+                           -- eax==0 if high part == sign extended low part
+                        ]
+         -- in
+	 return (Fixed rep eax code)
+
+    --------------------
+    shift_code :: MachRep
+	       -> (Operand -> Operand -> Instr)
+	       -> CmmExpr
+	       -> CmmExpr
+	       -> NatM Register
+
+    {- Case1: shift length as immediate -}
+    shift_code rep instr x y@(CmmLit lit) = do
+	  x_code <- getAnyReg x
+	  let
+	       code dst
+		  = x_code dst `snocOL` 
+		    instr (OpImm (litToImm lit)) (OpReg dst)
+	  -- in
+	  return (Any rep code)
+        
+    {- Case2: shift length is complex (non-immediate) -}
+    shift_code rep instr x y{-amount-} = do
+        (x_reg, x_code) <- getNonClobberedReg x
+        y_code <- getAnyReg y
+	let 
+	   code = x_code `appOL`
+		  y_code ecx `snocOL`
+		  instr (OpReg ecx) (OpReg x_reg)
+        -- in
+        return (Fixed rep x_reg code)
+
+    --------------------
+    add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+    add_code rep x (CmmLit (CmmInt y _))
+	| not (is64BitInteger y) = add_int rep x y
+    add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
+
+    --------------------
+    sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+    sub_code rep x (CmmLit (CmmInt y _))
+	| not (is64BitInteger (-y)) = add_int rep x (-y)
+    sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
+
+    -- our three-operand add instruction:
+    add_int rep x y = do
+	(x_reg, x_code) <- getSomeReg x
+	let
+	    imm = ImmInt (fromInteger y)
+	    code dst
+               = x_code `snocOL`
+		 LEA rep
+			(OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
+                        (OpReg dst)
+	-- 
+	return (Any rep code)
+
+    ----------------------
+    div_code rep signed quotient x y = do
+	   (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
+	   x_code <- getAnyReg x
+	   let
+	     widen | signed    = CLTD rep
+		   | otherwise = XOR rep (OpReg edx) (OpReg edx)
+
+	     instr | signed    = IDIV
+		   | otherwise = DIV
+
+	     code = y_code `appOL`
+		    x_code eax `appOL`
+		    toOL [widen, instr rep y_op]
+
+	     result | quotient  = eax
+		    | otherwise = edx
+
+	   -- in
+           return (Fixed rep result code)
+
+
+getRegister (CmmLoad mem pk)
+  | isFloatingRep pk
+  = do
+    Amode src mem_code <- getAmode mem
+    let
+    	code dst = mem_code `snocOL` 
+		   IF_ARCH_i386(GLD pk src dst,
+			        MOV pk (OpAddr src) (OpReg dst))
+    --
+    return (Any pk code)
+
+#if i386_TARGET_ARCH
+getRegister (CmmLoad mem pk)
+  | pk /= I64
+  = do 
+    code <- intLoadCode (instr pk) mem
+    return (Any pk code)
+  where
+	instr I8  = MOVZxL pk
+	instr I16 = MOV I16
+	instr I32 = MOV I32
+	-- we always zero-extend 8-bit loads, if we
+	-- can't think of anything better.  This is because
+	-- we can't guarantee access to an 8-bit variant of every register
+	-- (esi and edi don't have 8-bit variants), so to make things
+	-- simpler we do our 8-bit arithmetic with full 32-bit registers.
+#endif
+
+#if x86_64_TARGET_ARCH
+-- Simpler memory load code on x86_64
+getRegister (CmmLoad mem pk)
+  = do 
+    code <- intLoadCode (MOV pk) mem
+    return (Any pk code)
+#endif
+
+getRegister (CmmLit (CmmInt 0 rep))
+  = let
+	-- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
+	adj_rep = case rep of I64 -> I32; _ -> rep
+	rep1 = IF_ARCH_i386( rep, adj_rep ) 
+    	code dst 
+           = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
+    in
+    	return (Any rep code)
+
+#if x86_64_TARGET_ARCH
+  -- optimisation for loading small literals on x86_64: take advantage
+  -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
+  -- instruction forms are shorter.
+getRegister (CmmLit lit) 
+  | I64 <- cmmLitRep lit, not (isBigLit lit)
+  = let 
+	imm = litToImm lit
+	code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
+    in
+    	return (Any I64 code)
+  where
+   isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
+   isBigLit _ = False
+	-- note1: not the same as is64BitLit, because that checks for
+	-- signed literals that fit in 32 bits, but we want unsigned
+	-- literals here.
+	-- note2: all labels are small, because we're assuming the
+	-- small memory model (see gcc docs, -mcmodel=small).
+#endif
+
+getRegister (CmmLit lit)
+  = let 
+	rep = cmmLitRep lit
+	imm = litToImm lit
+	code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
+    in
+    	return (Any rep code)
+
+getRegister other = pprPanic "getRegister(x86)" (ppr other)
+
+
+intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
+   -> NatM (Reg -> InstrBlock)
+intLoadCode instr mem = do
+  Amode src mem_code <- getAmode mem
+  return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
+
+-- Compute an expression into *any* register, adding the appropriate
+-- move instruction if necessary.
+getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
+getAnyReg expr = do
+  r <- getRegister expr
+  anyReg r
+
+anyReg :: Register -> NatM (Reg -> InstrBlock)
+anyReg (Any _ code)          = return code
+anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
+
+-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
+-- Fixed registers might not be byte-addressable, so we make sure we've
+-- got a temporary, inserting an extra reg copy if necessary.
+getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
+#if x86_64_TARGET_ARCH
+getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
+#else
+getByteReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed rep reg code 
+	| isVirtualReg reg -> return (reg,code)
+	| otherwise -> do
+	    tmp <- getNewRegNat rep
+	    return (tmp, code `snocOL` reg2reg rep reg tmp)
+	-- ToDo: could optimise slightly by checking for byte-addressable
+	-- real registers, but that will happen very rarely if at all.
+#endif
+
+-- Another variant: this time we want the result in a register that cannot
+-- be modified by code to evaluate an arbitrary expression.
+getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getNonClobberedReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed rep reg code
+	-- only free regs can be clobbered
+	| RealReg rr <- reg, isFastTrue (freeReg rr) -> do
+		tmp <- getNewRegNat rep
+		return (tmp, code `snocOL` reg2reg rep reg tmp)
+	| otherwise -> 
+		return (reg, code)
+
+reg2reg :: MachRep -> Reg -> Reg -> Instr
+reg2reg rep src dst 
+#if i386_TARGET_ARCH
+  | isFloatingRep rep = GMOV src dst
+#endif
+  | otherwise	      = MOV rep (OpReg src) (OpReg dst)
+
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat f F32)) = do
+    lbl <- getNewLabelNat
+    let code dst = toOL [
+	    LDATA ReadOnlyData
+	                [CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat f F32)],
+	    SETHI (HI (ImmCLbl lbl)) dst,
+	    LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] 
+    return (Any F32 code)
+
+getRegister (CmmLit (CmmFloat d F64)) = do
+    lbl <- getNewLabelNat
+    let code dst = toOL [
+	    LDATA ReadOnlyData
+	                [CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat d F64)],
+	    SETHI (HI (ImmCLbl lbl)) dst,
+	    LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] 
+    return (Any F64 code)
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+      MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
+      MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
+
+      MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
+      MO_Not rep       -> trivialUCode rep (XNOR False g0) x
+
+      MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
+
+      MO_U_Conv F64 F32-> coerceDbl2Flt x
+      MO_U_Conv F32 F64-> coerceFlt2Dbl x
+
+      MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
+      MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
+      MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
+      MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
+
+      -- Conversions which are a nop on sparc
+      MO_U_Conv from to
+	| from == to   -> conversionNop to   x
+      MO_U_Conv I32 to -> conversionNop to   x
+      MO_S_Conv I32 to -> conversionNop to   x
+
+      -- widenings
+      MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
+      MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
+      MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
+      MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
+
+      other_op -> panic "Unknown unary mach op"
+    where
+        -- XXX SLL/SRL?
+        integerExtend signed from to expr = do
+           (reg, e_code) <- getSomeReg expr
+	   let
+	       code dst =
+		   e_code `snocOL` 
+		   ((if signed then SRA else SRL)
+		          reg (RIImm (ImmInt 0)) dst)
+	   return (Any to code)
+        conversionNop new_rep expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_rep)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+  = case mop of
+      MO_Eq F32 -> condFltReg EQQ x y
+      MO_Ne F32 -> condFltReg NE x y
+
+      MO_S_Gt F32 -> condFltReg GTT x y
+      MO_S_Ge F32 -> condFltReg GE x y 
+      MO_S_Lt F32 -> condFltReg LTT x y
+      MO_S_Le F32 -> condFltReg LE x y
+
+      MO_Eq F64 -> condFltReg EQQ x y
+      MO_Ne F64 -> condFltReg NE x y
+
+      MO_S_Gt F64 -> condFltReg GTT x y
+      MO_S_Ge F64 -> condFltReg GE x y
+      MO_S_Lt F64 -> condFltReg LTT x y
+      MO_S_Le F64 -> condFltReg LE x y
+
+      MO_Eq rep -> condIntReg EQQ x y
+      MO_Ne rep -> condIntReg NE x y
+
+      MO_S_Gt rep -> condIntReg GTT x y
+      MO_S_Ge rep -> condIntReg GE x y
+      MO_S_Lt rep -> condIntReg LTT x y
+      MO_S_Le rep -> condIntReg LE x y
+	      
+      MO_U_Gt I32  -> condIntReg GTT x y
+      MO_U_Ge I32  -> condIntReg GE x y
+      MO_U_Lt I32  -> condIntReg LTT x y
+      MO_U_Le I32  -> condIntReg LE x y
+
+      MO_U_Gt I16 -> condIntReg GU  x y
+      MO_U_Ge I16 -> condIntReg GEU x y
+      MO_U_Lt I16 -> condIntReg LU  x y
+      MO_U_Le I16 -> condIntReg LEU x y
+
+      MO_Add I32 -> trivialCode I32 (ADD False False) x y
+      MO_Sub I32 -> trivialCode I32 (SUB False False) x y
+
+      MO_S_MulMayOflo rep -> imulMayOflo rep x y
+{-
+      -- ToDo: teach about V8+ SPARC div instructions
+      MO_S_Quot I32 -> idiv FSLIT(".div")  x y
+      MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
+      MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
+      MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
+-}
+      MO_Add F32  -> trivialFCode F32 FADD  x y
+      MO_Sub F32   -> trivialFCode F32  FSUB x y
+      MO_Mul F32   -> trivialFCode F32  FMUL  x y
+      MO_S_Quot F32   -> trivialFCode F32  FDIV x y
+
+      MO_Add F64   -> trivialFCode F64 FADD  x y
+      MO_Sub F64   -> trivialFCode F64  FSUB x y
+      MO_Mul F64   -> trivialFCode F64  FMUL x y
+      MO_S_Quot F64   -> trivialFCode F64  FDIV x y
+
+      MO_And rep   -> trivialCode rep (AND False) x y
+      MO_Or rep    -> trivialCode rep (OR  False) x y
+      MO_Xor rep   -> trivialCode rep (XOR False) x y
+
+      MO_Mul rep -> trivialCode rep (SMUL False) x y
+
+      MO_Shl rep   -> trivialCode rep SLL  x y
+      MO_U_Shr rep   -> trivialCode rep SRL x y
+      MO_S_Shr rep   -> trivialCode rep SRA x y
+
+{-
+      MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64 
+                                         [promote x, promote y])
+		       where promote x = CmmMachOp MO_F32_to_Dbl [x]
+      MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64 
+                                        [x, y])
+-}
+      other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
+  where
+    --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
+
+    --------------------
+    imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+    imulMayOflo rep a b = do
+         (a_reg, a_code) <- getSomeReg a
+	 (b_reg, b_code) <- getSomeReg b
+	 res_lo <- getNewRegNat I32
+	 res_hi <- getNewRegNat I32
+	 let
+	    shift_amt  = case rep of
+			  I32 -> 31
+			  I64 -> 63
+			  _ -> panic "shift_amt"
+	    code dst = a_code `appOL` b_code `appOL`
+                       toOL [
+                           SMUL False a_reg (RIReg b_reg) res_lo,
+                           RDY res_hi,
+                           SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
+                           SUB False False res_lo (RIReg res_hi) dst
+                        ]
+         return (Any I32 code)
+
+getRegister (CmmLoad mem pk) = do
+    Amode src code <- getAmode mem
+    let
+    	code__2 dst = code `snocOL` LD pk src dst
+    return (Any pk code__2)
+
+getRegister (CmmLit (CmmInt i _))
+  | fits13Bits i
+  = let
+    	src = ImmInt (fromInteger i)
+    	code dst = unitOL (OR False g0 (RIImm src) dst)
+    in
+    	return (Any I32 code)
+
+getRegister (CmmLit lit)
+  = let rep = cmmLitRep lit
+	imm = litToImm lit
+    	code dst = toOL [
+    	    SETHI (HI imm) dst,
+    	    OR False dst (RIImm (LO imm)) dst]
+    in return (Any I32 code)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+getRegister (CmmLoad mem pk)
+  | pk /= I64
+  = do
+        Amode addr addr_code <- getAmode mem
+        let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
+                       addr_code `snocOL` LD pk dst addr
+        return (Any pk code)
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
+    Amode addr addr_code <- getAmode mem
+    return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
+
+-- Note: there is no Load Byte Arithmetic instruction, so no signed case here
+
+getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
+    Amode addr addr_code <- getAmode mem
+    return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
+
+getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
+    Amode addr addr_code <- getAmode mem
+    return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+      MO_Not rep   -> trivialUCode rep NOT x
+
+      MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
+      MO_S_Conv F32 F64 -> conversionNop F64 x
+
+      MO_S_Conv from to
+        | from == to         -> conversionNop to x
+	| isFloatingRep from -> coerceFP2Int from to x
+	| isFloatingRep to   -> coerceInt2FP from to x
+
+        -- narrowing is a nop: we treat the high bits as undefined
+      MO_S_Conv I32 to -> conversionNop to x
+      MO_S_Conv I16 I8 -> conversionNop I8 x
+      MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
+      MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
+
+      MO_U_Conv from to
+        | from == to -> conversionNop to x
+        -- narrowing is a nop: we treat the high bits as undefined
+      MO_U_Conv I32 to -> conversionNop to x
+      MO_U_Conv I16 I8 -> conversionNop I8 x
+      MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
+      MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32)) 
+
+      MO_S_Neg F32      -> trivialUCode F32 FNEG x
+      MO_S_Neg F64      -> trivialUCode F64 FNEG x
+      MO_S_Neg rep      -> trivialUCode rep NEG x
+      
+    where
+        conversionNop new_rep expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_rep)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+  = case mop of
+      MO_Eq F32 -> condFltReg EQQ x y
+      MO_Ne F32 -> condFltReg NE  x y
+
+      MO_S_Gt F32 -> condFltReg GTT x y
+      MO_S_Ge F32 -> condFltReg GE  x y
+      MO_S_Lt F32 -> condFltReg LTT x y
+      MO_S_Le F32 -> condFltReg LE  x y
+
+      MO_Eq F64 -> condFltReg EQQ x y
+      MO_Ne F64 -> condFltReg NE  x y
+
+      MO_S_Gt F64 -> condFltReg GTT x y
+      MO_S_Ge F64 -> condFltReg GE  x y
+      MO_S_Lt F64 -> condFltReg LTT x y
+      MO_S_Le F64 -> condFltReg LE  x y
+
+      MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
+      MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
+
+      MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
+
+      MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
+
+      MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
+      MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
+      MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
+      MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
+      
+      MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
+      MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
+      MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
+      MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
+
+         -- optimize addition with 32-bit immediate
+         -- (needed for PIC)
+      MO_Add I32 ->
+        case y of
+          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
+            -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
+          CmmLit lit
+            -> do
+                (src, srcCode) <- getSomeReg x
+                let imm = litToImm lit
+                    code dst = srcCode `appOL` toOL [
+                                    ADDIS dst src (HA imm),
+                                    ADD dst dst (RIImm (LO imm))
+                                ]
+                return (Any I32 code)
+          _ -> trivialCode I32 True ADD x y
+
+      MO_Add rep -> trivialCode rep True ADD x y
+      MO_Sub rep ->
+        case y of    -- subfi ('substract from' with immediate) doesn't exist
+          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
+            -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
+          _ -> trivialCodeNoImm rep SUBF y x
+
+      MO_Mul rep -> trivialCode rep True MULLW x y
+
+      MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
+      
+      MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
+      MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
+
+      MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
+      MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
+      
+      MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
+      MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
+      
+      MO_And rep   -> trivialCode rep False AND x y
+      MO_Or rep    -> trivialCode rep False OR x y
+      MO_Xor rep   -> trivialCode rep False XOR x y
+
+      MO_Shl rep   -> trivialCode rep False SLW x y
+      MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
+      MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
+
+getRegister (CmmLit (CmmInt i rep))
+  | Just imm <- makeImmediate rep True i
+  = let
+    	code dst = unitOL (LI dst imm)
+    in
+    	return (Any rep code)
+
+getRegister (CmmLit (CmmFloat f frep)) = do
+    lbl <- getNewLabelNat
+    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst = 
+	    LDATA ReadOnlyData  [CmmDataLabel lbl,
+				 CmmStaticLit (CmmFloat f frep)]
+            `consOL` (addr_code `snocOL` LD frep dst addr)
+    return (Any frep code)
+
+getRegister (CmmLit lit)
+  = let rep = cmmLitRep lit
+        imm = litToImm lit
+        code dst = toOL [
+              LIS dst (HI imm),
+              OR dst dst (RIImm (LO imm))
+          ]
+    in return (Any rep code)
+
+getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
+    
+    -- extend?Rep: wrap integer expression of type rep
+    -- in a conversion to I32
+extendSExpr I32 x = x
+extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
+extendUExpr I32 x = x
+extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+--  The 'Amode' type: Memory addressing modes passed up the tree.
+
+data Amode = Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to.  So you can't put
+anything in between, lest it overwrite some of those registers.  If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+    code
+    LEA amode, tmp
+    ... other computation ...
+    ... (tmp) ...
+-}
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+getAmode (StPrim IntSubOp [x, StInt i])
+  = getNewRegNat PtrRep		`thenNat` \ tmp ->
+    getRegister x		`thenNat` \ register ->
+    let
+    	code = registerCode register tmp
+    	reg  = registerName register tmp
+    	off  = ImmInt (-(fromInteger i))
+    in
+    return (Amode (AddrRegImm reg off) code)
+
+getAmode (StPrim IntAddOp [x, StInt i])
+  = getNewRegNat PtrRep		`thenNat` \ tmp ->
+    getRegister x		`thenNat` \ register ->
+    let
+    	code = registerCode register tmp
+    	reg  = registerName register tmp
+    	off  = ImmInt (fromInteger i)
+    in
+    return (Amode (AddrRegImm reg off) code)
+
+getAmode leaf
+  | isJust imm
+  = return (Amode (AddrImm imm__2) id)
+  where
+    imm = maybeImm leaf
+    imm__2 = case imm of Just x -> x
+
+getAmode other
+  = getNewRegNat PtrRep		`thenNat` \ tmp ->
+    getRegister other		`thenNat` \ register ->
+    let
+    	code = registerCode register tmp
+    	reg  = registerName register tmp
+    in
+    return (Amode (AddrReg reg) code)
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- This is all just ridiculous, since it carefully undoes 
+-- what mangleIndexTree has just done.
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
+  | not (is64BitLit lit)
+  -- ASSERT(rep == I32)???
+  = do (x_reg, x_code) <- getSomeReg x
+       let off = ImmInt (-(fromInteger i))
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
+  
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
+  | not (is64BitLit lit)
+  -- ASSERT(rep == I32)???
+  = do (x_reg, x_code) <- getSomeReg x
+       let off = ImmInt (fromInteger i)
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
+
+-- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be 
+-- recognised by the next rule.
+getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
+				  b@(CmmLit _)])
+  = getAmode (CmmMachOp (MO_Add rep) [b,a])
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) 
+					[y, CmmLit (CmmInt shift _)]])
+  | shift == 0 || shift == 1 || shift == 2 || shift == 3
+  = do (x_reg, x_code) <- getNonClobberedReg x
+	-- x must be in a temp, because it has to stay live over y_code
+	-- we could compre x_reg and y_reg and do something better here...
+       (y_reg, y_code) <- getSomeReg y
+       let
+    	   code = x_code `appOL` y_code
+           base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
+               code)
+
+getAmode (CmmLit lit) | not (is64BitLit lit)
+  = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
+
+getAmode expr = do
+  (reg,code) <- getSomeReg expr
+  return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
+
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
+  | fits13Bits (-i)
+  = do
+       (reg, code) <- getSomeReg x
+       let
+         off  = ImmInt (-(fromInteger i))
+       return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
+  | fits13Bits i
+  = do
+       (reg, code) <- getSomeReg x
+       let
+     	 off  = ImmInt (fromInteger i)
+       return (Amode (AddrRegImm reg off) code)
+
+getAmode (CmmMachOp (MO_Add rep) [x, y])
+  = do
+    (regX, codeX) <- getSomeReg x
+    (regY, codeY) <- getSomeReg y
+    let
+    	code = codeX `appOL` codeY
+    return (Amode (AddrRegReg regX regY) code)
+
+-- XXX Is this same as "leaf" in Stix?
+getAmode (CmmLit lit)
+  = do
+      tmp <- getNewRegNat I32
+      let
+    	code = unitOL (SETHI (HI imm__2) tmp)
+      return (Amode (AddrRegImm tmp (LO imm__2)) code)
+      where
+         imm__2 = litToImm lit
+
+getAmode other
+  = do
+       (reg, code) <- getSomeReg other
+       let
+    	    off  = ImmInt 0
+       return (Amode (AddrRegImm reg off) code)
+
+#endif /* sparc_TARGET_ARCH */
+
+#ifdef powerpc_TARGET_ARCH
+getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
+  | Just off <- makeImmediate I32 True (-i)
+  = do
+        (reg, code) <- getSomeReg x
+        return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
+  | Just off <- makeImmediate I32 True i
+  = do
+        (reg, code) <- getSomeReg x
+        return (Amode (AddrRegImm reg off) code)
+
+   -- optimize addition with 32-bit immediate
+   -- (needed for PIC)
+getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
+  = do
+        tmp <- getNewRegNat I32
+        (src, srcCode) <- getSomeReg x
+        let imm = litToImm lit
+            code = srcCode `snocOL` ADDIS tmp src (HA imm)
+        return (Amode (AddrRegImm tmp (LO imm)) code)
+
+getAmode (CmmLit lit)
+  = do
+        tmp <- getNewRegNat I32
+        let imm = litToImm lit
+            code = unitOL (LIS tmp (HA imm))
+        return (Amode (AddrRegImm tmp (LO imm)) code)
+    
+getAmode (CmmMachOp (MO_Add I32) [x, y])
+  = do
+        (regX, codeX) <- getSomeReg x
+        (regY, codeY) <- getSomeReg y
+        return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
+    
+getAmode other
+  = do
+        (reg, code) <- getSomeReg other
+        let
+            off  = ImmInt 0
+        return (Amode (AddrRegImm reg off) code)
+#endif /* powerpc_TARGET_ARCH */
+
+-- -----------------------------------------------------------------------------
+-- getOperand: sometimes any operand will do.
+
+-- getNonClobberedOperand: the value of the operand will remain valid across
+-- the computation of an arbitrary expression, unless the expression
+-- is computed directly into a register which the operand refers to
+-- (see trivialCode where this function is used for an example).
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getNonClobberedOperand (CmmLit lit)
+  | isSuitableFloatingPointLit lit = do
+    lbl <- getNewLabelNat
+    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
+					   CmmStaticLit lit])
+    return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getNonClobberedOperand (CmmLit lit)
+  | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
+    return (OpImm (litToImm lit), nilOL)
+getNonClobberedOperand (CmmLoad mem pk) 
+  | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+    Amode src mem_code <- getAmode mem
+    (src',save_code) <- 
+	if (amodeCouldBeClobbered src) 
+		then do
+		   tmp <- getNewRegNat wordRep
+		   return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
+			   unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
+		else
+		   return (src, nilOL)
+    return (OpAddr src', save_code `appOL` mem_code)
+getNonClobberedOperand e = do
+    (reg, code) <- getNonClobberedReg e
+    return (OpReg reg, code)
+
+amodeCouldBeClobbered :: AddrMode -> Bool
+amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
+
+regClobbered (RealReg rr) = isFastTrue (freeReg rr)
+regClobbered _ = False
+
+-- getOperand: the operand is not required to remain valid across the
+-- computation of an arbitrary expression.
+getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getOperand (CmmLit lit)
+  | isSuitableFloatingPointLit lit = do
+    lbl <- getNewLabelNat
+    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
+					   CmmStaticLit lit])
+    return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getOperand (CmmLit lit)
+  | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
+    return (OpImm (litToImm lit), nilOL)
+getOperand (CmmLoad mem pk)
+  | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+    Amode src mem_code <- getAmode mem
+    return (OpAddr src, mem_code)
+getOperand e = do
+    (reg, code) <- getSomeReg e
+    return (OpReg reg, code)
+
+isOperand :: CmmExpr -> Bool
+isOperand (CmmLoad _ _) = True
+isOperand (CmmLit lit)  = not (is64BitLit lit)
+			  || isSuitableFloatingPointLit lit
+isOperand _             = False
+
+-- if we want a floating-point literal as an operand, we can
+-- use it directly from memory.  However, if the literal is
+-- zero, we're better off generating it into a register using
+-- xor.
+isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
+isSuitableFloatingPointLit _ = False
+
+getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
+getRegOrMem (CmmLoad mem pk)
+  | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+    Amode src mem_code <- getAmode mem
+    return (OpAddr src, mem_code)
+getRegOrMem e = do
+    (reg, code) <- getNonClobberedReg e
+    return (OpReg reg, code)
+
+#if x86_64_TARGET_ARCH
+is64BitLit (CmmInt i I64) = is64BitInteger i
+   -- assume that labels are in the range 0-2^31-1: this assumes the
+   -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+is64BitLit x = False
+#endif
+
+is64BitInteger :: Integer -> Bool
+is64BitInteger i = i > 0x7fffffff || i < -0x80000000
+
+-- -----------------------------------------------------------------------------
+--  The 'CondCode' type:  Condition codes passed up the tree.
+
+data CondCode = CondCode Bool Cond InstrBlock
+
+-- Set up a condition code for a conditional branch.
+
+getCondCode :: CmmExpr -> NatM CondCode
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+getCondCode = panic "MachCode.getCondCode: not on Alphas"
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
+-- yes, they really do seem to want exactly the same!
+
+getCondCode (CmmMachOp mop [x, y])
+  = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
+    case mop of
+      MO_Eq F32 -> condFltCode EQQ x y
+      MO_Ne F32 -> condFltCode NE  x y
+
+      MO_S_Gt F32 -> condFltCode GTT x y
+      MO_S_Ge F32 -> condFltCode GE  x y
+      MO_S_Lt F32 -> condFltCode LTT x y
+      MO_S_Le F32 -> condFltCode LE  x y
+
+      MO_Eq F64 -> condFltCode EQQ x y
+      MO_Ne F64 -> condFltCode NE  x y
+
+      MO_S_Gt F64 -> condFltCode GTT x y
+      MO_S_Ge F64 -> condFltCode GE  x y
+      MO_S_Lt F64 -> condFltCode LTT x y
+      MO_S_Le F64 -> condFltCode LE  x y
+
+      MO_Eq rep -> condIntCode EQQ  x y
+      MO_Ne rep -> condIntCode NE   x y
+
+      MO_S_Gt rep -> condIntCode GTT  x y
+      MO_S_Ge rep -> condIntCode GE   x y
+      MO_S_Lt rep -> condIntCode LTT  x y
+      MO_S_Le rep -> condIntCode LE   x y
+
+      MO_U_Gt rep -> condIntCode GU   x y
+      MO_U_Ge rep -> condIntCode GEU  x y
+      MO_U_Lt rep -> condIntCode LU   x y
+      MO_U_Le rep -> condIntCode LEU  x y
+
+      other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
+
+getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
+
+#elif powerpc_TARGET_ARCH
+
+-- almost the same as everywhere else - but we need to
+-- extend small integers to 32 bit first
+
+getCondCode (CmmMachOp mop [x, y])
+  = case mop of
+      MO_Eq F32 -> condFltCode EQQ x y
+      MO_Ne F32 -> condFltCode NE  x y
+
+      MO_S_Gt F32 -> condFltCode GTT x y
+      MO_S_Ge F32 -> condFltCode GE  x y
+      MO_S_Lt F32 -> condFltCode LTT x y
+      MO_S_Le F32 -> condFltCode LE  x y
+
+      MO_Eq F64 -> condFltCode EQQ x y
+      MO_Ne F64 -> condFltCode NE  x y
+
+      MO_S_Gt F64 -> condFltCode GTT x y
+      MO_S_Ge F64 -> condFltCode GE  x y
+      MO_S_Lt F64 -> condFltCode LTT x y
+      MO_S_Le F64 -> condFltCode LE  x y
+
+      MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
+      MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
+
+      MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
+
+      MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
+
+      other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
+
+getCondCode other =  panic "getCondCode(2)(powerpc)"
+
+
+#endif
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+
+#if alpha_TARGET_ARCH
+condIntCode = panic "MachCode.condIntCode: not on Alphas"
+condFltCode = panic "MachCode.condFltCode: not on Alphas"
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- memory vs immediate
+condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
+    Amode x_addr x_code <- getAmode x
+    let
+	imm  = litToImm lit
+    	code = x_code `snocOL`
+    	    	  CMP pk (OpImm imm) (OpAddr x_addr)
+    --
+    return (CondCode False cond code)
+
+-- anything vs zero
+condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
+    (x_reg, x_code) <- getSomeReg x
+    let
+	code = x_code `snocOL`
+    	    	  TEST pk (OpReg x_reg) (OpReg x_reg)
+    --
+    return (CondCode False cond code)
+
+-- anything vs operand
+condIntCode cond x y | isOperand y = do
+    (x_reg, x_code) <- getNonClobberedReg x
+    (y_op,  y_code) <- getOperand y    
+    let
+	code = x_code `appOL` y_code `snocOL`
+                  CMP (cmmExprRep x) y_op (OpReg x_reg)
+    -- in
+    return (CondCode False cond code)
+
+-- anything vs anything
+condIntCode cond x y = do
+  (y_reg, y_code) <- getNonClobberedReg y
+  (x_op, x_code) <- getRegOrMem x
+  let
+	code = y_code `appOL`
+	       x_code `snocOL`
+    	    	  CMP (cmmExprRep x) (OpReg y_reg) x_op
+  -- in
+  return (CondCode False cond code)
+#endif
+
+#if i386_TARGET_ARCH
+condFltCode cond x y 
+  = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
+  (x_reg, x_code) <- getNonClobberedReg x
+  (y_reg, y_code) <- getSomeReg y
+  let
+	code = x_code `appOL` y_code `snocOL`
+		GCMP cond x_reg y_reg
+  -- The GCMP insn does the test and sets the zero flag if comparable
+  -- and true.  Hence we always supply EQQ as the condition to test.
+  return (CondCode True EQQ code)
+#endif /* i386_TARGET_ARCH */
+
+#if x86_64_TARGET_ARCH
+-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+-- an operand, but the right must be a reg.  We can probably do better
+-- than this general case...
+condFltCode cond x y = do
+  (x_reg, x_code) <- getNonClobberedReg x
+  (y_op, y_code) <- getOperand y
+  let
+	code = x_code `appOL`
+	       y_code `snocOL`
+    	    	  CMP (cmmExprRep x) y_op (OpReg x_reg)
+	-- NB(1): we need to use the unsigned comparison operators on the
+	-- result of this comparison.
+  -- in
+  return (CondCode True (condToUnsigned cond) code)
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+condIntCode cond x (CmmLit (CmmInt y rep))
+  | fits13Bits y
+  = do
+       (src1, code) <- getSomeReg x
+       let
+           src2 = ImmInt (fromInteger y)
+           code' = code `snocOL` SUB False True src1 (RIImm src2) g0
+       return (CondCode False cond code')
+
+condIntCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+	code__2 = code1 `appOL` code2 `snocOL`
+    	    	  SUB False True src1 (RIReg src2) g0
+    return (CondCode False cond code__2)
+
+-----------
+condFltCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp <- getNewRegNat F64
+    let
+   	promote x = FxTOy F32 F64 x tmp
+
+    	pk1   = cmmExprRep x
+    	pk2   = cmmExprRep y
+
+    	code__2 =
+		if pk1 == pk2 then
+    	            code1 `appOL` code2 `snocOL`
+    	    	    FCMP True pk1 src1 src2
+    	    	else if pk1 == F32 then
+    	    	    code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+    	    	    FCMP True F64 tmp src2
+    	    	else
+    	    	    code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+    	    	    FCMP True F64 src1 tmp
+    return (CondCode True cond code__2)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+--  ###FIXME: I16 and I8!
+condIntCode cond x (CmmLit (CmmInt y rep))
+  | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
+  = do
+        (src1, code) <- getSomeReg x
+        let
+            code' = code `snocOL` 
+                (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
+        return (CondCode False cond code')
+
+condIntCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+	code' = code1 `appOL` code2 `snocOL`
+    	    	  (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
+    return (CondCode False cond code')
+
+condFltCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+	code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
+	code'' = case cond of -- twiddle CR to handle unordered case
+                    GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
+	            LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
+	            _ -> code'
+                 where
+                    ltbit = 0 ; eqbit = 2 ; gtbit = 1
+    return (CondCode True cond code'')
+
+#endif /* powerpc_TARGET_ARCH */
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business.  Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers.  If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side.  This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+assignIntCode pk (CmmLoad dst _) src
+  = getNewRegNat IntRep    	    `thenNat` \ tmp ->
+    getAmode dst    	    	    `thenNat` \ amode ->
+    getRegister src	     	    `thenNat` \ register ->
+    let
+    	code1   = amodeCode amode []
+    	dst__2  = amodeAddr amode
+    	code2   = registerCode register tmp []
+    	src__2  = registerName register tmp
+    	sz      = primRepToSize pk
+    	code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
+    in
+    return code__2
+
+assignIntCode pk dst src
+  = getRegister dst	    	    	    `thenNat` \ register1 ->
+    getRegister src	    	    	    `thenNat` \ register2 ->
+    let
+    	dst__2  = registerName register1 zeroh
+    	code    = registerCode register2 dst__2
+    	src__2  = registerName register2 dst__2
+    	code__2 = if isFixed register2
+		  then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
+    	    	  else code
+    in
+    return code__2
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- integer assignment to memory
+assignMem_IntCode pk addr src = do
+    Amode addr code_addr <- getAmode addr
+    (code_src, op_src)   <- get_op_RI src
+    let
+	code = code_src `appOL`
+	       code_addr `snocOL`
+                  MOV pk op_src (OpAddr addr)
+	-- NOTE: op_src is stable, so it will still be valid
+	-- after code_addr.  This may involve the introduction 
+	-- of an extra MOV to a temporary register, but we hope
+	-- the register allocator will get rid of it.
+    --
+    return code
+  where
+    get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)	-- code, operator
+    get_op_RI (CmmLit lit) | not (is64BitLit lit)
+      = return (nilOL, OpImm (litToImm lit))
+    get_op_RI op
+      = do (reg,code) <- getNonClobberedReg op
+	   return (code, OpReg reg)
+
+
+-- Assign; dst is a reg, rhs is mem
+assignReg_IntCode pk reg (CmmLoad src _) = do
+  load_code <- intLoadCode (MOV pk) src
+  return (load_code (getRegisterReg reg))
+
+-- dst is a reg, but src could be anything
+assignReg_IntCode pk reg src = do
+  code <- getAnyReg src
+  return (code (getRegisterReg reg))
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+assignMem_IntCode pk addr src = do
+    (srcReg, code) <- getSomeReg src
+    Amode dstAddr addr_code <- getAmode addr
+    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+assignReg_IntCode pk reg src = do
+    r <- getRegister src
+    return $ case r of
+	Any _ code         -> code dst
+	Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
+    where
+      dst = getRegisterReg reg
+
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+
+assignMem_IntCode pk addr src = do
+    (srcReg, code) <- getSomeReg src
+    Amode dstAddr addr_code <- getAmode addr
+    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+-- dst is a reg, but src could be anything
+assignReg_IntCode pk reg src
+    = do
+        r <- getRegister src
+        return $ case r of
+            Any _ code         -> code dst
+            Fixed _ freg fcode -> fcode `snocOL` MR dst freg
+    where
+        dst = getRegisterReg reg
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Floating-point assignments
+
+#if alpha_TARGET_ARCH
+
+assignFltCode pk (CmmLoad dst _) src
+  = getNewRegNat pk        	    `thenNat` \ tmp ->
+    getAmode dst    	    	    `thenNat` \ amode ->
+    getRegister src	    	    	    `thenNat` \ register ->
+    let
+    	code1   = amodeCode amode []
+    	dst__2  = amodeAddr amode
+    	code2   = registerCode register tmp []
+    	src__2  = registerName register tmp
+    	sz      = primRepToSize pk
+    	code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
+    in
+    return code__2
+
+assignFltCode pk dst src
+  = getRegister dst	    	    	    `thenNat` \ register1 ->
+    getRegister src	    	    	    `thenNat` \ register2 ->
+    let
+    	dst__2  = registerName register1 zeroh
+    	code    = registerCode register2 dst__2
+    	src__2  = registerName register2 dst__2
+    	code__2 = if isFixed register2
+		  then code . mkSeqInstr (FMOV src__2 dst__2)
+		  else code
+    in
+    return code__2
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- Floating point assignment to memory
+assignMem_FltCode pk addr src = do
+  (src_reg, src_code) <- getNonClobberedReg src
+  Amode addr addr_code <- getAmode addr
+  let
+	code = src_code `appOL`
+	       addr_code `snocOL`
+                IF_ARCH_i386(GST pk src_reg addr,
+		             MOV pk (OpReg src_reg) (OpAddr addr))
+  return code
+
+-- Floating point assignment to a register/temporary
+assignReg_FltCode pk reg src = do
+  src_code <- getAnyReg src
+  return (src_code (getRegisterReg reg))
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+-- Floating point assignment to memory
+assignMem_FltCode pk addr src = do
+    Amode dst__2 code1 <- getAmode addr
+    (src__2, code2) <- getSomeReg src
+    tmp1 <- getNewRegNat pk
+    let
+    	pk__2   = cmmExprRep src
+    	code__2 = code1 `appOL` code2 `appOL`
+	    if   pk == pk__2 
+            then unitOL (ST pk src__2 dst__2)
+	    else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
+    return code__2
+
+-- Floating point assignment to a register/temporary
+-- ToDo: Verify correctness
+assignReg_FltCode pk reg src = do
+    r <- getRegister src
+    v1 <- getNewRegNat pk
+    return $ case r of
+        Any _ code         -> code dst
+	Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
+    where
+      dst = getRegisterReg reg
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+
+-- Easy, isn't it?
+assignMem_FltCode = assignMem_IntCode
+assignReg_FltCode = assignReg_IntCode
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Generating an non-local jump
+
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+genJump (CmmLabel lbl)
+  | isAsmTemp lbl = returnInstr (BR target)
+  | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
+  where
+    target = ImmCLbl lbl
+
+genJump tree
+  = getRegister tree	     	    `thenNat` \ register ->
+    getNewRegNat PtrRep    	    `thenNat` \ tmp ->
+    let
+    	dst    = registerName register pv
+    	code   = registerCode register pv
+    	target = registerName register pv
+    in
+    if isFixed register then
+	returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
+    else
+    return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+genJump (CmmLoad mem pk) = do
+  Amode target code <- getAmode mem
+  return (code `snocOL` JMP (OpAddr target))
+
+genJump (CmmLit lit) = do
+  return (unitOL (JMP (OpImm (litToImm lit))))
+
+genJump expr = do
+  (reg,code) <- getSomeReg expr
+  return (code `snocOL` JMP (OpReg reg))
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+genJump (CmmLit (CmmLabel lbl))
+  = return (toOL [CALL (Left target) 0 True, NOP])
+  where
+    target = ImmCLbl lbl
+
+genJump tree
+  = do
+        (target, code) <- getSomeReg tree
+	return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+genJump (CmmLit (CmmLabel lbl))
+  = return (unitOL $ JMP lbl)
+
+genJump tree
+  = do
+        (target,code) <- getSomeReg tree
+        return (code `snocOL` MTCTR target `snocOL` BCTR [])
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+--  Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+
+genBranch = return . toOL . mkBranchInstr
+
+-- -----------------------------------------------------------------------------
+--  Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions.  We peek at the arguments to decide what kind of
+comparison to do.
+
+ALPHA: For comparisons with 0, we're laughing, because we can just do
+the desired conditional branch.
+
+I386: First, we have to ensure that the condition
+codes are set according to the supplied comparison operation.
+
+SPARC: First, we have to ensure that the condition codes are set
+according to the supplied comparison operation.  We generate slightly
+different code for floating point comparisons, because a floating
+point operation cannot directly precede a @BF@.  We assume the worst
+and fill that slot with a @NOP@.
+
+SPARC: Do not fill the delay slots here; you will confuse the register
+allocator.
+-}
+
+
+genCondJump
+    :: BlockId	    -- the branch target
+    -> CmmExpr      -- the condition on which to branch
+    -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+genCondJump id (StPrim op [x, StInt 0])
+  = getRegister x	  	    	    `thenNat` \ register ->
+    getNewRegNat (registerRep register)
+    	    	        	    `thenNat` \ tmp ->
+    let
+    	code   = registerCode register tmp
+    	value  = registerName register tmp
+    	pk     = registerRep register
+	target = ImmCLbl lbl
+    in
+    returnSeq code [BI (cmpOp op) value target]
+  where
+    cmpOp CharGtOp = GTT
+    cmpOp CharGeOp = GE
+    cmpOp CharEqOp = EQQ
+    cmpOp CharNeOp = NE
+    cmpOp CharLtOp = LTT
+    cmpOp CharLeOp = LE
+    cmpOp IntGtOp = GTT
+    cmpOp IntGeOp = GE
+    cmpOp IntEqOp = EQQ
+    cmpOp IntNeOp = NE
+    cmpOp IntLtOp = LTT
+    cmpOp IntLeOp = LE
+    cmpOp WordGtOp = NE
+    cmpOp WordGeOp = ALWAYS
+    cmpOp WordEqOp = EQQ
+    cmpOp WordNeOp = NE
+    cmpOp WordLtOp = NEVER
+    cmpOp WordLeOp = EQQ
+    cmpOp AddrGtOp = NE
+    cmpOp AddrGeOp = ALWAYS
+    cmpOp AddrEqOp = EQQ
+    cmpOp AddrNeOp = NE
+    cmpOp AddrLtOp = NEVER
+    cmpOp AddrLeOp = EQQ
+
+genCondJump lbl (StPrim op [x, StDouble 0.0])
+  = getRegister x	  	    	    `thenNat` \ register ->
+    getNewRegNat (registerRep register)
+    	    	        	    `thenNat` \ tmp ->
+    let
+    	code   = registerCode register tmp
+    	value  = registerName register tmp
+    	pk     = registerRep register
+	target = ImmCLbl lbl
+    in
+    return (code . mkSeqInstr (BF (cmpOp op) value target))
+  where
+    cmpOp FloatGtOp = GTT
+    cmpOp FloatGeOp = GE
+    cmpOp FloatEqOp = EQQ
+    cmpOp FloatNeOp = NE
+    cmpOp FloatLtOp = LTT
+    cmpOp FloatLeOp = LE
+    cmpOp DoubleGtOp = GTT
+    cmpOp DoubleGeOp = GE
+    cmpOp DoubleEqOp = EQQ
+    cmpOp DoubleNeOp = NE
+    cmpOp DoubleLtOp = LTT
+    cmpOp DoubleLeOp = LE
+
+genCondJump lbl (StPrim op [x, y])
+  | fltCmpOp op
+  = trivialFCode pr instr x y 	    `thenNat` \ register ->
+    getNewRegNat F64    	    `thenNat` \ tmp ->
+    let
+    	code   = registerCode register tmp
+    	result = registerName register tmp
+	target = ImmCLbl lbl
+    in
+    return (code . mkSeqInstr (BF cond result target))
+  where
+    pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
+
+    fltCmpOp op = case op of
+	FloatGtOp -> True
+	FloatGeOp -> True
+	FloatEqOp -> True
+	FloatNeOp -> True
+	FloatLtOp -> True
+	FloatLeOp -> True
+	DoubleGtOp -> True
+	DoubleGeOp -> True
+	DoubleEqOp -> True
+	DoubleNeOp -> True
+	DoubleLtOp -> True
+	DoubleLeOp -> True
+	_ -> False
+    (instr, cond) = case op of
+	FloatGtOp -> (FCMP TF LE, EQQ)
+	FloatGeOp -> (FCMP TF LTT, EQQ)
+	FloatEqOp -> (FCMP TF EQQ, NE)
+	FloatNeOp -> (FCMP TF EQQ, EQQ)
+	FloatLtOp -> (FCMP TF LTT, NE)
+	FloatLeOp -> (FCMP TF LE, NE)
+	DoubleGtOp -> (FCMP TF LE, EQQ)
+	DoubleGeOp -> (FCMP TF LTT, EQQ)
+	DoubleEqOp -> (FCMP TF EQQ, NE)
+	DoubleNeOp -> (FCMP TF EQQ, EQQ)
+	DoubleLtOp -> (FCMP TF LTT, NE)
+	DoubleLeOp -> (FCMP TF LE, NE)
+
+genCondJump lbl (StPrim op [x, y])
+  = trivialCode instr x y    	    `thenNat` \ register ->
+    getNewRegNat IntRep    	    `thenNat` \ tmp ->
+    let
+    	code   = registerCode register tmp
+    	result = registerName register tmp
+	target = ImmCLbl lbl
+    in
+    return (code . mkSeqInstr (BI cond result target))
+  where
+    (instr, cond) = case op of
+	CharGtOp -> (CMP LE, EQQ)
+	CharGeOp -> (CMP LTT, EQQ)
+	CharEqOp -> (CMP EQQ, NE)
+	CharNeOp -> (CMP EQQ, EQQ)
+	CharLtOp -> (CMP LTT, NE)
+	CharLeOp -> (CMP LE, NE)
+	IntGtOp -> (CMP LE, EQQ)
+	IntGeOp -> (CMP LTT, EQQ)
+	IntEqOp -> (CMP EQQ, NE)
+	IntNeOp -> (CMP EQQ, EQQ)
+	IntLtOp -> (CMP LTT, NE)
+	IntLeOp -> (CMP LE, NE)
+	WordGtOp -> (CMP ULE, EQQ)
+	WordGeOp -> (CMP ULT, EQQ)
+	WordEqOp -> (CMP EQQ, NE)
+	WordNeOp -> (CMP EQQ, EQQ)
+	WordLtOp -> (CMP ULT, NE)
+	WordLeOp -> (CMP ULE, NE)
+	AddrGtOp -> (CMP ULE, EQQ)
+	AddrGeOp -> (CMP ULT, EQQ)
+	AddrEqOp -> (CMP EQQ, NE)
+	AddrNeOp -> (CMP EQQ, EQQ)
+	AddrLtOp -> (CMP ULT, NE)
+	AddrLeOp -> (CMP ULE, NE)
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+genCondJump id bool = do
+  CondCode _ cond code <- getCondCode bool
+  return (code `snocOL` JXX cond id)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCondJump id bool = do
+  CondCode is_float cond cond_code <- getCondCode bool
+  if not is_float
+    then
+	return (cond_code `snocOL` JXX cond id)
+    else do
+	lbl <- getBlockIdNat
+
+	-- see comment with condFltReg
+	let code = case cond of
+	  		NE  -> or_unordered
+			GU  -> plain_test
+			GEU -> plain_test
+			_   -> and_ordered
+
+	    plain_test = unitOL (
+		  JXX cond id
+		)
+	    or_unordered = toOL [
+		  JXX cond id,
+		  JXX PARITY id
+		]
+	    and_ordered = toOL [
+		  JXX PARITY lbl,
+		  JXX cond id,
+		  JXX ALWAYS lbl,
+		  NEWBLOCK lbl
+		]
+	return (cond_code `appOL` code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+genCondJump (BlockId id) bool = do
+  CondCode is_float cond code <- getCondCode bool
+  return (
+       code `appOL` 
+       toOL (
+         if   is_float
+         then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
+         else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
+       )
+    )
+
+#endif /* sparc_TARGET_ARCH */
+
+
+#if powerpc_TARGET_ARCH
+
+genCondJump id bool = do
+  CondCode is_float cond code <- getCondCode bool
+  return (code `snocOL` BCC cond id)
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+--  Generating C calls
+
+-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations.  Apart from that, the code is easy.
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+    :: CmmCallTarget		-- function to call
+    -> [(CmmReg,MachHint)]	-- where to put the result
+    -> [(CmmExpr,MachHint)]	-- arguments (of mixed type)
+    -> Maybe [GlobalReg]	-- volatile regs to save
+    -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+ccallResultRegs = 
+
+genCCall fn cconv result_regs args
+  = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
+    	    	  	  `thenNat` \ ((unused,_), argCode) ->
+    let
+    	nRegs = length allArgRegs - length unused
+    	code = asmSeqThen (map ($ []) argCode)
+    in
+    	returnSeq code [
+    	    LDA pv (AddrImm (ImmLab (ptext fn))),
+    	    JSR ra (AddrReg pv) nRegs,
+    	    LDGP gp (AddrReg ra)]
+  where
+    ------------------------
+    {-	Try to get a value into a specific register (or registers) for
+	a call.  The first 6 arguments go into the appropriate
+	argument register (separate registers for integer and floating
+	point arguments, but used in lock-step), and the remaining
+	arguments are dumped to the stack, beginning at 0(sp).  Our
+	first argument is a pair of the list of remaining argument
+	registers to be assigned for this call and the next stack
+	offset to use for overflowing arguments.  This way,
+	@get_Arg@ can be applied to all of a call's arguments using
+	@mapAccumLNat@.
+    -}
+    get_arg
+	:: ([(Reg,Reg)], Int)	-- Argument registers and stack offset (accumulator)
+	-> StixTree		-- Current argument
+	-> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
+
+    -- We have to use up all of our argument registers first...
+
+    get_arg ((iDst,fDst):dsts, offset) arg
+      = getRegister arg	    	    	    `thenNat` \ register ->
+	let
+	    reg  = if isFloatingRep pk then fDst else iDst
+	    code = registerCode register reg
+	    src  = registerName register reg
+	    pk   = registerRep register
+	in
+	return (
+	    if isFloatingRep pk then
+		((dsts, offset), if isFixed register then
+		    code . mkSeqInstr (FMOV src fDst)
+		    else code)
+	    else
+		((dsts, offset), if isFixed register then
+		    code . mkSeqInstr (OR src (RIReg src) iDst)
+		    else code))
+
+    -- Once we have run out of argument registers, we move to the
+    -- stack...
+
+    get_arg ([], offset) arg
+      = getRegister arg			`thenNat` \ register ->
+	getNewRegNat (registerRep register)
+					`thenNat` \ tmp ->
+	let
+	    code = registerCode register tmp
+	    src  = registerName register tmp
+	    pk   = registerRep register
+	    sz   = primRepToSize pk
+	in
+	return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+-- we only cope with a single result for foreign calls
+genCCall (CmmPrim op) [(r,_)] args vols = do
+  case op of
+	MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
+	MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
+	
+	MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
+	MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
+	
+	MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
+	MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
+	
+	MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
+	MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
+	
+	other_op    -> outOfLineFloatOp op r args vols
+ where
+  actuallyInlineFloatOp rep instr [(x,_)]
+	= do res <- trivialUFCode rep instr x
+	     any <- anyReg res
+ 	     return (any (getRegisterReg r))
+
+genCCall target dest_regs args vols = do
+    let
+        sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
+#if !darwin_TARGET_OS        
+        tot_arg_size        = sum sizes
+#else
+        raw_arg_size        = sum sizes
+        tot_arg_size        = roundTo 16 raw_arg_size
+        arg_pad_size        = tot_arg_size - raw_arg_size
+    delta0 <- getDeltaNat
+    setDeltaNat (delta0 - arg_pad_size)
+#endif
+
+    push_codes <- mapM push_arg (reverse args)
+    delta <- getDeltaNat
+
+    -- in
+    -- deal with static vs dynamic call targets
+    (callinsns,cconv) <-
+      case target of
+	-- CmmPrim -> ...
+        CmmForeignCall (CmmLit (CmmLabel lbl)) conv
+           -> -- ToDo: stdcall arg sizes
+	      return (unitOL (CALL (Left fn_imm) []), conv)
+	   where fn_imm = ImmCLbl lbl
+        CmmForeignCall expr conv
+           -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
+                 ASSERT(dyn_rep == I32)
+                  return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
+
+    let	push_code
+#if darwin_TARGET_OS
+            | arg_pad_size /= 0
+            = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
+                    DELTA (delta0 - arg_pad_size)]
+              `appOL` concatOL push_codes
+            | otherwise
+#endif
+            = concatOL push_codes
+	call = callinsns `appOL`
+               toOL (
+			-- Deallocate parameters after call for ccall;
+			-- but not for stdcall (callee does it)
+                  (if cconv == StdCallConv || tot_arg_size==0 then [] else 
+		   [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
+                  ++
+                  [DELTA (delta + tot_arg_size)]
+               )
+    -- in
+    setDeltaNat (delta + tot_arg_size)
+
+    let
+	-- assign the results, if necessary
+	assign_code []     = nilOL
+	assign_code [(dest,_hint)] = 
+	  case rep of
+		I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
+			     MOV I32 (OpReg edx) (OpReg r_dest_hi)]
+		F32 -> unitOL (GMOV fake0 r_dest)
+		F64 -> unitOL (GMOV fake0 r_dest)
+		rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
+	  where 
+		r_dest_hi = getHiVRegFromLo r_dest
+		rep = cmmRegRep dest
+		r_dest = getRegisterReg dest
+	assign_code many = panic "genCCall.assign_code many"
+
+    return (push_code `appOL` 
+	    call `appOL` 
+	    assign_code dest_regs)
+
+  where
+    arg_size F64 = 8
+    arg_size F32 = 4
+    arg_size I64 = 8
+    arg_size _   = 4
+
+    roundTo a x | x `mod` a == 0 = x
+                | otherwise = x + a - (x `mod` a)
+
+
+    push_arg :: (CmmExpr,MachHint){-current argument-}
+                    -> NatM InstrBlock  -- code
+
+    push_arg (arg,_hint) -- we don't need the hints on x86
+      | arg_rep == I64 = do
+        ChildCode64 code r_lo <- iselExpr64 arg
+        delta <- getDeltaNat
+        setDeltaNat (delta - 8)
+        let 
+            r_hi = getHiVRegFromLo r_lo
+        -- in
+	return (       code `appOL`
+                       toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
+                             PUSH I32 (OpReg r_lo), DELTA (delta - 8),
+			     DELTA (delta-8)]
+            )
+
+      | otherwise = do
+        (code, reg, sz) <- get_op arg
+        delta <- getDeltaNat
+        let size = arg_size sz
+        setDeltaNat (delta-size)
+        if (case sz of F64 -> True; F32 -> True; _ -> False)
+           then return (code `appOL`
+                        toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
+                              DELTA (delta-size),
+                              GST sz reg (AddrBaseIndex (EABaseReg esp) 
+                                                        EAIndexNone
+                                                        (ImmInt 0))]
+                       )
+           else return (code `snocOL`
+                        PUSH I32 (OpReg reg) `snocOL`
+                        DELTA (delta-size)
+                       )
+      where
+         arg_rep = cmmExprRep arg
+
+    ------------
+    get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
+    get_op op = do
+        (reg,code) <- getSomeReg op
+	return (code, reg, cmmExprRep op)
+
+#endif /* i386_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
+  -> Maybe [GlobalReg] -> NatM InstrBlock
+outOfLineFloatOp mop res args vols
+  = do
+      targetExpr <- cmmMakeDynamicReference addImportNat True lbl
+      let target = CmmForeignCall targetExpr CCallConv
+        
+      if cmmRegRep res == F64
+        then
+          stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)  
+        else do
+          uq <- getUniqueNat
+          let 
+            tmp = CmmLocal (LocalReg uq F64)
+          -- in
+          code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
+          code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
+          return (code1 `appOL` code2)
+  where
+	lbl = mkForeignLabel fn Nothing True
+
+	fn = case mop of
+	      MO_F32_Sqrt  -> FSLIT("sqrtf")
+	      MO_F32_Sin   -> FSLIT("sinf")
+	      MO_F32_Cos   -> FSLIT("cosf")
+	      MO_F32_Tan   -> FSLIT("tanf")
+	      MO_F32_Exp   -> FSLIT("expf")
+	      MO_F32_Log   -> FSLIT("logf")
+
+	      MO_F32_Asin  -> FSLIT("asinf")
+	      MO_F32_Acos  -> FSLIT("acosf")
+	      MO_F32_Atan  -> FSLIT("atanf")
+
+	      MO_F32_Sinh  -> FSLIT("sinhf")
+	      MO_F32_Cosh  -> FSLIT("coshf")
+	      MO_F32_Tanh  -> FSLIT("tanhf")
+	      MO_F32_Pwr   -> FSLIT("powf")
+
+	      MO_F64_Sqrt  -> FSLIT("sqrt")
+	      MO_F64_Sin   -> FSLIT("sin")
+	      MO_F64_Cos   -> FSLIT("cos")
+	      MO_F64_Tan   -> FSLIT("tan")
+	      MO_F64_Exp   -> FSLIT("exp")
+	      MO_F64_Log   -> FSLIT("log")
+
+	      MO_F64_Asin  -> FSLIT("asin")
+	      MO_F64_Acos  -> FSLIT("acos")
+	      MO_F64_Atan  -> FSLIT("atan")
+
+	      MO_F64_Sinh  -> FSLIT("sinh")
+	      MO_F64_Cosh  -> FSLIT("cosh")
+	      MO_F64_Tanh  -> FSLIT("tanh")
+	      MO_F64_Pwr   -> FSLIT("pow")
+
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCCall (CmmPrim op) [(r,_)] args vols = 
+  outOfLineFloatOp op r args vols
+
+genCCall target dest_regs args vols = do
+
+	-- load up the register arguments
+    (stack_args, aregs, fregs, load_args_code)
+	 <- load_args args allArgRegs allFPArgRegs nilOL
+
+    let
+	fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
+	int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
+	arg_regs = int_regs_used ++ fp_regs_used
+		-- for annotating the call instruction with
+
+	sse_regs = length fp_regs_used
+
+	tot_arg_size = arg_size * length stack_args
+
+	-- On entry to the called function, %rsp should be aligned
+	-- on a 16-byte boundary +8 (i.e. the first stack arg after
+	-- the return address is 16-byte aligned).  In STG land
+	-- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+	-- need to make sure we push a multiple of 16-bytes of args,
+	-- plus the return address, to get the correct alignment.
+	-- Urg, this is hard.  We need to feed the delta back into
+	-- the arg pushing code.
+    (real_size, adjust_rsp) <-
+	if tot_arg_size `rem` 16 == 0
+	    then return (tot_arg_size, nilOL)
+	    else do -- we need to adjust...
+		delta <- getDeltaNat
+		setDeltaNat (delta-8)
+		return (tot_arg_size+8, toOL [
+				SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
+				DELTA (delta-8)
+			])
+
+	-- push the stack args, right to left
+    push_code <- push_args (reverse stack_args) nilOL
+    delta <- getDeltaNat
+
+    -- deal with static vs dynamic call targets
+    (callinsns,cconv) <-
+      case target of
+	-- CmmPrim -> ...
+        CmmForeignCall (CmmLit (CmmLabel lbl)) conv
+           -> -- ToDo: stdcall arg sizes
+	      return (unitOL (CALL (Left fn_imm) arg_regs), conv)
+	   where fn_imm = ImmCLbl lbl
+        CmmForeignCall expr conv
+           -> do (dyn_r, dyn_c) <- getSomeReg expr
+		 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
+
+    let
+	-- The x86_64 ABI requires us to set %al to the number of SSE
+	-- registers that contain arguments, if the called routine
+	-- is a varargs function.  We don't know whether it's a
+	-- varargs function or not, so we have to assume it is.
+	--
+	-- It's not safe to omit this assignment, even if the number
+	-- of SSE regs in use is zero.  If %al is larger than 8
+	-- on entry to a varargs function, seg faults ensue.
+	assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
+
+    let call = callinsns `appOL`
+               toOL (
+			-- Deallocate parameters after call for ccall;
+			-- but not for stdcall (callee does it)
+                  (if cconv == StdCallConv || real_size==0 then [] else 
+		   [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
+                  ++
+                  [DELTA (delta + real_size)]
+               )
+    -- in
+    setDeltaNat (delta + real_size)
+
+    let
+	-- assign the results, if necessary
+	assign_code []     = nilOL
+	assign_code [(dest,_hint)] = 
+	  case rep of
+		F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+		F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+		rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
+	  where 
+		rep = cmmRegRep dest
+		r_dest = getRegisterReg dest
+	assign_code many = panic "genCCall.assign_code many"
+
+    return (load_args_code 	`appOL` 
+	    adjust_rsp 		`appOL`
+	    push_code 		`appOL`
+	    assign_eax sse_regs `appOL`
+	    call 		`appOL` 
+	    assign_code dest_regs)
+
+  where
+    arg_size = 8 -- always, at the mo
+
+    load_args :: [(CmmExpr,MachHint)]
+	      -> [Reg] 			-- int regs avail for args
+	      -> [Reg] 			-- FP regs avail for args
+	      -> InstrBlock
+	      -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
+    load_args args [] [] code     =  return (args, [], [], code)
+	-- no more regs to use
+    load_args [] aregs fregs code =  return ([], aregs, fregs, code)
+	-- no more args to push
+    load_args ((arg,hint) : rest) aregs fregs code
+	| isFloatingRep arg_rep = 
+	case fregs of
+	  [] -> push_this_arg
+	  (r:rs) -> do
+	     arg_code <- getAnyReg arg
+	     load_args rest aregs rs (code `appOL` arg_code r)
+	| otherwise =
+	case aregs of
+	  [] -> push_this_arg
+	  (r:rs) -> do
+	     arg_code <- getAnyReg arg
+	     load_args rest rs fregs (code `appOL` arg_code r)
+	where
+	  arg_rep = cmmExprRep arg
+
+	  push_this_arg = do
+	    (args',ars,frs,code') <- load_args rest aregs fregs code
+	    return ((arg,hint):args', ars, frs, code')
+
+    push_args [] code = return code
+    push_args ((arg,hint):rest) code
+       | isFloatingRep arg_rep = do
+	 (arg_reg, arg_code) <- getSomeReg arg
+         delta <- getDeltaNat
+         setDeltaNat (delta-arg_size)
+	 let code' = code `appOL` toOL [
+			MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
+			SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
+	 		DELTA (delta-arg_size)]
+	 push_args rest code'
+
+       | otherwise = do
+       -- we only ever generate word-sized function arguments.  Promotion
+       -- has already happened: our Int8# type is kept sign-extended
+       -- in an Int#, for example.
+	 ASSERT(arg_rep == I64) return ()
+	 (arg_op, arg_code) <- getOperand arg
+         delta <- getDeltaNat
+         setDeltaNat (delta-arg_size)
+	 let code' = code `appOL` toOL [PUSH I64 arg_op, 
+	 			        DELTA (delta-arg_size)]
+	 push_args rest code'
+	where
+	  arg_rep = cmmExprRep arg
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+{- 
+   The SPARC calling convention is an absolute
+   nightmare.  The first 6x32 bits of arguments are mapped into
+   %o0 through %o5, and the remaining arguments are dumped to the
+   stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
+
+   If we have to put args on the stack, move %o6==%sp down by
+   the number of words to go on the stack, to ensure there's enough space.
+
+   According to Fraser and Hanson's lcc book, page 478, fig 17.2,
+   16 words above the stack pointer is a word for the address of
+   a structure return value.  I use this as a temporary location
+   for moving values from float to int regs.  Certainly it isn't
+   safe to put anything in the 16 words starting at %sp, since
+   this area can get trashed at any time due to window overflows
+   caused by signal handlers.
+
+   A final complication (if the above isn't enough) is that 
+   we can't blithely calculate the arguments one by one into
+   %o0 .. %o5.  Consider the following nested calls:
+
+       fff a (fff b c)
+
+   Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
+   the inner call will itself use %o0, which trashes the value put there
+   in preparation for the outer call.  Upshot: we need to calculate the
+   args into temporary regs, and move those to arg regs or onto the
+   stack only immediately prior to the call proper.  Sigh.
+-}
+
+genCCall target dest_regs argsAndHints vols = do
+    let
+        args = map fst argsAndHints
+    argcode_and_vregs <- mapM arg_to_int_vregs args
+    let 
+        (argcodes, vregss) = unzip argcode_and_vregs
+        n_argRegs          = length allArgRegs
+        n_argRegs_used     = min (length vregs) n_argRegs
+        vregs              = concat vregss
+    -- deal with static vs dynamic call targets
+    callinsns <- (case target of
+        CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
+		return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+        CmmForeignCall expr conv -> do
+                (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+                return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+	CmmPrim mop -> do
+		  (res, reduce) <- outOfLineFloatOp mop
+		  lblOrMopExpr <- case res of
+		       Left lbl -> do
+		            return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+		       Right mopExpr -> do
+			    (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+			    return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+		  if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
+
+      )
+    let
+        argcode = concatOL argcodes
+        (move_sp_down, move_sp_up)
+           = let diff = length vregs - n_argRegs
+                 nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
+             in  if   nn <= 0
+                 then (nilOL, nilOL)
+                 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+        transfer_code
+           = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
+    return (argcode       `appOL`
+            move_sp_down  `appOL`
+            transfer_code `appOL`
+            callinsns     `appOL`
+            unitOL NOP    `appOL`
+            move_sp_up)
+  where
+     -- move args from the integer vregs into which they have been 
+     -- marshalled, into %o0 .. %o5, and the rest onto the stack.
+     move_final :: [Reg] -> [Reg] -> Int -> [Instr]
+
+     move_final [] _ offset          -- all args done
+        = []
+
+     move_final (v:vs) [] offset     -- out of aregs; move to stack
+        = ST I32 v (spRel offset)
+          : move_final vs [] (offset+1)
+
+     move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
+        = OR False g0 (RIReg v) a
+          : move_final vs az offset
+
+     -- generate code to calculate an argument, and move it into one
+     -- or two integer vregs.
+     arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
+     arg_to_int_vregs arg
+        | (cmmExprRep arg) == I64
+        = do
+	  (ChildCode64 code r_lo) <- iselExpr64 arg
+          let 
+              r_hi = getHiVRegFromLo r_lo
+          return (code, [r_hi, r_lo])
+        | otherwise
+        = do
+	  (src, code) <- getSomeReg arg
+          tmp <- getNewRegNat (cmmExprRep arg)
+          let
+              pk   = cmmExprRep arg
+          case pk of
+             F64 -> do
+                      v1 <- getNewRegNat I32
+                      v2 <- getNewRegNat I32
+                      return (
+                        code                          `snocOL`
+                        FMOV F64 src f0                `snocOL`
+                        ST   F32  f0 (spRel 16)         `snocOL`
+                        LD   I32  (spRel 16) v1         `snocOL`
+                        ST   F32  (fPair f0) (spRel 16) `snocOL`
+                        LD   I32  (spRel 16) v2
+                        ,
+                        [v1,v2]
+                       )
+             F32 -> do
+                      v1 <- getNewRegNat I32
+                      return (
+                        code                    `snocOL`
+                        ST   F32  src (spRel 16)  `snocOL`
+                        LD   I32  (spRel 16) v1
+                        ,
+                        [v1]
+                       )
+             other -> do
+                        v1 <- getNewRegNat I32
+                        return (
+                          code `snocOL` OR False g0 (RIReg src) v1
+                          , 
+                          [v1]
+                         )
+outOfLineFloatOp mop =
+    do
+      mopExpr <- cmmMakeDynamicReference addImportNat True $
+		  mkForeignLabel functionName Nothing True
+      let mopLabelOrExpr = case mopExpr of
+			CmmLit (CmmLabel lbl) -> Left lbl
+                        _ -> Right mopExpr
+      return (mopLabelOrExpr, reduce)
+            where
+                (reduce, functionName) = case mop of
+	          MO_F32_Exp    -> (True,  FSLIT("exp"))
+	          MO_F32_Log    -> (True,  FSLIT("log"))
+	          MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
+
+	          MO_F32_Sin    -> (True,  FSLIT("sin"))
+	          MO_F32_Cos    -> (True,  FSLIT("cos"))
+	          MO_F32_Tan    -> (True,  FSLIT("tan"))
+
+	          MO_F32_Asin   -> (True,  FSLIT("asin"))
+	          MO_F32_Acos   -> (True,  FSLIT("acos"))
+	          MO_F32_Atan   -> (True,  FSLIT("atan"))
+
+	          MO_F32_Sinh   -> (True,  FSLIT("sinh"))
+	          MO_F32_Cosh   -> (True,  FSLIT("cosh"))
+	          MO_F32_Tanh   -> (True,  FSLIT("tanh"))
+
+	          MO_F64_Exp    -> (False, FSLIT("exp"))
+	          MO_F64_Log    -> (False, FSLIT("log"))
+	          MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
+
+	          MO_F64_Sin    -> (False, FSLIT("sin"))
+	          MO_F64_Cos    -> (False, FSLIT("cos"))
+	          MO_F64_Tan    -> (False, FSLIT("tan"))
+
+	          MO_F64_Asin   -> (False, FSLIT("asin"))
+	          MO_F64_Acos   -> (False, FSLIT("acos"))
+	          MO_F64_Atan   -> (False, FSLIT("atan"))
+
+	          MO_F64_Sinh   -> (False, FSLIT("sinh"))
+	          MO_F64_Cosh   -> (False, FSLIT("cosh"))
+	          MO_F64_Tanh   -> (False, FSLIT("tanh"))
+
+                  other -> pprPanic "outOfLineFloatOp(sparc) "
+                                (pprCallishMachOp mop)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+
+#if darwin_TARGET_OS || linux_TARGET_OS
+{-
+    The PowerPC calling convention for Darwin/Mac OS X
+    is described in Apple's document
+    "Inside Mac OS X - Mach-O Runtime Architecture".
+    
+    PowerPC Linux uses the System V Release 4 Calling Convention
+    for PowerPC. It is described in the
+    "System V Application Binary Interface PowerPC Processor Supplement".
+
+    Both conventions are similar:
+    Parameters may be passed in general-purpose registers starting at r3, in
+    floating point registers starting at f1, or on the stack. 
+    
+    But there are substantial differences:
+    * The number of registers used for parameter passing and the exact set of
+      nonvolatile registers differs (see MachRegs.lhs).
+    * On Darwin, stack space is always reserved for parameters, even if they are
+      passed in registers. The called routine may choose to save parameters from
+      registers to the corresponding space on the stack.
+    * On Darwin, a corresponding amount of GPRs is skipped when a floating point
+      parameter is passed in an FPR.
+    * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
+      starting with an odd-numbered GPR. It may skip a GPR to achieve this.
+      Darwin just treats an I64 like two separate I32s (high word first).
+    * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
+      4-byte aligned like everything else on Darwin.
+    * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
+      PowerPC Linux does not agree, so neither do we.
+      
+    According to both conventions, The parameter area should be part of the
+    caller's stack frame, allocated in the caller's prologue code (large enough
+    to hold the parameter lists for all called routines). The NCG already
+    uses the stack for register spilling, leaving 64 bytes free at the top.
+    If we need a larger parameter area than that, we just allocate a new stack
+    frame just before ccalling.
+-}
+
+genCCall target dest_regs argsAndHints vols
+  = ASSERT (not $ any (`elem` [I8,I16]) argReps)
+        -- we rely on argument promotion in the codeGen
+    do
+        (finalStack,passArgumentsCode,usedRegs) <- passArguments
+                                                        (zip args argReps)
+                                                        allArgRegs allFPArgRegs
+                                                        initialStackOffset
+                                                        (toOL []) []
+                                                
+        (labelOrExpr, reduceToF32) <- case target of
+            CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
+            CmmForeignCall expr conv -> return  (Right expr, False)
+            CmmPrim mop -> outOfLineFloatOp mop
+                                                        
+        let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
+            codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
+
+        case labelOrExpr of
+            Left lbl -> do
+		return (         codeBefore
+                        `snocOL` BL lbl usedRegs
+                        `appOL`	 codeAfter)
+            Right dyn -> do
+		(dynReg, dynCode) <- getSomeReg dyn
+		return (         dynCode
+			`snocOL` MTCTR dynReg
+                        `appOL`	 codeBefore
+                        `snocOL` BCTRL usedRegs
+                        `appOL`	 codeAfter)
+    where
+#if darwin_TARGET_OS
+        initialStackOffset = 24
+	    -- size of linkage area + size of arguments, in bytes	
+	stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
+	                               map machRepByteWidth argReps
+#elif linux_TARGET_OS
+        initialStackOffset = 8
+        stackDelta finalStack = roundTo 16 finalStack
+#endif
+	args = map fst argsAndHints
+	argReps = map cmmExprRep args
+
+	roundTo a x | x `mod` a == 0 = x
+		    | otherwise = x + a - (x `mod` a)
+
+        move_sp_down finalStack
+               | delta > 64 =
+                        toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
+	                      DELTA (-delta)]
+	       | otherwise = nilOL
+	       where delta = stackDelta finalStack
+	move_sp_up finalStack
+	       | delta > 64 =
+                        toOL [ADD sp sp (RIImm (ImmInt delta)),
+                              DELTA 0]
+	       | otherwise = nilOL
+	       where delta = stackDelta finalStack
+	       
+
+        passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
+        passArguments ((arg,I64):args) gprs fprs stackOffset
+               accumCode accumUsed =
+            do
+                ChildCode64 code vr_lo <- iselExpr64 arg
+                let vr_hi = getHiVRegFromLo vr_lo
+
+#if darwin_TARGET_OS                
+                passArguments args
+                              (drop 2 gprs)
+                              fprs
+                              (stackOffset+8)
+                              (accumCode `appOL` code
+                                    `snocOL` storeWord vr_hi gprs stackOffset
+                                    `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
+                              ((take 2 gprs) ++ accumUsed)
+            where
+                storeWord vr (gpr:_) offset = MR gpr vr
+                storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
+                
+#elif linux_TARGET_OS
+                let stackOffset' = roundTo 8 stackOffset
+                    stackCode = accumCode `appOL` code
+                        `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
+                        `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
+                    regCode hireg loreg =
+                        accumCode `appOL` code
+                            `snocOL` MR hireg vr_hi
+                            `snocOL` MR loreg vr_lo
+                                        
+                case gprs of
+                    hireg : loreg : regs | even (length gprs) ->
+                        passArguments args regs fprs stackOffset
+                                      (regCode hireg loreg) (hireg : loreg : accumUsed)
+                    _skipped : hireg : loreg : regs ->
+                        passArguments args regs fprs stackOffset
+                                      (regCode hireg loreg) (hireg : loreg : accumUsed)
+                    _ -> -- only one or no regs left
+                        passArguments args [] fprs (stackOffset'+8)
+                                      stackCode accumUsed
+#endif
+        
+        passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
+            | reg : _ <- regs = do
+                register <- getRegister arg
+                let code = case register of
+                            Fixed _ freg fcode -> fcode `snocOL` MR reg freg
+                            Any _ acode -> acode reg
+                passArguments args
+                              (drop nGprs gprs)
+                              (drop nFprs fprs)
+#if darwin_TARGET_OS
+        -- The Darwin ABI requires that we reserve stack slots for register parameters
+                              (stackOffset + stackBytes)
+#elif linux_TARGET_OS
+        -- ... the SysV ABI doesn't.
+                              stackOffset
+#endif
+                              (accumCode `appOL` code)
+                              (reg : accumUsed)
+            | otherwise = do
+                (vr, code) <- getSomeReg arg
+                passArguments args
+                              (drop nGprs gprs)
+                              (drop nFprs fprs)
+                              (stackOffset' + stackBytes)
+                              (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
+                              accumUsed
+            where
+#if darwin_TARGET_OS
+        -- stackOffset is at least 4-byte aligned
+        -- The Darwin ABI is happy with that.
+                stackOffset' = stackOffset
+#else
+        -- ... the SysV ABI requires 8-byte alignment for doubles.
+                stackOffset' | rep == F64 = roundTo 8 stackOffset
+                             | otherwise  =           stackOffset
+#endif
+                stackSlot = AddrRegImm sp (ImmInt stackOffset')
+                (nGprs, nFprs, stackBytes, regs) = case rep of
+                    I32 -> (1, 0, 4, gprs)
+#if darwin_TARGET_OS
+        -- The Darwin ABI requires that we skip a corresponding number of GPRs when
+        -- we use the FPRs.
+                    F32 -> (1, 1, 4, fprs)
+                    F64 -> (2, 1, 8, fprs)
+#elif linux_TARGET_OS
+        -- ... the SysV ABI doesn't.
+                    F32 -> (0, 1, 4, fprs)
+                    F64 -> (0, 1, 8, fprs)
+#endif
+        
+        moveResult reduceToF32 =
+            case dest_regs of
+                [] -> nilOL
+                [(dest, _hint)]
+                    | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
+                    | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
+                    | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
+                                          MR r_dest r4]
+                    | otherwise -> unitOL (MR r_dest r3)
+                    where rep = cmmRegRep dest
+                          r_dest = getRegisterReg dest
+                          
+        outOfLineFloatOp mop =
+            do
+                mopExpr <- cmmMakeDynamicReference addImportNat True $
+                              mkForeignLabel functionName Nothing True
+                let mopLabelOrExpr = case mopExpr of
+                        CmmLit (CmmLabel lbl) -> Left lbl
+                        _ -> Right mopExpr
+                return (mopLabelOrExpr, reduce)
+            where
+                (functionName, reduce) = case mop of
+                    MO_F32_Exp   -> (FSLIT("exp"), True)
+                    MO_F32_Log   -> (FSLIT("log"), True)
+                    MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
+                        
+                    MO_F32_Sin   -> (FSLIT("sin"), True)
+                    MO_F32_Cos   -> (FSLIT("cos"), True)
+                    MO_F32_Tan   -> (FSLIT("tan"), True)
+                    
+                    MO_F32_Asin  -> (FSLIT("asin"), True)
+                    MO_F32_Acos  -> (FSLIT("acos"), True)
+                    MO_F32_Atan  -> (FSLIT("atan"), True)
+                    
+                    MO_F32_Sinh  -> (FSLIT("sinh"), True)
+                    MO_F32_Cosh  -> (FSLIT("cosh"), True)
+                    MO_F32_Tanh  -> (FSLIT("tanh"), True)
+                    MO_F32_Pwr   -> (FSLIT("pow"), True)
+                        
+                    MO_F64_Exp   -> (FSLIT("exp"), False)
+                    MO_F64_Log   -> (FSLIT("log"), False)
+                    MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
+                        
+                    MO_F64_Sin   -> (FSLIT("sin"), False)
+                    MO_F64_Cos   -> (FSLIT("cos"), False)
+                    MO_F64_Tan   -> (FSLIT("tan"), False)
+                     
+                    MO_F64_Asin  -> (FSLIT("asin"), False)
+                    MO_F64_Acos  -> (FSLIT("acos"), False)
+                    MO_F64_Atan  -> (FSLIT("atan"), False)
+                    
+                    MO_F64_Sinh  -> (FSLIT("sinh"), False)
+                    MO_F64_Cosh  -> (FSLIT("cosh"), False)
+                    MO_F64_Tanh  -> (FSLIT("tanh"), False)
+                    MO_F64_Pwr   -> (FSLIT("pow"), False)
+                    other -> pprPanic "genCCall(ppc): unknown callish op"
+                                    (pprCallishMachOp other)
+
+#endif /* darwin_TARGET_OS || linux_TARGET_OS */
+                
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+genSwitch expr ids
+  | opt_PIC
+  = do
+        (reg,e_code) <- getSomeReg expr
+        lbl <- getNewLabelNat
+        dynRef <- cmmMakeDynamicReference addImportNat False lbl
+        (tableReg,t_code) <- getSomeReg $ dynRef
+        let
+            jumpTable = map jumpTableEntryRel ids
+            
+            jumpTableEntryRel Nothing
+                = CmmStaticLit (CmmInt 0 wordRep)
+            jumpTableEntryRel (Just (BlockId id))
+                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+                where blockLabel = mkAsmTempLabel id
+
+            op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
+                                       (EAIndex reg wORD_SIZE) (ImmInt 0))
+
+            code = e_code `appOL` t_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            ADD wordRep op (OpReg tableReg),
+                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
+                    ]
+        return code
+  | otherwise
+  = do
+        (reg,e_code) <- getSomeReg expr
+        lbl <- getNewLabelNat
+        let
+            jumpTable = map jumpTableEntry ids
+            op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
+            code = e_code `appOL` toOL [
+                    LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                    JMP_TBL op [ id | Just id <- ids ]
+                 ]
+        -- in
+        return code
+#elif powerpc_TARGET_ARCH
+genSwitch expr ids 
+  | opt_PIC
+  = do
+        (reg,e_code) <- getSomeReg expr
+        tmp <- getNewRegNat I32
+        lbl <- getNewLabelNat
+        dynRef <- cmmMakeDynamicReference addImportNat False lbl
+        (tableReg,t_code) <- getSomeReg $ dynRef
+        let
+            jumpTable = map jumpTableEntryRel ids
+            
+            jumpTableEntryRel Nothing
+                = CmmStaticLit (CmmInt 0 wordRep)
+            jumpTableEntryRel (Just (BlockId id))
+                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+                where blockLabel = mkAsmTempLabel id
+
+            code = e_code `appOL` t_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            SLW tmp reg (RIImm (ImmInt 2)),
+                            LD I32 tmp (AddrRegReg tableReg tmp),
+                            ADD tmp tmp (RIReg tableReg),
+                            MTCTR tmp,
+                            BCTR [ id | Just id <- ids ]
+                    ]
+        return code
+  | otherwise
+  = do
+        (reg,e_code) <- getSomeReg expr
+        tmp <- getNewRegNat I32
+        lbl <- getNewLabelNat
+        let
+            jumpTable = map jumpTableEntry ids
+        
+            code = e_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            SLW tmp reg (RIImm (ImmInt 2)),
+                            ADDIS tmp tmp (HA (ImmCLbl lbl)),
+                            LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
+                            MTCTR tmp,
+                            BCTR [ id | Just id <- ids ]
+                    ]
+        return code
+#else
+genSwitch expr ids = panic "ToDo: genSwitch"
+#endif
+
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+    where blockLabel = mkAsmTempLabel id
+
+-- -----------------------------------------------------------------------------
+-- Support bits
+-- -----------------------------------------------------------------------------
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+condIntReg = panic "MachCode.condIntReg (not on Alpha)"
+condFltReg = panic "MachCode.condFltReg (not on Alpha)"
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+condIntReg cond x y = do
+  CondCode _ cond cond_code <- condIntCode cond x y
+  tmp <- getNewRegNat I8
+  let 
+	code dst = cond_code `appOL` toOL [
+		    SETCC cond (OpReg tmp),
+		    MOVZxL I8 (OpReg tmp) (OpReg dst)
+		  ]
+  -- in
+  return (Any I32 code)
+
+#endif
+
+#if i386_TARGET_ARCH
+
+condFltReg cond x y = do
+  CondCode _ cond cond_code <- condFltCode cond x y
+  tmp <- getNewRegNat I8
+  let 
+	code dst = cond_code `appOL` toOL [
+		    SETCC cond (OpReg tmp),
+		    MOVZxL I8 (OpReg tmp) (OpReg dst)
+		  ]
+  -- in
+  return (Any I32 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+
+condFltReg cond x y = do
+  CondCode _ cond cond_code <- condFltCode cond x y
+  tmp1 <- getNewRegNat wordRep
+  tmp2 <- getNewRegNat wordRep
+  let 
+	-- We have to worry about unordered operands (eg. comparisons
+	-- against NaN).  If the operands are unordered, the comparison
+	-- sets the parity flag, carry flag and zero flag.
+	-- All comparisons are supposed to return false for unordered
+	-- operands except for !=, which returns true.
+	--
+	-- Optimisation: we don't have to test the parity flag if we
+	-- know the test has already excluded the unordered case: eg >
+	-- and >= test for a zero carry flag, which can only occur for
+	-- ordered operands.
+	--
+	-- ToDo: by reversing comparisons we could avoid testing the
+	-- parity flag in more cases.
+
+	code dst = 
+	   cond_code `appOL` 
+	     (case cond of
+	  	NE  -> or_unordered dst
+		GU  -> plain_test   dst
+		GEU -> plain_test   dst
+		_   -> and_ordered  dst)
+
+	plain_test dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    MOVZxL I8 (OpReg tmp1) (OpReg dst)
+		 ]
+	or_unordered dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    SETCC PARITY (OpReg tmp2),
+		    OR I8 (OpReg tmp1) (OpReg tmp2),
+		    MOVZxL I8 (OpReg tmp2) (OpReg dst)
+		  ]
+	and_ordered dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    SETCC NOTPARITY (OpReg tmp2),
+		    AND I8 (OpReg tmp1) (OpReg tmp2),
+		    MOVZxL I8 (OpReg tmp2) (OpReg dst)
+		  ]
+  -- in
+  return (Any I32 code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat I32
+    let
+	code__2 dst = code `appOL` toOL [
+    	    SUB False True g0 (RIReg src) g0,
+    	    SUB True False g0 (RIImm (ImmInt (-1))) dst]
+    return (Any I32 code__2)
+
+condIntReg EQQ x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat I32
+    tmp2 <- getNewRegNat I32
+    let
+    	code__2 dst = code1 `appOL` code2 `appOL` toOL [
+    	    XOR False src1 (RIReg src2) dst,
+    	    SUB False True g0 (RIReg dst) g0,
+    	    SUB True False g0 (RIImm (ImmInt (-1))) dst]
+    return (Any I32 code__2)
+
+condIntReg NE x (CmmLit (CmmInt 0 d)) = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat I32
+    let
+    	code__2 dst = code `appOL` toOL [
+    	    SUB False True g0 (RIReg src) g0,
+    	    ADD True False g0 (RIImm (ImmInt 0)) dst]
+    return (Any I32 code__2)
+
+condIntReg NE x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat I32
+    tmp2 <- getNewRegNat I32
+    let
+	code__2 dst = code1 `appOL` code2 `appOL` toOL [
+    	    XOR False src1 (RIReg src2) dst,
+    	    SUB False True g0 (RIReg dst) g0,
+    	    ADD True False g0 (RIImm (ImmInt 0)) dst]
+    return (Any I32 code__2)
+
+condIntReg cond x y = do
+    BlockId lbl1 <- getBlockIdNat
+    BlockId lbl2 <- getBlockIdNat
+    CondCode _ cond cond_code <- condIntCode cond x y
+    let
+	code__2 dst = cond_code `appOL` toOL [
+	    BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
+	    OR False g0 (RIImm (ImmInt 0)) dst,
+	    BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
+	    NEWBLOCK (BlockId lbl1),
+	    OR False g0 (RIImm (ImmInt 1)) dst,
+	    NEWBLOCK (BlockId lbl2)]
+    return (Any I32 code__2)
+
+condFltReg cond x y = do
+    BlockId lbl1 <- getBlockIdNat
+    BlockId lbl2 <- getBlockIdNat
+    CondCode _ cond cond_code <- condFltCode cond x y
+    let
+    	code__2 dst = cond_code `appOL` toOL [ 
+    	    NOP,
+	    BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
+	    OR False g0 (RIImm (ImmInt 0)) dst,
+	    BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
+	    NEWBLOCK (BlockId lbl1),
+	    OR False g0 (RIImm (ImmInt 1)) dst,
+	    NEWBLOCK (BlockId lbl2)]
+    return (Any I32 code__2)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+condReg getCond = do
+    lbl1 <- getBlockIdNat
+    lbl2 <- getBlockIdNat
+    CondCode _ cond cond_code <- getCond
+    let
+{-        code dst = cond_code `appOL` toOL [
+                BCC cond lbl1,
+                LI dst (ImmInt 0),
+                BCC ALWAYS lbl2,
+                NEWBLOCK lbl1,
+                LI dst (ImmInt 1),
+                BCC ALWAYS lbl2,
+                NEWBLOCK lbl2
+            ]-}
+        code dst = cond_code
+            `appOL` negate_code
+            `appOL` toOL [
+                MFCR dst,
+                RLWINM dst dst (bit + 1) 31 31
+            ]
+        
+        negate_code | do_negate = unitOL (CRNOR bit bit bit)
+                    | otherwise = nilOL
+                    
+        (bit, do_negate) = case cond of
+            LTT -> (0, False)
+            LE  -> (1, True)
+            EQQ -> (2, False)
+            GE  -> (0, True)
+            GTT -> (1, False)
+            
+            NE  -> (2, True)
+            
+            LU  -> (0, False)
+            LEU -> (1, True)
+            GEU -> (0, True)
+            GU  -> (1, False)
+                
+    return (Any I32 code)
+    
+condIntReg cond x y = condReg (condIntCode cond x y)
+condFltReg cond x y = condReg (condFltCode cond x y)
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+trivialCode
+    :: MachRep 
+    -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
+      ,IF_ARCH_i386 ((Operand -> Operand -> Instr) 
+                     -> Maybe (Operand -> Operand -> Instr)
+      ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr) 
+                     -> Maybe (Operand -> Operand -> Instr)
+      ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
+      ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
+      ,)))))
+    -> CmmExpr -> CmmExpr -- the two arguments
+    -> NatM Register
+
+#ifndef powerpc_TARGET_ARCH
+trivialFCode
+    :: MachRep
+    -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
+      ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
+      ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
+      ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
+      ,))))
+    -> CmmExpr -> CmmExpr -- the two arguments
+    -> NatM Register
+#endif
+
+trivialUCode
+    :: MachRep 
+    -> IF_ARCH_alpha((RI -> Reg -> Instr)
+      ,IF_ARCH_i386 ((Operand -> Instr)
+      ,IF_ARCH_x86_64 ((Operand -> Instr)
+      ,IF_ARCH_sparc((RI -> Reg -> Instr)
+      ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
+      ,)))))
+    -> CmmExpr	-- the one argument
+    -> NatM Register
+
+#ifndef powerpc_TARGET_ARCH
+trivialUFCode
+    :: MachRep
+    -> IF_ARCH_alpha((Reg -> Reg -> Instr)
+      ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
+      ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
+      ,IF_ARCH_sparc((Reg -> Reg -> Instr)
+      ,))))
+    -> CmmExpr -- the one argument
+    -> NatM Register
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+trivialCode instr x (StInt y)
+  | fits8Bits y
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat IntRep		`thenNat` \ tmp ->
+    let
+    	code = registerCode register tmp
+    	src1 = registerName register tmp
+    	src2 = ImmInt (fromInteger y)
+    	code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
+    in
+    return (Any IntRep code__2)
+
+trivialCode instr x y
+  = getRegister x		`thenNat` \ register1 ->
+    getRegister y		`thenNat` \ register2 ->
+    getNewRegNat IntRep		`thenNat` \ tmp1 ->
+    getNewRegNat IntRep		`thenNat` \ tmp2 ->
+    let
+    	code1 = registerCode register1 tmp1 []
+    	src1  = registerName register1 tmp1
+    	code2 = registerCode register2 tmp2 []
+    	src2  = registerName register2 tmp2
+    	code__2 dst = asmSeqThen [code1, code2] .
+    	    	     mkSeqInstr (instr src1 (RIReg src2) dst)
+    in
+    return (Any IntRep code__2)
+
+------------
+trivialUCode instr x
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat IntRep		`thenNat` \ tmp ->
+    let
+    	code = registerCode register tmp
+    	src  = registerName register tmp
+    	code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
+    in
+    return (Any IntRep code__2)
+
+------------
+trivialFCode _ instr x y
+  = getRegister x		`thenNat` \ register1 ->
+    getRegister y		`thenNat` \ register2 ->
+    getNewRegNat F64	`thenNat` \ tmp1 ->
+    getNewRegNat F64	`thenNat` \ tmp2 ->
+    let
+    	code1 = registerCode register1 tmp1
+    	src1  = registerName register1 tmp1
+
+    	code2 = registerCode register2 tmp2
+    	src2  = registerName register2 tmp2
+
+    	code__2 dst = asmSeqThen [code1 [], code2 []] .
+    	    	      mkSeqInstr (instr src1 src2 dst)
+    in
+    return (Any F64 code__2)
+
+trivialUFCode _ instr x
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat F64	`thenNat` \ tmp ->
+    let
+    	code = registerCode register tmp
+    	src  = registerName register tmp
+    	code__2 dst = code . mkSeqInstr (instr src dst)
+    in
+    return (Any F64 code__2)
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+{-
+The Rules of the Game are:
+
+* You cannot assume anything about the destination register dst;
+  it may be anything, including a fixed reg.
+
+* You may compute an operand into a fixed reg, but you may not 
+  subsequently change the contents of that fixed reg.  If you
+  want to do so, first copy the value either to a temporary
+  or into dst.  You are free to modify dst even if it happens
+  to be a fixed reg -- that's not your problem.
+
+* You cannot assume that a fixed reg will stay live over an
+  arbitrary computation.  The same applies to the dst reg.
+
+* Temporary regs obtained from getNewRegNat are distinct from 
+  each other and from all other regs, and stay live over 
+  arbitrary computations.
+
+--------------------
+
+SDM's version of The Rules:
+
+* If getRegister returns Any, that means it can generate correct
+  code which places the result in any register, period.  Even if that
+  register happens to be read during the computation.
+
+  Corollary #1: this means that if you are generating code for an
+  operation with two arbitrary operands, you cannot assign the result
+  of the first operand into the destination register before computing
+  the second operand.  The second operand might require the old value
+  of the destination register.
+
+  Corollary #2: A function might be able to generate more efficient
+  code if it knows the destination register is a new temporary (and
+  therefore not read by any of the sub-computations).
+
+* If getRegister returns Any, then the code it generates may modify only:
+	(a) fresh temporaries
+	(b) the destination register
+	(c) known registers (eg. %ecx is used by shifts)
+  In particular, it may *not* modify global registers, unless the global
+  register happens to be the destination register.
+-}
+
+trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
+  | not (is64BitLit lit_a) = do
+  b_code <- getAnyReg b
+  let
+       code dst 
+	 = b_code dst `snocOL`
+           revinstr (OpImm (litToImm lit_a)) (OpReg dst)
+  -- in
+  return (Any rep code)
+
+trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
+
+-- This is re-used for floating pt instructions too.
+genTrivialCode rep instr a b = do
+  (b_op, b_code) <- getNonClobberedOperand b
+  a_code <- getAnyReg a
+  tmp <- getNewRegNat rep
+  let
+     -- We want the value of b to stay alive across the computation of a.
+     -- But, we want to calculate a straight into the destination register,
+     -- because the instruction only has two operands (dst := dst `op` src).
+     -- The troublesome case is when the result of b is in the same register
+     -- as the destination reg.  In this case, we have to save b in a
+     -- new temporary across the computation of a.
+     code dst
+	| dst `regClashesWithOp` b_op =
+		b_code `appOL`
+		unitOL (MOV rep b_op (OpReg tmp)) `appOL`
+		a_code dst `snocOL`
+		instr (OpReg tmp) (OpReg dst)
+	| otherwise =
+		b_code `appOL`
+		a_code dst `snocOL`
+		instr b_op (OpReg dst)
+  -- in
+  return (Any rep code)
+
+reg `regClashesWithOp` OpReg reg2   = reg == reg2
+reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
+reg `regClashesWithOp` _            = False
+
+-----------
+
+trivialUCode rep instr x = do
+  x_code <- getAnyReg x
+  let
+     code dst =
+	x_code dst `snocOL`
+	instr (OpReg dst)
+  -- in
+  return (Any rep code)
+
+-----------
+
+#if i386_TARGET_ARCH
+
+trivialFCode pk instr x y = do
+  (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
+  (y_reg, y_code) <- getSomeReg y
+  let
+     code dst =
+	x_code `appOL`
+	y_code `snocOL`
+	instr pk x_reg y_reg dst
+  -- in
+  return (Any pk code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+
+trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
+
+#endif
+
+-------------
+
+trivialUFCode rep instr x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+     code dst =
+	x_code `snocOL`
+	instr x_reg dst
+  -- in
+  return (Any rep code)
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+trivialCode pk instr x (CmmLit (CmmInt y d))
+  | fits13Bits y
+  = do
+      (src1, code) <- getSomeReg x
+      tmp <- getNewRegNat I32
+      let
+    	src2 = ImmInt (fromInteger y)
+    	code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
+      return (Any I32 code__2)
+
+trivialCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat I32
+    tmp2 <- getNewRegNat I32
+    let
+    	code__2 dst = code1 `appOL` code2 `snocOL`
+    	    	      instr src1 (RIReg src2) dst
+    return (Any I32 code__2)
+
+------------
+trivialFCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat (cmmExprRep x)
+    tmp2 <- getNewRegNat (cmmExprRep y)
+    tmp <- getNewRegNat F64
+    let
+    	promote x = FxTOy F32 F64 x tmp
+
+    	pk1   = cmmExprRep x
+    	pk2   = cmmExprRep y
+
+    	code__2 dst =
+    	    	if pk1 == pk2 then
+    	            code1 `appOL` code2 `snocOL`
+    	    	    instr pk src1 src2 dst
+    	    	else if pk1 == F32 then
+    	    	    code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+    	    	    instr F64 tmp src2 dst
+    	    	else
+    	    	    code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+    	    	    instr F64 src1 tmp dst
+    return (Any (if pk1 == pk2 then pk1 else F64) code__2)
+
+------------
+trivialUCode pk instr x = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat pk
+    let
+    	code__2 dst = code `snocOL` instr (RIReg src) dst
+    return (Any pk code__2)
+
+-------------
+trivialUFCode pk instr x = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat pk
+    let
+    	code__2 dst = code `snocOL` instr src dst
+    return (Any pk code__2)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+
+{-
+Wolfgang's PowerPC version of The Rules:
+
+A slightly modified version of The Rules to take advantage of the fact
+that PowerPC instructions work on all registers and don't implicitly
+clobber any fixed registers.
+
+* The only expression for which getRegister returns Fixed is (CmmReg reg).
+
+* If getRegister returns Any, then the code it generates may modify only:
+	(a) fresh temporaries
+	(b) the destination register
+  It may *not* modify global registers, unless the global
+  register happens to be the destination register.
+  It may not clobber any other registers. In fact, only ccalls clobber any
+  fixed registers.
+  Also, it may not modify the counter register (used by genCCall).
+  
+  Corollary: If a getRegister for a subexpression returns Fixed, you need
+  not move it to a fresh temporary before evaluating the next subexpression.
+  The Fixed register won't be modified.
+  Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
+  
+* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
+  the value of the destination register.
+-}
+
+trivialCode rep signed instr x (CmmLit (CmmInt y _))
+    | Just imm <- makeImmediate rep signed y 
+    = do
+        (src1, code1) <- getSomeReg x
+        let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
+        return (Any rep code)
+  
+trivialCode rep signed instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
+    return (Any rep code)
+
+trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
+    -> CmmExpr -> CmmExpr -> NatM Register
+trivialCodeNoImm rep instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
+    return (Any rep code)
+    
+trivialUCode rep instr x = do
+    (src, code) <- getSomeReg x
+    let code' dst = code `snocOL` instr dst src
+    return (Any rep code')
+    
+-- There is no "remainder" instruction on the PPC, so we have to do
+-- it the hard way.
+-- The "div" parameter is the division instruction to use (DIVW or DIVWU)
+
+remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
+    -> CmmExpr -> CmmExpr -> NatM Register
+remainderCode rep div x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let code dst = code1 `appOL` code2 `appOL` toOL [
+                div dst src1 src2,
+                MULLW dst dst (RIReg src2),
+                SUBF dst dst src1
+            ]
+    return (Any rep code)
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+--  Coercing to/from integer/floating-point...
+
+-- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
+-- conversions.  We have to store temporaries in memory to move
+-- between the integer and the floating point register sets.
+
+-- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
+-- pretend, on sparc at least, that double and float regs are seperate
+-- kinds, so the value has to be computed into one kind before being
+-- explicitly "converted" to live in the other kind.
+
+coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
+coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
+
+#if sparc_TARGET_ARCH
+coerceDbl2Flt :: CmmExpr -> NatM Register
+coerceFlt2Dbl :: CmmExpr -> NatM Register
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+coerceInt2FP _ x
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat IntRep		`thenNat` \ reg ->
+    let
+    	code = registerCode register reg
+    	src  = registerName register reg
+
+    	code__2 dst = code . mkSeqInstrs [
+    	    ST Q src (spRel 0),
+    	    LD TF dst (spRel 0),
+    	    CVTxy Q TF dst dst]
+    in
+    return (Any F64 code__2)
+
+-------------
+coerceFP2Int x
+  = getRegister x		`thenNat` \ register ->
+    getNewRegNat F64	`thenNat` \ tmp ->
+    let
+    	code = registerCode register tmp
+    	src  = registerName register tmp
+
+    	code__2 dst = code . mkSeqInstrs [
+    	    CVTxy TF Q src tmp,
+    	    ST TF tmp (spRel 0),
+    	    LD Q dst (spRel 0)]
+    in
+    return (Any IntRep code__2)
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+coerceInt2FP from to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case to of F32 -> GITOF; F64 -> GITOD
+        code dst = x_code `snocOL` opc x_reg dst
+	-- ToDo: works for non-I32 reps?
+  -- in
+  return (Any to code)
+
+------------
+
+coerceFP2Int from to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case from of F32 -> GFTOI; F64 -> GDTOI
+        code dst = x_code `snocOL` opc x_reg dst
+	-- ToDo: works for non-I32 reps?
+  -- in
+  return (Any to code)
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+coerceFP2Int from to x = do
+  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
+  let
+        opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+        code dst = x_code `snocOL` opc x_op dst
+  -- in
+  return (Any to code) -- works even if the destination rep is <I32
+
+coerceInt2FP from to x = do
+  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
+  let
+        opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
+        code dst = x_code `snocOL` opc x_op dst
+  -- in
+  return (Any to code) -- works even if the destination rep is <I32
+
+coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
+coerceFP2FP to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
+        code dst = x_code `snocOL` opc x_reg dst
+  -- in
+  return (Any to code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+coerceInt2FP pk1 pk2 x = do
+    (src, code) <- getSomeReg x
+    let
+    	code__2 dst = code `appOL` toOL [
+    	    ST pk1 src (spRel (-2)),
+    	    LD pk1 (spRel (-2)) dst,
+    	    FxTOy pk1 pk2 dst dst]
+    return (Any pk2 code__2)
+
+------------
+coerceFP2Int pk fprep x = do
+    (src, code) <- getSomeReg x
+    reg <- getNewRegNat fprep
+    tmp <- getNewRegNat pk
+    let
+    	code__2 dst = ASSERT(fprep == F64 || fprep == F32)
+	    code `appOL` toOL [
+    	    FxTOy fprep pk src tmp,
+    	    ST pk tmp (spRel (-2)),
+    	    LD pk (spRel (-2)) dst]
+    return (Any pk code__2)
+
+------------
+coerceDbl2Flt x = do
+    (src, code) <- getSomeReg x
+    return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst)) 
+
+------------
+coerceFlt2Dbl x = do
+    (src, code) <- getSomeReg x
+    return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+coerceInt2FP fromRep toRep x = do
+    (src, code) <- getSomeReg x
+    lbl <- getNewLabelNat
+    itmp <- getNewRegNat I32
+    ftmp <- getNewRegNat F64
+    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    Amode addr addr_code <- getAmode dynRef
+    let
+    	code' dst = code `appOL` maybe_exts `appOL` toOL [
+		LDATA ReadOnlyData
+				[CmmDataLabel lbl,
+				 CmmStaticLit (CmmInt 0x43300000 I32),
+				 CmmStaticLit (CmmInt 0x80000000 I32)],
+		XORIS itmp src (ImmInt 0x8000),
+		ST I32 itmp (spRel 3),
+		LIS itmp (ImmInt 0x4330),
+		ST I32 itmp (spRel 2),
+		LD F64 ftmp (spRel 2)
+            ] `appOL` addr_code `appOL` toOL [
+		LD F64 dst addr,
+		FSUB F64 dst ftmp dst
+	    ] `appOL` maybe_frsp dst
+            
+        maybe_exts = case fromRep of
+                        I8 ->  unitOL $ EXTS I8 src src
+                        I16 -> unitOL $ EXTS I16 src src
+                        I32 -> nilOL
+        maybe_frsp dst = case toRep of
+                        F32 -> unitOL $ FRSP dst dst
+                        F64 -> nilOL
+    return (Any toRep code')
+
+coerceFP2Int fromRep toRep x = do
+    -- the reps don't really matter: F*->F64 and I32->I* are no-ops
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat F64
+    let
+    	code' dst = code `appOL` toOL [
+		-- convert to int in FP reg
+    	    FCTIWZ tmp src,
+		-- store value (64bit) from FP to stack
+    	    ST F64 tmp (spRel 2),
+		-- read low word of value (high word is undefined)
+    	    LD I32 dst (spRel 3)]	
+    return (Any toRep code')
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- eXTRA_STK_ARGS_HERE
+
+-- We (allegedly) put the first six C-call arguments in registers;
+-- where do we start putting the rest of them?
+
+-- Moved from MachInstrs (SDM):
+
+#if alpha_TARGET_ARCH || sparc_TARGET_ARCH
+eXTRA_STK_ARGS_HERE :: Int
+eXTRA_STK_ARGS_HERE
+  = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
+#endif
+
diff --git a/compiler/nativeGen/MachInstrs.hs b/compiler/nativeGen/MachInstrs.hs
new file mode 100644
index 0000000000..0f718d3cea
--- /dev/null
+++ b/compiler/nativeGen/MachInstrs.hs
@@ -0,0 +1,722 @@
+-----------------------------------------------------------------------------
+--
+-- Machine-dependent assembly language
+--
+-- (c) The University of Glasgow 1993-2004
+--
+-----------------------------------------------------------------------------
+
+#include "nativeGen/NCG.h"
+
+module MachInstrs (
+	-- * Cmm instantiations
+	NatCmm, NatCmmTop, NatBasicBlock,	
+
+	-- * Machine instructions
+	Instr(..),
+	Cond(..), condUnsigned, condToSigned, condToUnsigned,
+
+#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
+	Size(..), machRepSize,
+#endif
+	RI(..),
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+	Operand(..),
+#endif
+#if i386_TARGET_ARCH
+	i386_insert_ffrees,
+#endif
+#if sparc_TARGET_ARCH
+	riZero, fpRelEA, moveSp, fPair,
+#endif
+    ) where
+
+#include "HsVersions.h"
+
+import MachRegs
+import Cmm
+import MachOp		( MachRep(..) )
+import CLabel           ( CLabel, pprCLabel )
+import Panic		( panic )
+import Outputable
+import FastString
+import Constants       ( wORD_SIZE )
+
+import GLAEXTS
+
+
+-- -----------------------------------------------------------------------------
+-- Our flavours of the Cmm types
+
+-- Type synonyms for Cmm populated with native code
+type NatCmm        = GenCmm CmmStatic Instr
+type NatCmmTop     = GenCmmTop CmmStatic Instr
+type NatBasicBlock = GenBasicBlock Instr
+
+-- -----------------------------------------------------------------------------
+-- Conditions on this architecture
+
+data Cond
+#if alpha_TARGET_ARCH
+  = ALWAYS	-- For BI (same as BR)
+  | EQQ		-- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
+  | GE		-- For BI only
+  | GTT		-- For BI only (NB: "GT" is a 1.3 Prelude name)
+  | LE		-- For CMP and BI
+  | LTT		-- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
+  | NE		-- For BI only
+  | NEVER	-- For BI (null instruction)
+  | ULE		-- For CMP only
+  | ULT		-- For CMP only
+#endif
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+  = ALWAYS	-- What's really used? ToDo
+  | EQQ
+  | GE
+  | GEU
+  | GTT
+  | GU
+  | LE
+  | LEU
+  | LTT
+  | LU
+  | NE
+  | NEG
+  | POS
+  | CARRY
+  | OFLO
+  | PARITY
+  | NOTPARITY
+#endif
+#if sparc_TARGET_ARCH
+  = ALWAYS	-- What's really used? ToDo
+  | EQQ
+  | GE
+  | GEU
+  | GTT
+  | GU
+  | LE
+  | LEU
+  | LTT
+  | LU
+  | NE
+  | NEG
+  | NEVER
+  | POS
+  | VC
+  | VS
+#endif
+#if powerpc_TARGET_ARCH
+  = ALWAYS
+  | EQQ
+  | GE
+  | GEU
+  | GTT
+  | GU
+  | LE
+  | LEU
+  | LTT
+  | LU
+  | NE
+#endif
+    deriving Eq  -- to make an assertion work
+
+condUnsigned GU  = True
+condUnsigned LU  = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _   = False
+
+condToSigned GU  = GTT
+condToSigned LU  = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x   = x
+
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE  = GEU
+condToUnsigned LE  = LEU
+condToUnsigned x   = x
+
+-- -----------------------------------------------------------------------------
+-- Sizes on this architecture
+
+-- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
+-- here.  I've removed them from the x86 version, we'll see what happens --SDM
+
+#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
+data Size
+#if alpha_TARGET_ARCH
+    = B	    -- byte
+    | Bu
+--  | W	    -- word (2 bytes): UNUSED
+--  | Wu    -- : UNUSED
+    | L	    -- longword (4 bytes)
+    | Q	    -- quadword (8 bytes)
+--  | FF    -- VAX F-style floating pt: UNUSED
+--  | GF    -- VAX G-style floating pt: UNUSED
+--  | DF    -- VAX D-style floating pt: UNUSED
+--  | SF    -- IEEE single-precision floating pt: UNUSED
+    | TF    -- IEEE double-precision floating pt
+#endif
+#if sparc_TARGET_ARCH || powerpc_TARGET_ARCH
+    = B     -- byte (signed)
+    | Bu    -- byte (unsigned)
+    | H     -- halfword (signed, 2 bytes)
+    | Hu    -- halfword (unsigned, 2 bytes)
+    | W	    -- word (4 bytes)
+    | F	    -- IEEE single-precision floating pt
+    | DF    -- IEEE single-precision floating pt
+#endif
+  deriving Eq
+
+machRepSize :: MachRep -> Size
+machRepSize I8    = IF_ARCH_alpha(Bu, IF_ARCH_sparc(Bu, ))
+machRepSize I16   = IF_ARCH_alpha(err,IF_ARCH_sparc(Hu, ))
+machRepSize I32   = IF_ARCH_alpha(L,  IF_ARCH_sparc(W,  ))
+machRepSize I64	  = panic "machRepSize: I64"
+machRepSize I128  = panic "machRepSize: I128"
+machRepSize F32   = IF_ARCH_alpha(TF, IF_ARCH_sparc(F, ))
+machRepSize F64   = IF_ARCH_alpha(TF, IF_ARCH_sparc(DF,))
+#endif
+
+-- -----------------------------------------------------------------------------
+-- Register or immediate (a handy type on some platforms)
+
+data RI = RIReg Reg
+	| RIImm Imm
+
+
+-- -----------------------------------------------------------------------------
+-- Machine's assembly language
+
+-- We have a few common "instructions" (nearly all the pseudo-ops) but
+-- mostly all of 'Instr' is machine-specific.
+
+data Instr
+  = COMMENT FastString		-- comment pseudo-op
+
+  | LDATA   Section [CmmStatic]	-- some static data spat out during code
+				-- generation.  Will be extracted before
+				-- pretty-printing.
+
+  | NEWBLOCK BlockId		-- start a new basic block.  Useful during
+				-- codegen, removed later.  Preceding 
+				-- instruction should be a jump, as per the
+				-- invariants for a BasicBlock (see Cmm).
+
+  | DELTA   Int                 -- specify current stack offset for
+                                -- benefit of subsequent passes
+
+-- -----------------------------------------------------------------------------
+-- Alpha instructions
+
+#if alpha_TARGET_ARCH
+
+-- data Instr continues...
+
+-- Loads and stores.
+	      |	LD	      Size Reg AddrMode -- size, dst, src
+	      | LDA	      Reg AddrMode      -- dst, src
+	      | LDAH	      Reg AddrMode      -- dst, src
+	      | LDGP	      Reg AddrMode      -- dst, src
+	      | LDI	      Size Reg Imm     -- size, dst, src
+	      | ST	      Size Reg AddrMode -- size, src, dst
+
+-- Int Arithmetic.
+	      | CLR	      Reg		    -- dst
+	      | ABS	      Size RI Reg	    -- size, src, dst
+	      | NEG	      Size Bool RI Reg	    -- size, overflow, src, dst
+	      | ADD	      Size Bool Reg RI Reg  -- size, overflow, src, src, dst
+	      | SADD	      Size Size Reg RI Reg  -- size, scale, src, src, dst
+	      | SUB	      Size Bool Reg RI Reg  -- size, overflow, src, src, dst
+	      | SSUB	      Size Size Reg RI Reg  -- size, scale, src, src, dst
+	      | MUL	      Size Bool Reg RI Reg  -- size, overflow, src, src, dst
+	      | DIV	      Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
+	      | REM	      Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
+
+-- Simple bit-twiddling.
+	      | NOT	      RI Reg
+	      | AND	      Reg RI Reg
+	      | ANDNOT	      Reg RI Reg
+	      | OR	      Reg RI Reg
+	      | ORNOT	      Reg RI Reg
+	      | XOR	      Reg RI Reg
+	      | XORNOT	      Reg RI Reg
+	      | SLL	      Reg RI Reg
+	      | SRL	      Reg RI Reg
+	      | SRA	      Reg RI Reg
+
+	      | ZAP	      Reg RI Reg
+	      | ZAPNOT	      Reg RI Reg
+
+	      | NOP
+
+-- Comparison
+	      | CMP	      Cond Reg RI Reg
+
+-- Float Arithmetic.
+	      | FCLR	      Reg
+	      | FABS	      Reg Reg
+	      | FNEG	      Size Reg Reg
+	      | FADD	      Size Reg Reg Reg
+	      | FDIV	      Size Reg Reg Reg
+	      | FMUL	      Size Reg Reg Reg
+	      | FSUB	      Size Reg Reg Reg
+	      | CVTxy	      Size Size Reg Reg
+	      | FCMP	      Size Cond Reg Reg Reg
+	      | FMOV	      Reg Reg
+
+-- Jumping around.
+	      | BI	      Cond Reg Imm
+	      | BF	      Cond Reg Imm
+	      | BR	      Imm
+	      | JMP	      Reg AddrMode Int
+	      | BSR	      Imm Int
+	      | JSR	      Reg AddrMode Int
+
+-- Alpha-specific pseudo-ops.
+	      | FUNBEGIN CLabel
+	      | FUNEND CLabel
+
+data RI
+  = RIReg Reg
+  | RIImm Imm
+
+#endif /* alpha_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Intel x86 instructions
+
+{-
+Intel, in their infinite wisdom, selected a stack model for floating
+point registers on x86.  That might have made sense back in 1979 --
+nowadays we can see it for the nonsense it really is.  A stack model
+fits poorly with the existing nativeGen infrastructure, which assumes
+flat integer and FP register sets.  Prior to this commit, nativeGen
+could not generate correct x86 FP code -- to do so would have meant
+somehow working the register-stack paradigm into the register
+allocator and spiller, which sounds very difficult.
+  
+We have decided to cheat, and go for a simple fix which requires no
+infrastructure modifications, at the expense of generating ropey but
+correct FP code.  All notions of the x86 FP stack and its insns have
+been removed.  Instead, we pretend (to the instruction selector and
+register allocator) that x86 has six floating point registers, %fake0
+.. %fake5, which can be used in the usual flat manner.  We further
+claim that x86 has floating point instructions very similar to SPARC
+and Alpha, that is, a simple 3-operand register-register arrangement.
+Code generation and register allocation proceed on this basis.
+  
+When we come to print out the final assembly, our convenient fiction
+is converted to dismal reality.  Each fake instruction is
+independently converted to a series of real x86 instructions.
+%fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
+arithmetic operations, the two operands are pushed onto the top of the
+FP stack, the operation done, and the result copied back into the
+relevant register.  There are only six %fake registers because 2 are
+needed for the translation, and x86 has 8 in total.
+
+The translation is inefficient but is simple and it works.  A cleverer
+translation would handle a sequence of insns, simulating the FP stack
+contents, would not impose a fixed mapping from %fake to %st regs, and
+hopefully could avoid most of the redundant reg-reg moves of the
+current translation.
+
+We might as well make use of whatever unique FP facilities Intel have
+chosen to bless us with (let's not be churlish, after all).
+Hence GLDZ and GLD1.  Bwahahahahahahaha!
+-}
+
+{-
+MORE FLOATING POINT MUSINGS...
+
+Intel's internal floating point registers are by default 80 bit
+extended precision.  This means that all operations done on values in
+registers are done at 80 bits, and unless the intermediate values are
+truncated to the appropriate size (32 or 64 bits) by storing in
+memory, calculations in registers will give different results from
+calculations which pass intermediate values in memory (eg. via
+function calls).
+
+One solution is to set the FPU into 64 bit precision mode.  Some OSs
+do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
+that this will only affect 64-bit precision arithmetic; 32-bit
+calculations will still be done at 64-bit precision in registers.  So
+it doesn't solve the whole problem.  
+
+There's also the issue of what the C library is expecting in terms of
+precision.  It seems to be the case that glibc on Linux expects the
+FPU to be set to 80 bit precision, so setting it to 64 bit could have
+unexpected effects.  Changing the default could have undesirable
+effects on other 3rd-party library code too, so the right thing would
+be to save/restore the FPU control word across Haskell code if we were
+to do this.
+
+gcc's -ffloat-store gives consistent results by always storing the
+results of floating-point calculations in memory, which works for both
+32 and 64-bit precision.  However, it only affects the values of
+user-declared floating point variables in C, not intermediate results.
+GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
+flag).
+
+Another problem is how to spill floating point registers in the
+register allocator.  Should we spill the whole 80 bits, or just 64?
+On an OS which is set to 64 bit precision, spilling 64 is fine.  On
+Linux, spilling 64 bits will round the results of some operations.
+This is what gcc does.  Spilling at 80 bits requires taking up a full
+128 bit slot (so we get alignment).  We spill at 80-bits and ignore
+the alignment problems.
+
+In the future, we'll use the SSE registers for floating point.  This
+requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
+precision float ops), which means P4 or Xeon and above.  Using SSE
+will solve all these problems, because the SSE registers use fixed 32
+bit or 64 bit precision.
+
+--SDM 1/2003
+-}
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- data Instr continues...
+
+-- Moves.
+	| MOV	      MachRep Operand Operand
+	| MOVZxL      MachRep Operand Operand -- size is the size of operand 1
+	| MOVSxL      MachRep Operand Operand -- size is the size of operand 1
+	-- x86_64 note: plain mov into a 32-bit register always zero-extends
+	-- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
+	-- don't affect the high bits of the register.
+
+-- Load effective address (also a very useful three-operand add instruction :-)
+	| LEA         MachRep Operand Operand
+
+-- Int Arithmetic.
+	| ADD	      MachRep Operand Operand
+	| ADC	      MachRep Operand Operand
+	| SUB	      MachRep Operand Operand
+
+	| MUL	      MachRep Operand Operand
+	| IMUL	      MachRep Operand Operand	-- signed int mul
+        | IMUL2       MachRep Operand -- %edx:%eax = operand * %eax
+
+	| DIV	      MachRep Operand	-- eax := eax:edx/op, edx := eax:edx%op
+	| IDIV	      MachRep Operand	-- ditto, but signed
+
+-- Simple bit-twiddling.
+	| AND	      MachRep Operand Operand
+	| OR	      MachRep Operand Operand
+	| XOR	      MachRep Operand Operand
+	| NOT	      MachRep Operand
+	| NEGI	      MachRep Operand -- NEG instruction (name clash with Cond)
+
+-- Shifts (amount may be immediate or %cl only)
+	| SHL	      MachRep Operand{-amount-} Operand
+	| SAR	      MachRep Operand{-amount-} Operand
+	| SHR	      MachRep Operand{-amount-} Operand
+
+        | BT          MachRep Imm Operand
+	| NOP
+
+#if i386_TARGET_ARCH
+-- Float Arithmetic.
+
+-- Note that we cheat by treating G{ABS,MOV,NEG} of doubles 
+-- as single instructions right up until we spit them out.
+        -- all the 3-operand fake fp insns are src1 src2 dst
+        -- and furthermore are constrained to be fp regs only.
+        -- IMPORTANT: keep is_G_insn up to date with any changes here
+    	| GMOV	      Reg Reg -- src(fpreg), dst(fpreg)
+        | GLD         MachRep AddrMode Reg -- src, dst(fpreg)
+        | GST         MachRep Reg AddrMode -- src(fpreg), dst
+		      
+        | GLDZ        Reg -- dst(fpreg)
+        | GLD1        Reg -- dst(fpreg)
+		      
+        | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
+        | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
+		      
+        | GITOF       Reg Reg -- src(intreg), dst(fpreg)
+        | GITOD       Reg Reg -- src(intreg), dst(fpreg)
+	
+	| GADD	      MachRep Reg Reg Reg -- src1, src2, dst
+	| GDIV	      MachRep Reg Reg Reg -- src1, src2, dst
+	| GSUB	      MachRep Reg Reg Reg -- src1, src2, dst
+	| GMUL	      MachRep Reg Reg Reg -- src1, src2, dst
+	
+		-- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
+		-- Compare src1 with src2; set the Zero flag iff the numbers are
+		-- comparable and the comparison is True.  Subsequent code must
+		-- test the %eflags zero flag regardless of the supplied Cond.
+    	| GCMP	      Cond Reg Reg -- src1, src2
+	
+     	| GABS	      MachRep Reg Reg -- src, dst
+    	| GNEG	      MachRep Reg Reg -- src, dst
+    	| GSQRT	      MachRep Reg Reg -- src, dst
+    	| GSIN	      MachRep Reg Reg -- src, dst
+    	| GCOS	      MachRep Reg Reg -- src, dst
+    	| GTAN	      MachRep Reg Reg -- src, dst
+	
+        | GFREE         -- do ffree on all x86 regs; an ugly hack
+#endif
+
+#if x86_64_TARGET_ARCH
+-- SSE2 floating point: we use a restricted set of the available SSE2
+-- instructions for floating-point.
+
+	-- use MOV for moving (either movss or movsd (movlpd better?))
+
+	| CVTSS2SD	Reg Reg		-- F32 to F64
+	| CVTSD2SS	Reg Reg		-- F64 to F32
+	| CVTSS2SI	Operand Reg	-- F32 to I32/I64 (with rounding)
+	| CVTSD2SI	Operand	Reg	-- F64 to I32/I64 (with rounding)
+	| CVTSI2SS	Operand Reg	-- I32/I64 to F32
+	| CVTSI2SD	Operand Reg	-- I32/I64 to F64
+
+	-- use ADD & SUB for arithmetic.  In both cases, operands
+	-- are  Operand Reg.
+
+ 	-- SSE2 floating-point division:
+	| FDIV		MachRep Operand Operand   -- divisor, dividend(dst)
+
+	-- use CMP for comparisons.  ucomiss and ucomisd instructions
+	-- compare single/double prec floating point respectively.
+
+	| SQRT		MachRep Operand Reg	-- src, dst
+#endif
+
+-- Comparison
+	| TEST          MachRep Operand Operand
+	| CMP           MachRep Operand Operand
+	| SETCC         Cond Operand
+
+-- Stack Operations.
+	| PUSH          MachRep Operand
+	| POP           MachRep Operand
+	-- both unused (SDM):
+	--  | PUSHA
+	--  | POPA
+
+-- Jumping around.
+	| JMP	      Operand
+	| JXX	      Cond BlockId  -- includes unconditional branches
+	| JMP_TBL     Operand [BlockId]  -- table jump
+	| CALL	      (Either Imm Reg) [Reg]
+
+-- Other things.
+	| CLTD MachRep	 -- sign extend %eax into %edx:%eax
+
+	| FETCHGOT    Reg  -- pseudo-insn for ELF position-independent code
+                           -- pretty-prints as
+                           --       call 1f
+                           -- 1:    popl %reg
+                           --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
+	| FETCHPC     Reg  -- pseudo-insn for Darwin position-independent code
+                           -- pretty-prints as
+                           --       call 1f
+                           -- 1:    popl %reg
+
+
+data Operand
+  = OpReg  Reg	        -- register
+  | OpImm  Imm	        -- immediate value
+  | OpAddr AddrMode	-- memory reference
+
+#endif /* i386 or x86_64 */
+
+#if i386_TARGET_ARCH
+i386_insert_ffrees :: [Instr] -> [Instr]
+i386_insert_ffrees insns
+   | any is_G_instr insns
+   = concatMap ffree_before_nonlocal_transfers insns
+   | otherwise
+   = insns
+
+ffree_before_nonlocal_transfers insn
+   = case insn of
+        CALL _ _ -> [GFREE, insn]
+        JMP _    -> [GFREE, insn]
+        other    -> [insn]
+
+
+-- if you ever add a new FP insn to the fake x86 FP insn set,
+-- you must update this too
+is_G_instr :: Instr -> Bool
+is_G_instr instr
+   = case instr of
+        GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True
+        GLDZ _ -> True; GLD1 _ -> True
+        GFTOI _ _ -> True; GDTOI _ _ -> True
+        GITOF _ _ -> True; GITOD _ _ -> True
+	GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
+	GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
+    	GCMP _ _ _ -> True; GABS _ _ _ -> True
+    	GNEG _ _ _ -> True; GSQRT _ _ _ -> True
+        GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True
+        GFREE -> panic "is_G_instr: GFREE (!)"
+        other -> False
+#endif /* i386_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Sparc instructions
+
+#if sparc_TARGET_ARCH
+
+-- data Instr continues...
+
+-- Loads and stores.
+	      | LD	      MachRep AddrMode Reg -- size, src, dst
+	      | ST	      MachRep Reg AddrMode -- size, src, dst
+
+-- Int Arithmetic.
+	      | ADD	      Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
+	      | SUB	      Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
+	      | UMUL	           Bool Reg RI Reg --     cc?, src1, src2, dst
+	      | SMUL	           Bool Reg RI Reg --     cc?, src1, src2, dst
+              | RDY           Reg	-- move contents of Y register to reg
+
+-- Simple bit-twiddling.
+	      | AND	      Bool Reg RI Reg -- cc?, src1, src2, dst
+	      | ANDN	      Bool Reg RI Reg -- cc?, src1, src2, dst
+	      | OR	      Bool Reg RI Reg -- cc?, src1, src2, dst
+	      | ORN	      Bool Reg RI Reg -- cc?, src1, src2, dst
+	      | XOR	      Bool Reg RI Reg -- cc?, src1, src2, dst
+	      | XNOR	      Bool Reg RI Reg -- cc?, src1, src2, dst
+	      | SLL	      Reg RI Reg -- src1, src2, dst
+	      | SRL	      Reg RI Reg -- src1, src2, dst
+	      | SRA	      Reg RI Reg -- src1, src2, dst
+	      | SETHI	      Imm Reg -- src, dst
+	      | NOP	      -- Really SETHI 0, %g0, but worth an alias
+
+-- Float Arithmetic.
+
+-- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
+-- instructions right up until we spit them out.
+    	      | FABS	      MachRep Reg Reg	   -- src dst
+	      | FADD	      MachRep Reg Reg Reg  -- src1, src2, dst
+    	      | FCMP	      Bool MachRep Reg Reg -- exception?, src1, src2, dst
+	      | FDIV	      MachRep Reg Reg Reg -- src1, src2, dst
+    	      | FMOV	      MachRep Reg Reg     -- src, dst
+	      | FMUL	      MachRep Reg Reg Reg -- src1, src2, dst
+    	      | FNEG	      MachRep Reg Reg     -- src, dst
+    	      | FSQRT	      MachRep Reg Reg     -- src, dst
+	      | FSUB	      MachRep Reg Reg Reg -- src1, src2, dst
+    	      | FxTOy	      MachRep MachRep Reg Reg -- src, dst
+
+-- Jumping around.
+	      | BI	      Cond Bool Imm -- cond, annul?, target
+    	      | BF  	      Cond Bool Imm -- cond, annul?, target
+
+	      | JMP	      AddrMode     -- target
+	      | CALL	      (Either Imm Reg) Int Bool -- target, args, terminal
+
+riZero :: RI -> Bool
+
+riZero (RIImm (ImmInt 0))	    = True
+riZero (RIImm (ImmInteger 0))	    = True
+riZero (RIReg (RealReg 0))          = True
+riZero _			    = False
+
+-- Calculate the effective address which would be used by the
+-- corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
+-- alas -- can't have fpRelEA here because of module dependencies.
+fpRelEA :: Int -> Reg -> Instr
+fpRelEA n dst
+   = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
+
+-- Code to shift the stack pointer by n words.
+moveSp :: Int -> Instr
+moveSp n
+   = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
+
+-- Produce the second-half-of-a-double register given the first half.
+fPair :: Reg -> Reg
+fPair (RealReg n) | n >= 32 && n `mod` 2 == 0  = RealReg (n+1)
+fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
+#endif /* sparc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- PowerPC instructions
+
+#ifdef powerpc_TARGET_ARCH
+-- data Instr continues...
+
+-- Loads and stores.
+	      | LD	MachRep Reg AddrMode -- Load size, dst, src
+	      | LA      MachRep Reg AddrMode -- Load arithmetic size, dst, src
+	      | ST	MachRep Reg AddrMode -- Store size, src, dst 
+	      | STU	MachRep Reg AddrMode -- Store with Update size, src, dst 
+	      | LIS	Reg Imm -- Load Immediate Shifted dst, src
+	      | LI	Reg Imm -- Load Immediate dst, src
+	      | MR	Reg Reg -- Move Register dst, src -- also for fmr
+	      
+	      | CMP     MachRep Reg RI --- size, src1, src2
+	      | CMPL    MachRep Reg RI --- size, src1, src2
+	      
+	      | BCC     Cond BlockId
+              | JMP     CLabel          -- same as branch,
+                                        -- but with CLabel instead of block ID
+	      | MTCTR	Reg
+	      | BCTR    [BlockId]       -- with list of local destinations
+	      | BL	CLabel [Reg]	-- with list of argument regs
+	      | BCTRL	[Reg]
+	      
+	      | ADD     Reg Reg RI -- dst, src1, src2
+	      | ADDC    Reg Reg Reg -- (carrying) dst, src1, src2
+	      | ADDE    Reg Reg Reg -- (extend) dst, src1, src2
+	      | ADDIS   Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
+	      | SUBF    Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1  
+	      | MULLW	Reg Reg RI
+	      | DIVW	Reg Reg Reg
+	      | DIVWU	Reg Reg Reg
+
+	      | MULLW_MayOflo Reg Reg Reg
+                        -- dst = 1 if src1 * src2 overflows
+                        -- pseudo-instruction; pretty-printed as:
+                        -- mullwo. dst, src1, src2
+                        -- mfxer dst
+                        -- rlwinm dst, dst, 2, 31,31
+	      
+	      | AND	Reg Reg RI -- dst, src1, src2
+	      | OR	Reg Reg RI -- dst, src1, src2
+	      | XOR	Reg Reg RI -- dst, src1, src2
+	      | XORIS	Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
+	      
+              | EXTS    MachRep Reg Reg
+		  
+	      | NEG	Reg Reg
+	      | NOT	Reg Reg
+	      
+	      | SLW	Reg Reg RI	-- shift left word
+	      | SRW	Reg Reg RI	-- shift right word
+	      | SRAW	Reg Reg RI	-- shift right arithmetic word
+	      
+        	        -- Rotate Left Word Immediate then AND with Mask
+	      | RLWINM  Reg Reg Int Int Int
+	      
+	      | FADD	MachRep Reg Reg Reg
+	      | FSUB	MachRep Reg Reg Reg
+	      | FMUL	MachRep Reg Reg Reg
+	      | FDIV	MachRep Reg Reg Reg
+	      | FNEG	Reg Reg	 -- negate is the same for single and double prec.
+	      
+	      | FCMP	Reg Reg
+	      
+	      | FCTIWZ	Reg Reg		-- convert to integer word
+              | FRSP    Reg Reg		-- reduce to single precision
+					-- (but destination is a FP register)
+	      
+	      | CRNOR   Int Int Int    -- condition register nor
+	      | MFCR    Reg            -- move from condition register
+	      
+	      | MFLR    Reg            -- move from link register
+	      | FETCHPC Reg            -- pseudo-instruction:
+	                               -- bcl to next insn, mflr reg
+	      
+#endif /* powerpc_TARGET_ARCH */
diff --git a/compiler/nativeGen/MachRegs.lhs b/compiler/nativeGen/MachRegs.lhs
new file mode 100644
index 0000000000..bffb723d1b
--- /dev/null
+++ b/compiler/nativeGen/MachRegs.lhs
@@ -0,0 +1,1437 @@
+-- -----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow 1994-2004
+-- 
+-- Machine-specific info about registers.
+-- 
+-- Also includes stuff about immediate operands, which are
+-- often/usually quite entangled with registers.
+-- 
+-- (Immediates could be untangled from registers at some cost in tangled
+-- modules --- the pleasure has been foregone.)
+-- 
+-- -----------------------------------------------------------------------------
+
+\begin{code}
+#include "nativeGen/NCG.h"
+
+module MachRegs (
+
+	-- * Immediate values
+	Imm(..), strImmLit, litToImm,
+
+	-- * Addressing modes
+	AddrMode(..),
+	addrOffset,
+
+	-- * The 'Reg' type
+	RegNo,
+	Reg(..), isRealReg, isVirtualReg,
+        RegClass(..), regClass,
+	getHiVRegFromLo, 
+	mkVReg,
+
+	-- * Global registers
+        get_GlobalReg_reg_or_addr,
+	callerSaves, callerSaveVolatileRegs,
+
+	-- * Machine-dependent register-related stuff
+        allocatableRegs, argRegs, allArgRegs, callClobberedRegs,
+	freeReg,
+	spRel,
+
+#if alpha_TARGET_ARCH
+	fits8Bits,
+	fReg,
+	gp, pv, ra, sp, t9, t10, t11, t12, v0, f0, zeroh,
+#endif
+#if i386_TARGET_ARCH
+	EABase(..), EAIndex(..),
+	eax, ebx, ecx, edx, esi, edi, ebp, esp,
+	fake0, fake1, fake2, fake3, fake4, fake5,
+	addrModeRegs,
+#endif
+#if x86_64_TARGET_ARCH
+	EABase(..), EAIndex(..), ripRel,
+	rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp,
+	eax, ebx, ecx, edx, esi, edi, ebp, esp,
+	r8, r9, r10, r11, r12, r13, r14, r15,
+  	xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
+  	xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+	xmm,
+	addrModeRegs, allFPArgRegs,
+#endif
+#if sparc_TARGET_ARCH
+	fits13Bits,
+	fpRel, gReg, iReg, lReg, oReg, largeOffsetError,
+	fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f26, f27,
+#endif
+#if powerpc_TARGET_ARCH
+	allFPArgRegs,
+	makeImmediate,
+	sp,
+	r3, r4, r27, r28,
+	f1, f20, f21,
+#endif
+    ) where
+
+#include "HsVersions.h"
+
+#if i386_TARGET_ARCH
+# define STOLEN_X86_REGS 4
+-- HACK: go for the max
+#endif
+
+#include "../includes/MachRegs.h"
+
+import Cmm
+import MachOp		( MachRep(..) )
+
+import CLabel           ( CLabel, mkMainCapabilityLabel )
+import Pretty
+import Outputable	( Outputable(..), pprPanic, panic )
+import qualified Outputable
+import Unique
+import Constants
+import FastTypes
+
+#if powerpc_TARGET_ARCH
+#if __GLASGOW_HASKELL__ >= 504
+import Data.Word	( Word8, Word16, Word32 )
+import Data.Int 	( Int8, Int16, Int32 )
+#else
+import Word     	( Word8, Word16, Word32 )
+import Int 	        ( Int8, Int16, Int32 )
+#endif
+#endif
+
+-- -----------------------------------------------------------------------------
+-- Immediates
+
+data Imm
+  = ImmInt	Int
+  | ImmInteger	Integer	    -- Sigh.
+  | ImmCLbl	CLabel	    -- AbstractC Label (with baggage)
+  | ImmLit	Doc	    -- Simple string
+  | ImmIndex    CLabel Int
+  | ImmFloat	Rational
+  | ImmDouble	Rational
+  | ImmConstantSum Imm Imm
+  | ImmConstantDiff Imm Imm
+#if sparc_TARGET_ARCH
+  | LO Imm		    {- Possible restrictions... -}
+  | HI Imm
+#endif
+#if powerpc_TARGET_ARCH
+  | LO Imm
+  | HI Imm
+  | HA Imm	{- high halfword adjusted -}
+#endif
+strImmLit s = ImmLit (text s)
+
+litToImm :: CmmLit -> Imm
+litToImm (CmmInt i _)        = ImmInteger i
+litToImm (CmmFloat f F32)    = ImmFloat f
+litToImm (CmmFloat f F64)    = ImmDouble f
+litToImm (CmmLabel l)        = ImmCLbl l
+litToImm (CmmLabelOff l off) = ImmIndex l off
+litToImm (CmmLabelDiffOff l1 l2 off)
+                             = ImmConstantSum
+                               (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2))
+                               (ImmInt off)
+
+-- -----------------------------------------------------------------------------
+-- Addressing modes
+
+data AddrMode
+#if alpha_TARGET_ARCH
+  = AddrImm	Imm
+  | AddrReg	Reg
+  | AddrRegImm	Reg Imm
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+  = AddrBaseIndex	EABase EAIndex Displacement
+  | ImmAddr		Imm Int
+
+data EABase       = EABaseNone  | EABaseReg Reg | EABaseRip
+data EAIndex      = EAIndexNone | EAIndex Reg Int
+type Displacement = Imm
+#endif
+
+#if sparc_TARGET_ARCH
+  = AddrRegReg	Reg Reg
+  | AddrRegImm	Reg Imm
+#endif
+
+#if powerpc_TARGET_ARCH
+  = AddrRegReg	Reg Reg
+  | AddrRegImm	Reg Imm
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+addrModeRegs :: AddrMode -> [Reg]
+addrModeRegs (AddrBaseIndex b i _) =  b_regs ++ i_regs
+  where
+   b_regs = case b of { EABaseReg r -> [r]; _ -> [] }
+   i_regs = case i of { EAIndex r _ -> [r]; _ -> [] }
+addrModeRegs _ = []
+#endif
+
+
+addrOffset :: AddrMode -> Int -> Maybe AddrMode
+
+addrOffset addr off
+  = case addr of
+#if alpha_TARGET_ARCH
+      _ -> panic "MachMisc.addrOffset not defined for Alpha"
+#endif
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+      ImmAddr i off0	  -> Just (ImmAddr i (off0 + off))
+
+      AddrBaseIndex r i (ImmInt n) -> Just (AddrBaseIndex r i (ImmInt (n + off)))
+      AddrBaseIndex r i (ImmInteger n)
+	-> Just (AddrBaseIndex r i (ImmInt (fromInteger (n + toInteger off))))
+
+      AddrBaseIndex r i (ImmCLbl lbl)
+	-> Just (AddrBaseIndex r i (ImmIndex lbl off))
+
+      AddrBaseIndex r i (ImmIndex lbl ix)
+	-> Just (AddrBaseIndex r i (ImmIndex lbl (ix+off)))
+
+      _ -> Nothing  -- in theory, shouldn't happen
+#endif
+#if sparc_TARGET_ARCH
+      AddrRegImm r (ImmInt n)
+       | fits13Bits n2 -> Just (AddrRegImm r (ImmInt n2))
+       | otherwise     -> Nothing
+       where n2 = n + off
+
+      AddrRegImm r (ImmInteger n)
+       | fits13Bits n2 -> Just (AddrRegImm r (ImmInt (fromInteger n2)))
+       | otherwise     -> Nothing
+       where n2 = n + toInteger off
+
+      AddrRegReg r (RealReg 0)
+       | fits13Bits off -> Just (AddrRegImm r (ImmInt off))
+       | otherwise     -> Nothing
+       
+      _ -> Nothing
+#endif /* sparc */
+#if powerpc_TARGET_ARCH
+      AddrRegImm r (ImmInt n)
+       | fits16Bits n2 -> Just (AddrRegImm r (ImmInt n2))
+       | otherwise     -> Nothing
+       where n2 = n + off
+
+      AddrRegImm r (ImmInteger n)
+       | fits16Bits n2 -> Just (AddrRegImm r (ImmInt (fromInteger n2)))
+       | otherwise     -> Nothing
+       where n2 = n + toInteger off
+       
+      _ -> Nothing
+#endif /* powerpc */
+
+-----------------
+#if alpha_TARGET_ARCH
+
+fits8Bits :: Integer -> Bool
+fits8Bits i = i >= -256 && i < 256
+
+#endif
+
+#if sparc_TARGET_ARCH
+
+{-# SPECIALIZE fits13Bits :: Int -> Bool, Integer -> Bool #-}
+fits13Bits :: Integral a => a -> Bool
+fits13Bits x = x >= -4096 && x < 4096
+
+-----------------
+largeOffsetError i
+  = error ("ERROR: SPARC native-code generator cannot handle large offset ("
+           ++show i++");\nprobably because of large constant data structures;" ++ 
+           "\nworkaround: use -fvia-C on this module.\n")
+
+#endif /* sparc */
+
+#if powerpc_TARGET_ARCH
+fits16Bits :: Integral a => a -> Bool
+fits16Bits x = x >= -32768 && x < 32768
+
+makeImmediate :: Integral a => MachRep -> Bool -> a -> Maybe Imm
+
+makeImmediate rep signed x = fmap ImmInt (toI16 rep signed)
+    where
+        narrow I32 False = fromIntegral (fromIntegral x :: Word32)
+        narrow I16 False = fromIntegral (fromIntegral x :: Word16)
+        narrow I8  False = fromIntegral (fromIntegral x :: Word8)
+        narrow I32 True  = fromIntegral (fromIntegral x :: Int32)
+        narrow I16 True  = fromIntegral (fromIntegral x :: Int16)
+        narrow I8  True  = fromIntegral (fromIntegral x :: Int8)
+        
+        narrowed = narrow rep signed
+        
+        toI16 I32 True
+            | narrowed >= -32768 && narrowed < 32768 = Just narrowed
+            | otherwise = Nothing
+        toI16 I32 False
+            | narrowed >= 0 && narrowed < 65536 = Just narrowed
+            | otherwise = Nothing
+        toI16 _ _  = Just narrowed
+#endif
+
+
+-- @spRel@ gives us a stack relative addressing mode for volatile
+-- temporaries and for excess call arguments.  @fpRel@, where
+-- applicable, is the same but for the frame pointer.
+
+spRel :: Int	-- desired stack offset in words, positive or negative
+      -> AddrMode
+
+spRel n
+#if defined(i386_TARGET_ARCH)
+  = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt (n * wORD_SIZE))
+#elif defined(x86_64_TARGET_ARCH)
+  = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt (n * wORD_SIZE))
+#else
+  = AddrRegImm sp (ImmInt (n * wORD_SIZE))
+#endif
+
+#if sparc_TARGET_ARCH
+fpRel :: Int -> AddrMode
+    -- Duznae work for offsets greater than 13 bits; we just hope for
+    -- the best
+fpRel n
+  = AddrRegImm fp (ImmInt (n * wORD_SIZE))
+#endif
+
+#if x86_64_TARGET_ARCH
+ripRel imm = AddrBaseIndex EABaseRip EAIndexNone imm
+#endif
+
+-- -----------------------------------------------------------------------------
+-- Global registers
+
+-- We map STG registers onto appropriate CmmExprs.  Either they map
+-- to real machine registers or stored as offsets from BaseReg.  Given
+-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
+-- register it is in, on this platform, or a StixExpr denoting the
+-- address in the register table holding it.  get_MagicId_addr always
+-- produces the register table address for it.
+
+get_GlobalReg_reg_or_addr       :: GlobalReg -> Either Reg CmmExpr
+get_GlobalReg_addr              :: GlobalReg -> CmmExpr
+get_Regtable_addr_from_offset   :: MachRep -> Int -> CmmExpr
+
+get_GlobalReg_reg_or_addr mid
+   = case globalRegMaybe mid of
+        Just rr -> Left rr
+        Nothing -> Right (get_GlobalReg_addr mid)
+
+get_GlobalReg_addr BaseReg = regTableOffset 0
+get_GlobalReg_addr mid     = get_Regtable_addr_from_offset 
+				(globalRegRep mid) (baseRegOffset mid)
+
+-- Calculate a literal representing an offset into the register table.
+-- Used when we don't have an actual BaseReg to offset from.
+regTableOffset n = 
+  CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n))
+
+get_Regtable_addr_from_offset rep offset
+   = case globalRegMaybe BaseReg of
+                 Nothing -> regTableOffset offset
+                 Just _  -> CmmRegOff (CmmGlobal BaseReg) offset
+
+-- -----------------------------------------------------------------------------
+-- caller-save registers
+
+-- Here we generate the sequence of saves/restores required around a
+-- foreign call instruction.
+
+callerSaveVolatileRegs :: Maybe [GlobalReg] -> ([CmmStmt], [CmmStmt])
+callerSaveVolatileRegs vols = (caller_save, caller_load)
+  where
+    caller_save = foldr ($!) [] (map callerSaveGlobalReg    regs_to_save)
+    caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save)
+
+    system_regs = [Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery,
+		   {-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ]
+
+    regs_to_save = system_regs ++ vol_list
+
+    vol_list = case vols of Nothing -> all_of_em; Just regs -> regs
+
+    all_of_em = [ VanillaReg n | n <- [0..mAX_Vanilla_REG] ]
+	     ++ [ FloatReg   n | n <- [0..mAX_Float_REG] ]
+	     ++ [ DoubleReg  n | n <- [0..mAX_Double_REG] ]
+	     ++ [ LongReg    n | n <- [0..mAX_Long_REG] ]
+
+    callerSaveGlobalReg reg next
+	| callerSaves reg = 
+		CmmStore (get_GlobalReg_addr reg) 
+			 (CmmReg (CmmGlobal reg)) : next
+	| otherwise = next
+
+    callerRestoreGlobalReg reg next
+	| callerSaves reg = 
+		CmmAssign (CmmGlobal reg)
+			  (CmmLoad (get_GlobalReg_addr reg) (globalRegRep reg))
+			: next
+	| otherwise = next
+
+
+-- ---------------------------------------------------------------------------
+-- Registers
+
+-- RealRegs are machine regs which are available for allocation, in
+-- the usual way.  We know what class they are, because that's part of
+-- the processor's architecture.
+
+-- VirtualRegs are virtual registers.  The register allocator will
+-- eventually have to map them into RealRegs, or into spill slots.
+-- VirtualRegs are allocated on the fly, usually to represent a single
+-- value in the abstract assembly code (i.e. dynamic registers are
+-- usually single assignment).  With the new register allocator, the
+-- single assignment restriction isn't necessary to get correct code,
+-- although a better register allocation will result if single
+-- assignment is used -- because the allocator maps a VirtualReg into
+-- a single RealReg, even if the VirtualReg has multiple live ranges.
+
+-- Virtual regs can be of either class, so that info is attached.
+
+-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
+-- when supplied with the vreg for the lower-half of the quantity.
+-- (NB. Not reversible).
+getHiVRegFromLo (VirtualRegI u) 
+   = VirtualRegHi (newTagUnique u 'H') -- makes a pseudo-unique with tag 'H'
+getHiVRegFromLo other 
+   = pprPanic "getHiVRegFromLo" (ppr other)
+
+data RegClass 
+   = RcInteger 
+   | RcFloat
+   | RcDouble
+     deriving Eq
+
+type RegNo = Int
+
+data Reg
+   = RealReg      {-# UNPACK #-} !RegNo
+   | VirtualRegI  {-# UNPACK #-} !Unique
+   | VirtualRegHi {-# UNPACK #-} !Unique  -- High part of 2-word register
+   | VirtualRegF  {-# UNPACK #-} !Unique
+   | VirtualRegD  {-# UNPACK #-} !Unique
+   deriving (Eq,Ord)
+
+-- We like to have Uniques for Reg so that we can make UniqFM and UniqSets 
+-- in the register allocator.
+instance Uniquable Reg where
+   getUnique (RealReg i)      = mkUnique 'C' i
+   getUnique (VirtualRegI u)  = u
+   getUnique (VirtualRegHi u) = u
+   getUnique (VirtualRegF u)  = u
+   getUnique (VirtualRegD u)  = u
+
+unRealReg (RealReg i) = i
+unRealReg vreg        = pprPanic "unRealReg on VirtualReg" (ppr vreg)
+
+mkVReg :: Unique -> MachRep -> Reg
+mkVReg u rep
+   = case rep of
+#if sparc_TARGET_ARCH
+        F32   -> VirtualRegF u
+#else
+        F32   -> VirtualRegD u
+#endif
+        F64   -> VirtualRegD u
+        other -> VirtualRegI u
+
+isVirtualReg :: Reg -> Bool
+isVirtualReg (RealReg _)      = False
+isVirtualReg (VirtualRegI _)  = True
+isVirtualReg (VirtualRegHi _) = True
+isVirtualReg (VirtualRegF _)  = True
+isVirtualReg (VirtualRegD _)  = True
+
+isRealReg :: Reg -> Bool
+isRealReg = not . isVirtualReg
+
+instance Show Reg where
+    show (RealReg i)      = showReg i
+    show (VirtualRegI u)  = "%vI_" ++ show u
+    show (VirtualRegHi u) = "%vHi_" ++ show u
+    show (VirtualRegF u)  = "%vF_" ++ show u
+    show (VirtualRegD u)  = "%vD_" ++ show u
+
+instance Outputable Reg where
+    ppr r = Outputable.text (show r)
+
+
+-- -----------------------------------------------------------------------------
+-- Machine-specific register stuff
+
+-- The Alpha has 64 registers of interest; 32 integer registers and 32 floating
+-- point registers.  The mapping of STG registers to alpha machine registers
+-- is defined in StgRegs.h.  We are, of course, prepared for any eventuality.
+
+#if alpha_TARGET_ARCH
+fReg :: Int -> RegNo
+fReg x = (32 + x)
+
+v0, f0, ra, pv, gp, sp, zeroh :: Reg
+v0    = realReg 0
+f0    = realReg (fReg 0)
+ra    = FixedReg ILIT(26)
+pv    = t12
+gp    = FixedReg ILIT(29)
+sp    = FixedReg ILIT(30)
+zeroh = FixedReg ILIT(31) -- "zero" is used in 1.3 (MonadZero method)
+
+t9, t10, t11, t12 :: Reg
+t9  = realReg 23
+t10 = realReg 24
+t11 = realReg 25
+t12 = realReg 27
+#endif
+
+{-
+Intel x86 architecture:
+- All registers except 7 (esp) are available for use.
+- Only ebx, esi, edi and esp are available across a C call (they are callee-saves).
+- Registers 0-7 have 16-bit counterparts (ax, bx etc.)
+- Registers 0-3 have 8 bit counterparts (ah, bh etc.)
+- Registers 8-13 are fakes; we pretend x86 has 6 conventionally-addressable
+  fp registers, and 3-operand insns for them, and we translate this into
+  real stack-based x86 fp code after register allocation.
+
+The fp registers are all Double registers; we don't have any RcFloat class
+regs.  @regClass@ barfs if you give it a VirtualRegF, and mkVReg above should
+never generate them.
+-}
+
+#if i386_TARGET_ARCH
+
+fake0, fake1, fake2, fake3, fake4, fake5, 
+       eax, ebx, ecx, edx, esp, ebp, esi, edi :: Reg
+eax   = RealReg 0
+ebx   = RealReg 1
+ecx   = RealReg 2
+edx   = RealReg 3
+esi   = RealReg 4
+edi   = RealReg 5
+ebp   = RealReg 6
+esp   = RealReg 7
+fake0 = RealReg 8
+fake1 = RealReg 9
+fake2 = RealReg 10
+fake3 = RealReg 11
+fake4 = RealReg 12
+fake5 = RealReg 13
+
+-- On x86, we might want to have an 8-bit RegClass, which would
+-- contain just regs 1-4 (the others don't have 8-bit versions).
+-- However, we can get away without this at the moment because the
+-- only allocatable integer regs are also 8-bit compatible (1, 3, 4).
+regClass (RealReg i)     = if i < 8 then RcInteger else RcDouble
+regClass (VirtualRegI  u) = RcInteger
+regClass (VirtualRegHi u) = RcInteger
+regClass (VirtualRegD  u) = RcDouble
+regClass (VirtualRegF  u) = pprPanic "regClass(x86):VirtualRegF" 
+                                    (ppr (VirtualRegF u))
+
+regNames 
+   = ["%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp", 
+      "%fake0", "%fake1", "%fake2", "%fake3", "%fake4", "%fake5", "%fake6"]
+
+showReg :: RegNo -> String
+showReg n
+   = if   n >= 0 && n < 14
+     then regNames !! n
+     else "%unknown_x86_real_reg_" ++ show n
+
+#endif
+
+{-
+AMD x86_64 architecture:
+- Registers 0-16 have 32-bit counterparts (eax, ebx etc.)
+- Registers 0-7 have 16-bit counterparts (ax, bx etc.)
+- Registers 0-3 have 8 bit counterparts (ah, bh etc.)
+
+-}
+
+#if x86_64_TARGET_ARCH
+
+rax, rbx, rcx, rdx, rsp, rbp, rsi, rdi, 
+  r8, r9, r10, r11, r12, r13, r14, r15,
+  xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
+  xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 :: Reg
+
+rax   = RealReg 0
+rbx   = RealReg 1
+rcx   = RealReg 2
+rdx   = RealReg 3
+rsi   = RealReg 4
+rdi   = RealReg 5
+rbp   = RealReg 6
+rsp   = RealReg 7
+r8    = RealReg 8
+r9    = RealReg 9
+r10   = RealReg 10
+r11   = RealReg 11
+r12   = RealReg 12
+r13   = RealReg 13
+r14   = RealReg 14
+r15   = RealReg 15
+xmm0  = RealReg 16
+xmm1  = RealReg 17
+xmm2  = RealReg 18
+xmm3  = RealReg 19
+xmm4  = RealReg 20
+xmm5  = RealReg 21
+xmm6  = RealReg 22
+xmm7  = RealReg 23
+xmm8  = RealReg 24
+xmm9  = RealReg 25
+xmm10 = RealReg 26
+xmm11 = RealReg 27
+xmm12 = RealReg 28
+xmm13 = RealReg 29
+xmm14 = RealReg 30
+xmm15 = RealReg 31
+
+ -- so we can re-use some x86 code:
+eax = rax
+ebx = rbx
+ecx = rcx
+edx = rdx
+esi = rsi
+edi = rdi
+ebp = rbp
+esp = rsp
+
+xmm n = RealReg (16+n)
+
+-- On x86, we might want to have an 8-bit RegClass, which would
+-- contain just regs 1-4 (the others don't have 8-bit versions).
+-- However, we can get away without this at the moment because the
+-- only allocatable integer regs are also 8-bit compatible (1, 3, 4).
+regClass (RealReg i)     = if i < 16 then RcInteger else RcDouble
+regClass (VirtualRegI  u) = RcInteger
+regClass (VirtualRegHi u) = RcInteger
+regClass (VirtualRegD  u) = RcDouble
+regClass (VirtualRegF  u) = pprPanic "regClass(x86_64):VirtualRegF" 
+                                    (ppr (VirtualRegF u))
+
+regNames 
+ = ["%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", "%rbp", "%rsp" ]
+
+showReg :: RegNo -> String
+showReg n
+  | n >= 16 = "%xmm" ++ show (n-16)
+  | n >= 8  = "%r" ++ show n
+  | otherwise = regNames !! n
+
+#endif
+
+{-
+The SPARC has 64 registers of interest; 32 integer registers and 32
+floating point registers.  The mapping of STG registers to SPARC
+machine registers is defined in StgRegs.h.  We are, of course,
+prepared for any eventuality.
+
+The whole fp-register pairing thing on sparcs is a huge nuisance.  See
+fptools/ghc/includes/MachRegs.h for a description of what's going on
+here.
+-}
+
+#if sparc_TARGET_ARCH
+
+gReg,lReg,iReg,oReg,fReg :: Int -> RegNo
+gReg x = x
+oReg x = (8 + x)
+lReg x = (16 + x)
+iReg x = (24 + x)
+fReg x = (32 + x)
+
+nCG_FirstFloatReg :: RegNo
+nCG_FirstFloatReg = unRealReg NCG_FirstFloatReg
+
+regClass (VirtualRegI u) = RcInteger
+regClass (VirtualRegF u) = RcFloat
+regClass (VirtualRegD u) = RcDouble
+regClass (RealReg i) | i < 32                = RcInteger 
+                     | i < nCG_FirstFloatReg = RcDouble
+                     | otherwise             = RcFloat
+
+showReg :: RegNo -> String
+showReg n
+   | n >= 0  && n < 8   = "%g" ++ show n
+   | n >= 8  && n < 16  = "%o" ++ show (n-8)
+   | n >= 16 && n < 24  = "%l" ++ show (n-16)
+   | n >= 24 && n < 32  = "%i" ++ show (n-24)
+   | n >= 32 && n < 64  = "%f" ++ show (n-32)
+   | otherwise          = "%unknown_sparc_real_reg_" ++ show n
+
+g0, g1, g2, fp, sp, o0, o1, f0, f1, f6, f8, f22, f26, f27 :: Reg
+
+f6  = RealReg (fReg 6)
+f8  = RealReg (fReg 8)
+f22 = RealReg (fReg 22)
+f26 = RealReg (fReg 26)
+f27 = RealReg (fReg 27)
+
+
+-- g0 is useful for codegen; is always zero, and writes to it vanish.
+g0  = RealReg (gReg 0)
+g1  = RealReg (gReg 1)
+g2  = RealReg (gReg 2)
+
+-- FP, SP, int and float return (from C) regs.
+fp  = RealReg (iReg 6)
+sp  = RealReg (oReg 6)
+o0  = RealReg (oReg 0)
+o1  = RealReg (oReg 1)
+f0  = RealReg (fReg 0)
+f1  = RealReg (fReg 1)
+
+#endif
+
+{-
+The PowerPC has 64 registers of interest; 32 integer registers and 32 floating
+point registers.
+-}
+
+#if powerpc_TARGET_ARCH
+fReg :: Int -> RegNo
+fReg x = (32 + x)
+
+regClass (VirtualRegI  u) = RcInteger
+regClass (VirtualRegHi u) = RcInteger
+regClass (VirtualRegF  u) = pprPanic "regClass(ppc):VirtualRegF" 
+                                    (ppr (VirtualRegF u))
+regClass (VirtualRegD u) = RcDouble
+regClass (RealReg i) | i < 32                = RcInteger 
+		     | otherwise	     = RcDouble
+
+showReg :: RegNo -> String
+showReg n
+    | n >= 0 && n <= 31	  = "%r" ++ show n
+    | n >= 32 && n <= 63  = "%f" ++ show (n - 32)
+    | otherwise           = "%unknown_powerpc_real_reg_" ++ show n
+
+sp = RealReg 1
+r3 = RealReg 3
+r4 = RealReg 4
+r27 = RealReg 27
+r28 = RealReg 28
+f1 = RealReg $ fReg 1
+f20 = RealReg $ fReg 20
+f21 = RealReg $ fReg 21
+#endif
+
+{-
+Redefine the literals used for machine-registers with non-numeric
+names in the header files.  Gag me with a spoon, eh?
+-}
+
+#if alpha_TARGET_ARCH
+#define f0 32
+#define f1 33
+#define f2 34
+#define f3 35
+#define f4 36
+#define f5 37
+#define f6 38
+#define f7 39
+#define f8 40
+#define f9 41
+#define f10 42
+#define f11 43
+#define f12 44
+#define f13 45
+#define f14 46
+#define f15 47
+#define f16 48
+#define f17 49
+#define f18 50
+#define f19 51
+#define f20 52
+#define f21 53
+#define f22 54
+#define f23 55
+#define f24 56
+#define f25 57
+#define f26 58
+#define f27 59
+#define f28 60
+#define f29 61
+#define f30 62
+#define f31 63
+#endif
+#if i386_TARGET_ARCH
+#define eax 0
+#define ebx 1
+#define ecx 2
+#define edx 3
+#define esi 4
+#define edi 5
+#define ebp 6
+#define esp 7
+#define fake0 8
+#define fake1 9
+#define fake2 10
+#define fake3 11
+#define fake4 12
+#define fake5 13
+#endif
+
+#if x86_64_TARGET_ARCH
+#define rax   0
+#define rbx   1
+#define rcx   2
+#define rdx   3
+#define rsi   4
+#define rdi   5
+#define rbp   6
+#define rsp   7
+#define r8    8
+#define r9    9
+#define r10   10
+#define r11   11
+#define r12   12
+#define r13   13
+#define r14   14
+#define r15   15
+#define xmm0  16
+#define xmm1  17
+#define xmm2  18
+#define xmm3  19
+#define xmm4  20
+#define xmm5  21
+#define xmm6  22
+#define xmm7  23
+#define xmm8  24
+#define xmm9  25
+#define xmm10 26
+#define xmm11 27
+#define xmm12 28
+#define xmm13 29
+#define xmm14 30
+#define xmm15 31
+#endif
+
+#if sparc_TARGET_ARCH
+#define g0 0
+#define g1 1
+#define g2 2
+#define g3 3
+#define g4 4
+#define g5 5
+#define g6 6
+#define g7 7
+#define o0 8
+#define o1 9
+#define o2 10
+#define o3 11
+#define o4 12
+#define o5 13
+#define o6 14
+#define o7 15
+#define l0 16
+#define l1 17
+#define l2 18
+#define l3 19
+#define l4 20
+#define l5 21
+#define l6 22
+#define l7 23
+#define i0 24
+#define i1 25
+#define i2 26
+#define i3 27
+#define i4 28
+#define i5 29
+#define i6 30
+#define i7 31
+
+#define f0  32
+#define f1  33
+#define f2  34
+#define f3  35
+#define f4  36
+#define f5  37
+#define f6  38
+#define f7  39
+#define f8  40
+#define f9  41
+#define f10 42
+#define f11 43
+#define f12 44
+#define f13 45
+#define f14 46
+#define f15 47
+#define f16 48
+#define f17 49
+#define f18 50
+#define f19 51
+#define f20 52
+#define f21 53
+#define f22 54
+#define f23 55
+#define f24 56
+#define f25 57
+#define f26 58
+#define f27 59
+#define f28 60
+#define f29 61
+#define f30 62
+#define f31 63
+#endif
+
+#if powerpc_TARGET_ARCH
+#define r0 0
+#define r1 1
+#define r2 2
+#define r3 3
+#define r4 4
+#define r5 5
+#define r6 6
+#define r7 7
+#define r8 8
+#define r9 9
+#define r10 10
+#define r11 11
+#define r12 12
+#define r13 13
+#define r14 14
+#define r15 15
+#define r16 16
+#define r17 17
+#define r18 18
+#define r19 19
+#define r20 20
+#define r21 21
+#define r22 22
+#define r23 23
+#define r24 24
+#define r25 25
+#define r26 26
+#define r27 27
+#define r28 28
+#define r29 29
+#define r30 30
+#define r31 31
+
+#ifdef darwin_TARGET_OS
+#define f0  32
+#define f1  33
+#define f2  34
+#define f3  35
+#define f4  36
+#define f5  37
+#define f6  38
+#define f7  39
+#define f8  40
+#define f9  41
+#define f10 42
+#define f11 43
+#define f12 44
+#define f13 45
+#define f14 46
+#define f15 47
+#define f16 48
+#define f17 49
+#define f18 50
+#define f19 51
+#define f20 52
+#define f21 53
+#define f22 54
+#define f23 55
+#define f24 56
+#define f25 57
+#define f26 58
+#define f27 59
+#define f28 60
+#define f29 61
+#define f30 62
+#define f31 63
+#else
+#define fr0  32
+#define fr1  33
+#define fr2  34
+#define fr3  35
+#define fr4  36
+#define fr5  37
+#define fr6  38
+#define fr7  39
+#define fr8  40
+#define fr9  41
+#define fr10 42
+#define fr11 43
+#define fr12 44
+#define fr13 45
+#define fr14 46
+#define fr15 47
+#define fr16 48
+#define fr17 49
+#define fr18 50
+#define fr19 51
+#define fr20 52
+#define fr21 53
+#define fr22 54
+#define fr23 55
+#define fr24 56
+#define fr25 57
+#define fr26 58
+#define fr27 59
+#define fr28 60
+#define fr29 61
+#define fr30 62
+#define fr31 63
+#endif
+#endif
+
+
+-- allMachRegs is the complete set of machine regs.
+allMachRegNos :: [RegNo]
+allMachRegNos
+   = IF_ARCH_alpha( [0..63],
+     IF_ARCH_i386(  [0..13],
+     IF_ARCH_x86_64( [0..31],
+     IF_ARCH_sparc( ([0..31]
+                     ++ [f0,f2 .. nCG_FirstFloatReg-1]
+                     ++ [nCG_FirstFloatReg .. f31]),
+     IF_ARCH_powerpc([0..63],
+                   )))))
+
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: [RegNo]
+allocatableRegs
+   = let isFree i = isFastTrue (freeReg i)
+     in  filter isFree allMachRegNos
+
+-- these are the regs which we cannot assume stay alive over a
+-- C call.  
+callClobberedRegs :: [Reg]
+callClobberedRegs
+  =
+#if alpha_TARGET_ARCH
+    [0, 1, 2, 3, 4, 5, 6, 7, 8,
+     16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+     fReg 0, fReg 1, fReg 10, fReg 11, fReg 12, fReg 13, fReg 14, fReg 15,
+     fReg 16, fReg 17, fReg 18, fReg 19, fReg 20, fReg 21, fReg 22, fReg 23,
+     fReg 24, fReg 25, fReg 26, fReg 27, fReg 28, fReg 29, fReg 30]
+#endif /* alpha_TARGET_ARCH */
+#if i386_TARGET_ARCH
+    -- caller-saves registers
+    map RealReg [eax,ecx,edx,fake0,fake1,fake2,fake3,fake4,fake5]
+#endif /* i386_TARGET_ARCH */
+#if x86_64_TARGET_ARCH
+    -- caller-saves registers
+    map RealReg ([rax,rcx,rdx,rsi,rdi,r8,r9,r10,r11] ++ [16..31])
+       -- all xmm regs are caller-saves
+#endif /* x86_64_TARGET_ARCH */
+#if sparc_TARGET_ARCH
+    map RealReg 
+        ( oReg 7 :
+          [oReg i | i <- [0..5]] ++
+          [gReg i | i <- [1..7]] ++
+          [fReg i | i <- [0..31]] )
+#endif /* sparc_TARGET_ARCH */
+#if powerpc_TARGET_ARCH
+#if darwin_TARGET_OS
+    map RealReg (0:[2..12] ++ map fReg [0..13])
+#elif linux_TARGET_OS
+    map RealReg (0:[2..13] ++ map fReg [0..13])
+#endif
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- argRegs is the set of regs which are read for an n-argument call to C.
+-- For archs which pass all args on the stack (x86), is empty.
+-- Sparc passes up to the first 6 args in regs.
+-- Dunno about Alpha.
+argRegs :: RegNo -> [Reg]
+
+#if i386_TARGET_ARCH
+argRegs _ = panic "MachRegs.argRegs(x86): should not be used!"
+#endif
+
+#if x86_64_TARGET_ARCH
+argRegs _ = panic "MachRegs.argRegs(x86_64): should not be used!"
+#endif
+
+#if alpha_TARGET_ARCH
+argRegs 0 = []
+argRegs 1 = freeMappedRegs [16, fReg 16]
+argRegs 2 = freeMappedRegs [16, 17, fReg 16, fReg 17]
+argRegs 3 = freeMappedRegs [16, 17, 18, fReg 16, fReg 17, fReg 18]
+argRegs 4 = freeMappedRegs [16, 17, 18, 19, fReg 16, fReg 17, fReg 18, fReg 19]
+argRegs 5 = freeMappedRegs [16, 17, 18, 19, 20, fReg 16, fReg 17, fReg 18, fReg 19, fReg 20]
+argRegs 6 = freeMappedRegs [16, 17, 18, 19, 20, 21, fReg 16, fReg 17, fReg 18, fReg 19, fReg 20, fReg 21]
+argRegs _ = panic "MachRegs.argRegs(alpha): don't know about >6 arguments!"
+#endif /* alpha_TARGET_ARCH */
+
+#if sparc_TARGET_ARCH
+argRegs 0 = []
+argRegs 1 = map (RealReg . oReg) [0]
+argRegs 2 = map (RealReg . oReg) [0,1]
+argRegs 3 = map (RealReg . oReg) [0,1,2]
+argRegs 4 = map (RealReg . oReg) [0,1,2,3]
+argRegs 5 = map (RealReg . oReg) [0,1,2,3,4]
+argRegs 6 = map (RealReg . oReg) [0,1,2,3,4,5]
+argRegs _ = panic "MachRegs.argRegs(sparc): don't know about >6 arguments!"
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+argRegs 0 = []
+argRegs 1 = map RealReg [3]
+argRegs 2 = map RealReg [3,4]
+argRegs 3 = map RealReg [3..5]
+argRegs 4 = map RealReg [3..6]
+argRegs 5 = map RealReg [3..7]
+argRegs 6 = map RealReg [3..8]
+argRegs 7 = map RealReg [3..9]
+argRegs 8 = map RealReg [3..10]
+argRegs _ = panic "MachRegs.argRegs(powerpc): don't know about >8 arguments!"
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- all of the arg regs ??
+#if alpha_TARGET_ARCH
+allArgRegs :: [(Reg, Reg)]
+allArgRegs = [(realReg i, realReg (fReg i)) | i <- [16..21]]
+#endif /* alpha_TARGET_ARCH */
+
+#if sparc_TARGET_ARCH
+allArgRegs :: [Reg]
+allArgRegs = map RealReg [oReg i | i <- [0..5]]
+#endif /* sparc_TARGET_ARCH */
+
+#if i386_TARGET_ARCH
+allArgRegs :: [Reg]
+allArgRegs = panic "MachRegs.allArgRegs(x86): should not be used!"
+#endif
+
+#if x86_64_TARGET_ARCH
+allArgRegs :: [Reg]
+allArgRegs = map RealReg [rdi,rsi,rdx,rcx,r8,r9]
+allFPArgRegs :: [Reg]
+allFPArgRegs = map RealReg [xmm0 .. xmm7]
+#endif
+
+#if powerpc_TARGET_ARCH
+allArgRegs :: [Reg]
+allArgRegs = map RealReg [3..10]
+allFPArgRegs :: [Reg]
+#if darwin_TARGET_OS
+allFPArgRegs = map (RealReg . fReg) [1..13]
+#elif linux_TARGET_OS
+allFPArgRegs = map (RealReg . fReg) [1..8]
+#endif
+#endif /* powerpc_TARGET_ARCH */
+\end{code}
+
+\begin{code}
+freeReg :: RegNo -> FastBool
+
+#if alpha_TARGET_ARCH
+freeReg 26 = fastBool False  -- return address (ra)
+freeReg 28 = fastBool False  -- reserved for the assembler (at)
+freeReg 29 = fastBool False  -- global pointer (gp)
+freeReg 30 = fastBool False  -- stack pointer (sp)
+freeReg 31 = fastBool False  -- always zero (zeroh)
+freeReg 63 = fastBool False  -- always zero (f31)
+#endif
+
+#if i386_TARGET_ARCH
+freeReg esp = fastBool False  --	%esp is the C stack pointer
+#endif
+
+#if x86_64_TARGET_ARCH
+freeReg rsp = fastBool False  --	%rsp is the C stack pointer
+#endif
+
+#if sparc_TARGET_ARCH
+freeReg g0 = fastBool False  --	%g0 is always 0.
+freeReg g5 = fastBool False  --	%g5 is reserved (ABI).
+freeReg g6 = fastBool False  --	%g6 is reserved (ABI).
+freeReg g7 = fastBool False  --	%g7 is reserved (ABI).
+freeReg i6 = fastBool False  --	%i6 is our frame pointer.
+freeReg i7 = fastBool False  --	%i7 tends to have ret-addr-ish things
+freeReg o6 = fastBool False  --	%o6 is our stack pointer.
+freeReg o7 = fastBool False  --	%o7 holds ret addrs (???)
+freeReg f0 = fastBool False  --  %f0/%f1 are the C fp return registers.
+freeReg f1 = fastBool False
+#endif
+
+#if powerpc_TARGET_ARCH
+freeReg 0 = fastBool False -- Hack: r0 can't be used in all insns, but it's actually free
+freeReg 1 = fastBool False -- The Stack Pointer
+#if !darwin_TARGET_OS
+ -- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that
+freeReg 2 = fastBool False
+#endif
+#endif
+
+#ifdef REG_Base
+freeReg REG_Base = fastBool False
+#endif
+#ifdef REG_R1
+freeReg REG_R1   = fastBool False
+#endif	
+#ifdef REG_R2  
+freeReg REG_R2   = fastBool False
+#endif	
+#ifdef REG_R3  
+freeReg REG_R3   = fastBool False
+#endif	
+#ifdef REG_R4  
+freeReg REG_R4   = fastBool False
+#endif	
+#ifdef REG_R5  
+freeReg REG_R5   = fastBool False
+#endif	
+#ifdef REG_R6  
+freeReg REG_R6   = fastBool False
+#endif	
+#ifdef REG_R7  
+freeReg REG_R7   = fastBool False
+#endif	
+#ifdef REG_R8  
+freeReg REG_R8   = fastBool False
+#endif
+#ifdef REG_F1
+freeReg REG_F1 = fastBool False
+#endif
+#ifdef REG_F2
+freeReg REG_F2 = fastBool False
+#endif
+#ifdef REG_F3
+freeReg REG_F3 = fastBool False
+#endif
+#ifdef REG_F4
+freeReg REG_F4 = fastBool False
+#endif
+#ifdef REG_D1
+freeReg REG_D1 = fastBool False
+#endif
+#ifdef REG_D2
+freeReg REG_D2 = fastBool False
+#endif
+#ifdef REG_Sp 
+freeReg REG_Sp   = fastBool False
+#endif 
+#ifdef REG_Su
+freeReg REG_Su   = fastBool False
+#endif 
+#ifdef REG_SpLim 
+freeReg REG_SpLim = fastBool False
+#endif 
+#ifdef REG_Hp 
+freeReg REG_Hp   = fastBool False
+#endif
+#ifdef REG_HpLim
+freeReg REG_HpLim = fastBool False
+#endif
+freeReg n               = fastBool True
+
+
+-- -----------------------------------------------------------------------------
+-- Information about global registers
+
+baseRegOffset :: GlobalReg -> Int
+
+baseRegOffset (VanillaReg 1)      = oFFSET_StgRegTable_rR1
+baseRegOffset (VanillaReg 2)      = oFFSET_StgRegTable_rR2
+baseRegOffset (VanillaReg 3)      = oFFSET_StgRegTable_rR3
+baseRegOffset (VanillaReg 4)      = oFFSET_StgRegTable_rR4
+baseRegOffset (VanillaReg 5)      = oFFSET_StgRegTable_rR5
+baseRegOffset (VanillaReg 6)      = oFFSET_StgRegTable_rR6
+baseRegOffset (VanillaReg 7)      = oFFSET_StgRegTable_rR7
+baseRegOffset (VanillaReg 8)      = oFFSET_StgRegTable_rR8
+baseRegOffset (VanillaReg 9)      = oFFSET_StgRegTable_rR9
+baseRegOffset (VanillaReg 10)     = oFFSET_StgRegTable_rR10
+baseRegOffset (FloatReg  1)       = oFFSET_StgRegTable_rF1
+baseRegOffset (FloatReg  2)       = oFFSET_StgRegTable_rF2
+baseRegOffset (FloatReg  3)       = oFFSET_StgRegTable_rF3
+baseRegOffset (FloatReg  4)       = oFFSET_StgRegTable_rF4
+baseRegOffset (DoubleReg 1)       = oFFSET_StgRegTable_rD1
+baseRegOffset (DoubleReg 2)       = oFFSET_StgRegTable_rD2
+baseRegOffset Sp		  = oFFSET_StgRegTable_rSp
+baseRegOffset SpLim		  = oFFSET_StgRegTable_rSpLim
+baseRegOffset (LongReg 1)         = oFFSET_StgRegTable_rL1
+baseRegOffset Hp		  = oFFSET_StgRegTable_rHp
+baseRegOffset HpLim		  = oFFSET_StgRegTable_rHpLim
+baseRegOffset CurrentTSO	  = oFFSET_StgRegTable_rCurrentTSO
+baseRegOffset CurrentNursery	  = oFFSET_StgRegTable_rCurrentNursery
+baseRegOffset HpAlloc		  = oFFSET_StgRegTable_rHpAlloc
+baseRegOffset GCEnter1		  = oFFSET_stgGCEnter1
+baseRegOffset GCFun		  = oFFSET_stgGCFun
+#ifdef DEBUG
+baseRegOffset BaseReg		  = panic "baseRegOffset:BaseReg"
+baseRegOffset _			  = panic "baseRegOffset:other"
+#endif
+
+
+-- | Returns 'True' if this global register is stored in a caller-saves
+-- machine register.
+
+callerSaves :: GlobalReg -> Bool
+
+#ifdef CALLER_SAVES_Base
+callerSaves BaseReg		= True
+#endif
+#ifdef CALLER_SAVES_R1
+callerSaves (VanillaReg 1)	= True
+#endif
+#ifdef CALLER_SAVES_R2
+callerSaves (VanillaReg 2)	= True
+#endif
+#ifdef CALLER_SAVES_R3
+callerSaves (VanillaReg 3)	= True
+#endif
+#ifdef CALLER_SAVES_R4
+callerSaves (VanillaReg 4)	= True
+#endif
+#ifdef CALLER_SAVES_R5
+callerSaves (VanillaReg 5)	= True
+#endif
+#ifdef CALLER_SAVES_R6
+callerSaves (VanillaReg 6)	= True
+#endif
+#ifdef CALLER_SAVES_R7
+callerSaves (VanillaReg 7)	= True
+#endif
+#ifdef CALLER_SAVES_R8
+callerSaves (VanillaReg 8)	= True
+#endif
+#ifdef CALLER_SAVES_F1
+callerSaves (FloatReg 1)	= True
+#endif
+#ifdef CALLER_SAVES_F2
+callerSaves (FloatReg 2)	= True
+#endif
+#ifdef CALLER_SAVES_F3
+callerSaves (FloatReg 3)	= True
+#endif
+#ifdef CALLER_SAVES_F4
+callerSaves (FloatReg 4)	= True
+#endif
+#ifdef CALLER_SAVES_D1
+callerSaves (DoubleReg 1)	= True
+#endif
+#ifdef CALLER_SAVES_D2
+callerSaves (DoubleReg 2)	= True
+#endif
+#ifdef CALLER_SAVES_L1
+callerSaves (LongReg 1)		= True
+#endif
+#ifdef CALLER_SAVES_Sp
+callerSaves Sp			= True
+#endif
+#ifdef CALLER_SAVES_SpLim
+callerSaves SpLim		= True
+#endif
+#ifdef CALLER_SAVES_Hp
+callerSaves Hp			= True
+#endif
+#ifdef CALLER_SAVES_HpLim
+callerSaves HpLim		= True
+#endif
+#ifdef CALLER_SAVES_CurrentTSO
+callerSaves CurrentTSO		= True
+#endif
+#ifdef CALLER_SAVES_CurrentNursery
+callerSaves CurrentNursery	= True
+#endif
+callerSaves _			= False
+
+
+--  | Returns 'Nothing' if this global register is not stored
+-- in a real machine register, otherwise returns @'Just' reg@, where
+-- reg is the machine register it is stored in.
+
+globalRegMaybe :: GlobalReg -> Maybe Reg
+
+#ifdef REG_Base
+globalRegMaybe BaseReg			= Just (RealReg REG_Base)
+#endif
+#ifdef REG_R1
+globalRegMaybe (VanillaReg 1) 		= Just (RealReg REG_R1)
+#endif 
+#ifdef REG_R2 
+globalRegMaybe (VanillaReg 2) 		= Just (RealReg REG_R2)
+#endif 
+#ifdef REG_R3 
+globalRegMaybe (VanillaReg 3) 		= Just (RealReg REG_R3)
+#endif 
+#ifdef REG_R4 
+globalRegMaybe (VanillaReg 4) 		= Just (RealReg REG_R4)
+#endif 
+#ifdef REG_R5 
+globalRegMaybe (VanillaReg 5) 		= Just (RealReg REG_R5)
+#endif 
+#ifdef REG_R6 
+globalRegMaybe (VanillaReg 6) 		= Just (RealReg REG_R6)
+#endif 
+#ifdef REG_R7 
+globalRegMaybe (VanillaReg 7) 		= Just (RealReg REG_R7)
+#endif 
+#ifdef REG_R8 
+globalRegMaybe (VanillaReg 8) 		= Just (RealReg REG_R8)
+#endif
+#ifdef REG_R9 
+globalRegMaybe (VanillaReg 9) 		= Just (RealReg REG_R9)
+#endif
+#ifdef REG_R10 
+globalRegMaybe (VanillaReg 10)		= Just (RealReg REG_R10)
+#endif
+#ifdef REG_F1
+globalRegMaybe (FloatReg 1)		= Just (RealReg REG_F1)
+#endif				 	
+#ifdef REG_F2			 	
+globalRegMaybe (FloatReg 2)		= Just (RealReg REG_F2)
+#endif				 	
+#ifdef REG_F3			 	
+globalRegMaybe (FloatReg 3)		= Just (RealReg REG_F3)
+#endif				 	
+#ifdef REG_F4			 	
+globalRegMaybe (FloatReg 4)		= Just (RealReg REG_F4)
+#endif				 	
+#ifdef REG_D1			 	
+globalRegMaybe (DoubleReg 1)		= Just (RealReg REG_D1)
+#endif				 	
+#ifdef REG_D2			 	
+globalRegMaybe (DoubleReg 2)		= Just (RealReg REG_D2)
+#endif
+#ifdef REG_Sp	    
+globalRegMaybe Sp		   	= Just (RealReg REG_Sp)
+#endif
+#ifdef REG_Lng1			 	
+globalRegMaybe (LongReg 1)		= Just (RealReg REG_Lng1)
+#endif				 	
+#ifdef REG_Lng2			 	
+globalRegMaybe (LongReg 2)		= Just (RealReg REG_Lng2)
+#endif
+#ifdef REG_SpLim	    			
+globalRegMaybe SpLim		   	= Just (RealReg REG_SpLim)
+#endif	    				
+#ifdef REG_Hp	   			
+globalRegMaybe Hp		   	= Just (RealReg REG_Hp)
+#endif	    				
+#ifdef REG_HpLim      			
+globalRegMaybe HpLim		   	= Just (RealReg REG_HpLim)
+#endif	    				
+#ifdef REG_CurrentTSO      			
+globalRegMaybe CurrentTSO	   	= Just (RealReg REG_CurrentTSO)
+#endif	    				
+#ifdef REG_CurrentNursery      			
+globalRegMaybe CurrentNursery	   	= Just (RealReg REG_CurrentNursery)
+#endif	    				
+globalRegMaybe _		   	= Nothing
+
+
+\end{code}
diff --git a/compiler/nativeGen/NCG.h b/compiler/nativeGen/NCG.h
new file mode 100644
index 0000000000..b17f682e71
--- /dev/null
+++ b/compiler/nativeGen/NCG.h
@@ -0,0 +1,108 @@
+/* -----------------------------------------------------------------------------
+
+   (c) The University of Glasgow, 1994-2004
+
+   Native-code generator header file - just useful macros for now.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef NCG_H
+#define NCG_H
+
+#include "ghc_boot_platform.h"
+
+#define COMMA ,
+
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if alpha_TARGET_ARCH
+# define IF_ARCH_alpha(x,y) x
+#else
+# define IF_ARCH_alpha(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if i386_TARGET_ARCH
+# define IF_ARCH_i386(x,y) x
+#else
+# define IF_ARCH_i386(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if x86_64_TARGET_ARCH
+# define IF_ARCH_x86_64(x,y) x
+#else
+# define IF_ARCH_x86_64(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if freebsd_TARGET_OS
+# define IF_OS_freebsd(x,y) x
+#else
+# define IF_OS_freebsd(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if netbsd_TARGET_OS
+# define IF_OS_netbsd(x,y) x
+#else
+# define IF_OS_netbsd(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if openbsd_TARGET_OS
+# define IF_OS_openbsd(x,y) x
+#else
+# define IF_OS_openbsd(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if linux_TARGET_OS
+# define IF_OS_linux(x,y) x
+#else
+# define IF_OS_linux(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if linuxaout_TARGET_OS
+# define IF_OS_linuxaout(x,y) x
+#else
+# define IF_OS_linuxaout(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if bsdi_TARGET_OS
+# define IF_OS_bsdi(x,y) x
+#else
+# define IF_OS_bsdi(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if cygwin32_TARGET_OS
+# define IF_OS_cygwin32(x,y) x
+#else
+# define IF_OS_cygwin32(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if sparc_TARGET_ARCH
+# define IF_ARCH_sparc(x,y) x
+#else
+# define IF_ARCH_sparc(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if sunos4_TARGET_OS
+# define IF_OS_sunos4(x,y) x
+#else
+# define IF_OS_sunos4(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+-- NB: this will catch i386-*-solaris2, too
+#if solaris2_TARGET_OS
+# define IF_OS_solaris2(x,y) x
+#else
+# define IF_OS_solaris2(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if powerpc_TARGET_ARCH
+# define IF_ARCH_powerpc(x,y) x
+#else
+# define IF_ARCH_powerpc(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
+#if darwin_TARGET_OS
+# define IF_OS_darwin(x,y) x
+#else
+# define IF_OS_darwin(x,y) y
+#endif
+---------------------------------------------
+#endif
diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs
new file mode 100644
index 0000000000..8fdcd44024
--- /dev/null
+++ b/compiler/nativeGen/NCGMonad.hs
@@ -0,0 +1,111 @@
+-- -----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow 1993-2004
+-- 
+-- The native code generator's monad.
+--
+-- -----------------------------------------------------------------------------
+
+module NCGMonad (
+	NatM_State(..), mkNatM_State,
+
+	NatM, -- instance Monad
+	initNat, addImportNat, getUniqueNat,
+	mapAccumLNat, setDeltaNat, getDeltaNat,
+	getBlockIdNat, getNewLabelNat, getNewRegNat, getNewRegPairNat,
+	getPicBaseMaybeNat, getPicBaseNat
+ ) where
+  
+#include "HsVersions.h"
+
+import Cmm		( BlockId(..) )
+import CLabel		( CLabel, mkAsmTempLabel )
+import MachRegs
+import MachOp		( MachRep )
+import UniqSupply
+import Unique		( Unique )
+
+
+data NatM_State = NatM_State {
+			natm_us      :: UniqSupply,
+			natm_delta   :: Int,
+			natm_imports :: [(CLabel)],
+			natm_pic     :: Maybe Reg
+		}
+
+newtype NatM result = NatM (NatM_State -> (result, NatM_State))
+
+unNat (NatM a) = a
+
+mkNatM_State :: UniqSupply -> Int -> NatM_State
+mkNatM_State us delta = NatM_State us delta [] Nothing
+
+initNat :: NatM_State -> NatM a -> (a, NatM_State)
+initNat init_st m = case unNat m init_st of { (r,st) -> (r,st) }
+
+instance Monad NatM where
+  (>>=) = thenNat
+  return = returnNat
+
+thenNat :: NatM a -> (a -> NatM b) -> NatM b
+thenNat expr cont
+  = NatM $ \st -> case unNat expr st of
+			(result, st') -> unNat (cont result) st'
+
+returnNat :: a -> NatM a
+returnNat result = NatM $ \st ->  (result, st)
+
+mapAccumLNat :: (acc -> x -> NatM (acc, y))
+                -> acc
+	        -> [x]
+	        -> NatM (acc, [y])
+
+mapAccumLNat f b []
+  = return (b, [])
+mapAccumLNat f b (x:xs)
+  = do (b__2, x__2)  <- f b x
+       (b__3, xs__2) <- mapAccumLNat f b__2 xs
+       return (b__3, x__2:xs__2)
+
+getUniqueNat :: NatM Unique
+getUniqueNat = NatM $ \ (NatM_State us delta imports pic) ->
+    case splitUniqSupply us of
+         (us1,us2) -> (uniqFromSupply us1, (NatM_State us2 delta imports pic))
+
+getDeltaNat :: NatM Int
+getDeltaNat = NatM $ \ st -> (natm_delta st, st)
+
+setDeltaNat :: Int -> NatM ()
+setDeltaNat delta = NatM $ \ (NatM_State us _ imports pic) ->
+   ((), NatM_State us delta imports pic)
+
+addImportNat :: CLabel -> NatM ()
+addImportNat imp = NatM $ \ (NatM_State us delta imports pic) -> 
+   ((), NatM_State us delta (imp:imports) pic)
+
+getBlockIdNat :: NatM BlockId
+getBlockIdNat = do u <- getUniqueNat; return (BlockId u)
+
+getNewLabelNat :: NatM CLabel
+getNewLabelNat = do u <- getUniqueNat; return (mkAsmTempLabel u)
+
+getNewRegNat :: MachRep -> NatM Reg
+getNewRegNat rep = do u <- getUniqueNat; return (mkVReg u rep)
+
+getNewRegPairNat :: MachRep -> NatM (Reg,Reg)
+getNewRegPairNat rep = do 
+  u <- getUniqueNat
+  let lo = mkVReg u rep; hi = getHiVRegFromLo lo
+  return (lo,hi)
+
+getPicBaseMaybeNat :: NatM (Maybe Reg)
+getPicBaseMaybeNat = NatM (\state -> (natm_pic state, state))
+
+getPicBaseNat :: MachRep -> NatM Reg
+getPicBaseNat rep = do
+  mbPicBase <- getPicBaseMaybeNat
+  case mbPicBase of
+        Just picBase -> return picBase
+        Nothing -> do
+            reg <- getNewRegNat rep
+            NatM (\state -> (reg, state { natm_pic = Just reg }))
diff --git a/compiler/nativeGen/NOTES b/compiler/nativeGen/NOTES
new file mode 100644
index 0000000000..9068a7fc2c
--- /dev/null
+++ b/compiler/nativeGen/NOTES
@@ -0,0 +1,41 @@
+TODO in new NCG
+~~~~~~~~~~~~~~~
+
+- Are we being careful enough about narrowing those out-of-range CmmInts?
+
+- Register allocator:
+    - fixup code
+    - keep track of free stack slots
+
+    Optimisations:
+
+    - picking the assignment on entry to a block: better to defer this
+      until we know all the assignments.  In a loop, we should pick
+      the assignment from the looping jump (fixpointing?), so that any
+      fixup code ends up *outside* the loop.  Otherwise, we should
+      pick the assignment that results in the least fixup code.
+
+- splitting?
+
+-- -----------------------------------------------------------------------------
+-- x86 ToDos
+
+- x86 genCCall needs to tack on the @size for stdcalls (might not be in the
+  foreignlabel).
+
+- x86: should really clean up that IMUL64 stuff, and tell the code gen about
+  Intel imul instructions.
+
+- x86: we're not careful enough about making sure that we only use
+  byte-addressable registers in byte instructions.  Should we do it this
+  way, or stick to using 32-bit registers everywhere?
+
+- Use SSE for floating point, optionally.
+
+------------------------------------------------------------------------------
+-- Further optimisations:
+
+- We might be able to extend the scope of the inlining phase so it can
+  skip over more statements that don't affect the value of the inlined
+  expr.
+
diff --git a/compiler/nativeGen/PositionIndependentCode.hs b/compiler/nativeGen/PositionIndependentCode.hs
new file mode 100644
index 0000000000..0daccb6530
--- /dev/null
+++ b/compiler/nativeGen/PositionIndependentCode.hs
@@ -0,0 +1,605 @@
+module PositionIndependentCode (
+        cmmMakeDynamicReference,
+        needImportedSymbols,
+        pprImportedSymbol,
+        pprGotDeclaration,
+        initializePicBase
+     ) where
+
+{-
+  This module handles generation of position independent code and
+  dynamic-linking related issues for the native code generator.
+  
+  Things outside this module which are related to this:
+  
+  + module CLabel
+    - PIC base label (pretty printed as local label 1)
+    - DynamicLinkerLabels - several kinds:
+        CodeStub, SymbolPtr, GotSymbolPtr, GotSymbolOffset
+    - labelDynamic predicate
+  + module Cmm
+    - The GlobalReg datatype has a PicBaseReg constructor
+    - The CmmLit datatype has a CmmLabelDiffOff constructor
+  + codeGen & RTS
+    - When tablesNextToCode, no absolute addresses are stored in info tables
+      any more. Instead, offsets from the info label are used.
+    - For Win32 only, SRTs might contain addresses of __imp_ symbol pointers
+      because Win32 doesn't support external references in data sections.
+      TODO: make sure this still works, it might be bitrotted
+  + NCG
+    - The cmmToCmm pass in AsmCodeGen calls cmmMakeDynamicReference for all
+      labels.
+    - nativeCodeGen calls pprImportedSymbol and pprGotDeclaration to output
+      all the necessary stuff for imported symbols.
+    - The NCG monad keeps track of a list of imported symbols.
+    - MachCodeGen invokes initializePicBase to generate code to initialize
+      the PIC base register when needed.
+    - MachCodeGen calls cmmMakeDynamicReference whenever it uses a CLabel
+      that wasn't in the original Cmm code (e.g. floating point literals).
+  + The Mangler
+    - The mangler converts absolure refs to relative refs in info tables
+    - Symbol pointers, stub code and PIC calculations that are generated
+      by GCC are left intact by the mangler (so far only on ppc-darwin
+      and ppc-linux).
+-}
+     
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+
+import Cmm
+import MachOp           ( MachOp(MO_Add), wordRep )
+import CLabel           ( CLabel, pprCLabel,
+                          mkDynamicLinkerLabel, DynamicLinkerLabelInfo(..),
+                          dynamicLinkerLabelInfo, mkPicBaseLabel,
+                          labelDynamic, externallyVisibleCLabel )
+
+#if linux_TARGET_OS
+import CLabel           ( mkForeignLabel )
+#endif
+
+import MachRegs
+import MachInstrs
+import NCGMonad         ( NatM, getNewRegNat, getNewLabelNat )
+
+import StaticFlags	( opt_PIC, opt_Static )
+
+import Pretty
+import qualified Outputable
+
+import Panic            ( panic )
+
+
+-- The most important function here is cmmMakeDynamicReference.
+
+-- It gets called by the cmmToCmm pass for every CmmLabel in the Cmm
+-- code. It does The Right Thing(tm) to convert the CmmLabel into a
+-- position-independent, dynamic-linking-aware reference to the thing
+-- in question.
+-- Note that this also has to be called from MachCodeGen in order to
+-- access static data like floating point literals (labels that were
+-- created after the cmmToCmm pass).
+-- The function must run in a monad that can keep track of imported symbols
+-- A function for recording an imported symbol must be passed in:
+-- - addImportCmmOpt for the CmmOptM monad
+-- - addImportNat for the NatM monad.
+
+cmmMakeDynamicReference
+  :: Monad m => (CLabel -> m ())  -- a monad & a function
+                                  -- used for recording imported symbols
+             -> Bool              -- whether this is the target of a jump
+             -> CLabel            -- the label
+             -> m CmmExpr
+  
+cmmMakeDynamicReference addImport isJumpTarget lbl
+  | Just _ <- dynamicLinkerLabelInfo lbl
+  = return $ CmmLit $ CmmLabel lbl   -- already processed it, pass through
+  | otherwise = case howToAccessLabel isJumpTarget lbl of
+        AccessViaStub -> do
+              let stub = mkDynamicLinkerLabel CodeStub lbl
+              addImport stub
+              return $ CmmLit $ CmmLabel stub
+        AccessViaSymbolPtr -> do
+              let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl
+              addImport symbolPtr
+              return $ CmmLoad (cmmMakePicReference symbolPtr) wordRep
+        AccessDirectly
+                -- all currently supported processors support
+                -- a PC-relative branch instruction, so just jump there
+          | isJumpTarget -> return $ CmmLit $ CmmLabel lbl
+                -- for data, we might have to make some calculations:
+          | otherwise    -> return $ cmmMakePicReference lbl  
+  
+-- -------------------------------------------------------------------
+  
+-- Create a position independent reference to a label.
+-- (but do not bother with dynamic linking).
+-- We calculate the label's address by adding some (platform-dependent)
+-- offset to our base register; this offset is calculated by
+-- the function picRelative in the platform-dependent part below.
+
+cmmMakePicReference :: CLabel -> CmmExpr
+  
+#if !mingw32_TARGET_OS
+        -- Windows doesn't need PIC,
+        -- everything gets relocated at runtime
+
+cmmMakePicReference lbl
+    | opt_PIC && absoluteLabel lbl = CmmMachOp (MO_Add wordRep) [
+            CmmReg (CmmGlobal PicBaseReg),
+            CmmLit $ picRelative lbl
+        ]
+    where
+        absoluteLabel lbl = case dynamicLinkerLabelInfo lbl of
+                                Just (GotSymbolPtr, _) -> False
+                                Just (GotSymbolOffset, _) -> False
+                                _ -> True
+
+#endif
+cmmMakePicReference lbl = CmmLit $ CmmLabel lbl
+
+-- ===================================================================
+-- Platform dependent stuff
+-- ===================================================================
+
+-- Knowledge about how special dynamic linker labels like symbol
+-- pointers, code stubs and GOT offsets look like is located in the
+-- module CLabel.
+
+-- -------------------------------------------------------------------
+
+-- We have to decide which labels need to be accessed
+-- indirectly or via a piece of stub code.
+
+data LabelAccessStyle = AccessViaStub
+                      | AccessViaSymbolPtr
+                      | AccessDirectly
+
+howToAccessLabel :: Bool -> CLabel -> LabelAccessStyle
+
+#if mingw32_TARGET_OS
+-- Windows
+-- 
+-- We need to use access *exactly* those things that
+-- are imported from a DLL via an __imp_* label.
+-- There are no stubs for imported code.
+
+howToAccessLabel _ lbl | labelDynamic lbl = AccessViaSymbolPtr
+                       | otherwise        = AccessDirectly
+
+#elif darwin_TARGET_OS
+-- Mach-O (Darwin, Mac OS X)
+--
+-- Indirect access is required in the following cases:
+--  * things imported from a dynamic library
+--  * things from a different module, if we're generating PIC code
+-- It is always possible to access something indirectly,
+-- even when it's not necessary.
+
+howToAccessLabel True lbl
+      -- jumps to a dynamic library go via a symbol stub
+    | labelDynamic lbl = AccessViaStub
+      -- when generating PIC code, all cross-module references must
+      -- must go via a symbol pointer, too.
+      -- Unfortunately, we don't know whether it's cross-module,
+      -- so we do it for all externally visible labels.
+      -- This is a slight waste of time and space, but otherwise
+      -- we'd need to pass the current Module all the way in to
+      -- this function.
+    | opt_PIC && externallyVisibleCLabel lbl = AccessViaStub
+howToAccessLabel False lbl
+      -- data access to a dynamic library goes via a symbol pointer
+    | labelDynamic lbl = AccessViaSymbolPtr
+      -- cross-module PIC references: same as above
+    | opt_PIC && externallyVisibleCLabel lbl = AccessViaSymbolPtr
+howToAccessLabel _ _ = AccessDirectly
+
+#elif linux_TARGET_OS && powerpc64_TARGET_ARCH
+-- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
+
+howToAccessLabel True lbl = AccessDirectly -- actually, .label instead of label
+howToAccessLabel _ lbl = AccessViaSymbolPtr
+
+#elif linux_TARGET_OS
+-- ELF (Linux)
+--
+-- ELF tries to pretend to the main application code that dynamic linking does 
+-- not exist. While this may sound convenient, it tends to mess things up in
+-- very bad ways, so we have to be careful when we generate code for the main
+-- program (-dynamic but no -fPIC).
+--
+-- Indirect access is required for references to imported symbols
+-- from position independent code. It is also required from the main program
+-- when dynamic libraries containing Haskell code are used.
+
+howToAccessLabel isJump lbl
+	-- no PIC -> the dynamic linker does everything for us;
+	--           if we don't dynamically link to Haskell code,
+	--           it actually manages to do so without messing thins up.
+    | not opt_PIC && opt_Static = AccessDirectly
+   
+#if !i386_TARGET_ARCH
+-- for Intel, we temporarily disable the use of the
+-- Procedure Linkage Table, because PLTs on intel require the
+-- address of the GOT to be loaded into register %ebx before
+-- a jump through the PLT is made.
+-- TODO: make the i386 NCG ensure this before jumping to a
+--       CodeStub label, so we can remove this special case.
+
+	-- As long as we're in a shared library ourselves,
+	-- we can use the plt.
+	-- NOTE: We might want to disable this, because this
+	--       prevents -fPIC code from being linked statically.
+    | isJump && labelDynamic lbl && opt_PIC = AccessViaStub
+
+    	-- TODO: it would be OK to access non-Haskell code via a stub
+--  | isJump && labelDynamic lbl && not isHaskellCode lbl = AccessViaStub
+
+	-- Using code stubs for jumps from the main program to an entry
+	-- label in a dynamic library is deadly; this will cause the dynamic
+	-- linker to replace all references (even data references) to that
+	-- label by references to the stub, so we won't find our info tables
+	-- any more.
+#endif
+
+    	-- A dynamic label needs to be accessed via a symbol pointer.
+	-- NOTE: It would be OK to jump to foreign code via a PLT stub.
+    | labelDynamic lbl = AccessViaSymbolPtr
+    
+#if powerpc_TARGET_ARCH
+ 	-- For PowerPC32 -fPIC, we have to access even static data
+	-- via a symbol pointer (see below for an explanation why
+	-- PowerPC32 Linux is especially broken).
+    | opt_PIC && not isJump = AccessViaSymbolPtr
+#endif
+
+    | otherwise = AccessDirectly
+
+#else
+--
+-- all other platforms
+--
+howToAccessLabel _ _
+        | not opt_PIC = AccessDirectly
+        | otherwise   = panic "howToAccessLabel: PIC not defined for this platform"
+#endif
+
+-- -------------------------------------------------------------------
+
+-- What do we have to add to our 'PIC base register' in order to
+-- get the address of a label?
+
+picRelative :: CLabel -> CmmLit
+#if darwin_TARGET_OS
+-- Darwin:
+-- The PIC base register points to the PIC base label at the beginning
+-- of the current CmmTop. We just have to use a label difference to
+-- get the offset.
+-- We have already made sure that all labels that are not from the current
+-- module are accessed indirectly ('as' can't calculate differences between
+-- undefined labels).
+
+picRelative lbl
+  = CmmLabelDiffOff lbl mkPicBaseLabel 0
+
+#elif powerpc_TARGET_ARCH && linux_TARGET_OS
+-- PowerPC Linux:
+-- The PIC base register points to our fake GOT. Use a label difference
+-- to get the offset.
+-- We have made sure that *everything* is accessed indirectly, so this
+-- is only used for offsets from the GOT to symbol pointers inside the
+-- GOT.
+picRelative lbl
+  = CmmLabelDiffOff lbl gotLabel 0
+
+#elif linux_TARGET_OS
+-- Other Linux versions:
+-- The PIC base register points to the GOT. Use foo@got for symbol
+-- pointers, and foo@gotoff for everything else.
+
+picRelative lbl
+  | Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl
+  = CmmLabel $ mkDynamicLinkerLabel GotSymbolPtr lbl'
+  | otherwise
+  = CmmLabel $ mkDynamicLinkerLabel GotSymbolOffset lbl
+
+#else
+picRelative lbl = panic "PositionIndependentCode.picRelative"
+#endif
+
+-- -------------------------------------------------------------------
+
+-- What do we have to add to every assembly file we generate?
+
+-- utility function for pretty-printing asm-labels,
+-- copied from PprMach
+asmSDoc d = Outputable.withPprStyleDoc (
+	      Outputable.mkCodeStyle Outputable.AsmStyle) d
+pprCLabel_asm l = asmSDoc (pprCLabel l)
+
+
+#if darwin_TARGET_OS
+
+needImportedSymbols = True
+
+-- We don't need to declare any offset tables.
+-- However, for PIC on x86, we need a small helper function.
+#if i386_TARGET_ARCH
+pprGotDeclaration
+    | opt_PIC
+    = vcat [
+        ptext SLIT(".section __TEXT,__textcoal_nt,coalesced,no_toc"),
+        ptext SLIT(".weak_definition ___i686.get_pc_thunk.ax"),
+        ptext SLIT(".private_extern ___i686.get_pc_thunk.ax"),
+        ptext SLIT("___i686.get_pc_thunk.ax:"),
+            ptext SLIT("\tmovl (%esp), %eax"),
+            ptext SLIT("\tret")
+    ]
+    | otherwise = Pretty.empty
+#else
+pprGotDeclaration = Pretty.empty
+#endif
+
+-- On Darwin, we have to generate our own stub code for lazy binding..
+-- For each processor architecture, there are two versions, one for PIC
+-- and one for non-PIC.
+pprImportedSymbol importedLbl
+#if powerpc_TARGET_ARCH
+    | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
+    = case opt_PIC of
+        False ->
+            vcat [
+                ptext SLIT(".symbol_stub"),
+                ptext SLIT("L") <> pprCLabel_asm lbl <> ptext SLIT("$stub:"),
+                    ptext SLIT("\t.indirect_symbol") <+> pprCLabel_asm lbl,
+                    ptext SLIT("\tlis r11,ha16(L") <> pprCLabel_asm lbl
+                        <> ptext SLIT("$lazy_ptr)"),
+                    ptext SLIT("\tlwz r12,lo16(L") <> pprCLabel_asm lbl
+                        <> ptext SLIT("$lazy_ptr)(r11)"),
+                    ptext SLIT("\tmtctr r12"),
+                    ptext SLIT("\taddi r11,r11,lo16(L") <> pprCLabel_asm lbl
+                        <> ptext SLIT("$lazy_ptr)"),
+                    ptext SLIT("\tbctr")
+            ]
+        True ->
+            vcat [
+                ptext SLIT(".section __TEXT,__picsymbolstub1,")
+                  <> ptext SLIT("symbol_stubs,pure_instructions,32"),
+                ptext SLIT("\t.align 2"),
+                ptext SLIT("L") <> pprCLabel_asm lbl <> ptext SLIT("$stub:"),
+                    ptext SLIT("\t.indirect_symbol") <+> pprCLabel_asm lbl,
+                    ptext SLIT("\tmflr r0"),
+                    ptext SLIT("\tbcl 20,31,L0$") <> pprCLabel_asm lbl,
+                ptext SLIT("L0$") <> pprCLabel_asm lbl <> char ':',
+                    ptext SLIT("\tmflr r11"),
+                    ptext SLIT("\taddis r11,r11,ha16(L") <> pprCLabel_asm lbl
+                        <> ptext SLIT("$lazy_ptr-L0$") <> pprCLabel_asm lbl <> char ')',
+                    ptext SLIT("\tmtlr r0"),
+                    ptext SLIT("\tlwzu r12,lo16(L") <> pprCLabel_asm lbl
+                        <> ptext SLIT("$lazy_ptr-L0$") <> pprCLabel_asm lbl
+                        <> ptext SLIT(")(r11)"),
+                    ptext SLIT("\tmtctr r12"),
+                    ptext SLIT("\tbctr")
+            ]
+    $+$ vcat [
+        ptext SLIT(".lazy_symbol_pointer"),
+        ptext SLIT("L") <> pprCLabel_asm lbl <> ptext SLIT("$lazy_ptr:"),
+            ptext SLIT("\t.indirect_symbol") <+> pprCLabel_asm lbl,
+            ptext SLIT("\t.long dyld_stub_binding_helper")
+    ]
+#elif i386_TARGET_ARCH
+    | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
+    = case opt_PIC of
+        False ->
+            vcat [
+                ptext SLIT(".symbol_stub"),
+                ptext SLIT("L") <> pprCLabel_asm lbl <> ptext SLIT("$stub:"),
+                    ptext SLIT("\t.indirect_symbol") <+> pprCLabel_asm lbl,
+                    ptext SLIT("\tjmp *L") <> pprCLabel_asm lbl
+                        <> ptext SLIT("$lazy_ptr"),
+                ptext SLIT("L") <> pprCLabel_asm lbl
+                    <> ptext SLIT("$stub_binder:"),
+                    ptext SLIT("\tpushl $L") <> pprCLabel_asm lbl
+                        <> ptext SLIT("$lazy_ptr"),
+                    ptext SLIT("\tjmp dyld_stub_binding_helper")
+            ]
+        True ->
+            vcat [
+                ptext SLIT(".section __TEXT,__picsymbolstub2,")
+                    <> ptext SLIT("symbol_stubs,pure_instructions,25"),
+                ptext SLIT("L") <> pprCLabel_asm lbl <> ptext SLIT("$stub:"),
+                    ptext SLIT("\t.indirect_symbol") <+> pprCLabel_asm lbl,
+                    ptext SLIT("\tcall ___i686.get_pc_thunk.ax"),
+                ptext SLIT("1:"),
+                    ptext SLIT("\tmovl L") <> pprCLabel_asm lbl
+                        <> ptext SLIT("$lazy_ptr-1b(%eax),%edx"),
+                    ptext SLIT("\tjmp %edx"),
+                ptext SLIT("L") <> pprCLabel_asm lbl
+                    <> ptext SLIT("$stub_binder:"),
+                    ptext SLIT("\tlea L") <> pprCLabel_asm lbl
+                        <> ptext SLIT("$lazy_ptr-1b(%eax),%eax"),
+                    ptext SLIT("\tpushl %eax"),
+                    ptext SLIT("\tjmp dyld_stub_binding_helper")
+            ]
+    $+$ vcat [        ptext SLIT(".section __DATA, __la_sym_ptr")
+                    <> (if opt_PIC then int 2 else int 3)
+                    <> ptext SLIT(",lazy_symbol_pointers"),
+        ptext SLIT("L") <> pprCLabel_asm lbl <> ptext SLIT("$lazy_ptr:"),
+            ptext SLIT("\t.indirect_symbol") <+> pprCLabel_asm lbl,
+            ptext SLIT("\t.long L") <> pprCLabel_asm lbl
+                    <> ptext SLIT("$stub_binder")
+    ]
+#endif
+-- We also have to declare our symbol pointers ourselves:
+    | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
+    = vcat [
+        ptext SLIT(".non_lazy_symbol_pointer"),
+        char 'L' <> pprCLabel_asm lbl <> ptext SLIT("$non_lazy_ptr:"),
+	    ptext SLIT("\t.indirect_symbol") <+> pprCLabel_asm lbl,
+            ptext SLIT("\t.long\t0")
+    ]
+
+    | otherwise = empty
+
+#elif linux_TARGET_OS && !powerpc64_TARGET_ARCH
+
+-- ELF / Linux
+--
+-- In theory, we don't need to generate any stubs or symbol pointers
+-- by hand for Linux.
+--
+-- Reality differs from this in two areas.
+--
+-- 1) If we just use a dynamically imported symbol directly in a read-only
+--    section of the main executable (as GCC does), ld generates R_*_COPY
+--    relocations, which are fundamentally incompatible with reversed info
+--    tables. Therefore, we need a table of imported addresses in a writable
+--    section.
+--    The "official" GOT mechanism (label@got) isn't intended to be used
+--    in position dependent code, so we have to create our own "fake GOT"
+--    when not opt_PCI && not opt_Static.
+--
+-- 2) PowerPC Linux is just plain broken.
+--    While it's theoretically possible to use GOT offsets larger
+--    than 16 bit, the standard crt*.o files don't, which leads to
+--    linker errors as soon as the GOT size exceeds 16 bit.
+--    Also, the assembler doesn't support @gotoff labels.
+--    In order to be able to use a larger GOT, we have to circumvent the
+--    entire GOT mechanism and do it ourselves (this is also what GCC does).
+
+
+-- When needImportedSymbols is defined,
+-- the NCG will keep track of all DynamicLinkerLabels it uses
+-- and output each of them using pprImportedSymbol.
+#if powerpc_TARGET_ARCH
+    -- PowerPC Linux: -fPIC or -dynamic
+needImportedSymbols = opt_PIC || not opt_Static
+#else
+    -- i386 (and others?): -dynamic but not -fPIC
+needImportedSymbols = not opt_Static && not opt_PIC
+#endif
+
+-- gotLabel
+-- The label used to refer to our "fake GOT" from
+-- position-independent code.
+gotLabel = mkForeignLabel -- HACK: it's not really foreign
+                           FSLIT(".LCTOC1") Nothing False
+
+-- pprGotDeclaration
+-- Output whatever needs to be output once per .s file.
+-- The .LCTOC1 label is defined to point 32768 bytes into the table,
+-- to make the most of the PPC's 16-bit displacements.
+-- Only needed for PIC.
+
+pprGotDeclaration
+    | not opt_PIC = Pretty.empty
+    | otherwise = vcat [
+        ptext SLIT(".section \".got2\",\"aw\""),
+        ptext SLIT(".LCTOC1 = .+32768")
+    ]
+
+-- We generate one .long literal for every symbol we import;
+-- the dynamic linker will relocate those addresses.
+
+pprImportedSymbol importedLbl
+    | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
+    = vcat [
+        ptext SLIT(".section \".got2\", \"aw\""),
+        ptext SLIT(".LC_") <> pprCLabel_asm lbl <> char ':',
+        ptext SLIT("\t.long") <+> pprCLabel_asm lbl
+    ]
+
+-- PLT code stubs are generated automatically be the dynamic linker.
+    | otherwise = empty
+
+#else
+
+-- For all other currently supported platforms, we don't need to do
+-- anything at all.
+
+needImportedSymbols = False
+pprGotDeclaration = Pretty.empty
+pprImportedSymbol _ = empty
+#endif
+
+-- -------------------------------------------------------------------
+
+-- Generate code to calculate the address that should be put in the
+-- PIC base register.
+-- This is called by MachCodeGen for every CmmProc that accessed the
+-- PIC base register. It adds the appropriate instructions to the
+-- top of the CmmProc.
+
+-- It is assumed that the first NatCmmTop in the input list is a Proc
+-- and the rest are CmmDatas.
+
+initializePicBase :: Reg -> [NatCmmTop] -> NatM [NatCmmTop]
+
+#if darwin_TARGET_OS
+
+-- Darwin is simple: just fetch the address of a local label.
+-- The FETCHPC pseudo-instruction is expanded to multiple instructions
+-- during pretty-printing so that we don't have to deal with the
+-- local label:
+
+-- PowerPC version:
+--          bcl 20,31,1f.
+--      1:  mflr picReg
+
+-- i386 version:
+--          call 1f
+--      1:  popl %picReg
+
+initializePicBase picReg (CmmProc info lab params blocks : statics)
+    = return (CmmProc info lab params (b':tail blocks) : statics)
+    where BasicBlock bID insns = head blocks
+          b' = BasicBlock bID (FETCHPC picReg : insns)
+
+#elif powerpc_TARGET_ARCH && linux_TARGET_OS
+
+-- Get a pointer to our own fake GOT, which is defined on a per-module basis.
+-- This is exactly how GCC does it, and it's quite horrible:
+-- We first fetch the address of a local label (mkPicBaseLabel).
+-- Then we add a 16-bit offset to that to get the address of a .long that we
+-- define in .text space right next to the proc. This .long literal contains
+-- the (32-bit) offset from our local label to our global offset table
+-- (.LCTOC1 aka gotOffLabel).
+initializePicBase picReg
+    (CmmProc info lab params blocks : statics)
+    = do
+        gotOffLabel <- getNewLabelNat
+        tmp <- getNewRegNat wordRep
+        let 
+            gotOffset = CmmData Text [
+                            CmmDataLabel gotOffLabel,
+			    CmmStaticLit (CmmLabelDiffOff gotLabel
+                        	                          mkPicBaseLabel
+				                          0)
+                        ]
+            offsetToOffset = ImmConstantDiff (ImmCLbl gotOffLabel)
+                                             (ImmCLbl mkPicBaseLabel)
+            BasicBlock bID insns = head blocks
+            b' = BasicBlock bID (FETCHPC picReg
+                               : LD wordRep tmp
+                                    (AddrRegImm picReg offsetToOffset)
+                               : ADD picReg picReg (RIReg tmp)
+                               : insns)
+        return (CmmProc info lab params (b' : tail blocks) : gotOffset : statics)
+#elif i386_TARGET_ARCH && linux_TARGET_OS
+
+-- We cheat a bit here by defining a pseudo-instruction named FETCHGOT
+-- which pretty-prints as:
+--              call 1f
+-- 1:           popl %picReg
+--              addl __GLOBAL_OFFSET_TABLE__+.-1b, %picReg
+-- (See PprMach.lhs)
+
+initializePicBase picReg (CmmProc info lab params blocks : statics)
+    = return (CmmProc info lab params (b':tail blocks) : statics)
+    where BasicBlock bID insns = head blocks
+          b' = BasicBlock bID (FETCHGOT picReg : insns)
+
+#else
+initializePicBase picReg proc = panic "initializePicBase"
+
+-- mingw32_TARGET_OS: not needed, won't be called
+#endif
diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs
new file mode 100644
index 0000000000..afa5bcd872
--- /dev/null
+++ b/compiler/nativeGen/PprMach.hs
@@ -0,0 +1,2454 @@
+-----------------------------------------------------------------------------
+--
+-- Pretty-printing assembly language
+--
+-- (c) The University of Glasgow 1993-2005
+--
+-----------------------------------------------------------------------------
+
+-- We start with the @pprXXX@s with some cross-platform commonality
+-- (e.g., 'pprReg'); we conclude with the no-commonality monster,
+-- 'pprInstr'.
+
+#include "nativeGen/NCG.h"
+
+module PprMach ( 
+	pprNatCmmTop, pprBasicBlock,
+	pprInstr, pprSize, pprUserReg,
+  ) where
+
+
+#include "HsVersions.h"
+
+import Cmm
+import MachOp		( MachRep(..), wordRep, isFloatingRep )
+import MachRegs		-- may differ per-platform
+import MachInstrs
+
+import CLabel		( CLabel, pprCLabel, externallyVisibleCLabel,
+			  labelDynamic, mkAsmTempLabel, entryLblToInfoLbl )
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+import CLabel       ( mkDeadStripPreventer )
+#endif
+
+import Panic		( panic )
+import Unique		( pprUnique )
+import Pretty
+import FastString
+import qualified Outputable
+
+import StaticFlags      ( opt_PIC, opt_Static )
+
+#if __GLASGOW_HASKELL__ >= 504
+import Data.Array.ST
+import Data.Word	( Word8 )
+#else
+import MutableArray
+#endif
+
+import MONAD_ST
+import Char		( chr, ord )
+import Maybe            ( isJust )
+
+#if powerpc_TARGET_ARCH || darwin_TARGET_OS
+import DATA_WORD(Word32)
+import DATA_BITS
+#endif
+
+-- -----------------------------------------------------------------------------
+-- Printing this stuff out
+
+asmSDoc d = Outputable.withPprStyleDoc (
+	      Outputable.mkCodeStyle Outputable.AsmStyle) d
+pprCLabel_asm l = asmSDoc (pprCLabel l)
+
+pprNatCmmTop :: NatCmmTop -> Doc
+pprNatCmmTop (CmmData section dats) = 
+  pprSectionHeader section $$ vcat (map pprData dats)
+
+ -- special case for split markers:
+pprNatCmmTop (CmmProc [] lbl _ []) = pprLabel lbl
+
+pprNatCmmTop (CmmProc info lbl params blocks) = 
+  pprSectionHeader Text $$
+  (if not (null info)
+	then
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+            pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+                <> char ':' $$
+#endif
+            vcat (map pprData info) $$
+            pprLabel (entryLblToInfoLbl lbl)
+	else empty) $$
+  (case blocks of
+	[] -> empty
+	(BasicBlock _ instrs : rest) -> 
+		(if null info then pprLabel lbl else empty) $$
+		-- the first block doesn't get a label:
+		vcat (map pprInstr instrs) $$
+		vcat (map pprBasicBlock rest)
+  )
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+        -- If we are using the .subsections_via_symbols directive
+        -- (available on recent versions of Darwin),
+        -- we have to make sure that there is some kind of reference
+        -- from the entry code to a label on the _top_ of of the info table,
+        -- so that the linker will not think it is unreferenced and dead-strip
+        -- it. That's why the label is called a DeadStripPreventer (_dsp).
+  $$ if not (null info)
+		    then text "\t.long "
+		      <+> pprCLabel_asm (entryLblToInfoLbl lbl)
+		      <+> char '-'
+		      <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+		    else empty
+#endif
+
+
+pprBasicBlock :: NatBasicBlock -> Doc
+pprBasicBlock (BasicBlock (BlockId id) instrs) =
+  pprLabel (mkAsmTempLabel id) $$
+  vcat (map pprInstr instrs)
+
+-- -----------------------------------------------------------------------------
+-- pprReg: print a 'Reg'
+
+-- For x86, the way we print a register name depends
+-- on which bit of it we care about.  Yurgh.
+
+pprUserReg :: Reg -> Doc
+pprUserReg = pprReg IF_ARCH_i386(I32,) IF_ARCH_x86_64(I64,)
+
+pprReg :: IF_ARCH_i386(MachRep ->,) IF_ARCH_x86_64(MachRep ->,) Reg -> Doc
+
+pprReg IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) r
+  = case r of
+      RealReg i      -> ppr_reg_no IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) i
+      VirtualRegI  u  -> text "%vI_" <> asmSDoc (pprUnique u)
+      VirtualRegHi u  -> text "%vHi_" <> asmSDoc (pprUnique u)
+      VirtualRegF  u  -> text "%vF_" <> asmSDoc (pprUnique u)
+      VirtualRegD  u  -> text "%vD_" <> asmSDoc (pprUnique u)
+  where
+#if alpha_TARGET_ARCH
+    ppr_reg_no :: Int -> Doc
+    ppr_reg_no i = ptext
+      (case i of {
+	 0 -> SLIT("$0");    1 -> SLIT("$1");
+	 2 -> SLIT("$2");    3 -> SLIT("$3");
+	 4 -> SLIT("$4");    5 -> SLIT("$5");
+	 6 -> SLIT("$6");    7 -> SLIT("$7");
+	 8 -> SLIT("$8");    9 -> SLIT("$9");
+	10 -> SLIT("$10");  11 -> SLIT("$11");
+	12 -> SLIT("$12");  13 -> SLIT("$13");
+	14 -> SLIT("$14");  15 -> SLIT("$15");
+	16 -> SLIT("$16");  17 -> SLIT("$17");
+	18 -> SLIT("$18");  19 -> SLIT("$19");
+	20 -> SLIT("$20");  21 -> SLIT("$21");
+	22 -> SLIT("$22");  23 -> SLIT("$23");
+	24 -> SLIT("$24");  25 -> SLIT("$25");
+	26 -> SLIT("$26");  27 -> SLIT("$27");
+	28 -> SLIT("$28");  29 -> SLIT("$29");
+	30 -> SLIT("$30");  31 -> SLIT("$31");
+	32 -> SLIT("$f0");  33 -> SLIT("$f1");
+	34 -> SLIT("$f2");  35 -> SLIT("$f3");
+	36 -> SLIT("$f4");  37 -> SLIT("$f5");
+	38 -> SLIT("$f6");  39 -> SLIT("$f7");
+	40 -> SLIT("$f8");  41 -> SLIT("$f9");
+	42 -> SLIT("$f10"); 43 -> SLIT("$f11");
+	44 -> SLIT("$f12"); 45 -> SLIT("$f13");
+	46 -> SLIT("$f14"); 47 -> SLIT("$f15");
+	48 -> SLIT("$f16"); 49 -> SLIT("$f17");
+	50 -> SLIT("$f18"); 51 -> SLIT("$f19");
+	52 -> SLIT("$f20"); 53 -> SLIT("$f21");
+	54 -> SLIT("$f22"); 55 -> SLIT("$f23");
+	56 -> SLIT("$f24"); 57 -> SLIT("$f25");
+	58 -> SLIT("$f26"); 59 -> SLIT("$f27");
+	60 -> SLIT("$f28"); 61 -> SLIT("$f29");
+	62 -> SLIT("$f30"); 63 -> SLIT("$f31");
+	_  -> SLIT("very naughty alpha register")
+      })
+#endif
+#if i386_TARGET_ARCH
+    ppr_reg_no :: MachRep -> Int -> Doc
+    ppr_reg_no I8   = ppr_reg_byte
+    ppr_reg_no I16  = ppr_reg_word
+    ppr_reg_no _    = ppr_reg_long
+
+    ppr_reg_byte i = ptext
+      (case i of {
+	 0 -> SLIT("%al");     1 -> SLIT("%bl");
+	 2 -> SLIT("%cl");     3 -> SLIT("%dl");
+	_  -> SLIT("very naughty I386 byte register")
+      })
+
+    ppr_reg_word i = ptext
+      (case i of {
+	 0 -> SLIT("%ax");     1 -> SLIT("%bx");
+	 2 -> SLIT("%cx");     3 -> SLIT("%dx");
+	 4 -> SLIT("%si");     5 -> SLIT("%di");
+	 6 -> SLIT("%bp");     7 -> SLIT("%sp");
+	_  -> SLIT("very naughty I386 word register")
+      })
+
+    ppr_reg_long i = ptext
+      (case i of {
+	 0 -> SLIT("%eax");    1 -> SLIT("%ebx");
+	 2 -> SLIT("%ecx");    3 -> SLIT("%edx");
+	 4 -> SLIT("%esi");    5 -> SLIT("%edi");
+	 6 -> SLIT("%ebp");    7 -> SLIT("%esp");
+	 8 -> SLIT("%fake0");  9 -> SLIT("%fake1");
+	10 -> SLIT("%fake2"); 11 -> SLIT("%fake3");
+	12 -> SLIT("%fake4"); 13 -> SLIT("%fake5");
+	_  -> SLIT("very naughty I386 register")
+      })
+#endif
+
+#if x86_64_TARGET_ARCH
+    ppr_reg_no :: MachRep -> Int -> Doc
+    ppr_reg_no I8   = ppr_reg_byte
+    ppr_reg_no I16  = ppr_reg_word
+    ppr_reg_no I32  = ppr_reg_long
+    ppr_reg_no _    = ppr_reg_quad
+
+    ppr_reg_byte i = ptext
+      (case i of {
+	 0 -> SLIT("%al");     1 -> SLIT("%bl");
+	 2 -> SLIT("%cl");     3 -> SLIT("%dl");
+	 4 -> SLIT("%sil");    5 -> SLIT("%dil"); -- new 8-bit regs!
+	 6 -> SLIT("%bpl");    7 -> SLIT("%spl");
+	 8 -> SLIT("%r8b");    9  -> SLIT("%r9b");
+	10 -> SLIT("%r10b");   11 -> SLIT("%r11b");
+	12 -> SLIT("%r12b");   13 -> SLIT("%r13b");
+	14 -> SLIT("%r14b");   15 -> SLIT("%r15b");
+	_  -> SLIT("very naughty x86_64 byte register")
+      })
+
+    ppr_reg_word i = ptext
+      (case i of {
+	 0 -> SLIT("%ax");     1 -> SLIT("%bx");
+	 2 -> SLIT("%cx");     3 -> SLIT("%dx");
+	 4 -> SLIT("%si");     5 -> SLIT("%di");
+	 6 -> SLIT("%bp");     7 -> SLIT("%sp");
+	 8 -> SLIT("%r8w");    9  -> SLIT("%r9w");
+	10 -> SLIT("%r10w");   11 -> SLIT("%r11w");
+	12 -> SLIT("%r12w");   13 -> SLIT("%r13w");
+	14 -> SLIT("%r14w");   15 -> SLIT("%r15w");
+	_  -> SLIT("very naughty x86_64 word register")
+      })
+
+    ppr_reg_long i = ptext
+      (case i of {
+	 0 -> SLIT("%eax");    1  -> SLIT("%ebx");
+	 2 -> SLIT("%ecx");    3  -> SLIT("%edx");
+	 4 -> SLIT("%esi");    5  -> SLIT("%edi");
+	 6 -> SLIT("%ebp");    7  -> SLIT("%esp");
+	 8 -> SLIT("%r8d");    9  -> SLIT("%r9d");
+	10 -> SLIT("%r10d");   11 -> SLIT("%r11d");
+	12 -> SLIT("%r12d");   13 -> SLIT("%r13d");
+	14 -> SLIT("%r14d");   15 -> SLIT("%r15d");
+	_  -> SLIT("very naughty x86_64 register")
+      })
+
+    ppr_reg_quad i = ptext
+      (case i of {
+	 0 -> SLIT("%rax");	1 -> SLIT("%rbx");
+	 2 -> SLIT("%rcx");	3 -> SLIT("%rdx");
+	 4 -> SLIT("%rsi");	5 -> SLIT("%rdi");
+	 6 -> SLIT("%rbp");	7 -> SLIT("%rsp");
+	 8 -> SLIT("%r8");  	9 -> SLIT("%r9");
+	10 -> SLIT("%r10");    11 -> SLIT("%r11");
+	12 -> SLIT("%r12");    13 -> SLIT("%r13");
+	14 -> SLIT("%r14");    15 -> SLIT("%r15");
+	16 -> SLIT("%xmm0");   17 -> SLIT("%xmm1");
+	18 -> SLIT("%xmm2");   19 -> SLIT("%xmm3");
+	20 -> SLIT("%xmm4");   21 -> SLIT("%xmm5");
+	22 -> SLIT("%xmm6");   23 -> SLIT("%xmm7");
+	24 -> SLIT("%xmm8");   25 -> SLIT("%xmm9");
+	26 -> SLIT("%xmm10");  27 -> SLIT("%xmm11");
+	28 -> SLIT("%xmm12");  29 -> SLIT("%xmm13");
+	30 -> SLIT("%xmm14");  31 -> SLIT("%xmm15");
+	_  -> SLIT("very naughty x86_64 register")
+      })
+#endif
+
+#if sparc_TARGET_ARCH
+    ppr_reg_no :: Int -> Doc
+    ppr_reg_no i = ptext
+      (case i of {
+	 0 -> SLIT("%g0");   1 -> SLIT("%g1");
+	 2 -> SLIT("%g2");   3 -> SLIT("%g3");
+	 4 -> SLIT("%g4");   5 -> SLIT("%g5");
+	 6 -> SLIT("%g6");   7 -> SLIT("%g7");
+	 8 -> SLIT("%o0");   9 -> SLIT("%o1");
+	10 -> SLIT("%o2");  11 -> SLIT("%o3");
+	12 -> SLIT("%o4");  13 -> SLIT("%o5");
+	14 -> SLIT("%o6");  15 -> SLIT("%o7");
+	16 -> SLIT("%l0");  17 -> SLIT("%l1");
+	18 -> SLIT("%l2");  19 -> SLIT("%l3");
+	20 -> SLIT("%l4");  21 -> SLIT("%l5");
+	22 -> SLIT("%l6");  23 -> SLIT("%l7");
+	24 -> SLIT("%i0");  25 -> SLIT("%i1");
+	26 -> SLIT("%i2");  27 -> SLIT("%i3");
+	28 -> SLIT("%i4");  29 -> SLIT("%i5");
+	30 -> SLIT("%i6");  31 -> SLIT("%i7");
+	32 -> SLIT("%f0");  33 -> SLIT("%f1");
+	34 -> SLIT("%f2");  35 -> SLIT("%f3");
+	36 -> SLIT("%f4");  37 -> SLIT("%f5");
+	38 -> SLIT("%f6");  39 -> SLIT("%f7");
+	40 -> SLIT("%f8");  41 -> SLIT("%f9");
+	42 -> SLIT("%f10"); 43 -> SLIT("%f11");
+	44 -> SLIT("%f12"); 45 -> SLIT("%f13");
+	46 -> SLIT("%f14"); 47 -> SLIT("%f15");
+	48 -> SLIT("%f16"); 49 -> SLIT("%f17");
+	50 -> SLIT("%f18"); 51 -> SLIT("%f19");
+	52 -> SLIT("%f20"); 53 -> SLIT("%f21");
+	54 -> SLIT("%f22"); 55 -> SLIT("%f23");
+	56 -> SLIT("%f24"); 57 -> SLIT("%f25");
+	58 -> SLIT("%f26"); 59 -> SLIT("%f27");
+	60 -> SLIT("%f28"); 61 -> SLIT("%f29");
+	62 -> SLIT("%f30"); 63 -> SLIT("%f31");
+	_  -> SLIT("very naughty sparc register")
+      })
+#endif
+#if powerpc_TARGET_ARCH
+#if darwin_TARGET_OS
+    ppr_reg_no :: Int -> Doc
+    ppr_reg_no i = ptext
+      (case i of {
+	 0 -> SLIT("r0");   1 -> SLIT("r1");
+	 2 -> SLIT("r2");   3 -> SLIT("r3");
+	 4 -> SLIT("r4");   5 -> SLIT("r5");
+	 6 -> SLIT("r6");   7 -> SLIT("r7");
+	 8 -> SLIT("r8");   9 -> SLIT("r9");
+	10 -> SLIT("r10");  11 -> SLIT("r11");
+	12 -> SLIT("r12");  13 -> SLIT("r13");
+	14 -> SLIT("r14");  15 -> SLIT("r15");
+	16 -> SLIT("r16");  17 -> SLIT("r17");
+	18 -> SLIT("r18");  19 -> SLIT("r19");
+	20 -> SLIT("r20");  21 -> SLIT("r21");
+	22 -> SLIT("r22");  23 -> SLIT("r23");
+	24 -> SLIT("r24");  25 -> SLIT("r25");
+	26 -> SLIT("r26");  27 -> SLIT("r27");
+	28 -> SLIT("r28");  29 -> SLIT("r29");
+	30 -> SLIT("r30");  31 -> SLIT("r31");
+	32 -> SLIT("f0");  33 -> SLIT("f1");
+	34 -> SLIT("f2");  35 -> SLIT("f3");
+	36 -> SLIT("f4");  37 -> SLIT("f5");
+	38 -> SLIT("f6");  39 -> SLIT("f7");
+	40 -> SLIT("f8");  41 -> SLIT("f9");
+	42 -> SLIT("f10"); 43 -> SLIT("f11");
+	44 -> SLIT("f12"); 45 -> SLIT("f13");
+	46 -> SLIT("f14"); 47 -> SLIT("f15");
+	48 -> SLIT("f16"); 49 -> SLIT("f17");
+	50 -> SLIT("f18"); 51 -> SLIT("f19");
+	52 -> SLIT("f20"); 53 -> SLIT("f21");
+	54 -> SLIT("f22"); 55 -> SLIT("f23");
+	56 -> SLIT("f24"); 57 -> SLIT("f25");
+	58 -> SLIT("f26"); 59 -> SLIT("f27");
+	60 -> SLIT("f28"); 61 -> SLIT("f29");
+	62 -> SLIT("f30"); 63 -> SLIT("f31");
+	_  -> SLIT("very naughty powerpc register")
+      })
+#else
+    ppr_reg_no :: Int -> Doc
+    ppr_reg_no i | i <= 31 = int i	-- GPRs
+                 | i <= 63 = int (i-32) -- FPRs
+		 | otherwise = ptext SLIT("very naughty powerpc register")
+#endif
+#endif
+
+
+-- -----------------------------------------------------------------------------
+-- pprSize: print a 'Size'
+
+#if powerpc_TARGET_ARCH || i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
+pprSize :: MachRep -> Doc
+#else
+pprSize :: Size -> Doc
+#endif
+
+pprSize x = ptext (case x of
+#if alpha_TARGET_ARCH
+	 B  -> SLIT("b")
+	 Bu -> SLIT("bu")
+--	 W  -> SLIT("w") UNUSED
+--	 Wu -> SLIT("wu") UNUSED
+	 L  -> SLIT("l")
+	 Q  -> SLIT("q")
+--	 FF -> SLIT("f") UNUSED
+--	 DF -> SLIT("d") UNUSED
+--	 GF -> SLIT("g") UNUSED
+--	 SF -> SLIT("s") UNUSED
+	 TF -> SLIT("t")
+#endif
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+	I8   -> SLIT("b")
+	I16  -> SLIT("w")
+	I32  -> SLIT("l")
+	I64  -> SLIT("q")
+#endif
+#if i386_TARGET_ARCH
+	F32  -> SLIT("s")
+	F64  -> SLIT("l")
+	F80  -> SLIT("t")
+#endif
+#if x86_64_TARGET_ARCH
+	F32  -> SLIT("ss")	-- "scalar single-precision float" (SSE2)
+	F64  -> SLIT("sd")	-- "scalar double-precision float" (SSE2)
+#endif
+#if sparc_TARGET_ARCH
+	I8   -> SLIT("sb")
+        I16   -> SLIT("sh")
+	I32   -> SLIT("")
+	F32   -> SLIT("")
+	F64  -> SLIT("d")
+    )
+pprStSize :: MachRep -> Doc
+pprStSize x = ptext (case x of
+	I8   -> SLIT("b")
+	I16  -> SLIT("h")
+	I32  -> SLIT("")
+	F32  -> SLIT("")
+	F64  -> SLIT("d")
+#endif
+#if powerpc_TARGET_ARCH
+	I8   -> SLIT("b")
+        I16  -> SLIT("h")
+	I32  -> SLIT("w")
+	F32  -> SLIT("fs")
+	F64  -> SLIT("fd")
+#endif
+    )
+
+-- -----------------------------------------------------------------------------
+-- pprCond: print a 'Cond'
+
+pprCond :: Cond -> Doc
+
+pprCond c = ptext (case c of {
+#if alpha_TARGET_ARCH
+	EQQ  -> SLIT("eq");
+	LTT  -> SLIT("lt");
+	LE  -> SLIT("le");
+	ULT -> SLIT("ult");
+	ULE -> SLIT("ule");
+	NE  -> SLIT("ne");
+	GTT  -> SLIT("gt");
+	GE  -> SLIT("ge")
+#endif
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+	GEU	-> SLIT("ae");	LU    -> SLIT("b");
+	EQQ	-> SLIT("e");	GTT   -> SLIT("g");
+	GE	-> SLIT("ge");	GU    -> SLIT("a");
+	LTT	-> SLIT("l");	LE    -> SLIT("le");
+	LEU	-> SLIT("be");	NE    -> SLIT("ne");
+	NEG	-> SLIT("s");	POS   -> SLIT("ns");
+        CARRY   -> SLIT("c");   OFLO  -> SLIT("o");
+	PARITY  -> SLIT("p");   NOTPARITY -> SLIT("np");
+	ALWAYS	-> SLIT("mp")	-- hack
+#endif
+#if sparc_TARGET_ARCH
+	ALWAYS	-> SLIT("");	NEVER -> SLIT("n");
+	GEU	-> SLIT("geu");	LU    -> SLIT("lu");
+	EQQ	-> SLIT("e");	GTT   -> SLIT("g");
+	GE	-> SLIT("ge");	GU    -> SLIT("gu");
+	LTT	-> SLIT("l");	LE    -> SLIT("le");
+	LEU	-> SLIT("leu");	NE    -> SLIT("ne");
+	NEG	-> SLIT("neg");	POS   -> SLIT("pos");
+	VC	-> SLIT("vc");	VS    -> SLIT("vs")
+#endif
+#if powerpc_TARGET_ARCH
+	ALWAYS  -> SLIT("");
+	EQQ	-> SLIT("eq");	NE    -> SLIT("ne");
+	LTT     -> SLIT("lt");  GE    -> SLIT("ge");
+	GTT     -> SLIT("gt");  LE    -> SLIT("le");
+	LU      -> SLIT("lt");  GEU   -> SLIT("ge");
+	GU      -> SLIT("gt");  LEU   -> SLIT("le");
+#endif
+    })
+
+
+-- -----------------------------------------------------------------------------
+-- pprImm: print an 'Imm'
+
+pprImm :: Imm -> Doc
+
+pprImm (ImmInt i)     = int i
+pprImm (ImmInteger i) = integer i
+pprImm (ImmCLbl l)    = pprCLabel_asm l
+pprImm (ImmIndex l i) = pprCLabel_asm l <> char '+' <> int i
+pprImm (ImmLit s)     = s
+
+pprImm (ImmFloat _) = ptext SLIT("naughty float immediate")
+pprImm (ImmDouble _) = ptext SLIT("naughty double immediate")
+
+pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
+#if sparc_TARGET_ARCH
+-- ToDo: This should really be fixed in the PIC support, but only
+-- print a for now.
+pprImm (ImmConstantDiff a b) = pprImm a 
+#else
+pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
+                            <> lparen <> pprImm b <> rparen
+#endif
+
+#if sparc_TARGET_ARCH
+pprImm (LO i)
+  = hcat [ pp_lo, pprImm i, rparen ]
+  where
+    pp_lo = text "%lo("
+
+pprImm (HI i)
+  = hcat [ pp_hi, pprImm i, rparen ]
+  where
+    pp_hi = text "%hi("
+#endif
+#if powerpc_TARGET_ARCH
+#if darwin_TARGET_OS
+pprImm (LO i)
+  = hcat [ pp_lo, pprImm i, rparen ]
+  where
+    pp_lo = text "lo16("
+
+pprImm (HI i)
+  = hcat [ pp_hi, pprImm i, rparen ]
+  where
+    pp_hi = text "hi16("
+
+pprImm (HA i)
+  = hcat [ pp_ha, pprImm i, rparen ]
+  where
+    pp_ha = text "ha16("
+    
+#else
+pprImm (LO i)
+  = pprImm i <> text "@l"
+
+pprImm (HI i)
+  = pprImm i <> text "@h"
+
+pprImm (HA i)
+  = pprImm i <> text "@ha"
+#endif
+#endif
+
+
+-- -----------------------------------------------------------------------------
+-- @pprAddr: print an 'AddrMode'
+
+pprAddr :: AddrMode -> Doc
+
+#if alpha_TARGET_ARCH
+pprAddr (AddrReg r) = parens (pprReg r)
+pprAddr (AddrImm i) = pprImm i
+pprAddr (AddrRegImm r1 i)
+  = (<>) (pprImm i) (parens (pprReg r1))
+#endif
+
+-------------------
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+pprAddr (ImmAddr imm off)
+  = let	pp_imm = pprImm imm
+    in
+    if (off == 0) then
+	pp_imm
+    else if (off < 0) then
+	pp_imm <> int off
+    else
+	pp_imm <> char '+' <> int off
+
+pprAddr (AddrBaseIndex base index displacement)
+  = let
+	pp_disp  = ppr_disp displacement
+	pp_off p = pp_disp <> char '(' <> p <> char ')'
+	pp_reg r = pprReg wordRep r
+    in
+    case (base,index) of
+      (EABaseNone,  EAIndexNone) -> pp_disp
+      (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
+      (EABaseRip,   EAIndexNone) -> pp_off (ptext SLIT("%rip"))
+      (EABaseNone,  EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
+      (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r 
+                                       <> comma <> int i)
+  where
+    ppr_disp (ImmInt 0) = empty
+    ppr_disp imm        = pprImm imm
+#endif
+
+-------------------
+
+#if sparc_TARGET_ARCH
+pprAddr (AddrRegReg r1 (RealReg 0)) = pprReg r1
+
+pprAddr (AddrRegReg r1 r2)
+  = hcat [ pprReg r1, char '+', pprReg r2 ]
+
+pprAddr (AddrRegImm r1 (ImmInt i))
+  | i == 0 = pprReg r1
+  | not (fits13Bits i) = largeOffsetError i
+  | otherwise = hcat [ pprReg r1, pp_sign, int i ]
+  where
+    pp_sign = if i > 0 then char '+' else empty
+
+pprAddr (AddrRegImm r1 (ImmInteger i))
+  | i == 0 = pprReg r1
+  | not (fits13Bits i) = largeOffsetError i
+  | otherwise  = hcat [ pprReg r1, pp_sign, integer i ]
+  where
+    pp_sign = if i > 0 then char '+' else empty
+
+pprAddr (AddrRegImm r1 imm)
+  = hcat [ pprReg r1, char '+', pprImm imm ]
+#endif
+
+-------------------
+
+#if powerpc_TARGET_ARCH
+pprAddr (AddrRegReg r1 r2)
+  = pprReg r1 <+> ptext SLIT(", ") <+> pprReg r2
+
+pprAddr (AddrRegImm r1 (ImmInt i)) = hcat [ int i, char '(', pprReg r1, char ')' ]
+pprAddr (AddrRegImm r1 (ImmInteger i)) = hcat [ integer i, char '(', pprReg r1, char ')' ]
+pprAddr (AddrRegImm r1 imm) = hcat [ pprImm imm, char '(', pprReg r1, char ')' ]
+#endif
+
+
+-- -----------------------------------------------------------------------------
+-- pprData: print a 'CmmStatic'
+
+pprSectionHeader Text
+    = ptext
+	IF_ARCH_alpha(SLIT("\t.text\n\t.align 3") {-word boundary-}
+       ,IF_ARCH_sparc(SLIT(".text\n\t.align 4") {-word boundary-}
+       ,IF_ARCH_i386(IF_OS_darwin(SLIT(".text\n\t.align 2"),
+                                  SLIT(".text\n\t.align 4,0x90"))
+                                  {-needs per-OS variation!-}
+       ,IF_ARCH_x86_64(SLIT(".text\n\t.align 8") {-needs per-OS variation!-}
+       ,IF_ARCH_powerpc(SLIT(".text\n.align 2")
+       ,)))))
+pprSectionHeader Data
+    = ptext
+	 IF_ARCH_alpha(SLIT("\t.data\n\t.align 3")
+	,IF_ARCH_sparc(SLIT(".data\n\t.align 8") {-<8 will break double constants -}
+	,IF_ARCH_i386(IF_OS_darwin(SLIT(".data\n\t.align 2"),
+                                   SLIT(".data\n\t.align 4"))
+	,IF_ARCH_x86_64(SLIT(".data\n\t.align 8")
+        ,IF_ARCH_powerpc(SLIT(".data\n.align 2")
+	,)))))
+pprSectionHeader ReadOnlyData
+    = ptext
+	 IF_ARCH_alpha(SLIT("\t.data\n\t.align 3")
+	,IF_ARCH_sparc(SLIT(".data\n\t.align 8") {-<8 will break double constants -}
+	,IF_ARCH_i386(IF_OS_darwin(SLIT(".const\n.align 2"),
+                                   SLIT(".section .rodata\n\t.align 4"))
+	,IF_ARCH_x86_64(SLIT(".section .rodata\n\t.align 8")
+        ,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const\n.align 2"),
+                                      SLIT(".section .rodata\n\t.align 2"))
+	,)))))
+pprSectionHeader RelocatableReadOnlyData
+    = ptext
+	 IF_ARCH_alpha(SLIT("\t.data\n\t.align 3")
+	,IF_ARCH_sparc(SLIT(".data\n\t.align 8") {-<8 will break double constants -}
+	,IF_ARCH_i386(IF_OS_darwin(SLIT(".const_data\n.align 2"),
+                                   SLIT(".section .rodata\n\t.align 4"))
+	,IF_ARCH_x86_64(SLIT(".section .rodata\n\t.align 8")
+        ,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const_data\n.align 2"),
+                                      SLIT(".data\n\t.align 2"))
+	,)))))
+pprSectionHeader UninitialisedData
+    = ptext
+	 IF_ARCH_alpha(SLIT("\t.bss\n\t.align 3")
+	,IF_ARCH_sparc(SLIT(".bss\n\t.align 8") {-<8 will break double constants -}
+	,IF_ARCH_i386(IF_OS_darwin(SLIT(".const_data\n\t.align 2"),
+                                   SLIT(".section .bss\n\t.align 4"))
+	,IF_ARCH_x86_64(SLIT(".section .bss\n\t.align 8")
+        ,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const_data\n.align 2"),
+                                      SLIT(".section .bss\n\t.align 2"))
+	,)))))
+pprSectionHeader ReadOnlyData16
+    = ptext
+	 IF_ARCH_alpha(SLIT("\t.data\n\t.align 4")
+	,IF_ARCH_sparc(SLIT(".data\n\t.align 16")
+	,IF_ARCH_i386(IF_OS_darwin(SLIT(".const\n.align 4"),
+                                   SLIT(".section .rodata\n\t.align 16"))
+	,IF_ARCH_x86_64(SLIT(".section .rodata.cst16\n\t.align 16")
+        ,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const\n.align 4"),
+                                      SLIT(".section .rodata\n\t.align 4"))
+	,)))))
+
+pprSectionHeader (OtherSection sec)
+    = panic "PprMach.pprSectionHeader: unknown section"
+
+pprData :: CmmStatic -> Doc
+pprData (CmmAlign bytes)         = pprAlign bytes
+pprData (CmmDataLabel lbl)       = pprLabel lbl
+pprData (CmmString str)          = pprASCII str
+pprData (CmmUninitialised bytes) = ptext SLIT(".space ") <> int bytes
+pprData (CmmStaticLit lit)       = pprDataItem lit
+
+pprGloblDecl :: CLabel -> Doc
+pprGloblDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext IF_ARCH_sparc(SLIT(".global "), 
+				    SLIT(".globl ")) <>
+		pprCLabel_asm lbl
+
+pprLabel :: CLabel -> Doc
+pprLabel lbl = pprGloblDecl lbl $$ (pprCLabel_asm lbl <> char ':')
+
+
+pprASCII str
+  = vcat (map do1 str) $$ do1 0
+    where
+       do1 :: Word8 -> Doc
+       do1 w = ptext SLIT("\t.byte\t") <> int (fromIntegral w)
+
+pprAlign bytes =
+	IF_ARCH_alpha(ptextSLIT(".align ") <> int pow2,
+	IF_ARCH_i386(ptext SLIT(".align ") <> int IF_OS_darwin(pow2,bytes),
+	IF_ARCH_x86_64(ptext SLIT(".align ") <> int bytes,
+	IF_ARCH_sparc(ptext SLIT(".align ") <> int bytes,
+	IF_ARCH_powerpc(ptext SLIT(".align ") <> int pow2,)))))
+  where
+	pow2 = log2 bytes
+	
+	log2 :: Int -> Int  -- cache the common ones
+	log2 1 = 0 
+	log2 2 = 1
+	log2 4 = 2
+	log2 8 = 3
+	log2 n = 1 + log2 (n `quot` 2)
+
+
+pprDataItem :: CmmLit -> Doc
+pprDataItem lit
+  = vcat (ppr_item (cmmLitRep lit) lit)
+    where
+	imm = litToImm lit
+
+	-- These seem to be common:
+	ppr_item I8   x = [ptext SLIT("\t.byte\t") <> pprImm imm]
+	ppr_item I32  x = [ptext SLIT("\t.long\t") <> pprImm imm]
+	ppr_item F32  (CmmFloat r _)
+           = let bs = floatToBytes (fromRational r)
+             in  map (\b -> ptext SLIT("\t.byte\t") <> pprImm (ImmInt b)) bs
+    	ppr_item F64 (CmmFloat r _)
+           = let bs = doubleToBytes (fromRational r)
+             in  map (\b -> ptext SLIT("\t.byte\t") <> pprImm (ImmInt b)) bs
+
+#if sparc_TARGET_ARCH
+        -- copy n paste of x86 version
+	ppr_item I16  x = [ptext SLIT("\t.short\t") <> pprImm imm]
+	ppr_item I64  x = [ptext SLIT("\t.quad\t") <> pprImm imm]
+#endif
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+	ppr_item I16  x = [ptext SLIT("\t.word\t") <> pprImm imm]
+#endif
+#if i386_TARGET_ARCH && darwin_TARGET_OS
+        ppr_item I64 (CmmInt x _)  =
+                [ptext SLIT("\t.long\t")
+                    <> int (fromIntegral (fromIntegral x :: Word32)),
+                 ptext SLIT("\t.long\t")
+                    <> int (fromIntegral
+                        (fromIntegral (x `shiftR` 32) :: Word32))]
+#endif
+#if i386_TARGET_ARCH
+	ppr_item I64  x = [ptext SLIT("\t.quad\t") <> pprImm imm]
+#endif
+#if x86_64_TARGET_ARCH
+	-- x86_64: binutils can't handle the R_X86_64_PC64 relocation
+	-- type, which means we can't do pc-relative 64-bit addresses.
+	-- Fortunately we're assuming the small memory model, in which
+	-- all such offsets will fit into 32 bits, so we have to stick
+	-- to 32-bit offset fields and modify the RTS appropriately
+	-- (see InfoTables.h).
+	-- 
+	ppr_item I64  x 
+	   | isRelativeReloc x =
+		[ptext SLIT("\t.long\t") <> pprImm imm,
+		 ptext SLIT("\t.long\t0")]
+	   | otherwise =
+		[ptext SLIT("\t.quad\t") <> pprImm imm]
+	   where
+		isRelativeReloc (CmmLabelOff _ _)       = True
+		isRelativeReloc (CmmLabelDiffOff _ _ _) = True
+		isRelativeReloc _ = False
+#endif
+#if powerpc_TARGET_ARCH
+	ppr_item I16 x = [ptext SLIT("\t.short\t") <> pprImm imm]
+        ppr_item I64 (CmmInt x _)  =
+                [ptext SLIT("\t.long\t")
+                    <> int (fromIntegral 
+                        (fromIntegral (x `shiftR` 32) :: Word32)),
+                 ptext SLIT("\t.long\t")
+                    <> int (fromIntegral (fromIntegral x :: Word32))]
+#endif
+
+-- fall through to rest of (machine-specific) pprInstr...
+
+-- -----------------------------------------------------------------------------
+-- pprInstr: print an 'Instr'
+
+pprInstr :: Instr -> Doc
+
+--pprInstr (COMMENT s) = empty -- nuke 'em
+pprInstr (COMMENT s)
+   =  IF_ARCH_alpha( ((<>) (ptext SLIT("\t# ")) (ftext s))
+     ,IF_ARCH_sparc( ((<>) (ptext SLIT("! "))   (ftext s))
+     ,IF_ARCH_i386( ((<>) (ptext SLIT("# "))   (ftext s))
+     ,IF_ARCH_x86_64( ((<>) (ptext SLIT("# "))   (ftext s))
+     ,IF_ARCH_powerpc( IF_OS_linux(
+        ((<>) (ptext SLIT("# ")) (ftext s)),
+        ((<>) (ptext SLIT("; ")) (ftext s)))
+     ,)))))
+
+pprInstr (DELTA d)
+   = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
+
+pprInstr (NEWBLOCK _)
+   = panic "PprMach.pprInstr: NEWBLOCK"
+
+pprInstr (LDATA _ _)
+   = panic "PprMach.pprInstr: LDATA"
+
+-- -----------------------------------------------------------------------------
+-- pprInstr for an Alpha
+
+#if alpha_TARGET_ARCH
+
+pprInstr (LD size reg addr)
+  = hcat [
+	ptext SLIT("\tld"),
+	pprSize size,
+	char '\t',
+	pprReg reg,
+	comma,
+	pprAddr addr
+    ]
+
+pprInstr (LDA reg addr)
+  = hcat [
+	ptext SLIT("\tlda\t"),
+	pprReg reg,
+	comma,
+	pprAddr addr
+    ]
+
+pprInstr (LDAH reg addr)
+  = hcat [
+	ptext SLIT("\tldah\t"),
+	pprReg reg,
+	comma,
+	pprAddr addr
+    ]
+
+pprInstr (LDGP reg addr)
+  = hcat [
+	ptext SLIT("\tldgp\t"),
+	pprReg reg,
+	comma,
+	pprAddr addr
+    ]
+
+pprInstr (LDI size reg imm)
+  = hcat [
+	ptext SLIT("\tldi"),
+	pprSize size,
+	char '\t',
+	pprReg reg,
+	comma,
+	pprImm imm
+    ]
+
+pprInstr (ST size reg addr)
+  = hcat [
+	ptext SLIT("\tst"),
+	pprSize size,
+	char '\t',
+	pprReg reg,
+	comma,
+	pprAddr addr
+    ]
+
+pprInstr (CLR reg)
+  = hcat [
+	ptext SLIT("\tclr\t"),
+	pprReg reg
+    ]
+
+pprInstr (ABS size ri reg)
+  = hcat [
+	ptext SLIT("\tabs"),
+	pprSize size,
+	char '\t',
+	pprRI ri,
+	comma,
+	pprReg reg
+    ]
+
+pprInstr (NEG size ov ri reg)
+  = hcat [
+	ptext SLIT("\tneg"),
+	pprSize size,
+	if ov then ptext SLIT("v\t") else char '\t',
+	pprRI ri,
+	comma,
+	pprReg reg
+    ]
+
+pprInstr (ADD size ov reg1 ri reg2)
+  = hcat [
+	ptext SLIT("\tadd"),
+	pprSize size,
+	if ov then ptext SLIT("v\t") else char '\t',
+	pprReg reg1,
+	comma,
+	pprRI ri,
+	comma,
+	pprReg reg2
+    ]
+
+pprInstr (SADD size scale reg1 ri reg2)
+  = hcat [
+	ptext (case scale of {{-UNUSED:L -> SLIT("\ts4");-} Q -> SLIT("\ts8")}),
+	ptext SLIT("add"),
+	pprSize size,
+	char '\t',
+	pprReg reg1,
+	comma,
+	pprRI ri,
+	comma,
+	pprReg reg2
+    ]
+
+pprInstr (SUB size ov reg1 ri reg2)
+  = hcat [
+	ptext SLIT("\tsub"),
+	pprSize size,
+	if ov then ptext SLIT("v\t") else char '\t',
+	pprReg reg1,
+	comma,
+	pprRI ri,
+	comma,
+	pprReg reg2
+    ]
+
+pprInstr (SSUB size scale reg1 ri reg2)
+  = hcat [
+	ptext (case scale of {{-UNUSED:L -> SLIT("\ts4");-} Q -> SLIT("\ts8")}),
+	ptext SLIT("sub"),
+	pprSize size,
+	char '\t',
+	pprReg reg1,
+	comma,
+	pprRI ri,
+	comma,
+	pprReg reg2
+    ]
+
+pprInstr (MUL size ov reg1 ri reg2)
+  = hcat [
+	ptext SLIT("\tmul"),
+	pprSize size,
+	if ov then ptext SLIT("v\t") else char '\t',
+	pprReg reg1,
+	comma,
+	pprRI ri,
+	comma,
+	pprReg reg2
+    ]
+
+pprInstr (DIV size uns reg1 ri reg2)
+  = hcat [
+	ptext SLIT("\tdiv"),
+	pprSize size,
+	if uns then ptext SLIT("u\t") else char '\t',
+	pprReg reg1,
+	comma,
+	pprRI ri,
+	comma,
+	pprReg reg2
+    ]
+
+pprInstr (REM size uns reg1 ri reg2)
+  = hcat [
+	ptext SLIT("\trem"),
+	pprSize size,
+	if uns then ptext SLIT("u\t") else char '\t',
+	pprReg reg1,
+	comma,
+	pprRI ri,
+	comma,
+	pprReg reg2
+    ]
+
+pprInstr (NOT ri reg)
+  = hcat [
+	ptext SLIT("\tnot"),
+	char '\t',
+	pprRI ri,
+	comma,
+	pprReg reg
+    ]
+
+pprInstr (AND reg1 ri reg2) = pprRegRIReg SLIT("and") reg1 ri reg2
+pprInstr (ANDNOT reg1 ri reg2) = pprRegRIReg SLIT("andnot") reg1 ri reg2
+pprInstr (OR reg1 ri reg2) = pprRegRIReg SLIT("or") reg1 ri reg2
+pprInstr (ORNOT reg1 ri reg2) = pprRegRIReg SLIT("ornot") reg1 ri reg2
+pprInstr (XOR reg1 ri reg2) = pprRegRIReg SLIT("xor") reg1 ri reg2
+pprInstr (XORNOT reg1 ri reg2) = pprRegRIReg SLIT("xornot") reg1 ri reg2
+
+pprInstr (SLL reg1 ri reg2) = pprRegRIReg SLIT("sll") reg1 ri reg2
+pprInstr (SRL reg1 ri reg2) = pprRegRIReg SLIT("srl") reg1 ri reg2
+pprInstr (SRA reg1 ri reg2) = pprRegRIReg SLIT("sra") reg1 ri reg2
+
+pprInstr (ZAP reg1 ri reg2) = pprRegRIReg SLIT("zap") reg1 ri reg2
+pprInstr (ZAPNOT reg1 ri reg2) = pprRegRIReg SLIT("zapnot") reg1 ri reg2
+
+pprInstr (NOP) = ptext SLIT("\tnop")
+
+pprInstr (CMP cond reg1 ri reg2)
+  = hcat [
+	ptext SLIT("\tcmp"),
+	pprCond cond,
+	char '\t',
+	pprReg reg1,
+	comma,
+	pprRI ri,
+	comma,
+	pprReg reg2
+    ]
+
+pprInstr (FCLR reg)
+  = hcat [
+	ptext SLIT("\tfclr\t"),
+	pprReg reg
+    ]
+
+pprInstr (FABS reg1 reg2)
+  = hcat [
+	ptext SLIT("\tfabs\t"),
+	pprReg reg1,
+	comma,
+	pprReg reg2
+    ]
+
+pprInstr (FNEG size reg1 reg2)
+  = hcat [
+	ptext SLIT("\tneg"),
+	pprSize size,
+	char '\t',
+	pprReg reg1,
+	comma,
+	pprReg reg2
+    ]
+
+pprInstr (FADD size reg1 reg2 reg3) = pprSizeRegRegReg SLIT("add") size reg1 reg2 reg3
+pprInstr (FDIV size reg1 reg2 reg3) = pprSizeRegRegReg SLIT("div") size reg1 reg2 reg3
+pprInstr (FMUL size reg1 reg2 reg3) = pprSizeRegRegReg SLIT("mul") size reg1 reg2 reg3
+pprInstr (FSUB size reg1 reg2 reg3) = pprSizeRegRegReg SLIT("sub") size reg1 reg2 reg3
+
+pprInstr (CVTxy size1 size2 reg1 reg2)
+  = hcat [
+	ptext SLIT("\tcvt"),
+	pprSize size1,
+	case size2 of {Q -> ptext SLIT("qc"); _ -> pprSize size2},
+	char '\t',
+	pprReg reg1,
+	comma,
+	pprReg reg2
+    ]
+
+pprInstr (FCMP size cond reg1 reg2 reg3)
+  = hcat [
+	ptext SLIT("\tcmp"),
+	pprSize size,
+	pprCond cond,
+	char '\t',
+	pprReg reg1,
+	comma,
+	pprReg reg2,
+	comma,
+	pprReg reg3
+    ]
+
+pprInstr (FMOV reg1 reg2)
+  = hcat [
+	ptext SLIT("\tfmov\t"),
+	pprReg reg1,
+	comma,
+	pprReg reg2
+    ]
+
+pprInstr (BI ALWAYS reg lab) = pprInstr (BR lab)
+
+pprInstr (BI NEVER reg lab) = empty
+
+pprInstr (BI cond reg lab)
+  = hcat [
+	ptext SLIT("\tb"),
+	pprCond cond,
+	char '\t',
+	pprReg reg,
+	comma,
+	pprImm lab
+    ]
+
+pprInstr (BF cond reg lab)
+  = hcat [
+	ptext SLIT("\tfb"),
+	pprCond cond,
+	char '\t',
+	pprReg reg,
+	comma,
+	pprImm lab
+    ]
+
+pprInstr (BR lab)
+  = (<>) (ptext SLIT("\tbr\t")) (pprImm lab)
+
+pprInstr (JMP reg addr hint)
+  = hcat [
+	ptext SLIT("\tjmp\t"),
+	pprReg reg,
+	comma,
+	pprAddr addr,
+	comma,
+	int hint
+    ]
+
+pprInstr (BSR imm n)
+  = (<>) (ptext SLIT("\tbsr\t")) (pprImm imm)
+
+pprInstr (JSR reg addr n)
+  = hcat [
+	ptext SLIT("\tjsr\t"),
+	pprReg reg,
+	comma,
+	pprAddr addr
+    ]
+
+pprInstr (FUNBEGIN clab)
+  = hcat [
+	if (externallyVisibleCLabel clab) then
+	    hcat [ptext SLIT("\t.globl\t"), pp_lab, char '\n']
+	else
+	    empty,
+	ptext SLIT("\t.ent "),
+	pp_lab,
+	char '\n',
+	pp_lab,
+	pp_ldgp,
+	pp_lab,
+	pp_frame
+    ]
+    where
+	pp_lab = pprCLabel_asm clab
+
+        -- NEVER use commas within those string literals, cpp will ruin your day
+	pp_ldgp  = hcat [ ptext SLIT(":\n\tldgp $29"), char ',', ptext SLIT("0($27)\n") ]
+	pp_frame = hcat [ ptext SLIT("..ng:\n\t.frame $30"), char ',',
+                          ptext SLIT("4240"), char ',',
+                          ptext SLIT("$26"), char ',',
+                          ptext SLIT("0\n\t.prologue 1") ]
+
+pprInstr (FUNEND clab)
+  = (<>) (ptext SLIT("\t.align 4\n\t.end ")) (pprCLabel_asm clab)
+\end{code}
+
+Continue with Alpha-only printing bits and bobs:
+\begin{code}
+pprRI :: RI -> Doc
+
+pprRI (RIReg r) = pprReg r
+pprRI (RIImm r) = pprImm r
+
+pprRegRIReg :: LitString -> Reg -> RI -> Reg -> Doc
+pprRegRIReg name reg1 ri reg2
+  = hcat [
+ 	char '\t',
+	ptext name,
+	char '\t',
+	pprReg reg1,
+	comma,
+	pprRI ri,
+	comma,
+	pprReg reg2
+    ]
+
+pprSizeRegRegReg :: LitString -> Size -> Reg -> Reg -> Reg -> Doc
+pprSizeRegRegReg name size reg1 reg2 reg3
+  = hcat [
+	char '\t',
+	ptext name,
+	pprSize size,
+	char '\t',
+	pprReg reg1,
+	comma,
+	pprReg reg2,
+	comma,
+	pprReg reg3
+    ]
+
+#endif /* alpha_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- pprInstr for an x86
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+pprInstr v@(MOV size s@(OpReg src) d@(OpReg dst)) -- hack
+  | src == dst
+  =
+#if 0 /* #ifdef DEBUG */
+    (<>) (ptext SLIT("# warning: ")) (pprSizeOpOp SLIT("mov") size s d)
+#else
+    empty
+#endif
+
+pprInstr (MOV size src dst)
+  = pprSizeOpOp SLIT("mov") size src dst
+
+pprInstr (MOVZxL I32 src dst) = pprSizeOpOp SLIT("mov") I32 src dst
+	-- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
+	-- movl.  But we represent it as a MOVZxL instruction, because
+	-- the reg alloc would tend to throw away a plain reg-to-reg
+	-- move, and we still want it to do that.
+
+pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce SLIT("movz") sizes I32 src dst
+	-- zero-extension only needs to extend to 32 bits: on x86_64, 
+	-- the remaining zero-extension to 64 bits is automatic, and the 32-bit
+	-- instruction is shorter.
+
+pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce SLIT("movs") sizes wordRep src dst
+
+-- here we do some patching, since the physical registers are only set late
+-- in the code generation.
+pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
+  | reg1 == reg3
+  = pprSizeOpOp SLIT("add") size (OpReg reg2) dst
+pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
+  | reg2 == reg3
+  = pprSizeOpOp SLIT("add") size (OpReg reg1) dst
+pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
+  | reg1 == reg3
+  = pprInstr (ADD size (OpImm displ) dst)
+pprInstr (LEA size src dst) = pprSizeOpOp SLIT("lea") size src dst
+
+pprInstr (ADD size (OpImm (ImmInt (-1))) dst)
+  = pprSizeOp SLIT("dec") size dst
+pprInstr (ADD size (OpImm (ImmInt 1)) dst)
+  = pprSizeOp SLIT("inc") size dst
+pprInstr (ADD size src dst)
+  = pprSizeOpOp SLIT("add") size src dst
+pprInstr (ADC size src dst)
+  = pprSizeOpOp SLIT("adc") size src dst
+pprInstr (SUB size src dst) = pprSizeOpOp SLIT("sub") size src dst
+pprInstr (IMUL size op1 op2) = pprSizeOpOp SLIT("imul") size op1 op2
+
+{- A hack.  The Intel documentation says that "The two and three
+   operand forms [of IMUL] may also be used with unsigned operands
+   because the lower half of the product is the same regardless if
+   (sic) the operands are signed or unsigned.  The CF and OF flags,
+   however, cannot be used to determine if the upper half of the
+   result is non-zero."  So there.  
+-} 
+pprInstr (AND size src dst) = pprSizeOpOp SLIT("and") size src dst
+pprInstr (OR  size src dst) = pprSizeOpOp SLIT("or")  size src dst
+
+pprInstr (XOR F32 src dst)  = pprOpOp SLIT("xorps") F32 src dst
+pprInstr (XOR F64 src dst)  = pprOpOp SLIT("xorpd") F64 src dst
+pprInstr (XOR size src dst) = pprSizeOpOp SLIT("xor")  size src dst
+
+pprInstr (NOT size op) = pprSizeOp SLIT("not") size op
+pprInstr (NEGI size op) = pprSizeOp SLIT("neg") size op
+
+pprInstr (SHL size src dst) = pprShift SLIT("shl") size src dst
+pprInstr (SAR size src dst) = pprShift SLIT("sar") size src dst
+pprInstr (SHR size src dst) = pprShift SLIT("shr") size src dst
+
+pprInstr (BT  size imm src) = pprSizeImmOp SLIT("bt") size imm src
+
+pprInstr (CMP size src dst) 
+  | isFloatingRep size =  pprSizeOpOp SLIT("ucomi")  size src dst -- SSE2
+  | otherwise          =  pprSizeOpOp SLIT("cmp")  size src dst
+
+pprInstr (TEST size src dst) = pprSizeOpOp SLIT("test")  size src dst
+pprInstr (PUSH size op) = pprSizeOp SLIT("push") size op
+pprInstr (POP size op) = pprSizeOp SLIT("pop") size op
+
+-- both unused (SDM):
+-- pprInstr PUSHA = ptext SLIT("\tpushal")
+-- pprInstr POPA = ptext SLIT("\tpopal")
+
+pprInstr NOP = ptext SLIT("\tnop")
+pprInstr (CLTD I32) = ptext SLIT("\tcltd")
+pprInstr (CLTD I64) = ptext SLIT("\tcqto")
+
+pprInstr (SETCC cond op) = pprCondInstr SLIT("set") cond (pprOperand I8 op)
+
+pprInstr (JXX cond (BlockId id)) 
+  = pprCondInstr SLIT("j") cond (pprCLabel_asm lab)
+  where lab = mkAsmTempLabel id
+
+pprInstr (JMP (OpImm imm)) = (<>) (ptext SLIT("\tjmp ")) (pprImm imm)
+pprInstr (JMP op)          = (<>) (ptext SLIT("\tjmp *")) (pprOperand wordRep op)
+pprInstr (JMP_TBL op ids)  = pprInstr (JMP op)
+pprInstr (CALL (Left imm) _)    = (<>) (ptext SLIT("\tcall ")) (pprImm imm)
+pprInstr (CALL (Right reg) _)   = (<>) (ptext SLIT("\tcall *")) (pprReg wordRep reg)
+
+pprInstr (IDIV sz op)	= pprSizeOp SLIT("idiv") sz op
+pprInstr (DIV sz op)    = pprSizeOp SLIT("div")  sz op
+pprInstr (IMUL2 sz op)  = pprSizeOp SLIT("imul") sz op
+
+#if x86_64_TARGET_ARCH
+pprInstr (MUL size op1 op2) = pprSizeOpOp SLIT("mul") size op1 op2
+
+pprInstr (FDIV size op1 op2) = pprSizeOpOp SLIT("div") size op1 op2
+
+pprInstr (CVTSS2SD from to) = pprRegReg SLIT("cvtss2sd") from to
+pprInstr (CVTSD2SS from to) = pprRegReg SLIT("cvtsd2ss") from to
+pprInstr (CVTSS2SI from to) = pprOpReg  SLIT("cvtss2si") from to
+pprInstr (CVTSD2SI from to) = pprOpReg  SLIT("cvtsd2si") from to
+pprInstr (CVTSI2SS from to) = pprOpReg  SLIT("cvtsi2ss") from to
+pprInstr (CVTSI2SD from to) = pprOpReg  SLIT("cvtsi2sd") from to
+#endif
+
+    -- FETCHGOT for PIC on ELF platforms
+pprInstr (FETCHGOT reg)
+   = vcat [ ptext SLIT("\tcall 1f"),
+            hcat [ ptext SLIT("1:\tpopl\t"), pprReg I32 reg ],
+            hcat [ ptext SLIT("\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
+                   pprReg I32 reg ]
+          ]
+
+    -- FETCHPC for PIC on Darwin/x86
+    -- get the instruction pointer into a register
+    -- (Terminology note: the IP is called Program Counter on PPC,
+    --  and it's a good thing to use the same name on both platforms)
+pprInstr (FETCHPC reg)
+   = vcat [ ptext SLIT("\tcall 1f"),
+            hcat [ ptext SLIT("1:\tpopl\t"), pprReg I32 reg ]
+          ]
+
+
+
+#endif
+
+-- -----------------------------------------------------------------------------
+-- i386 floating-point
+
+#if i386_TARGET_ARCH
+-- Simulating a flat register set on the x86 FP stack is tricky.
+-- you have to free %st(7) before pushing anything on the FP reg stack
+-- so as to preclude the possibility of a FP stack overflow exception.
+pprInstr g@(GMOV src dst)
+   | src == dst
+   = empty
+   | otherwise 
+   = pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
+
+-- GLD sz addr dst ==> FFREE %st(7) ; FLDsz addr ; FSTP (dst+1)
+pprInstr g@(GLD sz addr dst)
+ = pprG g (hcat [gtab, text "ffree %st(7) ; fld", pprSize sz, gsp, 
+                 pprAddr addr, gsemi, gpop dst 1])
+
+-- GST sz src addr ==> FFREE %st(7) ; FLD dst ; FSTPsz addr
+pprInstr g@(GST sz src addr)
+ = pprG g (hcat [gtab, gpush src 0, gsemi, 
+                 text "fstp", pprSize sz, gsp, pprAddr addr])
+
+pprInstr g@(GLDZ dst)
+ = pprG g (hcat [gtab, text "ffree %st(7) ; fldz ; ", gpop dst 1])
+pprInstr g@(GLD1 dst)
+ = pprG g (hcat [gtab, text "ffree %st(7) ; fld1 ; ", gpop dst 1])
+
+pprInstr g@(GFTOI src dst) 
+   = pprInstr (GDTOI src dst)
+pprInstr g@(GDTOI src dst) 
+   = pprG g (hcat [gtab, text "subl $4, %esp ; ", 
+                   gpush src 0, gsemi, text "fistpl 0(%esp) ; popl ", 
+                   pprReg I32 dst])
+
+pprInstr g@(GITOF src dst) 
+   = pprInstr (GITOD src dst)
+pprInstr g@(GITOD src dst) 
+   = pprG g (hcat [gtab, text "pushl ", pprReg I32 src, 
+                   text " ; ffree %st(7); fildl (%esp) ; ",
+                   gpop dst 1, text " ; addl $4,%esp"])
+
+{- Gruesome swamp follows.  If you're unfortunate enough to have ventured
+   this far into the jungle AND you give a Rat's Ass (tm) what's going
+   on, here's the deal.  Generate code to do a floating point comparison
+   of src1 and src2, of kind cond, and set the Zero flag if true.
+
+   The complications are to do with handling NaNs correctly.  We want the
+   property that if either argument is NaN, then the result of the
+   comparison is False ... except if we're comparing for inequality,
+   in which case the answer is True.
+
+   Here's how the general (non-inequality) case works.  As an
+   example, consider generating the an equality test:
+
+     pushl %eax		-- we need to mess with this
+     <get src1 to top of FPU stack>
+     fcomp <src2 location in FPU stack> and pop pushed src1
+		-- Result of comparison is in FPU Status Register bits
+		-- C3 C2 and C0
+     fstsw %ax	-- Move FPU Status Reg to %ax
+     sahf	-- move C3 C2 C0 from %ax to integer flag reg
+     -- now the serious magic begins
+     setpo %ah	   -- %ah = if comparable(neither arg was NaN) then 1 else 0
+     sete  %al     -- %al = if arg1 == arg2 then 1 else 0
+     andb %ah,%al  -- %al &= %ah
+                   -- so %al == 1 iff (comparable && same); else it holds 0
+     decb %al	   -- %al == 0, ZeroFlag=1  iff (comparable && same); 
+                      else %al == 0xFF, ZeroFlag=0
+     -- the zero flag is now set as we desire.
+     popl %eax
+
+   The special case of inequality differs thusly:
+
+     setpe %ah     -- %ah = if incomparable(either arg was NaN) then 1 else 0
+     setne %al     -- %al = if arg1 /= arg2 then 1 else 0
+     orb %ah,%al   -- %al = if (incomparable || different) then 1 else 0
+     decb %al      -- if (incomparable || different) then (%al == 0, ZF=1)
+                                                     else (%al == 0xFF, ZF=0)
+-}
+pprInstr g@(GCMP cond src1 src2) 
+   | case cond of { NE -> True; other -> False }
+   = pprG g (vcat [
+        hcat [gtab, text "pushl %eax ; ",gpush src1 0],
+        hcat [gtab, text "fcomp ", greg src2 1, 
+                    text "; fstsw %ax ; sahf ;  setpe %ah"],
+        hcat [gtab, text "setne %al ;  ",
+              text "orb %ah,%al ;  decb %al ;  popl %eax"]
+    ])
+   | otherwise
+   = pprG g (vcat [
+        hcat [gtab, text "pushl %eax ; ",gpush src1 0],
+        hcat [gtab, text "fcomp ", greg src2 1, 
+                    text "; fstsw %ax ; sahf ;  setpo %ah"],
+        hcat [gtab, text "set", pprCond (fix_FP_cond cond), text " %al ;  ",
+              text "andb %ah,%al ;  decb %al ;  popl %eax"]
+    ])
+    where
+        {- On the 486, the flags set by FP compare are the unsigned ones!
+           (This looks like a HACK to me.  WDP 96/03)
+        -}
+        fix_FP_cond :: Cond -> Cond
+        fix_FP_cond GE   = GEU
+        fix_FP_cond GTT  = GU
+        fix_FP_cond LTT  = LU
+        fix_FP_cond LE   = LEU
+        fix_FP_cond EQQ  = EQQ
+        fix_FP_cond NE   = NE
+        -- there should be no others
+
+
+pprInstr g@(GABS sz src dst)
+   = pprG g (hcat [gtab, gpush src 0, text " ; fabs ; ", gpop dst 1])
+pprInstr g@(GNEG sz src dst)
+   = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])
+
+pprInstr g@(GSQRT sz src dst)
+   = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$ 
+             hcat [gtab, gcoerceto sz, gpop dst 1])
+pprInstr g@(GSIN sz src dst)
+   = pprG g (hcat [gtab, gpush src 0, text " ; fsin"] $$ 
+             hcat [gtab, gcoerceto sz, gpop dst 1])
+pprInstr g@(GCOS sz src dst)
+   = pprG g (hcat [gtab, gpush src 0, text " ; fcos"] $$ 
+             hcat [gtab, gcoerceto sz, gpop dst 1])
+pprInstr g@(GTAN sz src dst)
+   = pprG g (hcat [gtab, text "ffree %st(6) ; ",
+                   gpush src 0, text " ; fptan ; ", 
+                   text " fstp %st(0)"] $$
+             hcat [gtab, gcoerceto sz, gpop dst 1])
+
+-- In the translations for GADD, GMUL, GSUB and GDIV,
+-- the first two cases are mere optimisations.  The otherwise clause
+-- generates correct code under all circumstances.
+
+pprInstr g@(GADD sz src1 src2 dst)
+   | src1 == dst
+   = pprG g (text "\t#GADD-xxxcase1" $$ 
+             hcat [gtab, gpush src2 0,
+                   text " ; faddp %st(0),", greg src1 1])
+   | src2 == dst
+   = pprG g (text "\t#GADD-xxxcase2" $$ 
+             hcat [gtab, gpush src1 0,
+                   text " ; faddp %st(0),", greg src2 1])
+   | otherwise
+   = pprG g (hcat [gtab, gpush src1 0, 
+                   text " ; fadd ", greg src2 1, text ",%st(0)",
+                   gsemi, gpop dst 1])
+
+
+pprInstr g@(GMUL sz src1 src2 dst)
+   | src1 == dst
+   = pprG g (text "\t#GMUL-xxxcase1" $$ 
+             hcat [gtab, gpush src2 0,
+                   text " ; fmulp %st(0),", greg src1 1])
+   | src2 == dst
+   = pprG g (text "\t#GMUL-xxxcase2" $$ 
+             hcat [gtab, gpush src1 0,
+                   text " ; fmulp %st(0),", greg src2 1])
+   | otherwise
+   = pprG g (hcat [gtab, gpush src1 0, 
+                   text " ; fmul ", greg src2 1, text ",%st(0)",
+                   gsemi, gpop dst 1])
+
+
+pprInstr g@(GSUB sz src1 src2 dst)
+   | src1 == dst
+   = pprG g (text "\t#GSUB-xxxcase1" $$ 
+             hcat [gtab, gpush src2 0,
+                   text " ; fsubrp %st(0),", greg src1 1])
+   | src2 == dst
+   = pprG g (text "\t#GSUB-xxxcase2" $$ 
+             hcat [gtab, gpush src1 0,
+                   text " ; fsubp %st(0),", greg src2 1])
+   | otherwise
+   = pprG g (hcat [gtab, gpush src1 0, 
+                   text " ; fsub ", greg src2 1, text ",%st(0)",
+                   gsemi, gpop dst 1])
+
+
+pprInstr g@(GDIV sz src1 src2 dst)
+   | src1 == dst
+   = pprG g (text "\t#GDIV-xxxcase1" $$ 
+             hcat [gtab, gpush src2 0,
+                   text " ; fdivrp %st(0),", greg src1 1])
+   | src2 == dst
+   = pprG g (text "\t#GDIV-xxxcase2" $$ 
+             hcat [gtab, gpush src1 0,
+                   text " ; fdivp %st(0),", greg src2 1])
+   | otherwise
+   = pprG g (hcat [gtab, gpush src1 0, 
+                   text " ; fdiv ", greg src2 1, text ",%st(0)",
+                   gsemi, gpop dst 1])
+
+
+pprInstr GFREE 
+   = vcat [ ptext SLIT("\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)"),
+            ptext SLIT("\tffree %st(4) ;ffree %st(5) ;ffree %st(6) ;ffree %st(7)") 
+          ]
+
+--------------------------
+
+-- coerce %st(0) to the specified size
+gcoerceto F64 = empty
+gcoerceto F32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; "
+
+gpush reg offset
+   = hcat [text "ffree %st(7) ; fld ", greg reg offset]
+gpop reg offset
+   = hcat [text "fstp ", greg reg offset]
+
+greg reg offset = text "%st(" <> int (gregno reg - 8+offset) <> char ')'
+gsemi = text " ; "
+gtab  = char '\t'
+gsp   = char ' '
+
+gregno (RealReg i) = i
+gregno other       = --pprPanic "gregno" (ppr other)
+                     999   -- bogus; only needed for debug printing
+
+pprG :: Instr -> Doc -> Doc
+pprG fake actual
+   = (char '#' <> pprGInstr fake) $$ actual
+
+pprGInstr (GMOV src dst)   = pprSizeRegReg SLIT("gmov") F64 src dst
+pprGInstr (GLD sz src dst) = pprSizeAddrReg SLIT("gld") sz src dst
+pprGInstr (GST sz src dst) = pprSizeRegAddr SLIT("gst") sz src dst
+
+pprGInstr (GLDZ dst) = pprSizeReg SLIT("gldz") F64 dst
+pprGInstr (GLD1 dst) = pprSizeReg SLIT("gld1") F64 dst
+
+pprGInstr (GFTOI src dst) = pprSizeSizeRegReg SLIT("gftoi") F32 I32  src dst
+pprGInstr (GDTOI src dst) = pprSizeSizeRegReg SLIT("gdtoi") F64 I32 src dst
+
+pprGInstr (GITOF src dst) = pprSizeSizeRegReg SLIT("gitof") I32 F32  src dst
+pprGInstr (GITOD src dst) = pprSizeSizeRegReg SLIT("gitod") I32 F64 src dst
+
+pprGInstr (GCMP co src dst) = pprCondRegReg SLIT("gcmp_") F64 co src dst
+pprGInstr (GABS sz src dst) = pprSizeRegReg SLIT("gabs") sz src dst
+pprGInstr (GNEG sz src dst) = pprSizeRegReg SLIT("gneg") sz src dst
+pprGInstr (GSQRT sz src dst) = pprSizeRegReg SLIT("gsqrt") sz src dst
+pprGInstr (GSIN sz src dst) = pprSizeRegReg SLIT("gsin") sz src dst
+pprGInstr (GCOS sz src dst) = pprSizeRegReg SLIT("gcos") sz src dst
+pprGInstr (GTAN sz src dst) = pprSizeRegReg SLIT("gtan") sz src dst
+
+pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg SLIT("gadd") sz src1 src2 dst
+pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg SLIT("gsub") sz src1 src2 dst
+pprGInstr (GMUL sz src1 src2 dst) = pprSizeRegRegReg SLIT("gmul") sz src1 src2 dst
+pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg SLIT("gdiv") sz src1 src2 dst
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- Continue with I386-only printing bits and bobs:
+
+pprDollImm :: Imm -> Doc
+
+pprDollImm i =  ptext SLIT("$") <> pprImm i
+
+pprOperand :: MachRep -> Operand -> Doc
+pprOperand s (OpReg r)   = pprReg s r
+pprOperand s (OpImm i)   = pprDollImm i
+pprOperand s (OpAddr ea) = pprAddr ea
+
+pprMnemonic_  :: LitString -> Doc
+pprMnemonic_ name = 
+   char '\t' <> ptext name <> space
+
+pprMnemonic  :: LitString -> MachRep -> Doc
+pprMnemonic name size = 
+   char '\t' <> ptext name <> pprSize size <> space
+
+pprSizeImmOp :: LitString -> MachRep -> Imm -> Operand -> Doc
+pprSizeImmOp name size imm op1
+  = hcat [
+	pprMnemonic name size,
+	char '$',
+	pprImm imm,
+	comma,
+	pprOperand size op1
+    ]
+	
+pprSizeOp :: LitString -> MachRep -> Operand -> Doc
+pprSizeOp name size op1
+  = hcat [
+	pprMnemonic name size,
+	pprOperand size op1
+    ]
+
+pprSizeOpOp :: LitString -> MachRep -> Operand -> Operand -> Doc
+pprSizeOpOp name size op1 op2
+  = hcat [
+	pprMnemonic name size,
+	pprOperand size op1,
+	comma,
+	pprOperand size op2
+    ]
+
+pprOpOp :: LitString -> MachRep -> Operand -> Operand -> Doc
+pprOpOp name size op1 op2
+  = hcat [
+	pprMnemonic_ name,
+	pprOperand size op1,
+	comma,
+	pprOperand size op2
+    ]
+
+pprSizeReg :: LitString -> MachRep -> Reg -> Doc
+pprSizeReg name size reg1
+  = hcat [
+	pprMnemonic name size,
+	pprReg size reg1
+    ]
+
+pprSizeRegReg :: LitString -> MachRep -> Reg -> Reg -> Doc
+pprSizeRegReg name size reg1 reg2
+  = hcat [
+	pprMnemonic name size,
+	pprReg size reg1,
+        comma,
+        pprReg size reg2
+    ]
+
+pprRegReg :: LitString -> Reg -> Reg -> Doc
+pprRegReg name reg1 reg2
+  = hcat [
+	pprMnemonic_ name,
+	pprReg wordRep reg1,
+        comma,
+        pprReg wordRep reg2
+    ]
+
+pprOpReg :: LitString -> Operand -> Reg -> Doc
+pprOpReg name op1 reg2
+  = hcat [
+	pprMnemonic_ name,
+	pprOperand wordRep op1,
+        comma,
+        pprReg wordRep reg2
+    ]
+
+pprCondRegReg :: LitString -> MachRep -> Cond -> Reg -> Reg -> Doc
+pprCondRegReg name size cond reg1 reg2
+  = hcat [
+    	char '\t',
+	ptext name,
+    	pprCond cond,
+	space,
+	pprReg size reg1,
+        comma,
+        pprReg size reg2
+    ]
+
+pprSizeSizeRegReg :: LitString -> MachRep -> MachRep -> Reg -> Reg -> Doc
+pprSizeSizeRegReg name size1 size2 reg1 reg2
+  = hcat [
+    	char '\t',
+	ptext name,
+    	pprSize size1,
+        pprSize size2,
+	space,
+	pprReg size1 reg1,
+
+        comma,
+        pprReg size2 reg2
+    ]
+
+pprSizeRegRegReg :: LitString -> MachRep -> Reg -> Reg -> Reg -> Doc
+pprSizeRegRegReg name size reg1 reg2 reg3
+  = hcat [
+	pprMnemonic name size,
+	pprReg size reg1,
+        comma,
+        pprReg size reg2,
+        comma,
+        pprReg size reg3
+    ]
+
+pprSizeAddrReg :: LitString -> MachRep -> AddrMode -> Reg -> Doc
+pprSizeAddrReg name size op dst
+  = hcat [
+	pprMnemonic name size,
+	pprAddr op,
+	comma,
+	pprReg size dst
+    ]
+
+pprSizeRegAddr :: LitString -> MachRep -> Reg -> AddrMode -> Doc
+pprSizeRegAddr name size src op
+  = hcat [
+	pprMnemonic name size,
+	pprReg size src,
+	comma,
+	pprAddr op
+    ]
+
+pprShift :: LitString -> MachRep -> Operand -> Operand -> Doc
+pprShift name size src dest
+  = hcat [
+	pprMnemonic name size,
+	pprOperand I8 src,  -- src is 8-bit sized
+	comma,
+	pprOperand size dest
+    ]
+
+pprSizeOpOpCoerce :: LitString -> MachRep -> MachRep -> Operand -> Operand -> Doc
+pprSizeOpOpCoerce name size1 size2 op1 op2
+  = hcat [ char '\t', ptext name, pprSize size1, pprSize size2, space,
+	pprOperand size1 op1,
+	comma,
+	pprOperand size2 op2
+    ]
+
+pprCondInstr :: LitString -> Cond -> Doc -> Doc
+pprCondInstr name cond arg
+  = hcat [ char '\t', ptext name, pprCond cond, space, arg]
+
+#endif /* i386_TARGET_ARCH */
+
+
+-- ------------------------------------------------------------------------------- pprInstr for a SPARC
+
+#if sparc_TARGET_ARCH
+
+-- a clumsy hack for now, to handle possible double alignment problems
+
+-- even clumsier, to allow for RegReg regs that show when doing indexed
+-- reads (bytearrays).
+--
+
+-- Translate to the following:
+--    add g1,g2,g1
+--    ld  [g1],%fn
+--    ld  [g1+4],%f(n+1)
+--    sub g1,g2,g1           -- to restore g1
+
+pprInstr (LD F64 (AddrRegReg g1 g2) reg)
+  = vcat [
+       hcat [ptext SLIT("\tadd\t"), pprReg g1,comma,pprReg g2,comma,pprReg g1],
+       hcat [pp_ld_lbracket, pprReg g1, pp_rbracket_comma, pprReg reg],
+       hcat [pp_ld_lbracket, pprReg g1, ptext SLIT("+4]"), comma, pprReg (fPair reg)],
+       hcat [ptext SLIT("\tsub\t"), pprReg g1,comma,pprReg g2,comma,pprReg g1]
+    ]
+
+-- Translate to
+--    ld  [addr],%fn
+--    ld  [addr+4],%f(n+1)
+pprInstr (LD F64 addr reg) | isJust off_addr
+  = vcat [
+       hcat [pp_ld_lbracket, pprAddr addr, pp_rbracket_comma, pprReg reg],
+       hcat [pp_ld_lbracket, pprAddr addr2, pp_rbracket_comma,pprReg (fPair reg)]
+    ]
+  where
+    off_addr = addrOffset addr 4
+    addr2 = case off_addr of Just x -> x
+
+
+pprInstr (LD size addr reg)
+  = hcat [
+       ptext SLIT("\tld"),
+       pprSize size,
+       char '\t',
+       lbrack,
+       pprAddr addr,
+       pp_rbracket_comma,
+       pprReg reg
+    ]
+
+-- The same clumsy hack as above
+
+-- Translate to the following:
+--    add g1,g2,g1
+--    st  %fn,[g1]
+--    st  %f(n+1),[g1+4]
+--    sub g1,g2,g1           -- to restore g1
+pprInstr (ST F64 reg (AddrRegReg g1 g2))
+ = vcat [
+       hcat [ptext SLIT("\tadd\t"), pprReg g1,comma,pprReg g2,comma,pprReg g1],
+       hcat [ptext SLIT("\tst\t"), pprReg reg, pp_comma_lbracket, 
+             pprReg g1,	rbrack],
+       hcat [ptext SLIT("\tst\t"), pprReg (fPair reg), pp_comma_lbracket,
+             pprReg g1, ptext SLIT("+4]")],
+       hcat [ptext SLIT("\tsub\t"), pprReg g1,comma,pprReg g2,comma,pprReg g1]
+    ]
+
+-- Translate to
+--    st  %fn,[addr]
+--    st  %f(n+1),[addr+4]
+pprInstr (ST F64 reg addr) | isJust off_addr 
+ = vcat [
+      hcat [ptext SLIT("\tst\t"), pprReg reg, pp_comma_lbracket, 
+            pprAddr addr, rbrack],
+      hcat [ptext SLIT("\tst\t"), pprReg (fPair reg), pp_comma_lbracket,
+            pprAddr addr2, rbrack]
+    ]
+  where
+    off_addr = addrOffset addr 4
+    addr2 = case off_addr of Just x -> x
+
+-- no distinction is made between signed and unsigned bytes on stores for the
+-- Sparc opcodes (at least I cannot see any, and gas is nagging me --SOF),
+-- so we call a special-purpose pprSize for ST..
+
+pprInstr (ST size reg addr)
+  = hcat [
+       ptext SLIT("\tst"),
+       pprStSize size,
+       char '\t',
+       pprReg reg,
+       pp_comma_lbracket,
+       pprAddr addr,
+       rbrack
+    ]
+
+pprInstr (ADD x cc reg1 ri reg2)
+  | not x && not cc && riZero ri
+  = hcat [ ptext SLIT("\tmov\t"), pprReg reg1, comma, pprReg reg2 ]
+  | otherwise
+  = pprRegRIReg (if x then SLIT("addx") else SLIT("add")) cc reg1 ri reg2
+
+pprInstr (SUB x cc reg1 ri reg2)
+  | not x && cc && reg2 == g0
+  = hcat [ ptext SLIT("\tcmp\t"), pprReg reg1, comma, pprRI ri ]
+  | not x && not cc && riZero ri
+  = hcat [ ptext SLIT("\tmov\t"), pprReg reg1, comma, pprReg reg2 ]
+  | otherwise
+  = pprRegRIReg (if x then SLIT("subx") else SLIT("sub")) cc reg1 ri reg2
+
+pprInstr (AND  b reg1 ri reg2) = pprRegRIReg SLIT("and")  b reg1 ri reg2
+pprInstr (ANDN b reg1 ri reg2) = pprRegRIReg SLIT("andn") b reg1 ri reg2
+
+pprInstr (OR b reg1 ri reg2)
+  | not b && reg1 == g0
+  = let doit = hcat [ ptext SLIT("\tmov\t"), pprRI ri, comma, pprReg reg2 ]
+    in  case ri of
+           RIReg rrr | rrr == reg2 -> empty
+           other                   -> doit
+  | otherwise
+  = pprRegRIReg SLIT("or") b reg1 ri reg2
+
+pprInstr (ORN b reg1 ri reg2) = pprRegRIReg SLIT("orn") b reg1 ri reg2
+
+pprInstr (XOR  b reg1 ri reg2) = pprRegRIReg SLIT("xor")  b reg1 ri reg2
+pprInstr (XNOR b reg1 ri reg2) = pprRegRIReg SLIT("xnor") b reg1 ri reg2
+
+pprInstr (SLL reg1 ri reg2) = pprRegRIReg SLIT("sll") False reg1 ri reg2
+pprInstr (SRL reg1 ri reg2) = pprRegRIReg SLIT("srl") False reg1 ri reg2
+pprInstr (SRA reg1 ri reg2) = pprRegRIReg SLIT("sra") False reg1 ri reg2
+
+pprInstr (RDY rd) = ptext SLIT("\trd\t%y,") <> pprReg rd
+pprInstr (SMUL b reg1 ri reg2) = pprRegRIReg SLIT("smul")  b reg1 ri reg2
+pprInstr (UMUL b reg1 ri reg2) = pprRegRIReg SLIT("umul")  b reg1 ri reg2
+
+pprInstr (SETHI imm reg)
+  = hcat [
+	ptext SLIT("\tsethi\t"),
+	pprImm imm,
+	comma,
+	pprReg reg
+    ]
+
+pprInstr NOP = ptext SLIT("\tnop")
+
+pprInstr (FABS F32 reg1 reg2) = pprSizeRegReg SLIT("fabs") F32 reg1 reg2
+pprInstr (FABS F64 reg1 reg2)
+  = (<>) (pprSizeRegReg SLIT("fabs") F32 reg1 reg2)
+    (if (reg1 == reg2) then empty
+     else (<>) (char '\n')
+    	  (pprSizeRegReg SLIT("fmov") F32 (fPair reg1) (fPair reg2)))
+
+pprInstr (FADD size reg1 reg2 reg3)
+  = pprSizeRegRegReg SLIT("fadd") size reg1 reg2 reg3
+pprInstr (FCMP e size reg1 reg2)
+  = pprSizeRegReg (if e then SLIT("fcmpe") else SLIT("fcmp")) size reg1 reg2
+pprInstr (FDIV size reg1 reg2 reg3)
+  = pprSizeRegRegReg SLIT("fdiv") size reg1 reg2 reg3
+
+pprInstr (FMOV F32 reg1 reg2) = pprSizeRegReg SLIT("fmov") F32 reg1 reg2
+pprInstr (FMOV F64 reg1 reg2)
+  = (<>) (pprSizeRegReg SLIT("fmov") F32 reg1 reg2)
+    (if (reg1 == reg2) then empty
+     else (<>) (char '\n')
+    	  (pprSizeRegReg SLIT("fmov") F32 (fPair reg1) (fPair reg2)))
+
+pprInstr (FMUL size reg1 reg2 reg3)
+  = pprSizeRegRegReg SLIT("fmul") size reg1 reg2 reg3
+
+pprInstr (FNEG F32 reg1 reg2) = pprSizeRegReg SLIT("fneg") F32 reg1 reg2
+pprInstr (FNEG F64 reg1 reg2)
+  = (<>) (pprSizeRegReg SLIT("fneg") F32 reg1 reg2)
+    (if (reg1 == reg2) then empty
+     else (<>) (char '\n')
+    	  (pprSizeRegReg SLIT("fmov") F32 (fPair reg1) (fPair reg2)))
+
+pprInstr (FSQRT size reg1 reg2)     = pprSizeRegReg SLIT("fsqrt") size reg1 reg2
+pprInstr (FSUB size reg1 reg2 reg3) = pprSizeRegRegReg SLIT("fsub") size reg1 reg2 reg3
+pprInstr (FxTOy size1 size2 reg1 reg2)
+  = hcat [
+    	ptext SLIT("\tf"),
+	ptext
+    	(case size1 of
+    	    I32  -> SLIT("ito")
+    	    F32  -> SLIT("sto")
+    	    F64  -> SLIT("dto")),
+	ptext
+    	(case size2 of
+    	    I32  -> SLIT("i\t")
+    	    F32  -> SLIT("s\t")
+    	    F64  -> SLIT("d\t")),
+	pprReg reg1, comma, pprReg reg2
+    ]
+
+
+pprInstr (BI cond b lab)
+  = hcat [
+	ptext SLIT("\tb"), pprCond cond,
+	if b then pp_comma_a else empty,
+	char '\t',
+	pprImm lab
+    ]
+
+pprInstr (BF cond b lab)
+  = hcat [
+	ptext SLIT("\tfb"), pprCond cond,
+	if b then pp_comma_a else empty,
+	char '\t',
+	pprImm lab
+    ]
+
+pprInstr (JMP addr) = (<>) (ptext SLIT("\tjmp\t")) (pprAddr addr)
+
+pprInstr (CALL (Left imm) n _)
+  = hcat [ ptext SLIT("\tcall\t"), pprImm imm, comma, int n ]
+pprInstr (CALL (Right reg) n _)
+  = hcat [ ptext SLIT("\tcall\t"), pprReg reg, comma, int n ]
+
+pprRI :: RI -> Doc
+pprRI (RIReg r) = pprReg r
+pprRI (RIImm r) = pprImm r
+
+pprSizeRegReg :: LitString -> MachRep -> Reg -> Reg -> Doc
+pprSizeRegReg name size reg1 reg2
+  = hcat [
+    	char '\t',
+	ptext name,
+    	(case size of
+    	    F32  -> ptext SLIT("s\t")
+    	    F64 -> ptext SLIT("d\t")),
+	pprReg reg1,
+	comma,
+	pprReg reg2
+    ]
+
+pprSizeRegRegReg :: LitString -> MachRep -> Reg -> Reg -> Reg -> Doc
+pprSizeRegRegReg name size reg1 reg2 reg3
+  = hcat [
+    	char '\t',
+	ptext name,
+    	(case size of
+    	    F32  -> ptext SLIT("s\t")
+    	    F64  -> ptext SLIT("d\t")),
+	pprReg reg1,
+	comma,
+	pprReg reg2,
+	comma,
+	pprReg reg3
+    ]
+
+pprRegRIReg :: LitString -> Bool -> Reg -> RI -> Reg -> Doc
+pprRegRIReg name b reg1 ri reg2
+  = hcat [
+	char '\t',
+	ptext name,
+	if b then ptext SLIT("cc\t") else char '\t',
+	pprReg reg1,
+	comma,
+	pprRI ri,
+	comma,
+	pprReg reg2
+    ]
+
+pprRIReg :: LitString -> Bool -> RI -> Reg -> Doc
+pprRIReg name b ri reg1
+  = hcat [
+	char '\t',
+	ptext name,
+	if b then ptext SLIT("cc\t") else char '\t',
+	pprRI ri,
+	comma,
+	pprReg reg1
+    ]
+
+pp_ld_lbracket    = ptext SLIT("\tld\t[")
+pp_rbracket_comma = text "],"
+pp_comma_lbracket = text ",["
+pp_comma_a	  = text ",a"
+
+#endif /* sparc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- pprInstr for PowerPC
+
+#if powerpc_TARGET_ARCH
+pprInstr (LD sz reg addr) = hcat [
+	char '\t',
+	ptext SLIT("l"),
+	ptext (case sz of
+	    I8  -> SLIT("bz")
+	    I16 -> SLIT("hz")
+	    I32 -> SLIT("wz")
+	    F32 -> SLIT("fs")
+	    F64 -> SLIT("fd")),
+        case addr of AddrRegImm _ _ -> empty
+                     AddrRegReg _ _ -> char 'x',
+	char '\t',
+	pprReg reg,
+	ptext SLIT(", "),
+	pprAddr addr
+    ]
+pprInstr (LA sz reg addr) = hcat [
+	char '\t',
+	ptext SLIT("l"),
+	ptext (case sz of
+	    I8  -> SLIT("ba")
+	    I16 -> SLIT("ha")
+	    I32 -> SLIT("wa")
+	    F32 -> SLIT("fs")
+	    F64 -> SLIT("fd")),
+        case addr of AddrRegImm _ _ -> empty
+                     AddrRegReg _ _ -> char 'x',
+	char '\t',
+	pprReg reg,
+	ptext SLIT(", "),
+	pprAddr addr
+    ]
+pprInstr (ST sz reg addr) = hcat [
+	char '\t',
+	ptext SLIT("st"),
+	pprSize sz,
+        case addr of AddrRegImm _ _ -> empty
+                     AddrRegReg _ _ -> char 'x',
+	char '\t',
+	pprReg reg,
+	ptext SLIT(", "),
+	pprAddr addr
+    ]
+pprInstr (STU sz reg addr) = hcat [
+	char '\t',
+	ptext SLIT("st"),
+	pprSize sz,
+	ptext SLIT("u\t"),
+        case addr of AddrRegImm _ _ -> empty
+                     AddrRegReg _ _ -> char 'x',
+	pprReg reg,
+	ptext SLIT(", "),
+	pprAddr addr
+    ]
+pprInstr (LIS reg imm) = hcat [
+	char '\t',
+	ptext SLIT("lis"),
+	char '\t',
+	pprReg reg,
+	ptext SLIT(", "),
+	pprImm imm
+    ]
+pprInstr (LI reg imm) = hcat [
+	char '\t',
+	ptext SLIT("li"),
+	char '\t',
+	pprReg reg,
+	ptext SLIT(", "),
+	pprImm imm
+    ]
+pprInstr (MR reg1 reg2) 
+    | reg1 == reg2 = empty
+    | otherwise = hcat [
+	char '\t',
+	case regClass reg1 of
+	    RcInteger -> ptext SLIT("mr")
+	    _ -> ptext SLIT("fmr"),
+	char '\t',
+	pprReg reg1,
+	ptext SLIT(", "),
+	pprReg reg2
+    ]
+pprInstr (CMP sz reg ri) = hcat [
+	char '\t',
+	op,
+	char '\t',
+	pprReg reg,
+	ptext SLIT(", "),
+	pprRI ri
+    ]
+    where
+	op = hcat [
+		ptext SLIT("cmp"),
+		pprSize sz,
+		case ri of
+		    RIReg _ -> empty
+		    RIImm _ -> char 'i'
+	    ]
+pprInstr (CMPL sz reg ri) = hcat [
+	char '\t',
+	op,
+	char '\t',
+	pprReg reg,
+	ptext SLIT(", "),
+	pprRI ri
+    ]
+    where
+	op = hcat [
+		ptext SLIT("cmpl"),
+		pprSize sz,
+		case ri of
+		    RIReg _ -> empty
+		    RIImm _ -> char 'i'
+	    ]
+pprInstr (BCC cond (BlockId id)) = hcat [
+	char '\t',
+	ptext SLIT("b"),
+	pprCond cond,
+	char '\t',
+	pprCLabel_asm lbl
+    ]
+    where lbl = mkAsmTempLabel id
+
+pprInstr (JMP lbl) = hcat [ -- an alias for b that takes a CLabel
+	char '\t',
+	ptext SLIT("b"),
+	char '\t',
+	pprCLabel_asm lbl
+    ]
+
+pprInstr (MTCTR reg) = hcat [
+	char '\t',
+	ptext SLIT("mtctr"),
+	char '\t',
+	pprReg reg
+    ]
+pprInstr (BCTR _) = hcat [
+	char '\t',
+	ptext SLIT("bctr")
+    ]
+pprInstr (BL lbl _) = hcat [
+	ptext SLIT("\tbl\t"),
+        pprCLabel_asm lbl
+    ]
+pprInstr (BCTRL _) = hcat [
+	char '\t',
+	ptext SLIT("bctrl")
+    ]
+pprInstr (ADD reg1 reg2 ri) = pprLogic SLIT("add") reg1 reg2 ri
+pprInstr (ADDIS reg1 reg2 imm) = hcat [
+	char '\t',
+	ptext SLIT("addis"),
+	char '\t',
+	pprReg reg1,
+	ptext SLIT(", "),
+	pprReg reg2,
+	ptext SLIT(", "),
+	pprImm imm
+    ]
+
+pprInstr (ADDC reg1 reg2 reg3) = pprLogic SLIT("addc") reg1 reg2 (RIReg reg3)
+pprInstr (ADDE reg1 reg2 reg3) = pprLogic SLIT("adde") reg1 reg2 (RIReg reg3)
+pprInstr (SUBF reg1 reg2 reg3) = pprLogic SLIT("subf") reg1 reg2 (RIReg reg3)
+pprInstr (MULLW reg1 reg2 ri@(RIReg _)) = pprLogic SLIT("mullw") reg1 reg2 ri
+pprInstr (MULLW reg1 reg2 ri@(RIImm _)) = pprLogic SLIT("mull") reg1 reg2 ri
+pprInstr (DIVW reg1 reg2 reg3) = pprLogic SLIT("divw") reg1 reg2 (RIReg reg3)
+pprInstr (DIVWU reg1 reg2 reg3) = pprLogic SLIT("divwu") reg1 reg2 (RIReg reg3)
+
+pprInstr (MULLW_MayOflo reg1 reg2 reg3) = vcat [
+         hcat [ ptext SLIT("\tmullwo\t"), pprReg reg1, ptext SLIT(", "),
+                                          pprReg reg2, ptext SLIT(", "),
+                                          pprReg reg3 ],
+         hcat [ ptext SLIT("\tmfxer\t"),  pprReg reg1 ],
+         hcat [ ptext SLIT("\trlwinm\t"), pprReg reg1, ptext SLIT(", "),
+                                          pprReg reg1, ptext SLIT(", "),
+                                          ptext SLIT("2, 31, 31") ]
+    ]
+
+    	-- for some reason, "andi" doesn't exist.
+	-- we'll use "andi." instead.
+pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [
+	char '\t',
+	ptext SLIT("andi."),
+	char '\t',
+	pprReg reg1,
+	ptext SLIT(", "),
+	pprReg reg2,
+	ptext SLIT(", "),
+	pprImm imm
+    ]
+pprInstr (AND reg1 reg2 ri) = pprLogic SLIT("and") reg1 reg2 ri
+
+pprInstr (OR reg1 reg2 ri) = pprLogic SLIT("or") reg1 reg2 ri
+pprInstr (XOR reg1 reg2 ri) = pprLogic SLIT("xor") reg1 reg2 ri
+
+pprInstr (XORIS reg1 reg2 imm) = hcat [
+	char '\t',
+	ptext SLIT("xoris"),
+	char '\t',
+	pprReg reg1,
+	ptext SLIT(", "),
+	pprReg reg2,
+	ptext SLIT(", "),
+	pprImm imm
+    ]
+
+pprInstr (EXTS sz reg1 reg2) = hcat [
+	char '\t',
+	ptext SLIT("exts"),
+	pprSize sz,
+	char '\t',
+	pprReg reg1,
+	ptext SLIT(", "),
+	pprReg reg2
+    ]
+
+pprInstr (NEG reg1 reg2) = pprUnary SLIT("neg") reg1 reg2
+pprInstr (NOT reg1 reg2) = pprUnary SLIT("not") reg1 reg2
+
+pprInstr (SLW reg1 reg2 ri) = pprLogic SLIT("slw") reg1 reg2 (limitShiftRI ri)
+pprInstr (SRW reg1 reg2 ri) = pprLogic SLIT("srw") reg1 reg2 (limitShiftRI ri)
+pprInstr (SRAW reg1 reg2 ri) = pprLogic SLIT("sraw") reg1 reg2 (limitShiftRI ri)
+pprInstr (RLWINM reg1 reg2 sh mb me) = hcat [
+        ptext SLIT("\trlwinm\t"),
+        pprReg reg1,
+        ptext SLIT(", "),
+        pprReg reg2,
+        ptext SLIT(", "),
+        int sh,
+        ptext SLIT(", "),
+        int mb,
+        ptext SLIT(", "),
+        int me
+    ]
+    
+pprInstr (FADD sz reg1 reg2 reg3) = pprBinaryF SLIT("fadd") sz reg1 reg2 reg3
+pprInstr (FSUB sz reg1 reg2 reg3) = pprBinaryF SLIT("fsub") sz reg1 reg2 reg3
+pprInstr (FMUL sz reg1 reg2 reg3) = pprBinaryF SLIT("fmul") sz reg1 reg2 reg3
+pprInstr (FDIV sz reg1 reg2 reg3) = pprBinaryF SLIT("fdiv") sz reg1 reg2 reg3
+pprInstr (FNEG reg1 reg2) = pprUnary SLIT("fneg") reg1 reg2
+
+pprInstr (FCMP reg1 reg2) = hcat [
+	char '\t',
+	ptext SLIT("fcmpu\tcr0, "),
+	    -- Note: we're using fcmpu, not fcmpo
+	    -- The difference is with fcmpo, compare with NaN is an invalid operation.
+	    -- We don't handle invalid fp ops, so we don't care
+	pprReg reg1,
+	ptext SLIT(", "),
+	pprReg reg2
+    ]
+
+pprInstr (FCTIWZ reg1 reg2) = pprUnary SLIT("fctiwz") reg1 reg2
+pprInstr (FRSP reg1 reg2) = pprUnary SLIT("frsp") reg1 reg2
+
+pprInstr (CRNOR dst src1 src2) = hcat [
+        ptext SLIT("\tcrnor\t"),
+        int dst,
+        ptext SLIT(", "),
+        int src1,
+        ptext SLIT(", "),
+        int src2
+    ]
+
+pprInstr (MFCR reg) = hcat [
+	char '\t',
+	ptext SLIT("mfcr"),
+	char '\t',
+	pprReg reg
+    ]
+
+pprInstr (MFLR reg) = hcat [
+	char '\t',
+	ptext SLIT("mflr"),
+	char '\t',
+	pprReg reg
+    ]
+
+pprInstr (FETCHPC reg) = vcat [
+        ptext SLIT("\tbcl\t20,31,1f"),
+        hcat [ ptext SLIT("1:\tmflr\t"), pprReg reg ]
+    ]
+
+pprInstr _ = panic "pprInstr (ppc)"
+
+pprLogic op reg1 reg2 ri = hcat [
+	char '\t',
+	ptext op,
+	case ri of
+	    RIReg _ -> empty
+	    RIImm _ -> char 'i',
+	char '\t',
+	pprReg reg1,
+	ptext SLIT(", "),
+	pprReg reg2,
+	ptext SLIT(", "),
+	pprRI ri
+    ]
+    
+pprUnary op reg1 reg2 = hcat [
+	char '\t',
+	ptext op,
+	char '\t',
+	pprReg reg1,
+	ptext SLIT(", "),
+	pprReg reg2
+    ]
+    
+pprBinaryF op sz reg1 reg2 reg3 = hcat [
+	char '\t',
+	ptext op,
+	pprFSize sz,
+	char '\t',
+	pprReg reg1,
+	ptext SLIT(", "),
+	pprReg reg2,
+	ptext SLIT(", "),
+	pprReg reg3
+    ]
+    
+pprRI :: RI -> Doc
+pprRI (RIReg r) = pprReg r
+pprRI (RIImm r) = pprImm r
+
+pprFSize F64 = empty
+pprFSize F32 = char 's'
+
+    -- limit immediate argument for shift instruction to range 0..32
+    -- (yes, the maximum is really 32, not 31)
+limitShiftRI :: RI -> RI
+limitShiftRI (RIImm (ImmInt i)) | i > 32 || i < 0 = RIImm (ImmInt 32)
+limitShiftRI x = x
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Converting floating-point literals to integrals for printing
+
+#if __GLASGOW_HASKELL__ >= 504
+newFloatArray :: (Int,Int) -> ST s (STUArray s Int Float)
+newFloatArray = newArray_
+
+newDoubleArray :: (Int,Int) -> ST s (STUArray s Int Double)
+newDoubleArray = newArray_
+
+castFloatToCharArray :: STUArray s Int Float -> ST s (STUArray s Int Word8)
+castFloatToCharArray = castSTUArray
+
+castDoubleToCharArray :: STUArray s Int Double -> ST s (STUArray s Int Word8)
+castDoubleToCharArray = castSTUArray
+
+writeFloatArray :: STUArray s Int Float -> Int -> Float -> ST s ()
+writeFloatArray = writeArray
+
+writeDoubleArray :: STUArray s Int Double -> Int -> Double -> ST s ()
+writeDoubleArray = writeArray
+
+readCharArray :: STUArray s Int Word8 -> Int -> ST s Char
+readCharArray arr i = do 
+  w <- readArray arr i
+  return $! (chr (fromIntegral w))
+
+#else
+
+castFloatToCharArray :: MutableByteArray s t -> ST s (MutableByteArray s t)
+castFloatToCharArray = return
+
+castDoubleToCharArray :: MutableByteArray s t -> ST s (MutableByteArray s t)
+
+
+castDoubleToCharArray = return
+
+#endif
+
+-- floatToBytes and doubleToBytes convert to the host's byte
+-- order.  Providing that we're not cross-compiling for a 
+-- target with the opposite endianness, this should work ok
+-- on all targets.
+
+-- ToDo: this stuff is very similar to the shenanigans in PprAbs,
+-- could they be merged?
+
+floatToBytes :: Float -> [Int]
+floatToBytes f
+   = runST (do
+        arr <- newFloatArray ((0::Int),3)
+        writeFloatArray arr 0 f
+	arr <- castFloatToCharArray arr
+        i0 <- readCharArray arr 0
+        i1 <- readCharArray arr 1
+        i2 <- readCharArray arr 2
+        i3 <- readCharArray arr 3
+        return (map ord [i0,i1,i2,i3])
+     )
+
+doubleToBytes :: Double -> [Int]
+doubleToBytes d
+   = runST (do
+        arr <- newDoubleArray ((0::Int),7)
+        writeDoubleArray arr 0 d
+	arr <- castDoubleToCharArray arr
+        i0 <- readCharArray arr 0
+        i1 <- readCharArray arr 1
+        i2 <- readCharArray arr 2
+        i3 <- readCharArray arr 3
+        i4 <- readCharArray arr 4
+        i5 <- readCharArray arr 5
+        i6 <- readCharArray arr 6
+        i7 <- readCharArray arr 7
+        return (map ord [i0,i1,i2,i3,i4,i5,i6,i7])
+     )
diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs
new file mode 100644
index 0000000000..98c4e2dfe0
--- /dev/null
+++ b/compiler/nativeGen/RegAllocInfo.hs
@@ -0,0 +1,850 @@
+-----------------------------------------------------------------------------
+--
+-- Machine-specific parts of the register allocator
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+#include "nativeGen/NCG.h"
+
+module RegAllocInfo (
+	RegUsage(..),
+	noUsage,
+	regUsage,
+	patchRegs,
+	jumpDests,
+	patchJump,
+	isRegRegMove,
+
+	maxSpillSlots,
+	mkSpillInstr,
+	mkLoadInstr,
+	mkRegRegMoveInstr,
+	mkBranchInstr
+    ) where
+
+#include "HsVersions.h"
+
+import Cmm		( BlockId )
+import MachOp           ( MachRep(..), wordRep )
+import MachInstrs
+import MachRegs
+import Outputable
+import Constants	( rESERVED_C_STACK_BYTES )
+import FastTypes
+
+-- -----------------------------------------------------------------------------
+-- RegUsage type
+
+-- @regUsage@ returns the sets of src and destination registers used
+-- by a particular instruction.  Machine registers that are
+-- pre-allocated to stgRegs are filtered out, because they are
+-- uninteresting from a register allocation standpoint.  (We wouldn't
+-- want them to end up on the free list!)  As far as we are concerned,
+-- the fixed registers simply don't exist (for allocation purposes,
+-- anyway).
+
+-- regUsage doesn't need to do any trickery for jumps and such.  Just
+-- state precisely the regs read and written by that insn.  The
+-- consequences of control flow transfers, as far as register
+-- allocation goes, are taken care of by the register allocator.
+
+data RegUsage = RU [Reg] [Reg]
+
+noUsage :: RegUsage
+noUsage  = RU [] []
+
+regUsage :: Instr -> RegUsage
+
+interesting (VirtualRegI  _)  = True
+interesting (VirtualRegHi _)  = True
+interesting (VirtualRegF  _)  = True
+interesting (VirtualRegD  _)  = True
+interesting (RealReg i)       = isFastTrue (freeReg i)
+
+
+#if alpha_TARGET_ARCH
+regUsage instr = case instr of
+    LD B reg addr	-> usage (regAddr addr, [reg, t9])
+    LD Bu reg addr	-> usage (regAddr addr, [reg, t9])
+--  LD W reg addr	-> usage (regAddr addr, [reg, t9]) : UNUSED
+--  LD Wu reg addr	-> usage (regAddr addr, [reg, t9]) : UNUSED
+    LD sz reg addr	-> usage (regAddr addr, [reg])
+    LDA reg addr	-> usage (regAddr addr, [reg])
+    LDAH reg addr	-> usage (regAddr addr, [reg])
+    LDGP reg addr	-> usage (regAddr addr, [reg])
+    LDI sz reg imm	-> usage ([], [reg])
+    ST B reg addr	-> usage (reg : regAddr addr, [t9, t10])
+--  ST W reg addr	-> usage (reg : regAddr addr, [t9, t10]) : UNUSED
+    ST sz reg addr	-> usage (reg : regAddr addr, [])
+    CLR reg		-> usage ([], [reg])
+    ABS sz ri reg	-> usage (regRI ri, [reg])
+    NEG sz ov ri reg	-> usage (regRI ri, [reg])
+    ADD sz ov r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    SADD sz sc r1 ar r2 -> usage (r1 : regRI ar, [r2])
+    SUB sz ov r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    SSUB sz sc r1 ar r2 -> usage (r1 : regRI ar, [r2])
+    MUL sz ov r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    DIV sz un r1 ar r2	-> usage (r1 : regRI ar, [r2, t9, t10, t11, t12])
+    REM sz un r1 ar r2	-> usage (r1 : regRI ar, [r2, t9, t10, t11, t12])
+    NOT ri reg		-> usage (regRI ri, [reg])
+    AND r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    ANDNOT r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    OR r1 ar r2		-> usage (r1 : regRI ar, [r2])
+    ORNOT r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    XOR r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    XORNOT r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    SLL r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    SRL r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    SRA r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    ZAP r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    ZAPNOT r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    CMP co r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    FCLR reg		-> usage ([], [reg])
+    FABS r1 r2		-> usage ([r1], [r2])
+    FNEG sz r1 r2	-> usage ([r1], [r2])
+    FADD sz r1 r2 r3	-> usage ([r1, r2], [r3])
+    FDIV sz r1 r2 r3	-> usage ([r1, r2], [r3])
+    FMUL sz r1 r2 r3	-> usage ([r1, r2], [r3])
+    FSUB sz r1 r2 r3	-> usage ([r1, r2], [r3])
+    CVTxy sz1 sz2 r1 r2 -> usage ([r1], [r2])
+    FCMP sz co r1 r2 r3 -> usage ([r1, r2], [r3])
+    FMOV r1 r2		-> usage ([r1], [r2])
+
+
+    -- We assume that all local jumps will be BI/BF/BR.	 JMP must be out-of-line.
+    BI cond reg lbl	-> usage ([reg], [])
+    BF cond reg lbl	-> usage ([reg], [])
+    JMP reg addr hint	-> RU (mkRegSet (filter interesting (regAddr addr))) freeRegSet
+
+    BSR _ n		-> RU (argRegSet n) callClobberedRegSet
+    JSR reg addr n	-> RU (argRegSet n) callClobberedRegSet
+
+    _			-> noUsage
+
+  where
+    usage (src, dst) = RU (mkRegSet (filter interesting src))
+			  (mkRegSet (filter interesting dst))
+
+    interesting (FixedReg _) = False
+    interesting _ = True
+
+    regAddr (AddrReg r1)      = [r1]
+    regAddr (AddrRegImm r1 _) = [r1]
+    regAddr (AddrImm _)	      = []
+
+    regRI (RIReg r) = [r]
+    regRI  _	= []
+
+#endif /* alpha_TARGET_ARCH */
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+regUsage instr = case instr of
+    MOV    sz src dst	-> usageRW src dst
+    MOVZxL sz src dst	-> usageRW src dst
+    MOVSxL sz src dst	-> usageRW src dst
+    LEA    sz src dst	-> usageRW src dst
+    ADD    sz src dst	-> usageRM src dst
+    ADC    sz src dst	-> usageRM src dst
+    SUB    sz src dst	-> usageRM src dst
+    IMUL   sz src dst	-> usageRM src dst
+    IMUL2  sz src       -> mkRU (eax:use_R src) [eax,edx]
+    MUL    sz src dst	-> usageRM src dst
+    DIV    sz op	-> mkRU (eax:edx:use_R op) [eax,edx]
+    IDIV   sz op	-> mkRU (eax:edx:use_R op) [eax,edx]
+    AND    sz src dst	-> usageRM src dst
+    OR     sz src dst	-> usageRM src dst
+    XOR    sz src dst	-> usageRM src dst
+    NOT    sz op	-> usageM op
+    NEGI   sz op	-> usageM op
+    SHL    sz imm dst	-> usageRM imm dst
+    SAR    sz imm dst	-> usageRM imm dst
+    SHR    sz imm dst	-> usageRM imm dst
+    BT     sz imm src	-> mkRUR (use_R src)
+
+    PUSH   sz op	-> mkRUR (use_R op)
+    POP    sz op	-> mkRU [] (def_W op)
+    TEST   sz src dst	-> mkRUR (use_R src ++ use_R dst)
+    CMP    sz src dst	-> mkRUR (use_R src ++ use_R dst)
+    SETCC  cond op	-> mkRU [] (def_W op)
+    JXX    cond lbl	-> mkRU [] []
+    JMP    op		-> mkRUR (use_R op)
+    JMP_TBL op ids      -> mkRUR (use_R op)
+    CALL (Left imm)  params -> mkRU params callClobberedRegs
+    CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
+    CLTD   sz		-> mkRU [eax] [edx]
+    NOP			-> mkRU [] []
+
+#if i386_TARGET_ARCH
+    GMOV   src dst	-> mkRU [src] [dst]
+    GLD    sz src dst	-> mkRU (use_EA src) [dst]
+    GST    sz src dst	-> mkRUR (src : use_EA dst)
+
+    GLDZ   dst		-> mkRU [] [dst]
+    GLD1   dst		-> mkRU [] [dst]
+
+    GFTOI  src dst	-> mkRU [src] [dst]
+    GDTOI  src dst	-> mkRU [src] [dst]
+
+    GITOF  src dst	-> mkRU [src] [dst]
+    GITOD  src dst	-> mkRU [src] [dst]
+
+    GADD   sz s1 s2 dst	-> mkRU [s1,s2] [dst]
+    GSUB   sz s1 s2 dst	-> mkRU [s1,s2] [dst]
+    GMUL   sz s1 s2 dst	-> mkRU [s1,s2] [dst]
+    GDIV   sz s1 s2 dst	-> mkRU [s1,s2] [dst]
+
+    GCMP   sz src1 src2	-> mkRUR [src1,src2]
+    GABS   sz src dst	-> mkRU [src] [dst]
+    GNEG   sz src dst	-> mkRU [src] [dst]
+    GSQRT  sz src dst	-> mkRU [src] [dst]
+    GSIN   sz src dst	-> mkRU [src] [dst]
+    GCOS   sz src dst	-> mkRU [src] [dst]
+    GTAN   sz src dst	-> mkRU [src] [dst]
+#endif
+
+#if x86_64_TARGET_ARCH
+    CVTSS2SD src dst	-> mkRU [src] [dst]
+    CVTSD2SS src dst	-> mkRU [src] [dst]
+    CVTSS2SI src dst	-> mkRU (use_R src) [dst]
+    CVTSD2SI src dst	-> mkRU (use_R src) [dst]
+    CVTSI2SS src dst	-> mkRU (use_R src) [dst]
+    CVTSI2SD src dst	-> mkRU (use_R src) [dst]
+    FDIV sz src dst     -> usageRM src dst
+#endif    
+
+    FETCHGOT reg        -> mkRU [] [reg]
+    FETCHPC  reg        -> mkRU [] [reg]
+
+    COMMENT _		-> noUsage
+    DELTA   _           -> noUsage
+
+    _other		-> panic "regUsage: unrecognised instr"
+
+ where
+#if x86_64_TARGET_ARCH
+	-- call parameters: include %eax, because it is used
+	-- to pass the number of SSE reg arguments to varargs fns.
+    params = eax : allArgRegs ++ allFPArgRegs
+#endif
+
+    -- 2 operand form; first operand Read; second Written
+    usageRW :: Operand -> Operand -> RegUsage
+    usageRW op (OpReg reg) = mkRU (use_R op) [reg]
+    usageRW op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea)
+
+    -- 2 operand form; first operand Read; second Modified
+    usageRM :: Operand -> Operand -> RegUsage
+    usageRM op (OpReg reg) = mkRU (use_R op ++ [reg]) [reg]
+    usageRM op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea)
+
+    -- 1 operand form; operand Modified
+    usageM :: Operand -> RegUsage
+    usageM (OpReg reg)    = mkRU [reg] [reg]
+    usageM (OpAddr ea)    = mkRUR (use_EA ea)
+
+    -- Registers defd when an operand is written.
+    def_W (OpReg reg)  = [reg]
+    def_W (OpAddr ea)  = []
+
+    -- Registers used when an operand is read.
+    use_R (OpReg reg)  = [reg]
+    use_R (OpImm imm)  = []
+    use_R (OpAddr ea)  = use_EA ea
+
+    -- Registers used to compute an effective address.
+    use_EA (ImmAddr _ _) = []
+    use_EA (AddrBaseIndex base index _) = 
+	use_base base $! use_index index
+	where use_base (EABaseReg r) x = r : x
+	      use_base _ x             = x
+	      use_index EAIndexNone   = []
+	      use_index (EAIndex i _) = [i]
+
+    mkRUR src = src' `seq` RU src' []
+	where src' = filter interesting src
+
+    mkRU src dst = src' `seq` dst' `seq` RU src' dst'
+	where src' = filter interesting src
+	      dst' = filter interesting dst
+
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if sparc_TARGET_ARCH
+
+regUsage instr = case instr of
+    LD    sz addr reg  	-> usage (regAddr addr, [reg])
+    ST    sz reg addr  	-> usage (reg : regAddr addr, [])
+    ADD   x cc r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    SUB   x cc r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    UMUL    cc r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    SMUL    cc r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    RDY   rd            -> usage ([], [rd])
+    AND   b r1 ar r2  	-> usage (r1 : regRI ar, [r2])
+    ANDN  b r1 ar r2 	-> usage (r1 : regRI ar, [r2])
+    OR    b r1 ar r2   	-> usage (r1 : regRI ar, [r2])
+    ORN   b r1 ar r2  	-> usage (r1 : regRI ar, [r2])
+    XOR   b r1 ar r2  	-> usage (r1 : regRI ar, [r2])
+    XNOR  b r1 ar r2 	-> usage (r1 : regRI ar, [r2])
+    SLL   r1 ar r2    	-> usage (r1 : regRI ar, [r2])
+    SRL   r1 ar r2    	-> usage (r1 : regRI ar, [r2])
+    SRA   r1 ar r2    	-> usage (r1 : regRI ar, [r2])
+    SETHI imm reg   	-> usage ([], [reg])
+    FABS  s r1 r2    	-> usage ([r1], [r2])
+    FADD  s r1 r2 r3 	-> usage ([r1, r2], [r3])
+    FCMP  e s r1 r2  	-> usage ([r1, r2], [])
+    FDIV  s r1 r2 r3 	-> usage ([r1, r2], [r3])
+    FMOV  s r1 r2    	-> usage ([r1], [r2])
+    FMUL  s r1 r2 r3 	-> usage ([r1, r2], [r3])
+    FNEG  s r1 r2    	-> usage ([r1], [r2])
+    FSQRT s r1 r2   	-> usage ([r1], [r2])
+    FSUB  s r1 r2 r3 	-> usage ([r1, r2], [r3])
+    FxTOy s1 s2 r1 r2 	-> usage ([r1], [r2])
+
+    -- We assume that all local jumps will be BI/BF.  JMP must be out-of-line.
+    JMP   addr 	        -> usage (regAddr addr, [])
+
+    CALL  (Left imm)  n True  -> noUsage
+    CALL  (Left imm)  n False -> usage (argRegs n, callClobberedRegs)
+    CALL  (Right reg) n True  -> usage ([reg], [])
+    CALL  (Right reg) n False -> usage (reg : (argRegs n), callClobberedRegs)
+
+    _ 	    	    	-> noUsage
+  where
+    usage (src, dst) = RU (filter interesting src)
+    	    	    	 (filter interesting dst)
+
+    regAddr (AddrRegReg r1 r2) = [r1, r2]
+    regAddr (AddrRegImm r1 _)  = [r1]
+
+    regRI (RIReg r) = [r]
+    regRI  _	= []
+
+#endif /* sparc_TARGET_ARCH */
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if powerpc_TARGET_ARCH
+
+regUsage instr = case instr of
+    LD    sz reg addr  	-> usage (regAddr addr, [reg])
+    LA    sz reg addr  	-> usage (regAddr addr, [reg])
+    ST    sz reg addr  	-> usage (reg : regAddr addr, [])
+    STU    sz reg addr  -> usage (reg : regAddr addr, [])
+    LIS   reg imm	-> usage ([], [reg])
+    LI    reg imm	-> usage ([], [reg])
+    MR	  reg1 reg2     -> usage ([reg2], [reg1])
+    CMP   sz reg ri	-> usage (reg : regRI ri,[])
+    CMPL  sz reg ri	-> usage (reg : regRI ri,[])
+    BCC	  cond lbl	-> noUsage
+    MTCTR reg		-> usage ([reg],[])
+    BCTR  targets	-> noUsage
+    BL    imm params	-> usage (params, callClobberedRegs)
+    BCTRL params	-> usage (params, callClobberedRegs)
+    ADD	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    ADDC  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    ADDE  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    ADDIS reg1 reg2 imm -> usage ([reg2], [reg1])
+    SUBF  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    MULLW reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    DIVW  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    DIVWU reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    MULLW_MayOflo reg1 reg2 reg3        
+                        -> usage ([reg2,reg3], [reg1])
+    AND	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    OR	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    XOR	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    XORIS reg1 reg2 imm -> usage ([reg2], [reg1])
+    EXTS  siz reg1 reg2 -> usage ([reg2], [reg1])
+    NEG	  reg1 reg2	-> usage ([reg2], [reg1])
+    NOT	  reg1 reg2	-> usage ([reg2], [reg1])
+    SLW	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    SRW	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    SRAW  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    RLWINM reg1 reg2 sh mb me
+                        -> usage ([reg2], [reg1])
+    FADD  sz r1 r2 r3   -> usage ([r2,r3], [r1])
+    FSUB  sz r1 r2 r3   -> usage ([r2,r3], [r1])
+    FMUL  sz r1 r2 r3   -> usage ([r2,r3], [r1])
+    FDIV  sz r1 r2 r3   -> usage ([r2,r3], [r1])
+    FNEG  r1 r2		-> usage ([r2], [r1])
+    FCMP  r1 r2		-> usage ([r1,r2], [])
+    FCTIWZ r1 r2	-> usage ([r2], [r1])
+    FRSP r1 r2		-> usage ([r2], [r1])
+    MFCR reg            -> usage ([], [reg])
+    MFLR reg            -> usage ([], [reg])
+    FETCHPC reg         -> usage ([], [reg])
+    _ 	    	    	-> noUsage
+  where
+    usage (src, dst) = RU (filter interesting src)
+    	    	    	  (filter interesting dst)
+    regAddr (AddrRegReg r1 r2) = [r1, r2]
+    regAddr (AddrRegImm r1 _)  = [r1]
+
+    regRI (RIReg r) = [r]
+    regRI  _	= []
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Determine the possible destinations from the current instruction.
+
+-- (we always assume that the next instruction is also a valid destination;
+-- if this isn't the case then the jump should be at the end of the basic
+-- block).
+
+jumpDests :: Instr -> [BlockId] -> [BlockId]
+jumpDests insn acc
+  = case insn of
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+	JXX _ id	-> id : acc
+	JMP_TBL _ ids	-> ids ++ acc
+#elif powerpc_TARGET_ARCH
+        BCC _ id        -> id : acc
+        BCTR targets    -> targets ++ acc
+#endif
+	_other		-> acc
+
+patchJump :: Instr -> BlockId -> BlockId -> Instr
+
+patchJump insn old new
+  = case insn of
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+	JXX cc id | id == old -> JXX cc new
+	JMP_TBL op ids -> error "Cannot patch JMP_TBL"
+#elif powerpc_TARGET_ARCH
+        BCC cc id | id == old -> BCC cc new
+        BCTR targets -> error "Cannot patch BCTR"
+#endif
+	_other		-> insn
+
+-- -----------------------------------------------------------------------------
+-- 'patchRegs' function
+
+-- 'patchRegs' takes an instruction and applies the given mapping to
+-- all the register references.
+
+patchRegs :: Instr -> (Reg -> Reg) -> Instr
+
+#if alpha_TARGET_ARCH
+
+patchRegs instr env = case instr of
+    LD sz reg addr -> LD sz (env reg) (fixAddr addr)
+    LDA reg addr -> LDA (env reg) (fixAddr addr)
+    LDAH reg addr -> LDAH (env reg) (fixAddr addr)
+    LDGP reg addr -> LDGP (env reg) (fixAddr addr)
+    LDI sz reg imm -> LDI sz (env reg) imm
+    ST sz reg addr -> ST sz (env reg) (fixAddr addr)
+    CLR reg -> CLR (env reg)
+    ABS sz ar reg -> ABS sz (fixRI ar) (env reg)
+    NEG sz ov ar reg -> NEG sz ov (fixRI ar) (env reg)
+    ADD sz ov r1 ar r2 -> ADD sz ov (env r1) (fixRI ar) (env r2)
+    SADD sz sc r1 ar r2 -> SADD sz sc (env r1) (fixRI ar) (env r2)
+    SUB sz ov r1 ar r2 -> SUB sz ov (env r1) (fixRI ar) (env r2)
+    SSUB sz sc r1 ar r2 -> SSUB sz sc (env r1) (fixRI ar) (env r2)
+    MUL sz ov r1 ar r2 -> MUL sz ov (env r1) (fixRI ar) (env r2)
+    DIV sz un r1 ar r2 -> DIV sz un (env r1) (fixRI ar) (env r2)
+    REM sz un r1 ar r2 -> REM sz un (env r1) (fixRI ar) (env r2)
+    NOT ar reg -> NOT (fixRI ar) (env reg)
+    AND r1 ar r2 -> AND (env r1) (fixRI ar) (env r2)
+    ANDNOT r1 ar r2 -> ANDNOT (env r1) (fixRI ar) (env r2)
+    OR r1 ar r2 -> OR (env r1) (fixRI ar) (env r2)
+    ORNOT r1 ar r2 -> ORNOT (env r1) (fixRI ar) (env r2)
+    XOR r1 ar r2 -> XOR (env r1) (fixRI ar) (env r2)
+    XORNOT r1 ar r2 -> XORNOT (env r1) (fixRI ar) (env r2)
+    SLL r1 ar r2 -> SLL (env r1) (fixRI ar) (env r2)
+    SRL r1 ar r2 -> SRL (env r1) (fixRI ar) (env r2)
+    SRA r1 ar r2 -> SRA (env r1) (fixRI ar) (env r2)
+    ZAP r1 ar r2 -> ZAP (env r1) (fixRI ar) (env r2)
+    ZAPNOT r1 ar r2 -> ZAPNOT (env r1) (fixRI ar) (env r2)
+    CMP co r1 ar r2 -> CMP co (env r1) (fixRI ar) (env r2)
+    FCLR reg -> FCLR (env reg)
+    FABS r1 r2 -> FABS (env r1) (env r2)
+    FNEG s r1 r2 -> FNEG s (env r1) (env r2)
+    FADD s r1 r2 r3 -> FADD s (env r1) (env r2) (env r3)
+    FDIV s r1 r2 r3 -> FDIV s (env r1) (env r2) (env r3)
+    FMUL s r1 r2 r3 -> FMUL s (env r1) (env r2) (env r3)
+    FSUB s r1 r2 r3 -> FSUB s (env r1) (env r2) (env r3)
+    CVTxy s1 s2 r1 r2 -> CVTxy s1 s2 (env r1) (env r2)
+    FCMP s co r1 r2 r3 -> FCMP s co (env r1) (env r2) (env r3)
+    FMOV r1 r2 -> FMOV (env r1) (env r2)
+    BI cond reg lbl -> BI cond (env reg) lbl
+    BF cond reg lbl -> BF cond (env reg) lbl
+    JMP reg addr hint -> JMP (env reg) (fixAddr addr) hint
+    JSR reg addr i -> JSR (env reg) (fixAddr addr) i
+    _ -> instr
+  where
+    fixAddr (AddrReg r1)       = AddrReg (env r1)
+    fixAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i
+    fixAddr other	       = other
+
+    fixRI (RIReg r) = RIReg (env r)
+    fixRI other	= other
+
+#endif /* alpha_TARGET_ARCH */
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+patchRegs instr env = case instr of
+    MOV  sz src dst	-> patch2 (MOV  sz) src dst
+    MOVZxL sz src dst	-> patch2 (MOVZxL sz) src dst
+    MOVSxL sz src dst	-> patch2 (MOVSxL sz) src dst
+    LEA  sz src dst	-> patch2 (LEA  sz) src dst
+    ADD  sz src dst	-> patch2 (ADD  sz) src dst
+    ADC  sz src dst	-> patch2 (ADC  sz) src dst
+    SUB  sz src dst	-> patch2 (SUB  sz) src dst
+    IMUL sz src dst 	-> patch2 (IMUL sz) src dst
+    IMUL2 sz src        -> patch1 (IMUL2 sz) src
+    MUL sz src dst 	-> patch2 (MUL sz) src dst
+    IDIV sz op		-> patch1 (IDIV sz) op
+    DIV sz op		-> patch1 (DIV sz) op
+    AND  sz src dst	-> patch2 (AND  sz) src dst
+    OR   sz src dst	-> patch2 (OR   sz) src dst
+    XOR  sz src dst	-> patch2 (XOR  sz) src dst
+    NOT  sz op 		-> patch1 (NOT  sz) op
+    NEGI sz op		-> patch1 (NEGI sz) op
+    SHL  sz imm dst 	-> patch1 (SHL sz imm) dst
+    SAR  sz imm dst 	-> patch1 (SAR sz imm) dst
+    SHR  sz imm dst 	-> patch1 (SHR sz imm) dst
+    BT   sz imm src     -> patch1 (BT  sz imm) src
+    TEST sz src dst	-> patch2 (TEST sz) src dst
+    CMP  sz src dst	-> patch2 (CMP  sz) src dst
+    PUSH sz op		-> patch1 (PUSH sz) op
+    POP  sz op		-> patch1 (POP  sz) op
+    SETCC cond op	-> patch1 (SETCC cond) op
+    JMP op		-> patch1 JMP op
+    JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
+
+#if i386_TARGET_ARCH
+    GMOV src dst	-> GMOV (env src) (env dst)
+    GLD sz src dst	-> GLD sz (lookupAddr src) (env dst)
+    GST sz src dst	-> GST sz (env src) (lookupAddr dst)
+
+    GLDZ dst		-> GLDZ (env dst)
+    GLD1 dst		-> GLD1 (env dst)
+
+    GFTOI src dst	-> GFTOI (env src) (env dst)
+    GDTOI src dst	-> GDTOI (env src) (env dst)
+
+    GITOF src dst	-> GITOF (env src) (env dst)
+    GITOD src dst	-> GITOD (env src) (env dst)
+
+    GADD sz s1 s2 dst	-> GADD sz (env s1) (env s2) (env dst)
+    GSUB sz s1 s2 dst	-> GSUB sz (env s1) (env s2) (env dst)
+    GMUL sz s1 s2 dst	-> GMUL sz (env s1) (env s2) (env dst)
+    GDIV sz s1 s2 dst	-> GDIV sz (env s1) (env s2) (env dst)
+
+    GCMP sz src1 src2	-> GCMP sz (env src1) (env src2)
+    GABS sz src dst	-> GABS sz (env src) (env dst)
+    GNEG sz src dst	-> GNEG sz (env src) (env dst)
+    GSQRT sz src dst	-> GSQRT sz (env src) (env dst)
+    GSIN sz src dst	-> GSIN sz (env src) (env dst)
+    GCOS sz src dst	-> GCOS sz (env src) (env dst)
+    GTAN sz src dst	-> GTAN sz (env src) (env dst)
+#endif
+
+#if x86_64_TARGET_ARCH
+    CVTSS2SD src dst	-> CVTSS2SD (env src) (env dst)
+    CVTSD2SS src dst	-> CVTSD2SS (env src) (env dst)
+    CVTSS2SI src dst	-> CVTSS2SI (patchOp src) (env dst)
+    CVTSD2SI src dst	-> CVTSD2SI (patchOp src) (env dst)
+    CVTSI2SS src dst	-> CVTSI2SS (patchOp src) (env dst)
+    CVTSI2SD src dst	-> CVTSI2SD (patchOp src) (env dst)
+    FDIV sz src dst	-> FDIV sz (patchOp src) (patchOp dst)
+#endif    
+
+    CALL (Left imm)  _	-> instr
+    CALL (Right reg) p	-> CALL (Right (env reg)) p
+    
+    FETCHGOT reg        -> FETCHGOT (env reg)
+    FETCHPC  reg        -> FETCHPC  (env reg)
+   
+    NOP			-> instr
+    COMMENT _		-> instr
+    DELTA _ 		-> instr
+    JXX _ _		-> instr
+    CLTD _		-> instr
+
+    _other		-> panic "patchRegs: unrecognised instr"
+
+  where
+    patch1 insn op      = insn $! patchOp op
+    patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
+
+    patchOp (OpReg  reg) = OpReg $! env reg
+    patchOp (OpImm  imm) = OpImm imm
+    patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
+
+    lookupAddr (ImmAddr imm off) = ImmAddr imm off
+    lookupAddr (AddrBaseIndex base index disp)
+      = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
+      where
+	lookupBase EABaseNone       = EABaseNone
+	lookupBase EABaseRip        = EABaseRip
+	lookupBase (EABaseReg r)    = EABaseReg (env r)
+				 
+	lookupIndex EAIndexNone     = EAIndexNone
+	lookupIndex (EAIndex r i)   = EAIndex (env r) i
+
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH*/
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if sparc_TARGET_ARCH
+
+patchRegs instr env = case instr of
+    LD    sz addr reg   -> LD sz (fixAddr addr) (env reg)
+    ST    sz reg addr   -> ST sz (env reg) (fixAddr addr)
+    ADD   x cc r1 ar r2 -> ADD x cc (env r1) (fixRI ar) (env r2)
+    SUB   x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2)
+    UMUL    cc r1 ar r2	-> UMUL cc (env r1) (fixRI ar) (env r2)
+    SMUL    cc r1 ar r2	-> SMUL cc (env r1) (fixRI ar) (env r2)
+    RDY   rd            -> RDY (env rd)
+    AND   b r1 ar r2    -> AND b (env r1) (fixRI ar) (env r2)
+    ANDN  b r1 ar r2    -> ANDN b (env r1) (fixRI ar) (env r2)
+    OR    b r1 ar r2    -> OR b (env r1) (fixRI ar) (env r2)
+    ORN   b r1 ar r2    -> ORN b (env r1) (fixRI ar) (env r2)
+    XOR   b r1 ar r2    -> XOR b (env r1) (fixRI ar) (env r2)
+    XNOR  b r1 ar r2    -> XNOR b (env r1) (fixRI ar) (env r2)
+    SLL   r1 ar r2      -> SLL (env r1) (fixRI ar) (env r2)
+    SRL   r1 ar r2      -> SRL (env r1) (fixRI ar) (env r2)
+    SRA   r1 ar r2      -> SRA (env r1) (fixRI ar) (env r2)
+    SETHI imm reg       -> SETHI imm (env reg)
+    FABS  s r1 r2       -> FABS s (env r1) (env r2)
+    FADD  s r1 r2 r3    -> FADD s (env r1) (env r2) (env r3)
+    FCMP  e s r1 r2     -> FCMP e s (env r1) (env r2)
+    FDIV  s r1 r2 r3    -> FDIV s (env r1) (env r2) (env r3)
+    FMOV  s r1 r2       -> FMOV s (env r1) (env r2)
+    FMUL  s r1 r2 r3    -> FMUL s (env r1) (env r2) (env r3)
+    FNEG  s r1 r2       -> FNEG s (env r1) (env r2)
+    FSQRT s r1 r2       -> FSQRT s (env r1) (env r2)
+    FSUB  s r1 r2 r3    -> FSUB s (env r1) (env r2) (env r3)
+    FxTOy s1 s2 r1 r2   -> FxTOy s1 s2 (env r1) (env r2)
+    JMP   addr          -> JMP (fixAddr addr)
+    CALL  (Left i) n t  -> CALL (Left i) n t
+    CALL  (Right r) n t -> CALL (Right (env r)) n t
+    _ -> instr
+  where
+    fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
+    fixAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i
+
+    fixRI (RIReg r) = RIReg (env r)
+    fixRI other	= other
+
+#endif /* sparc_TARGET_ARCH */
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if powerpc_TARGET_ARCH
+
+patchRegs instr env = case instr of
+    LD    sz reg addr   -> LD sz (env reg) (fixAddr addr)
+    LA    sz reg addr   -> LA sz (env reg) (fixAddr addr)
+    ST    sz reg addr   -> ST sz (env reg) (fixAddr addr)
+    STU    sz reg addr  -> STU sz (env reg) (fixAddr addr)
+    LIS   reg imm	-> LIS (env reg) imm
+    LI    reg imm	-> LI (env reg) imm
+    MR	  reg1 reg2     -> MR (env reg1) (env reg2)
+    CMP	  sz reg ri	-> CMP sz (env reg) (fixRI ri)
+    CMPL  sz reg ri	-> CMPL sz (env reg) (fixRI ri)
+    BCC	  cond lbl	-> BCC cond lbl
+    MTCTR reg		-> MTCTR (env reg)
+    BCTR  targets	-> BCTR targets
+    BL    imm argRegs	-> BL imm argRegs	-- argument regs
+    BCTRL argRegs	-> BCTRL argRegs 	-- cannot be remapped
+    ADD	  reg1 reg2 ri	-> ADD (env reg1) (env reg2) (fixRI ri)
+    ADDC  reg1 reg2 reg3-> ADDC (env reg1) (env reg2) (env reg3)
+    ADDE  reg1 reg2 reg3-> ADDE (env reg1) (env reg2) (env reg3)
+    ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
+    SUBF  reg1 reg2 reg3-> SUBF (env reg1) (env reg2) (env reg3)
+    MULLW reg1 reg2 ri	-> MULLW (env reg1) (env reg2) (fixRI ri)
+    DIVW  reg1 reg2 reg3-> DIVW (env reg1) (env reg2) (env reg3)
+    DIVWU reg1 reg2 reg3-> DIVWU (env reg1) (env reg2) (env reg3)
+    MULLW_MayOflo reg1 reg2 reg3
+                        -> MULLW_MayOflo (env reg1) (env reg2) (env reg3)
+    AND	  reg1 reg2 ri	-> AND (env reg1) (env reg2) (fixRI ri)
+    OR 	  reg1 reg2 ri	-> OR  (env reg1) (env reg2) (fixRI ri)
+    XOR	  reg1 reg2 ri	-> XOR (env reg1) (env reg2) (fixRI ri)
+    XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
+    EXTS  sz reg1 reg2 -> EXTS sz (env reg1) (env reg2)
+    NEG	  reg1 reg2	-> NEG (env reg1) (env reg2)
+    NOT	  reg1 reg2	-> NOT (env reg1) (env reg2)
+    SLW	  reg1 reg2 ri	-> SLW (env reg1) (env reg2) (fixRI ri)
+    SRW	  reg1 reg2 ri	-> SRW (env reg1) (env reg2) (fixRI ri)
+    SRAW  reg1 reg2 ri	-> SRAW (env reg1) (env reg2) (fixRI ri)
+    RLWINM reg1 reg2 sh mb me
+                        -> RLWINM (env reg1) (env reg2) sh mb me
+    FADD  sz r1 r2 r3   -> FADD sz (env r1) (env r2) (env r3)
+    FSUB  sz r1 r2 r3   -> FSUB sz (env r1) (env r2) (env r3)
+    FMUL  sz r1 r2 r3   -> FMUL sz (env r1) (env r2) (env r3)
+    FDIV  sz r1 r2 r3   -> FDIV sz (env r1) (env r2) (env r3)
+    FNEG  r1 r2		-> FNEG (env r1) (env r2)
+    FCMP  r1 r2		-> FCMP (env r1) (env r2)
+    FCTIWZ r1 r2	-> FCTIWZ (env r1) (env r2)
+    FRSP r1 r2		-> FRSP (env r1) (env r2)
+    MFCR reg            -> MFCR (env reg)
+    MFLR reg            -> MFLR (env reg)
+    FETCHPC reg         -> FETCHPC (env reg)
+    _ -> instr
+  where
+    fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
+    fixAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i
+
+    fixRI (RIReg r) = RIReg (env r)
+    fixRI other	= other
+#endif /* powerpc_TARGET_ARCH */
+
+-- -----------------------------------------------------------------------------
+-- Detecting reg->reg moves
+
+-- The register allocator attempts to eliminate reg->reg moves whenever it can,
+-- by assigning the src and dest temporaries to the same real register.
+
+isRegRegMove :: Instr -> Maybe (Reg,Reg)
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+-- TMP:
+isRegRegMove (MOV _ (OpReg r1) (OpReg r2)) = Just (r1,r2)
+#elif powerpc_TARGET_ARCH
+isRegRegMove (MR dst src) = Just (src,dst)
+#else
+#warning ToDo: isRegRegMove
+#endif
+isRegRegMove _ = Nothing
+
+-- -----------------------------------------------------------------------------
+-- Generating spill instructions
+
+mkSpillInstr
+   :: Reg		-- register to spill (should be a real)
+   -> Int		-- current stack delta
+   -> Int		-- spill slot to use
+   -> Instr
+mkSpillInstr reg delta slot
+  = ASSERT(isRealReg reg)
+    let	
+        off     = spillSlotToOffset slot
+    in
+#ifdef alpha_TARGET_ARCH
+    {-Alpha: spill below the stack pointer (?)-}
+    ST sz dyn (spRel (- (off `div` 8)))
+#endif
+#ifdef i386_TARGET_ARCH
+    let off_w = (off-delta) `div` 4
+    in case regClass reg of
+	   RcInteger -> MOV I32 (OpReg reg) (OpAddr (spRel off_w))
+	   _         -> GST F80 reg (spRel off_w) {- RcFloat/RcDouble -}
+#endif
+#ifdef x86_64_TARGET_ARCH
+    let off_w = (off-delta) `div` 8
+    in case regClass reg of
+	   RcInteger -> MOV I64 (OpReg reg) (OpAddr (spRel off_w))
+	   RcDouble  -> MOV F64 (OpReg reg) (OpAddr (spRel off_w))
+		-- ToDo: will it work to always spill as a double?
+		-- does that cause a stall if the data was a float?
+#endif
+#ifdef sparc_TARGET_ARCH
+	{-SPARC: spill below frame pointer leaving 2 words/spill-}
+                        let{off_w = 1 + (off `div` 4);
+                            sz = case regClass reg of {
+                                    RcInteger -> I32;
+				    RcFloat   -> F32;
+                                    RcDouble  -> F64}}
+                        in ST sz reg (fpRel (- off_w))
+#endif
+#ifdef powerpc_TARGET_ARCH
+    let sz = case regClass reg of
+                RcInteger -> I32
+                RcDouble -> F64
+    in ST sz reg (AddrRegImm sp (ImmInt (off-delta)))
+#endif
+
+
+mkLoadInstr
+   :: Reg		-- register to load (should be a real)
+   -> Int		-- current stack delta
+   -> Int		-- spill slot to use
+   -> Instr
+mkLoadInstr reg delta slot
+  = ASSERT(isRealReg reg)
+    let
+        off     = spillSlotToOffset slot
+    in
+#if alpha_TARGET_ARCH
+	 LD  sz dyn (spRel (- (off `div` 8)))
+#endif
+#if i386_TARGET_ARCH
+	let off_w = (off-delta) `div` 4
+        in case regClass reg of {
+              RcInteger -> MOV I32 (OpAddr (spRel off_w)) (OpReg reg);
+              _         -> GLD F80 (spRel off_w) reg} {- RcFloat/RcDouble -}
+#endif
+#if x86_64_TARGET_ARCH
+	let off_w = (off-delta) `div` 8
+        in case regClass reg of
+              RcInteger -> MOV I64 (OpAddr (spRel off_w)) (OpReg reg)
+              _         -> MOV F64 (OpAddr (spRel off_w)) (OpReg reg)
+#endif
+#if sparc_TARGET_ARCH
+        let{off_w = 1 + (off `div` 4);
+            sz = case regClass reg of {
+                   RcInteger -> I32;
+		   RcFloat   -> F32;
+                   RcDouble  -> F64}}
+        in LD sz (fpRel (- off_w)) reg
+#endif
+#if powerpc_TARGET_ARCH
+    let sz = case regClass reg of
+                RcInteger -> I32
+                RcDouble -> F64
+    in LD sz reg (AddrRegImm sp (ImmInt (off-delta)))
+#endif
+
+mkRegRegMoveInstr
+    :: Reg
+    -> Reg
+    -> Instr
+mkRegRegMoveInstr src dst
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+    = case regClass src of
+        RcInteger -> MOV wordRep (OpReg src) (OpReg dst)
+#if i386_TARGET_ARCH
+        RcDouble  -> GMOV src dst
+#else
+        RcDouble  -> MOV F64 (OpReg src) (OpReg dst)
+#endif
+#elif powerpc_TARGET_ARCH
+    = MR dst src
+#endif
+
+mkBranchInstr
+    :: BlockId
+    -> [Instr]
+#if alpha_TARGET_ARCH
+mkBranchInstr id = [BR id]
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+mkBranchInstr id = [JXX ALWAYS id]
+#endif
+
+#if sparc_TARGET_ARCH
+mkBranchInstr (BlockId id) = [BI ALWAYS False (ImmCLbl (mkAsmTempLabel id)), NOP]
+#endif
+
+#if powerpc_TARGET_ARCH
+mkBranchInstr id = [BCC ALWAYS id]
+#endif
+
+
+spillSlotSize :: Int
+spillSlotSize = IF_ARCH_i386(12, 8)
+
+maxSpillSlots :: Int
+maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
+
+-- convert a spill slot number to a *byte* offset, with no sign:
+-- decide on a per arch basis whether you are spilling above or below
+-- the C stack pointer.
+spillSlotToOffset :: Int -> Int
+spillSlotToOffset slot
+   | slot >= 0 && slot < maxSpillSlots
+   = 64 + spillSlotSize * slot
+   | otherwise
+   = pprPanic "spillSlotToOffset:" 
+              (text "invalid spill location: " <> int slot)
diff --git a/compiler/nativeGen/RegisterAlloc.hs b/compiler/nativeGen/RegisterAlloc.hs
new file mode 100644
index 0000000000..7d2ab1b6d6
--- /dev/null
+++ b/compiler/nativeGen/RegisterAlloc.hs
@@ -0,0 +1,1004 @@
+-----------------------------------------------------------------------------
+--
+-- The register allocator
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+{-
+The algorithm is roughly:
+ 
+  1) Compute strongly connected components of the basic block list.
+
+  2) Compute liveness (mapping from pseudo register to
+     point(s) of death?).
+
+  3) Walk instructions in each basic block.  We keep track of
+	(a) Free real registers (a bitmap?)
+	(b) Current assignment of temporaries to machine registers and/or
+	    spill slots (call this the "assignment").
+     	(c) Partial mapping from basic block ids to a virt-to-loc mapping.
+	    When we first encounter a branch to a basic block,
+	    we fill in its entry in this table with the current mapping.
+
+     For each instruction:
+	(a) For each real register clobbered by this instruction:
+	    If a temporary resides in it,
+		If the temporary is live after this instruction,
+		    Move the temporary to another (non-clobbered & free) reg,
+		    or spill it to memory.  Mark the temporary as residing
+		    in both memory and a register if it was spilled (it might
+		    need to be read by this instruction).
+	    (ToDo: this is wrong for jump instructions?)
+
+	(b) For each temporary *read* by the instruction:
+	    If the temporary does not have a real register allocation:
+		- Allocate a real register from the free list.  If
+		  the list is empty:
+		  - Find a temporary to spill.  Pick one that is
+		    not used in this instruction (ToDo: not
+		    used for a while...)
+		  - generate a spill instruction
+		- If the temporary was previously spilled,
+		  generate an instruction to read the temp from its spill loc.
+	    (optimisation: if we can see that a real register is going to
+            be used soon, then don't use it for allocation).
+
+	(c) Update the current assignment
+
+	(d) If the intstruction is a branch:
+	      if the destination block already has a register assignment,
+	        Generate a new block with fixup code and redirect the
+		jump to the new block.
+	      else,
+		Update the block id->assignment mapping with the current
+		assignment.
+
+	(e) Delete all register assignments for temps which are read
+	    (only) and die here.  Update the free register list.
+
+	(f) Mark all registers clobbered by this instruction as not free,
+	    and mark temporaries which have been spilled due to clobbering
+	    as in memory (step (a) marks then as in both mem & reg).
+
+	(g) For each temporary *written* by this instruction:
+	    Allocate a real register as for (b), spilling something
+	    else if necessary.
+		- except when updating the assignment, drop any memory
+		  locations that the temporary was previously in, since
+		  they will be no longer valid after this instruction.
+
+	(h) Delete all register assignments for temps which are
+	    written and die here (there should rarely be any).  Update
+	    the free register list.
+
+	(i) Rewrite the instruction with the new mapping.
+
+	(j) For each spilled reg known to be now dead, re-add its stack slot
+	    to the free list.
+
+-}
+
+module RegisterAlloc (
+  	regAlloc
+  ) where
+
+#include "HsVersions.h"
+
+import PprMach
+import MachRegs
+import MachInstrs
+import RegAllocInfo
+import Cmm
+
+import Digraph
+import Unique		( Uniquable(getUnique), Unique )
+import UniqSet
+import UniqFM
+import UniqSupply
+import Outputable
+
+#ifndef DEBUG
+import Maybe		( fromJust )
+#endif
+import Maybe		( fromMaybe )
+import List		( nub, partition, mapAccumL, groupBy )
+import Monad		( when )
+import DATA_WORD
+import DATA_BITS
+
+-- -----------------------------------------------------------------------------
+-- Some useful types
+
+type RegSet = UniqSet Reg
+
+type RegMap a = UniqFM a
+emptyRegMap = emptyUFM
+
+type BlockMap a = UniqFM a
+emptyBlockMap = emptyUFM
+
+-- A basic block where the isntructions are annotated with the registers
+-- which are no longer live in the *next* instruction in this sequence.
+-- (NB. if the instruction is a jump, these registers might still be live
+-- at the jump target(s) - you have to check the liveness at the destination
+-- block to find out).
+type AnnBasicBlock 
+	= GenBasicBlock (Instr,
+			 [Reg],		-- registers read (only) which die
+			 [Reg])		-- registers written which die
+
+-- -----------------------------------------------------------------------------
+-- The free register set
+
+-- This needs to be *efficient*
+
+{- Here's an inefficient 'executable specification' of the FreeRegs data type:
+type FreeRegs = [RegNo]
+
+noFreeRegs = 0
+releaseReg n f = if n `elem` f then f else (n : f)
+initFreeRegs = allocatableRegs
+getFreeRegs cls f = filter ( (==cls) . regClass . RealReg ) f
+allocateReg f r = filter (/= r) f
+-}
+
+#if defined(powerpc_TARGET_ARCH)
+
+-- The PowerPC has 32 integer and 32 floating point registers.
+-- This is 32bit PowerPC, so Word64 is inefficient - two Word32s are much
+-- better.
+-- Note that when getFreeRegs scans for free registers, it starts at register
+-- 31 and counts down. This is a hack for the PowerPC - the higher-numbered
+-- registers are callee-saves, while the lower regs are caller-saves, so it
+-- makes sense to start at the high end.
+-- Apart from that, the code does nothing PowerPC-specific, so feel free to
+-- add your favourite platform to the #if (if you have 64 registers but only
+-- 32-bit words).
+
+data FreeRegs = FreeRegs !Word32 !Word32
+	      deriving( Show )	-- The Show is used in an ASSERT
+
+noFreeRegs :: FreeRegs
+noFreeRegs = FreeRegs 0 0
+
+releaseReg :: RegNo -> FreeRegs -> FreeRegs
+releaseReg r (FreeRegs g f)
+    | r > 31    = FreeRegs g (f .|. (1 `shiftL` (fromIntegral r - 32)))
+    | otherwise = FreeRegs (g .|. (1 `shiftL` fromIntegral r)) f
+    
+initFreeRegs :: FreeRegs
+initFreeRegs = foldr releaseReg noFreeRegs allocatableRegs
+
+getFreeRegs :: RegClass -> FreeRegs -> [RegNo]	-- lazilly
+getFreeRegs cls (FreeRegs g f)
+    | RcDouble <- cls = go f (0x80000000) 63
+    | RcInteger <- cls = go g (0x80000000) 31
+    where
+        go x 0 i = []
+        go x m i | x .&. m /= 0 = i : (go x (m `shiftR` 1) $! i-1)
+                 | otherwise    = go x (m `shiftR` 1) $! i-1
+
+allocateReg :: RegNo -> FreeRegs -> FreeRegs
+allocateReg r (FreeRegs g f) 
+    | r > 31    = FreeRegs g (f .&. complement (1 `shiftL` (fromIntegral r - 32)))
+    | otherwise = FreeRegs (g .&. complement (1 `shiftL` fromIntegral r)) f
+
+#else
+
+-- If we have less than 32 registers, or if we have efficient 64-bit words,
+-- we will just use a single bitfield.
+
+#if defined(alpha_TARGET_ARCH)
+type FreeRegs = Word64
+#else
+type FreeRegs = Word32
+#endif
+
+noFreeRegs :: FreeRegs
+noFreeRegs = 0
+
+releaseReg :: RegNo -> FreeRegs -> FreeRegs
+releaseReg n f = f .|. (1 `shiftL` n)
+
+initFreeRegs :: FreeRegs
+initFreeRegs = foldr releaseReg noFreeRegs allocatableRegs
+
+getFreeRegs :: RegClass -> FreeRegs -> [RegNo]	-- lazilly
+getFreeRegs cls f = go f 0
+  where go 0 m = []
+        go n m 
+	  | n .&. 1 /= 0 && regClass (RealReg m) == cls
+	  = m : (go (n `shiftR` 1) $! (m+1))
+	  | otherwise
+	  = go (n `shiftR` 1) $! (m+1)
+	-- ToDo: there's no point looking through all the integer registers
+	-- in order to find a floating-point one.
+
+allocateReg :: RegNo -> FreeRegs -> FreeRegs
+allocateReg r f = f .&. complement (1 `shiftL` fromIntegral r)
+
+#endif
+
+-- -----------------------------------------------------------------------------
+-- The free list of stack slots
+
+-- This doesn't need to be so efficient.  It also doesn't really need to be
+-- maintained as a set, so we just use an ordinary list (lazy, because it
+-- contains all the possible stack slots and there are lots :-).
+-- We do one more thing here: We make sure that we always use the same stack
+-- slot to spill the same temporary. That way, the stack slot assignments
+-- will always match up and we never need to worry about memory-to-memory
+-- moves when generating fixup code.
+
+type StackSlot = Int
+data FreeStack = FreeStack [StackSlot] (UniqFM StackSlot)
+
+completelyFreeStack :: FreeStack
+completelyFreeStack = FreeStack [0..maxSpillSlots] emptyUFM
+
+getFreeStackSlot :: FreeStack -> (FreeStack,Int)
+getFreeStackSlot (FreeStack (slot:stack) reserved)
+    = (FreeStack stack reserved,slot)
+
+freeStackSlot :: FreeStack -> Int -> FreeStack
+freeStackSlot (FreeStack stack reserved) slot
+    -- NOTE: This is probably terribly, unthinkably slow.
+    --       But on the other hand, it never gets called, because the allocator
+    --       currently does not free stack slots. So who cares if it's slow?
+    | slot `elem` eltsUFM reserved = FreeStack stack reserved
+    | otherwise = FreeStack (slot:stack) reserved
+
+
+getFreeStackSlotFor :: FreeStack -> Unique -> (FreeStack,Int)
+getFreeStackSlotFor fs@(FreeStack _ reserved) reg =
+    case lookupUFM reserved reg of
+    	Just slot -> (fs,slot)
+    	Nothing -> let (FreeStack stack' _, slot) = getFreeStackSlot fs
+    	           in  (FreeStack stack' (addToUFM reserved reg slot), slot)
+
+-- -----------------------------------------------------------------------------
+-- Top level of the register allocator
+
+regAlloc :: NatCmmTop -> UniqSM NatCmmTop
+regAlloc (CmmData sec d) = returnUs $ CmmData sec d
+regAlloc (CmmProc info lbl params [])
+  = returnUs $ CmmProc info lbl params []  -- no blocks to run the regalloc on
+regAlloc (CmmProc info lbl params blocks@(first:rest))
+  = let
+        first_id               = blockId first
+        sccs	               = sccBlocks blocks
+        (ann_sccs, block_live) = computeLiveness sccs
+    in  linearRegAlloc block_live ann_sccs `thenUs` \final_blocks ->
+    let ((first':_),rest')     = partition ((== first_id) . blockId) final_blocks
+    in returnUs $ -- pprTrace "Liveness" (ppr block_live) $
+                  CmmProc info lbl params (first':rest')
+
+sccBlocks :: [NatBasicBlock] -> [SCC NatBasicBlock]
+sccBlocks blocks = stronglyConnComp graph
+  where
+	getOutEdges :: [Instr] -> [BlockId]
+	getOutEdges instrs = foldr jumpDests [] instrs
+
+	graph = [ (block, getUnique id, map getUnique (getOutEdges instrs))
+		| block@(BasicBlock id instrs) <- blocks ]
+
+
+-- -----------------------------------------------------------------------------
+-- Computing liveness
+
+computeLiveness
+   :: [SCC NatBasicBlock]
+   -> ([SCC AnnBasicBlock],	-- instructions annotated with list of registers
+				-- which are "dead after this instruction".
+       BlockMap RegSet)		-- blocks annontated with set of live registers
+				-- on entry to the block.
+
+  -- NOTE: on entry, the SCCs are in "reverse" order: later blocks may transfer
+  -- control to earlier ones only.  The SCCs returned are in the *opposite* 
+  -- order, which is exactly what we want for the next pass.
+	
+computeLiveness sccs
+  = livenessSCCs emptyBlockMap [] sccs
+  where
+  livenessSCCs 
+	 :: BlockMap RegSet 
+	 -> [SCC AnnBasicBlock]		-- accum
+	 -> [SCC NatBasicBlock]
+	 -> ([SCC AnnBasicBlock], BlockMap RegSet)
+
+  livenessSCCs blockmap done [] = (done, blockmap)
+  livenessSCCs blockmap done
+	(AcyclicSCC (BasicBlock block_id instrs) : sccs) =
+	  {- pprTrace "live instrs" (ppr (getUnique block_id) $$
+				  vcat (map (\(instr,regs) -> docToSDoc (pprInstr instr) $$ ppr regs) instrs')) $ 
+	  -}
+	  livenessSCCs blockmap'
+		(AcyclicSCC (BasicBlock block_id instrs'):done) sccs
+	where (live,instrs') = liveness emptyUniqSet blockmap []
+					(reverse instrs)
+	      blockmap' = addToUFM blockmap block_id live
+
+  livenessSCCs blockmap done
+	(CyclicSCC blocks : sccs) =
+	  livenessSCCs blockmap' (CyclicSCC blocks':done) sccs
+	where (blockmap', blocks')
+	          = iterateUntilUnchanged linearLiveness equalBlockMaps
+	                                blockmap blocks
+
+              iterateUntilUnchanged
+                  :: (a -> b -> (a,c)) -> (a -> a -> Bool)
+                  -> a -> b
+                  -> (a,c)
+
+	      iterateUntilUnchanged f eq a b
+	          = head $
+	            concatMap tail $
+	            groupBy (\(a1, _) (a2, _) -> eq a1 a2) $
+	            iterate (\(a, _) -> f a b) $
+	            (a, error "RegisterAlloc.livenessSCCs")
+
+
+              linearLiveness :: BlockMap RegSet -> [NatBasicBlock]
+                             -> (BlockMap RegSet, [AnnBasicBlock])
+              linearLiveness = mapAccumL processBlock
+
+	      processBlock blockmap input@(BasicBlock block_id instrs)
+                  = (blockmap', BasicBlock block_id instrs')
+	        where (live,instrs') = liveness emptyUniqSet blockmap []
+	                                        (reverse instrs)
+        	      blockmap' = addToUFM blockmap block_id live
+
+                  -- probably the least efficient way to compare two
+                  -- BlockMaps for equality.
+	      equalBlockMaps a b
+	          = a' == b'
+	        where a' = map f $ ufmToList a
+	              b' = map f $ ufmToList b
+	              f (key,elt) = (key, uniqSetToList elt)
+
+  liveness :: RegSet			-- live regs
+	   -> BlockMap RegSet   	-- live regs on entry to other BBs
+	   -> [(Instr,[Reg],[Reg])]   	-- instructions (accum)
+	   -> [Instr]			-- instructions
+	   -> (RegSet, [(Instr,[Reg],[Reg])])
+
+  liveness liveregs blockmap done []  = (liveregs, done)
+  liveness liveregs blockmap done (instr:instrs) 
+ 	= liveness liveregs2 blockmap ((instr,r_dying,w_dying):done) instrs
+	where 
+	      RU read written = regUsage instr
+
+	      -- registers that were written here are dead going backwards.
+	      -- registers that were read here are live going backwards.
+	      liveregs1 = (liveregs `delListFromUniqSet` written)
+				    `addListToUniqSet` read
+
+	      -- union in the live regs from all the jump destinations of this
+	      -- instruction.
+	      targets = jumpDests instr [] -- where we go from here
+	      liveregs2 = unionManyUniqSets
+			    (liveregs1 : map targetLiveRegs targets)
+
+              targetLiveRegs target = case lookupUFM blockmap target of
+                                        Just ra -> ra
+                                        Nothing -> emptyBlockMap
+
+	      -- registers that are not live beyond this point, are recorded
+	      --  as dying here.
+	      r_dying  = [ reg | reg <- read, reg `notElem` written,
+			         not (elementOfUniqSet reg liveregs) ]
+
+	      w_dying = [ reg | reg <- written,
+			        not (elementOfUniqSet reg liveregs) ]
+
+
+-- -----------------------------------------------------------------------------
+-- Linear sweep to allocate registers
+
+data Loc = InReg   {-# UNPACK #-} !RegNo
+	 | InMem   {-# UNPACK #-} !Int		-- stack slot
+	 | InBoth  {-# UNPACK #-} !RegNo
+		   {-# UNPACK #-} !Int		-- stack slot
+  deriving (Eq, Show, Ord)
+
+{- 
+A temporary can be marked as living in both a register and memory
+(InBoth), for example if it was recently loaded from a spill location.
+This makes it cheap to spill (no save instruction required), but we
+have to be careful to turn this into InReg if the value in the
+register is changed.
+
+This is also useful when a temporary is about to be clobbered.  We
+save it in a spill location, but mark it as InBoth because the current
+instruction might still want to read it.
+-}
+
+#ifdef DEBUG
+instance Outputable Loc where
+  ppr l = text (show l)
+#endif
+
+linearRegAlloc
+   :: BlockMap RegSet		-- live regs on entry to each basic block
+   -> [SCC AnnBasicBlock]	-- instructions annotated with "deaths"
+   -> UniqSM [NatBasicBlock]
+linearRegAlloc block_live sccs = linearRA_SCCs emptyBlockMap sccs
+  where
+  linearRA_SCCs
+	:: BlockAssignment
+	-> [SCC AnnBasicBlock]
+	-> UniqSM [NatBasicBlock]
+  linearRA_SCCs block_assig [] = returnUs []
+  linearRA_SCCs block_assig 
+	(AcyclicSCC (BasicBlock id instrs) : sccs) 
+	= getUs `thenUs` \us ->
+	  let
+            (block_assig',(instrs',fixups)) =
+               case lookupUFM block_assig id of
+                    -- no prior info about this block: assume everything is
+                    -- free and the assignment is empty.
+                    Nothing ->
+                        runR block_assig initFreeRegs
+                                    emptyRegMap completelyFreeStack us $
+                            linearRA [] [] instrs
+                    Just (freeregs,stack,assig) ->
+                       runR block_assig freeregs assig stack us $
+                            linearRA [] [] instrs
+	  in
+	  linearRA_SCCs block_assig' sccs `thenUs` \moreBlocks ->
+	  returnUs $ BasicBlock id instrs' : fixups ++ moreBlocks
+
+  linearRA_SCCs block_assig 
+	(CyclicSCC blocks : sccs) 
+	= getUs `thenUs` \us ->
+	  let
+            ((block_assig', us'), blocks') = mapAccumL processBlock
+                                                       (block_assig, us)
+                                                       ({-reverse-} blocks)
+          in
+	  linearRA_SCCs block_assig' sccs `thenUs` \moreBlocks ->
+	  returnUs $ concat blocks' ++ moreBlocks
+    where
+        processBlock (block_assig, us0) (BasicBlock id instrs)
+          = ((block_assig', us'), BasicBlock id instrs' : fixups)
+          where
+                (us, us') = splitUniqSupply us0
+                (block_assig',(instrs',fixups)) = 
+                   case lookupUFM block_assig id of
+                        -- no prior info about this block: assume everything is
+                        -- free and the assignment is empty.
+                        Nothing -> 
+                           runR block_assig initFreeRegs 
+                                        emptyRegMap completelyFreeStack us $
+                                linearRA [] [] instrs 
+                        Just (freeregs,stack,assig) -> 
+                           runR block_assig freeregs assig stack us $
+                                linearRA [] [] instrs 
+
+  linearRA :: [Instr] -> [NatBasicBlock] -> [(Instr,[Reg],[Reg])]
+	-> RegM ([Instr], [NatBasicBlock])
+  linearRA instr_acc fixups [] = 
+    return (reverse instr_acc, fixups)
+  linearRA instr_acc fixups (instr:instrs) = do
+    (instr_acc', new_fixups) <- raInsn block_live instr_acc instr
+    linearRA instr_acc' (new_fixups++fixups) instrs
+
+-- -----------------------------------------------------------------------------
+-- Register allocation for a single instruction
+
+type BlockAssignment = BlockMap (FreeRegs, FreeStack, RegMap Loc)
+
+raInsn  :: BlockMap RegSet		-- Live temporaries at each basic block
+	-> [Instr]			-- new instructions (accum.)
+	-> (Instr,[Reg],[Reg])		-- the instruction (with "deaths")
+	-> RegM (
+	     [Instr],			-- new instructions
+	     [NatBasicBlock]		-- extra fixup blocks
+	   )
+
+raInsn block_live new_instrs (instr@(DELTA n), _, _) = do
+    setDeltaR n
+    return (new_instrs, [])
+
+raInsn block_live new_instrs (instr, r_dying, w_dying) = do
+    assig    <- getAssigR
+
+    -- If we have a reg->reg move between virtual registers, where the
+    -- src register is not live after this instruction, and the dst
+    -- register does not already have an assignment, then we can
+    -- eliminate the instruction.
+    case isRegRegMove instr of
+	Just (src,dst)
+		| src `elem` r_dying, 
+		  isVirtualReg dst,
+		  Just loc <- lookupUFM assig src,
+	 	  not (dst `elemUFM` assig) -> do
+			setAssigR (addToUFM (delFromUFM assig src) dst loc)
+			return (new_instrs, [])
+
+	other -> genRaInsn block_live new_instrs instr r_dying w_dying
+
+
+genRaInsn block_live new_instrs instr r_dying w_dying =
+    case regUsage instr              of { RU read written ->
+    case partition isRealReg written of { (real_written1,virt_written) ->
+    do
+    let 
+	real_written = [ r | RealReg r <- real_written1 ]
+
+	-- we don't need to do anything with real registers that are
+	-- only read by this instr.  (the list is typically ~2 elements,
+	-- so using nub isn't a problem).
+	virt_read = nub (filter isVirtualReg read)
+    -- in
+
+    -- (a) save any temporaries which will be clobbered by this instruction
+    clobber_saves <- saveClobberedTemps real_written r_dying
+
+    {-
+    freeregs <- getFreeRegsR
+    assig <- getAssigR
+    pprTrace "raInsn" (docToSDoc (pprInstr instr) $$ ppr r_dying <+> ppr w_dying $$ ppr virt_read <+> ppr virt_written $$ text (show freeregs) $$ ppr assig) $ do
+    -}
+
+    -- (b), (c) allocate real regs for all regs read by this instruction.
+    (r_spills, r_allocd) <- 
+	allocateRegsAndSpill True{-reading-} virt_read [] [] virt_read
+
+    -- (d) Update block map for new destinations
+    -- NB. do this before removing dead regs from the assignment, because
+    -- these dead regs might in fact be live in the jump targets (they're
+    -- only dead in the code that follows in the current basic block).
+    (fixup_blocks, adjusted_instr)
+	<- joinToTargets block_live [] instr (jumpDests instr [])
+
+    -- (e) Delete all register assignments for temps which are read
+    --     (only) and die here.  Update the free register list.
+    releaseRegs r_dying
+
+    -- (f) Mark regs which are clobbered as unallocatable
+    clobberRegs real_written
+
+    -- (g) Allocate registers for temporaries *written* (only)
+    (w_spills, w_allocd) <- 
+	allocateRegsAndSpill False{-writing-} virt_written [] [] virt_written
+
+    -- (h) Release registers for temps which are written here and not
+    -- used again.
+    releaseRegs w_dying
+
+    let
+	-- (i) Patch the instruction
+	patch_map = listToUFM   [ (t,RealReg r) | 
+				  (t,r) <- zip virt_read r_allocd
+					  ++ zip virt_written w_allocd ]
+
+	patched_instr = patchRegs adjusted_instr patchLookup
+	patchLookup x = case lookupUFM patch_map x of
+				Nothing -> x
+				Just y  -> y
+    -- in
+
+    -- pprTrace "patched" (docToSDoc (pprInstr patched_instr)) $ do
+
+    -- (j) free up stack slots for dead spilled regs
+    -- TODO (can't be bothered right now)
+
+    return (patched_instr : w_spills ++ reverse r_spills
+		 ++ clobber_saves ++ new_instrs,
+	    fixup_blocks)
+  }}
+
+-- -----------------------------------------------------------------------------
+-- releaseRegs
+
+releaseRegs regs = do
+  assig <- getAssigR
+  free <- getFreeRegsR
+  loop assig free regs 
+ where
+  loop assig free _ | free `seq` False = undefined
+  loop assig free [] = do setAssigR assig; setFreeRegsR free; return ()
+  loop assig free (RealReg r : rs) = loop assig (releaseReg r free) rs
+  loop assig free (r:rs) = 
+     case lookupUFM assig r of
+	Just (InBoth real _) -> loop (delFromUFM assig r) (releaseReg real free) rs
+	Just (InReg real) -> loop (delFromUFM assig r) (releaseReg real free) rs
+	_other            -> loop (delFromUFM assig r) free rs
+
+-- -----------------------------------------------------------------------------
+-- Clobber real registers
+
+{-
+For each temp in a register that is going to be clobbered:
+  - if the temp dies after this instruction, do nothing
+  - otherwise, put it somewhere safe (another reg if possible,
+    otherwise spill and record InBoth in the assignment).
+
+for allocateRegs on the temps *read*,
+  - clobbered regs are allocatable.
+
+for allocateRegs on the temps *written*, 
+  - clobbered regs are not allocatable.
+-}
+
+saveClobberedTemps
+   :: [RegNo]		   -- real registers clobbered by this instruction
+   -> [Reg]		   -- registers which are no longer live after this insn
+   -> RegM [Instr] 	   -- return: instructions to spill any temps that will
+		 	   -- be clobbered.
+
+saveClobberedTemps [] _ = return [] -- common case
+saveClobberedTemps clobbered dying =  do
+  assig <- getAssigR
+  let
+	to_spill  = [ (temp,reg) | (temp, InReg reg) <- ufmToList assig,
+				   reg `elem` clobbered,
+				   temp `notElem` map getUnique dying  ]
+  -- in
+  (instrs,assig') <- clobber assig [] to_spill
+  setAssigR assig'
+  return instrs
+ where
+  clobber assig instrs [] = return (instrs,assig)
+  clobber assig instrs ((temp,reg):rest)
+    = do
+	--ToDo: copy it to another register if possible
+      (spill,slot) <- spillR (RealReg reg) temp
+      clobber (addToUFM assig temp (InBoth reg slot)) (spill:instrs) rest
+
+clobberRegs :: [RegNo] -> RegM ()
+clobberRegs [] = return () -- common case
+clobberRegs clobbered = do
+  freeregs <- getFreeRegsR
+  setFreeRegsR $! foldr allocateReg freeregs clobbered
+  assig <- getAssigR
+  setAssigR $! clobber assig (ufmToList assig)
+ where
+    -- if the temp was InReg and clobbered, then we will have
+    -- saved it in saveClobberedTemps above.  So the only case
+    -- we have to worry about here is InBoth.  Note that this
+    -- also catches temps which were loaded up during allocation
+    -- of read registers, not just those saved in saveClobberedTemps.
+  clobber assig [] = assig
+  clobber assig ((temp, InBoth reg slot) : rest)
+	| reg `elem` clobbered
+	= clobber (addToUFM assig temp (InMem slot)) rest
+  clobber assig (entry:rest)
+	= clobber assig rest 
+
+-- -----------------------------------------------------------------------------
+-- allocateRegsAndSpill
+
+-- This function does several things:
+--   For each temporary referred to by this instruction,
+--   we allocate a real register (spilling another temporary if necessary).
+--   We load the temporary up from memory if necessary.
+--   We also update the register assignment in the process, and
+--   the list of free registers and free stack slots.
+
+allocateRegsAndSpill
+	:: Bool			-- True <=> reading (load up spilled regs)
+	-> [Reg]		-- don't push these out
+	-> [Instr]		-- spill insns
+	-> [RegNo]		-- real registers allocated (accum.)
+	-> [Reg]		-- temps to allocate
+	-> RegM ([Instr], [RegNo])
+
+allocateRegsAndSpill reading keep spills alloc []
+  = return (spills,reverse alloc)
+
+allocateRegsAndSpill reading keep spills alloc (r:rs) = do
+  assig <- getAssigR
+  case lookupUFM assig r of
+  -- case (1a): already in a register
+     Just (InReg my_reg) ->
+	allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
+
+  -- case (1b): already in a register (and memory)
+  -- NB1. if we're writing this register, update its assignemnt to be
+  -- InReg, because the memory value is no longer valid.
+  -- NB2. This is why we must process written registers here, even if they
+  -- are also read by the same instruction.
+     Just (InBoth my_reg mem) -> do
+	when (not reading) (setAssigR (addToUFM assig r (InReg my_reg)))
+   	allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
+
+  -- Not already in a register, so we need to find a free one...
+     loc -> do
+	freeregs <- getFreeRegsR
+
+        case getFreeRegs (regClass r) freeregs of
+
+      	-- case (2): we have a free register
+      	  my_reg:_ -> do
+    	    spills'   <- do_load reading loc my_reg spills
+	    let new_loc 
+		 | Just (InMem slot) <- loc, reading = InBoth my_reg slot
+		 | otherwise		             = InReg my_reg
+	    setAssigR (addToUFM assig r $! new_loc)
+	    setFreeRegsR (allocateReg my_reg freeregs)
+    	    allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs
+
+        -- case (3): we need to push something out to free up a register
+          [] -> do
+	    let
+	      keep' = map getUnique keep
+	      candidates1 = [ (temp,reg,mem)
+			    | (temp, InBoth reg mem) <- ufmToList assig,
+			      temp `notElem` keep', regClass (RealReg reg) == regClass r ]
+	      candidates2 = [ (temp,reg)
+			    | (temp, InReg reg) <- ufmToList assig,
+			      temp `notElem` keep', regClass (RealReg reg) == regClass r  ]
+	    -- in
+	    ASSERT2(not (null candidates1 && null candidates2), 
+		    text (show freeregs) <+> ppr r <+> ppr assig) do
+
+	    case candidates1 of
+
+	     -- we have a temporary that is in both register and mem,
+	     -- just free up its register for use.
+	     -- 
+	     (temp,my_reg,slot):_ -> do
+    	        spills' <- do_load reading loc my_reg spills
+    	        let	
+	          assig1  = addToUFM assig temp (InMem slot)
+	          assig2  = addToUFM assig1 r (InReg my_reg)
+ 	        -- in
+		setAssigR assig2
+		allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs
+
+	     -- otherwise, we need to spill a temporary that currently
+	     -- resides in a register.
+	     [] -> do
+	        let
+	          (temp_to_push_out, my_reg) = myHead "regalloc" candidates2
+	          -- TODO: plenty of room for optimisation in choosing which temp
+	          -- to spill.  We just pick the first one that isn't used in 
+	          -- the current instruction for now.
+	        -- in
+    	        (spill_insn,slot) <- spillR (RealReg my_reg) temp_to_push_out
+    	        let	
+	          assig1  = addToUFM assig temp_to_push_out (InMem slot)
+	          assig2  = addToUFM assig1 r (InReg my_reg)
+ 	        -- in
+	        setAssigR assig2
+    	        spills' <- do_load reading loc my_reg spills
+    	        allocateRegsAndSpill reading keep (spill_insn:spills')
+		 	(my_reg:alloc) rs
+  where
+	-- load up a spilled temporary if we need to
+	do_load True (Just (InMem slot)) reg spills = do
+           insn <- loadR (RealReg reg) slot
+	   return (insn : spills)
+	do_load _ _ _ spills = 
+	   return spills
+
+myHead s [] = panic s
+myHead s (x:xs) = x
+
+-- -----------------------------------------------------------------------------
+-- Joining a jump instruction to its targets
+
+-- The first time we encounter a jump to a particular basic block, we
+-- record the assignment of temporaries.  The next time we encounter a
+-- jump to the same block, we compare our current assignment to the
+-- stored one.  They might be different if spilling has occrred in one
+-- branch; so some fixup code will be required to match up the
+-- assignments.
+
+joinToTargets
+	:: BlockMap RegSet
+	-> [NatBasicBlock]
+	-> Instr
+	-> [BlockId]
+	-> RegM ([NatBasicBlock], Instr)
+
+joinToTargets block_live new_blocks instr []
+  = return (new_blocks, instr)
+joinToTargets block_live new_blocks instr (dest:dests) = do
+  block_assig <- getBlockAssigR
+  assig <- getAssigR
+  let
+	-- adjust the assignment to remove any registers which are not
+	-- live on entry to the destination block.
+	adjusted_assig = filterUFM_Directly still_live assig
+	still_live uniq _ = uniq `elemUniqSet_Directly` live_set
+
+	-- and free up those registers which are now free.
+	to_free =
+	  [ r | (reg, loc) <- ufmToList assig, 
+		not (elemUniqSet_Directly reg live_set), 
+		r <- regsOfLoc loc ]
+
+	regsOfLoc (InReg r)    = [r]
+	regsOfLoc (InBoth r _) = [r]
+	regsOfLoc (InMem _)    = []
+  -- in
+  case lookupUFM block_assig dest of
+	-- Nothing <=> this is the first time we jumped to this
+	-- block.
+	Nothing -> do
+	  freeregs <- getFreeRegsR
+	  let freeregs' = foldr releaseReg freeregs to_free 
+ 	  stack <- getStackR
+	  setBlockAssigR (addToUFM block_assig dest 
+				(freeregs',stack,adjusted_assig))
+	  joinToTargets block_live new_blocks instr dests
+
+	Just (freeregs,stack,dest_assig)
+	   | ufmToList dest_assig == ufmToList adjusted_assig
+	   -> -- ok, the assignments match
+	     joinToTargets block_live new_blocks instr dests
+	   | otherwise
+	   -> -- need fixup code
+	     do
+	       delta <- getDeltaR
+	       -- Construct a graph of register/spill movements and
+	       -- untangle it component by component.
+	       -- 
+	       -- We cut some corners by
+	       -- a) not handling cyclic components
+	       -- b) not handling memory-to-memory moves.
+	       --
+	       -- Cyclic components seem to occur only very rarely,
+	       -- and we don't need memory-to-memory moves because we
+	       -- make sure that every temporary always gets its own
+	       -- stack slot.
+	       
+	       let graph = [ (loc0, loc0,
+	                       [lookupWithDefaultUFM_Directly
+                                    dest_assig
+                                    (panic "RegisterAlloc.joinToTargets")
+                                    vreg]
+                                    )
+	                   | (vreg, loc0) <- ufmToList adjusted_assig ]
+	           sccs = stronglyConnCompR graph
+	           
+	           handleComponent (CyclicSCC [one]) = []
+	           handleComponent (AcyclicSCC (src,_,[dst]))
+	               = makeMove src dst
+	           handleComponent (CyclicSCC things)
+	               = panic $ "Register Allocator: handleComponent: cyclic"
+	                         ++ " (workaround: use -fviaC)"
+	           
+	           makeMove (InReg src) (InReg dst)
+	               = [mkRegRegMoveInstr (RealReg src) (RealReg dst)]
+	     	   makeMove (InMem src) (InReg dst)
+	     	       = [mkLoadInstr (RealReg dst) delta src]
+	     	   makeMove (InReg src) (InMem dst)
+	     	       = [mkSpillInstr (RealReg src) delta dst]
+	     	   
+	     	   makeMove (InBoth src _) (InReg dst)
+	     	       | src == dst = []
+	     	   makeMove (InBoth _ src) (InMem dst)
+	     	       | src == dst = []
+	     	   makeMove (InBoth src _) dst
+	     	       = makeMove (InReg src) dst
+                   makeMove (InReg src) (InBoth dstR dstM)
+                       | src == dstR
+                       = makeMove (InReg src) (InMem dstM)
+                       | otherwise
+                       = makeMove (InReg src) (InReg dstR)
+                       ++ makeMove (InReg src) (InMem dstM)
+	     	   
+	     	   makeMove src dst
+	     	       = panic $ "makeMove (" ++ show src ++ ") ("
+	     	                 ++ show dst ++ ")"
+	                         ++ " (workaround: use -fviaC)"
+	     
+	       block_id <- getUniqueR
+	       let block = BasicBlock (BlockId block_id) $
+	               concatMap handleComponent sccs ++ mkBranchInstr dest
+	       let instr' = patchJump instr dest (BlockId block_id)
+	       joinToTargets block_live (block : new_blocks) instr' dests
+  where
+	live_set = lookItUp "joinToTargets" block_live dest
+
+-- -----------------------------------------------------------------------------
+-- The register allocator's monad.  
+
+-- Here we keep all the state that the register allocator keeps track
+-- of as it walks the instructions in a basic block.
+
+data RA_State 
+  = RA_State {
+	ra_blockassig :: BlockAssignment,
+		-- The current mapping from basic blocks to 
+		-- the register assignments at the beginning of that block.
+	ra_freeregs   :: {-#UNPACK#-}!FreeRegs,	-- free machine registers
+	ra_assig      :: RegMap Loc,	-- assignment of temps to locations
+	ra_delta      :: Int,		-- current stack delta
+	ra_stack      :: FreeStack,	-- free stack slots for spilling
+	ra_us         :: UniqSupply     -- unique supply for generating names
+	                                -- for fixup blocks.
+  }
+
+newtype RegM a = RegM { unReg :: RA_State -> (# RA_State, a #) }
+
+instance Monad RegM where
+  m >>= k   =  RegM $ \s -> case unReg m s of { (# s, a #) -> unReg (k a) s }
+  return a  =  RegM $ \s -> (# s, a #)
+
+runR :: BlockAssignment -> FreeRegs -> RegMap Loc -> FreeStack -> UniqSupply
+  -> RegM a -> (BlockAssignment, a)
+runR block_assig freeregs assig stack us thing =
+  case unReg thing (RA_State{ ra_blockassig=block_assig, ra_freeregs=freeregs,
+			ra_assig=assig, ra_delta=0{-???-}, ra_stack=stack,
+			ra_us = us }) of
+	(# RA_State{ ra_blockassig=block_assig }, returned_thing #)
+		-> (block_assig, returned_thing)
+
+spillR :: Reg -> Unique -> RegM (Instr, Int)
+spillR reg temp = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack} ->
+  let (stack',slot) = getFreeStackSlotFor stack temp
+      instr  = mkSpillInstr reg delta slot
+  in
+  (# s{ra_stack=stack'}, (instr,slot) #)
+
+loadR :: Reg -> Int -> RegM Instr
+loadR reg slot = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack} ->
+  (# s, mkLoadInstr reg delta slot #)
+
+freeSlotR :: Int -> RegM ()
+freeSlotR slot = RegM $ \ s@RA_State{ra_stack=stack} ->
+  (# s{ra_stack=freeStackSlot stack slot}, () #)
+
+getFreeRegsR :: RegM FreeRegs
+getFreeRegsR = RegM $ \ s@RA_State{ra_freeregs = freeregs} ->
+  (# s, freeregs #)
+
+setFreeRegsR :: FreeRegs -> RegM ()
+setFreeRegsR regs = RegM $ \ s ->
+  (# s{ra_freeregs = regs}, () #)
+
+getAssigR :: RegM (RegMap Loc)
+getAssigR = RegM $ \ s@RA_State{ra_assig = assig} ->
+  (# s, assig #)
+
+setAssigR :: RegMap Loc -> RegM ()
+setAssigR assig = RegM $ \ s ->
+  (# s{ra_assig=assig}, () #)
+
+getStackR :: RegM FreeStack
+getStackR = RegM $ \ s@RA_State{ra_stack = stack} ->
+  (# s, stack #)
+
+setStackR :: FreeStack -> RegM ()
+setStackR stack = RegM $ \ s ->
+  (# s{ra_stack=stack}, () #)
+
+getBlockAssigR :: RegM BlockAssignment
+getBlockAssigR = RegM $ \ s@RA_State{ra_blockassig = assig} ->
+  (# s, assig #)
+
+setBlockAssigR :: BlockAssignment -> RegM ()
+setBlockAssigR assig = RegM $ \ s ->
+  (# s{ra_blockassig = assig}, () #)
+
+setDeltaR :: Int -> RegM ()
+setDeltaR n = RegM $ \ s ->
+  (# s{ra_delta = n}, () #)
+
+getDeltaR :: RegM Int
+getDeltaR = RegM $ \s -> (# s, ra_delta s #)
+
+getUniqueR :: RegM Unique
+getUniqueR = RegM $ \s ->
+  case splitUniqSupply (ra_us s) of
+    (us1, us2) -> (# s{ra_us = us2}, uniqFromSupply us1 #)
+
+-- -----------------------------------------------------------------------------
+-- Utils
+
+#ifdef DEBUG
+my_fromJust s p Nothing  = pprPanic ("fromJust: " ++ s) p
+my_fromJust s p (Just x) = x
+#else
+my_fromJust _ _ = fromJust
+#endif
+
+lookItUp :: Uniquable b => String -> UniqFM a -> b -> a
+lookItUp str fm x = my_fromJust str (ppr (getUnique x)) (lookupUFM fm x)
author	Simon Marlow <simonmar@microsoft.com>	2006-04-07 02:05:11 +0000
committer	Simon Marlow <simonmar@microsoft.com>	2006-04-07 02:05:11 +0000
commit	0065d5ab628975892cea1ec7303f968c3338cbe1 (patch)
tree	8e2afe0ab48ee33cf95009809d67c9649573ef92 /compiler/nativeGen
parent	28a464a75e14cece5db40f2765a29348273ff2d2 (diff)
download	haskell-0065d5ab628975892cea1ec7303f968c3338cbe1.tar.gz