summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/MachCodeGen.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nativeGen/MachCodeGen.hs')
-rw-r--r--compiler/nativeGen/MachCodeGen.hs4654
1 files changed, 4654 insertions, 0 deletions
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs
new file mode 100644
index 0000000000..90ce6b5bf8
--- /dev/null
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -0,0 +1,4654 @@
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+-- This is a big module, but, if you pay attention to
+-- (a) the sectioning, (b) the type signatures, and
+-- (c) the #if blah_TARGET_ARCH} things, the
+-- structure should not be too overwhelming.
+
+module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import MachInstrs
+import MachRegs
+import NCGMonad
+import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
+import RegAllocInfo ( mkBranchInstr )
+
+-- Our intermediate code:
+import PprCmm ( pprExpr )
+import Cmm
+import MachOp
+import CLabel
+
+-- The rest:
+import StaticFlags ( opt_PIC )
+import ForeignCall ( CCallConv(..) )
+import OrdList
+import Pretty
+import Outputable
+import FastString
+import FastTypes ( isFastTrue )
+import Constants ( wORD_SIZE )
+
+#ifdef DEBUG
+import Outputable ( assertPanic )
+import TRACE ( trace )
+#endif
+
+import Control.Monad ( mapAndUnzipM )
+import Maybe ( fromJust )
+import DATA_BITS
+import DATA_WORD
+
+-- -----------------------------------------------------------------------------
+-- Top-level of the instruction selector
+
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- They are really trees of insns to facilitate fast appending, where a
+-- left-to-right traversal (pre-order?) yields the insns in the correct
+-- order.
+
+type InstrBlock = OrdList Instr
+
+cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
+cmmTopCodeGen (CmmProc info lab params blocks) = do
+ (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+ picBaseMb <- getPicBaseMaybeNat
+ let proc = CmmProc info lab params (concat nat_blocks)
+ tops = proc : concat statics
+ case picBaseMb of
+ Just picBase -> initializePicBase picBase tops
+ Nothing -> return tops
+
+cmmTopCodeGen (CmmData sec dat) = do
+ return [CmmData sec dat] -- no translation, we just use CmmStatic
+
+basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
+basicBlockCodeGen (BasicBlock id stmts) = do
+ instrs <- stmtsToInstrs stmts
+ -- code generation may introduce new basic block boundaries, which
+ -- are indicated by the NEWBLOCK instruction. We must split up the
+ -- instruction stream into basic blocks again. Also, we extract
+ -- LDATAs here too.
+ let
+ (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+
+ mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
+ = ([], BasicBlock id instrs : blocks, statics)
+ mkBlocks (LDATA sec dat) (instrs,blocks,statics)
+ = (instrs, blocks, CmmData sec dat:statics)
+ mkBlocks instr (instrs,blocks,statics)
+ = (instr:instrs, blocks, statics)
+ -- in
+ return (BasicBlock id top : other_blocks, statics)
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+ = do instrss <- mapM stmtToInstrs stmts
+ return (concatOL instrss)
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+ CmmNop -> return nilOL
+ CmmComment s -> return (unitOL (COMMENT s))
+
+ CmmAssign reg src
+ | isFloatingRep kind -> assignReg_FltCode kind reg src
+#if WORD_SIZE_IN_BITS==32
+ | kind == I64 -> assignReg_I64Code reg src
+#endif
+ | otherwise -> assignReg_IntCode kind reg src
+ where kind = cmmRegRep reg
+
+ CmmStore addr src
+ | isFloatingRep kind -> assignMem_FltCode kind addr src
+#if WORD_SIZE_IN_BITS==32
+ | kind == I64 -> assignMem_I64Code addr src
+#endif
+ | otherwise -> assignMem_IntCode kind addr src
+ where kind = cmmExprRep src
+
+ CmmCall target result_regs args vols
+ -> genCCall target result_regs args vols
+
+ CmmBranch id -> genBranch id
+ CmmCondBranch arg id -> genCondJump id arg
+ CmmSwitch arg ids -> genSwitch arg ids
+ CmmJump arg params -> genJump arg
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff. ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+ = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
+ where rep = cmmRegRep reg
+
+-- -----------------------------------------------------------------------------
+-- Code gen for 64-bit arithmetic on 32-bit platforms
+
+{-
+Simple support for generating 64-bit code (ie, 64 bit values and 64
+bit assignments) on 32-bit platforms. Unlike the main code generator
+we merely shoot for generating working code as simply as possible, and
+pay little attention to code quality. Specifically, there is no
+attempt to deal cleverly with the fixed-vs-floating register
+distinction; all values are generated into (pairs of) floating
+registers, even if this would mean some redundant reg-reg moves as a
+result. Only one of the VRegUniques is returned, since it will be
+of the VRegUniqueLo form, and the upper-half VReg can be determined
+by applying getHiVRegFromLo to it.
+-}
+
+data ChildCode64 -- a.k.a "Register64"
+ = ChildCode64
+ InstrBlock -- code
+ Reg -- the lower 32-bit temporary which contains the
+ -- result; use getHiVRegFromLo to find the other
+ -- VRegUnique. Rules of this simplified insn
+ -- selection game are therefore that the returned
+ -- Reg may be modified
+
+#if WORD_SIZE_IN_BITS==32
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
+#endif
+
+#ifndef x86_64_TARGET_ARCH
+iselExpr64 :: CmmExpr -> NatM ChildCode64
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+assignMem_I64Code addrTree valueTree = do
+ Amode addr addr_code <- getAmode addrTree
+ ChildCode64 vcode rlo <- iselExpr64 valueTree
+ let
+ rhi = getHiVRegFromLo rlo
+
+ -- Little-endian store
+ mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
+ mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
+ -- in
+ return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
+
+
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+ ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+ let
+ r_dst_lo = mkVReg u_dst I32
+ r_dst_hi = getHiVRegFromLo r_dst_lo
+ r_src_hi = getHiVRegFromLo r_src_lo
+ mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
+ mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
+ -- in
+ return (
+ vcode `snocOL` mov_lo `snocOL` mov_hi
+ )
+
+assignReg_I64Code lvalue valueTree
+ = panic "assignReg_I64Code(i386): invalid lvalue"
+
+------------
+
+iselExpr64 (CmmLit (CmmInt i _)) = do
+ (rlo,rhi) <- getNewRegPairNat I32
+ let
+ r = fromIntegral (fromIntegral i :: Word32)
+ q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
+ code = toOL [
+ MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
+ MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
+ ]
+ -- in
+ return (ChildCode64 code rlo)
+
+iselExpr64 (CmmLoad addrTree I64) = do
+ Amode addr addr_code <- getAmode addrTree
+ (rlo,rhi) <- getNewRegPairNat I32
+ let
+ mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
+ mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
+ -- in
+ return (
+ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
+ rlo
+ )
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
+ = return (ChildCode64 nilOL (mkVReg vu I32))
+
+-- we handle addition, but rather badly
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
+ ChildCode64 code1 r1lo <- iselExpr64 e1
+ (rlo,rhi) <- getNewRegPairNat I32
+ let
+ r = fromIntegral (fromIntegral i :: Word32)
+ q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
+ r1hi = getHiVRegFromLo r1lo
+ code = code1 `appOL`
+ toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
+ ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
+ MOV I32 (OpReg r1hi) (OpReg rhi),
+ ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
+ -- in
+ return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+ ChildCode64 code1 r1lo <- iselExpr64 e1
+ ChildCode64 code2 r2lo <- iselExpr64 e2
+ (rlo,rhi) <- getNewRegPairNat I32
+ let
+ r1hi = getHiVRegFromLo r1lo
+ r2hi = getHiVRegFromLo r2lo
+ code = code1 `appOL`
+ code2 `appOL`
+ toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
+ ADD I32 (OpReg r2lo) (OpReg rlo),
+ MOV I32 (OpReg r1hi) (OpReg rhi),
+ ADC I32 (OpReg r2hi) (OpReg rhi) ]
+ -- in
+ return (ChildCode64 code rlo)
+
+iselExpr64 expr
+ = pprPanic "iselExpr64(i386)" (ppr expr)
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+assignMem_I64Code addrTree valueTree = do
+ Amode addr addr_code <- getAmode addrTree
+ ChildCode64 vcode rlo <- iselExpr64 valueTree
+ (src, code) <- getSomeReg addrTree
+ let
+ rhi = getHiVRegFromLo rlo
+ -- Big-endian store
+ mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
+ mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
+ return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
+
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+ ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+ let
+ r_dst_lo = mkVReg u_dst pk
+ r_dst_hi = getHiVRegFromLo r_dst_lo
+ r_src_hi = getHiVRegFromLo r_src_lo
+ mov_lo = mkMOV r_src_lo r_dst_lo
+ mov_hi = mkMOV r_src_hi r_dst_hi
+ mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+ return (vcode `snocOL` mov_hi `snocOL` mov_lo)
+assignReg_I64Code lvalue valueTree
+ = panic "assignReg_I64Code(sparc): invalid lvalue"
+
+
+-- Don't delete this -- it's very handy for debugging.
+--iselExpr64 expr
+-- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
+-- = panic "iselExpr64(???)"
+
+iselExpr64 (CmmLoad addrTree I64) = do
+ Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
+ rlo <- getNewRegNat I32
+ let rhi = getHiVRegFromLo rlo
+ mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
+ mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
+ return (
+ ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
+ rlo
+ )
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
+ r_dst_lo <- getNewRegNat I32
+ let r_dst_hi = getHiVRegFromLo r_dst_lo
+ r_src_lo = mkVReg uq I32
+ r_src_hi = getHiVRegFromLo r_src_lo
+ mov_lo = mkMOV r_src_lo r_dst_lo
+ mov_hi = mkMOV r_src_hi r_dst_hi
+ mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+ return (
+ ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
+ )
+
+iselExpr64 expr
+ = pprPanic "iselExpr64(sparc)" (ppr expr)
+
+#endif /* sparc_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if powerpc_TARGET_ARCH
+
+getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
+getI64Amodes addrTree = do
+ Amode hi_addr addr_code <- getAmode addrTree
+ case addrOffset hi_addr 4 of
+ Just lo_addr -> return (hi_addr, lo_addr, addr_code)
+ Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
+ return (AddrRegImm hi_ptr (ImmInt 0),
+ AddrRegImm hi_ptr (ImmInt 4),
+ code)
+
+assignMem_I64Code addrTree valueTree = do
+ (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
+ ChildCode64 vcode rlo <- iselExpr64 valueTree
+ let
+ rhi = getHiVRegFromLo rlo
+
+ -- Big-endian store
+ mov_hi = ST I32 rhi hi_addr
+ mov_lo = ST I32 rlo lo_addr
+ -- in
+ return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
+
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+ ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+ let
+ r_dst_lo = mkVReg u_dst I32
+ r_dst_hi = getHiVRegFromLo r_dst_lo
+ r_src_hi = getHiVRegFromLo r_src_lo
+ mov_lo = MR r_dst_lo r_src_lo
+ mov_hi = MR r_dst_hi r_src_hi
+ -- in
+ return (
+ vcode `snocOL` mov_lo `snocOL` mov_hi
+ )
+
+assignReg_I64Code lvalue valueTree
+ = panic "assignReg_I64Code(powerpc): invalid lvalue"
+
+
+-- Don't delete this -- it's very handy for debugging.
+--iselExpr64 expr
+-- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
+-- = panic "iselExpr64(???)"
+
+iselExpr64 (CmmLoad addrTree I64) = do
+ (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
+ (rlo, rhi) <- getNewRegPairNat I32
+ let mov_hi = LD I32 rhi hi_addr
+ mov_lo = LD I32 rlo lo_addr
+ return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
+ rlo
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
+ = return (ChildCode64 nilOL (mkVReg vu I32))
+
+iselExpr64 (CmmLit (CmmInt i _)) = do
+ (rlo,rhi) <- getNewRegPairNat I32
+ let
+ half0 = fromIntegral (fromIntegral i :: Word16)
+ half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
+ half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
+ half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
+
+ code = toOL [
+ LIS rlo (ImmInt half1),
+ OR rlo rlo (RIImm $ ImmInt half0),
+ LIS rhi (ImmInt half3),
+ OR rlo rlo (RIImm $ ImmInt half2)
+ ]
+ -- in
+ return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+ ChildCode64 code1 r1lo <- iselExpr64 e1
+ ChildCode64 code2 r2lo <- iselExpr64 e2
+ (rlo,rhi) <- getNewRegPairNat I32
+ let
+ r1hi = getHiVRegFromLo r1lo
+ r2hi = getHiVRegFromLo r2lo
+ code = code1 `appOL`
+ code2 `appOL`
+ toOL [ ADDC rlo r1lo r2lo,
+ ADDE rhi r1hi r2hi ]
+ -- in
+ return (ChildCode64 code rlo)
+
+iselExpr64 expr
+ = pprPanic "iselExpr64(powerpc)" (ppr expr)
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- The 'Register' type
+
+-- 'Register's passed up the tree. If the stix code forces the register
+-- to live in a pre-decided machine register, it comes out as @Fixed@;
+-- otherwise, it comes out as @Any@, and the parent can decide which
+-- register to put it in.
+
+data Register
+ = Fixed MachRep Reg InstrBlock
+ | Any MachRep (Reg -> InstrBlock)
+
+swizzleRegisterRep :: Register -> MachRep -> Register
+swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
+swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
+
+
+-- -----------------------------------------------------------------------------
+-- Utils based on getRegister, below
+
+-- The dual to getAnyReg: compute an expression into a register, but
+-- we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code tmp)
+ Fixed _ reg code ->
+ return (reg, code)
+
+-- -----------------------------------------------------------------------------
+-- Grab the Reg for a CmmReg
+
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+ = mkVReg u pk
+
+getRegisterReg (CmmGlobal mid)
+ = case get_GlobalReg_reg_or_addr mid of
+ Left (RealReg rrno) -> RealReg rrno
+ _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+ -- By this stage, the only MagicIds remaining should be the
+ -- ones which map to a real machine register on this
+ -- platform. Hence ...
+
+
+-- -----------------------------------------------------------------------------
+-- Generate code to get a subtree into a Register
+
+-- Don't delete this -- it's very handy for debugging.
+--getRegister expr
+-- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
+-- = panic "getRegister(???)"
+
+getRegister :: CmmExpr -> NatM Register
+
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+ = do
+ reg <- getPicBaseNat wordRep
+ return (Fixed wordRep reg nilOL)
+
+getRegister (CmmReg reg)
+ = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _)
+ = getRegister (mangleIndexTree tree)
+
+-- end of machine-"independent" bit; here we go on the rest...
+
+#if alpha_TARGET_ARCH
+
+getRegister (StDouble d)
+ = getBlockIdNat `thenNat` \ lbl ->
+ getNewRegNat PtrRep `thenNat` \ tmp ->
+ let code dst = mkSeqInstrs [
+ LDATA RoDataSegment lbl [
+ DATA TF [ImmLab (rational d)]
+ ],
+ LDA tmp (AddrImm (ImmCLbl lbl)),
+ LD TF dst (AddrReg tmp)]
+ in
+ return (Any F64 code)
+
+getRegister (StPrim primop [x]) -- unary PrimOps
+ = case primop of
+ IntNegOp -> trivialUCode (NEG Q False) x
+
+ NotOp -> trivialUCode NOT x
+
+ FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
+ DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
+
+ OrdOp -> coerceIntCode IntRep x
+ ChrOp -> chrCode x
+
+ Float2IntOp -> coerceFP2Int x
+ Int2FloatOp -> coerceInt2FP pr x
+ Double2IntOp -> coerceFP2Int x
+ Int2DoubleOp -> coerceInt2FP pr x
+
+ Double2FloatOp -> coerceFltCode x
+ Float2DoubleOp -> coerceFltCode x
+
+ other_op -> getRegister (StCall fn CCallConv F64 [x])
+ where
+ fn = case other_op of
+ FloatExpOp -> FSLIT("exp")
+ FloatLogOp -> FSLIT("log")
+ FloatSqrtOp -> FSLIT("sqrt")
+ FloatSinOp -> FSLIT("sin")
+ FloatCosOp -> FSLIT("cos")
+ FloatTanOp -> FSLIT("tan")
+ FloatAsinOp -> FSLIT("asin")
+ FloatAcosOp -> FSLIT("acos")
+ FloatAtanOp -> FSLIT("atan")
+ FloatSinhOp -> FSLIT("sinh")
+ FloatCoshOp -> FSLIT("cosh")
+ FloatTanhOp -> FSLIT("tanh")
+ DoubleExpOp -> FSLIT("exp")
+ DoubleLogOp -> FSLIT("log")
+ DoubleSqrtOp -> FSLIT("sqrt")
+ DoubleSinOp -> FSLIT("sin")
+ DoubleCosOp -> FSLIT("cos")
+ DoubleTanOp -> FSLIT("tan")
+ DoubleAsinOp -> FSLIT("asin")
+ DoubleAcosOp -> FSLIT("acos")
+ DoubleAtanOp -> FSLIT("atan")
+ DoubleSinhOp -> FSLIT("sinh")
+ DoubleCoshOp -> FSLIT("cosh")
+ DoubleTanhOp -> FSLIT("tanh")
+ where
+ pr = panic "MachCode.getRegister: no primrep needed for Alpha"
+
+getRegister (StPrim primop [x, y]) -- dyadic PrimOps
+ = case primop of
+ CharGtOp -> trivialCode (CMP LTT) y x
+ CharGeOp -> trivialCode (CMP LE) y x
+ CharEqOp -> trivialCode (CMP EQQ) x y
+ CharNeOp -> int_NE_code x y
+ CharLtOp -> trivialCode (CMP LTT) x y
+ CharLeOp -> trivialCode (CMP LE) x y
+
+ IntGtOp -> trivialCode (CMP LTT) y x
+ IntGeOp -> trivialCode (CMP LE) y x
+ IntEqOp -> trivialCode (CMP EQQ) x y
+ IntNeOp -> int_NE_code x y
+ IntLtOp -> trivialCode (CMP LTT) x y
+ IntLeOp -> trivialCode (CMP LE) x y
+
+ WordGtOp -> trivialCode (CMP ULT) y x
+ WordGeOp -> trivialCode (CMP ULE) x y
+ WordEqOp -> trivialCode (CMP EQQ) x y
+ WordNeOp -> int_NE_code x y
+ WordLtOp -> trivialCode (CMP ULT) x y
+ WordLeOp -> trivialCode (CMP ULE) x y
+
+ AddrGtOp -> trivialCode (CMP ULT) y x
+ AddrGeOp -> trivialCode (CMP ULE) y x
+ AddrEqOp -> trivialCode (CMP EQQ) x y
+ AddrNeOp -> int_NE_code x y
+ AddrLtOp -> trivialCode (CMP ULT) x y
+ AddrLeOp -> trivialCode (CMP ULE) x y
+
+ FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
+ FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
+ FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
+ FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
+ FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
+ FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
+
+ DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
+ DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
+ DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
+ DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
+ DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
+ DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
+
+ IntAddOp -> trivialCode (ADD Q False) x y
+ IntSubOp -> trivialCode (SUB Q False) x y
+ IntMulOp -> trivialCode (MUL Q False) x y
+ IntQuotOp -> trivialCode (DIV Q False) x y
+ IntRemOp -> trivialCode (REM Q False) x y
+
+ WordAddOp -> trivialCode (ADD Q False) x y
+ WordSubOp -> trivialCode (SUB Q False) x y
+ WordMulOp -> trivialCode (MUL Q False) x y
+ WordQuotOp -> trivialCode (DIV Q True) x y
+ WordRemOp -> trivialCode (REM Q True) x y
+
+ FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
+ FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
+ FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
+ FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
+
+ DoubleAddOp -> trivialFCode F64 (FADD TF) x y
+ DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
+ DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
+ DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
+
+ AddrAddOp -> trivialCode (ADD Q False) x y
+ AddrSubOp -> trivialCode (SUB Q False) x y
+ AddrRemOp -> trivialCode (REM Q True) x y
+
+ AndOp -> trivialCode AND x y
+ OrOp -> trivialCode OR x y
+ XorOp -> trivialCode XOR x y
+ SllOp -> trivialCode SLL x y
+ SrlOp -> trivialCode SRL x y
+
+ ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
+ ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
+ ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
+
+ FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
+ DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
+ where
+ {- ------------------------------------------------------------
+ Some bizarre special code for getting condition codes into
+ registers. Integer non-equality is a test for equality
+ followed by an XOR with 1. (Integer comparisons always set
+ the result register to 0 or 1.) Floating point comparisons of
+ any kind leave the result in a floating point register, so we
+ need to wrangle an integer register out of things.
+ -}
+ int_NE_code :: StixTree -> StixTree -> NatM Register
+
+ int_NE_code x y
+ = trivialCode (CMP EQQ) x y `thenNat` \ register ->
+ getNewRegNat IntRep `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ src = registerName register tmp
+ code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
+ in
+ return (Any IntRep code__2)
+
+ {- ------------------------------------------------------------
+ Comments for int_NE_code also apply to cmpF_code
+ -}
+ cmpF_code
+ :: (Reg -> Reg -> Reg -> Instr)
+ -> Cond
+ -> StixTree -> StixTree
+ -> NatM Register
+
+ cmpF_code instr cond x y
+ = trivialFCode pr instr x y `thenNat` \ register ->
+ getNewRegNat F64 `thenNat` \ tmp ->
+ getBlockIdNat `thenNat` \ lbl ->
+ let
+ code = registerCode register tmp
+ result = registerName register tmp
+
+ code__2 dst = code . mkSeqInstrs [
+ OR zeroh (RIImm (ImmInt 1)) dst,
+ BF cond result (ImmCLbl lbl),
+ OR zeroh (RIReg zeroh) dst,
+ NEWBLOCK lbl]
+ in
+ return (Any IntRep code__2)
+ where
+ pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
+ ------------------------------------------------------------
+
+getRegister (CmmLoad pk mem)
+ = getAmode mem `thenNat` \ amode ->
+ let
+ code = amodeCode amode
+ src = amodeAddr amode
+ size = primRepToSize pk
+ code__2 dst = code . mkSeqInstr (LD size dst src)
+ in
+ return (Any pk code__2)
+
+getRegister (StInt i)
+ | fits8Bits i
+ = let
+ code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
+ in
+ return (Any IntRep code)
+ | otherwise
+ = let
+ code dst = mkSeqInstr (LDI Q dst src)
+ in
+ return (Any IntRep code)
+ where
+ src = ImmInt (fromInteger i)
+
+getRegister leaf
+ | isJust imm
+ = let
+ code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
+ in
+ return (Any PtrRep code)
+ where
+ imm = maybeImm leaf
+ imm__2 = case imm of Just x -> x
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat f F32)) = do
+ lbl <- getNewLabelNat
+ dynRef <- cmmMakeDynamicReference addImportNat False lbl
+ Amode addr addr_code <- getAmode dynRef
+ let code dst =
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat f F32)]
+ `consOL` (addr_code `snocOL`
+ GLD F32 addr dst)
+ -- in
+ return (Any F32 code)
+
+
+getRegister (CmmLit (CmmFloat d F64))
+ | d == 0.0
+ = let code dst = unitOL (GLDZ dst)
+ in return (Any F64 code)
+
+ | d == 1.0
+ = let code dst = unitOL (GLD1 dst)
+ in return (Any F64 code)
+
+ | otherwise = do
+ lbl <- getNewLabelNat
+ dynRef <- cmmMakeDynamicReference addImportNat False lbl
+ Amode addr addr_code <- getAmode dynRef
+ let code dst =
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat d F64)]
+ `consOL` (addr_code `snocOL`
+ GLD F64 addr dst)
+ -- in
+ return (Any F64 code)
+
+#endif /* i386_TARGET_ARCH */
+
+#if x86_64_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat 0.0 rep)) = do
+ let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
+ -- I don't know why there are xorpd, xorps, and pxor instructions.
+ -- They all appear to do the same thing --SDM
+ return (Any rep code)
+
+getRegister (CmmLit (CmmFloat f rep)) = do
+ lbl <- getNewLabelNat
+ let code dst = toOL [
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat f rep)],
+ MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+ ]
+ -- in
+ return (Any rep code)
+
+#endif /* x86_64_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL I8) addr
+ return (Any I32 code)
+
+getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I8) addr
+ return (Any I32 code)
+
+getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL I16) addr
+ return (Any I32 code)
+
+getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I16) addr
+ return (Any I32 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL I8) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I8) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL I16) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I16) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I32) addr
+ return (Any I64 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
+ x_code <- getAnyReg x
+ lbl <- getNewLabelNat
+ let
+ code dst = x_code dst `appOL` toOL [
+ -- This is how gcc does it, so it can't be that bad:
+ LDATA ReadOnlyData16 [
+ CmmAlign 16,
+ CmmDataLabel lbl,
+ CmmStaticLit (CmmInt 0x80000000 I32),
+ CmmStaticLit (CmmInt 0 I32),
+ CmmStaticLit (CmmInt 0 I32),
+ CmmStaticLit (CmmInt 0 I32)
+ ],
+ XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+ -- xorps, so we need the 128-bit constant
+ -- ToDo: rip-relative
+ ]
+ --
+ return (Any F32 code)
+
+getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
+ x_code <- getAnyReg x
+ lbl <- getNewLabelNat
+ let
+ -- This is how gcc does it, so it can't be that bad:
+ code dst = x_code dst `appOL` toOL [
+ LDATA ReadOnlyData16 [
+ CmmAlign 16,
+ CmmDataLabel lbl,
+ CmmStaticLit (CmmInt 0x8000000000000000 I64),
+ CmmStaticLit (CmmInt 0 I64)
+ ],
+ -- gcc puts an unpck here. Wonder if we need it.
+ XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+ -- xorpd, so we need the 128-bit constant
+ ]
+ --
+ return (Any F64 code)
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+ = case mop of
+#if i386_TARGET_ARCH
+ MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
+ MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
+#endif
+
+ MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
+ MO_Not rep -> trivialUCode rep (NOT rep) x
+
+ -- Nop conversions
+ -- TODO: these are only nops if the arg is not a fixed register that
+ -- can't be byte-addressed.
+ MO_U_Conv I32 I8 -> conversionNop I32 x
+ MO_S_Conv I32 I8 -> conversionNop I32 x
+ MO_U_Conv I16 I8 -> conversionNop I16 x
+ MO_S_Conv I16 I8 -> conversionNop I16 x
+ MO_U_Conv I32 I16 -> conversionNop I32 x
+ MO_S_Conv I32 I16 -> conversionNop I32 x
+#if x86_64_TARGET_ARCH
+ MO_U_Conv I64 I32 -> conversionNop I64 x
+ MO_S_Conv I64 I32 -> conversionNop I64 x
+ MO_U_Conv I64 I16 -> conversionNop I64 x
+ MO_S_Conv I64 I16 -> conversionNop I64 x
+ MO_U_Conv I64 I8 -> conversionNop I64 x
+ MO_S_Conv I64 I8 -> conversionNop I64 x
+#endif
+
+ MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
+ MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
+
+ -- widenings
+ MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
+ MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
+ MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
+
+ MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
+ MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
+ MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
+
+#if x86_64_TARGET_ARCH
+ MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
+ MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
+ MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
+ MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
+ MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
+ MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
+ -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
+ -- However, we don't want the register allocator to throw it
+ -- away as an unnecessary reg-to-reg move, so we keep it in
+ -- the form of a movzl and print it as a movl later.
+#endif
+
+#if i386_TARGET_ARCH
+ MO_S_Conv F32 F64 -> conversionNop F64 x
+ MO_S_Conv F64 F32 -> conversionNop F32 x
+#else
+ MO_S_Conv F32 F64 -> coerceFP2FP F64 x
+ MO_S_Conv F64 F32 -> coerceFP2FP F32 x
+#endif
+
+ MO_S_Conv from to
+ | isFloatingRep from -> coerceFP2Int from to x
+ | isFloatingRep to -> coerceInt2FP from to x
+
+ other -> pprPanic "getRegister" (pprMachOp mop)
+ where
+ -- signed or unsigned extension.
+ integerExtend from to instr expr = do
+ (reg,e_code) <- if from == I8 then getByteReg expr
+ else getSomeReg expr
+ let
+ code dst =
+ e_code `snocOL`
+ instr from (OpReg reg) (OpReg dst)
+ return (Any to code)
+
+ conversionNop new_rep expr
+ = do e_code <- getRegister expr
+ return (swizzleRegisterRep e_code new_rep)
+
+
+getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
+ = ASSERT2(cmmExprRep x /= I8, pprExpr e)
+ case mop of
+ MO_Eq F32 -> condFltReg EQQ x y
+ MO_Ne F32 -> condFltReg NE x y
+ MO_S_Gt F32 -> condFltReg GTT x y
+ MO_S_Ge F32 -> condFltReg GE x y
+ MO_S_Lt F32 -> condFltReg LTT x y
+ MO_S_Le F32 -> condFltReg LE x y
+
+ MO_Eq F64 -> condFltReg EQQ x y
+ MO_Ne F64 -> condFltReg NE x y
+ MO_S_Gt F64 -> condFltReg GTT x y
+ MO_S_Ge F64 -> condFltReg GE x y
+ MO_S_Lt F64 -> condFltReg LTT x y
+ MO_S_Le F64 -> condFltReg LE x y
+
+ MO_Eq rep -> condIntReg EQQ x y
+ MO_Ne rep -> condIntReg NE x y
+
+ MO_S_Gt rep -> condIntReg GTT x y
+ MO_S_Ge rep -> condIntReg GE x y
+ MO_S_Lt rep -> condIntReg LTT x y
+ MO_S_Le rep -> condIntReg LE x y
+
+ MO_U_Gt rep -> condIntReg GU x y
+ MO_U_Ge rep -> condIntReg GEU x y
+ MO_U_Lt rep -> condIntReg LU x y
+ MO_U_Le rep -> condIntReg LEU x y
+
+#if i386_TARGET_ARCH
+ MO_Add F32 -> trivialFCode F32 GADD x y
+ MO_Sub F32 -> trivialFCode F32 GSUB x y
+
+ MO_Add F64 -> trivialFCode F64 GADD x y
+ MO_Sub F64 -> trivialFCode F64 GSUB x y
+
+ MO_S_Quot F32 -> trivialFCode F32 GDIV x y
+ MO_S_Quot F64 -> trivialFCode F64 GDIV x y
+#endif
+
+#if x86_64_TARGET_ARCH
+ MO_Add F32 -> trivialFCode F32 ADD x y
+ MO_Sub F32 -> trivialFCode F32 SUB x y
+
+ MO_Add F64 -> trivialFCode F64 ADD x y
+ MO_Sub F64 -> trivialFCode F64 SUB x y
+
+ MO_S_Quot F32 -> trivialFCode F32 FDIV x y
+ MO_S_Quot F64 -> trivialFCode F64 FDIV x y
+#endif
+
+ MO_Add rep -> add_code rep x y
+ MO_Sub rep -> sub_code rep x y
+
+ MO_S_Quot rep -> div_code rep True True x y
+ MO_S_Rem rep -> div_code rep True False x y
+ MO_U_Quot rep -> div_code rep False True x y
+ MO_U_Rem rep -> div_code rep False False x y
+
+#if i386_TARGET_ARCH
+ MO_Mul F32 -> trivialFCode F32 GMUL x y
+ MO_Mul F64 -> trivialFCode F64 GMUL x y
+#endif
+
+#if x86_64_TARGET_ARCH
+ MO_Mul F32 -> trivialFCode F32 MUL x y
+ MO_Mul F64 -> trivialFCode F64 MUL x y
+#endif
+
+ MO_Mul rep -> let op = IMUL rep in
+ trivialCode rep op (Just op) x y
+
+ MO_S_MulMayOflo rep -> imulMayOflo rep x y
+
+ MO_And rep -> let op = AND rep in
+ trivialCode rep op (Just op) x y
+ MO_Or rep -> let op = OR rep in
+ trivialCode rep op (Just op) x y
+ MO_Xor rep -> let op = XOR rep in
+ trivialCode rep op (Just op) x y
+
+ {- Shift ops on x86s have constraints on their source, it
+ either has to be Imm, CL or 1
+ => trivialCode is not restrictive enough (sigh.)
+ -}
+ MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
+ MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
+ MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
+
+ other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
+ where
+ --------------------
+ imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+ imulMayOflo rep a b = do
+ (a_reg, a_code) <- getNonClobberedReg a
+ b_code <- getAnyReg b
+ let
+ shift_amt = case rep of
+ I32 -> 31
+ I64 -> 63
+ _ -> panic "shift_amt"
+
+ code = a_code `appOL` b_code eax `appOL`
+ toOL [
+ IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
+ SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
+ -- sign extend lower part
+ SUB rep (OpReg edx) (OpReg eax)
+ -- compare against upper
+ -- eax==0 if high part == sign extended low part
+ ]
+ -- in
+ return (Fixed rep eax code)
+
+ --------------------
+ shift_code :: MachRep
+ -> (Operand -> Operand -> Instr)
+ -> CmmExpr
+ -> CmmExpr
+ -> NatM Register
+
+ {- Case1: shift length as immediate -}
+ shift_code rep instr x y@(CmmLit lit) = do
+ x_code <- getAnyReg x
+ let
+ code dst
+ = x_code dst `snocOL`
+ instr (OpImm (litToImm lit)) (OpReg dst)
+ -- in
+ return (Any rep code)
+
+ {- Case2: shift length is complex (non-immediate) -}
+ shift_code rep instr x y{-amount-} = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ y_code <- getAnyReg y
+ let
+ code = x_code `appOL`
+ y_code ecx `snocOL`
+ instr (OpReg ecx) (OpReg x_reg)
+ -- in
+ return (Fixed rep x_reg code)
+
+ --------------------
+ add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+ add_code rep x (CmmLit (CmmInt y _))
+ | not (is64BitInteger y) = add_int rep x y
+ add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
+
+ --------------------
+ sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+ sub_code rep x (CmmLit (CmmInt y _))
+ | not (is64BitInteger (-y)) = add_int rep x (-y)
+ sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
+
+ -- our three-operand add instruction:
+ add_int rep x y = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ imm = ImmInt (fromInteger y)
+ code dst
+ = x_code `snocOL`
+ LEA rep
+ (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
+ (OpReg dst)
+ --
+ return (Any rep code)
+
+ ----------------------
+ div_code rep signed quotient x y = do
+ (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
+ x_code <- getAnyReg x
+ let
+ widen | signed = CLTD rep
+ | otherwise = XOR rep (OpReg edx) (OpReg edx)
+
+ instr | signed = IDIV
+ | otherwise = DIV
+
+ code = y_code `appOL`
+ x_code eax `appOL`
+ toOL [widen, instr rep y_op]
+
+ result | quotient = eax
+ | otherwise = edx
+
+ -- in
+ return (Fixed rep result code)
+
+
+getRegister (CmmLoad mem pk)
+ | isFloatingRep pk
+ = do
+ Amode src mem_code <- getAmode mem
+ let
+ code dst = mem_code `snocOL`
+ IF_ARCH_i386(GLD pk src dst,
+ MOV pk (OpAddr src) (OpReg dst))
+ --
+ return (Any pk code)
+
+#if i386_TARGET_ARCH
+getRegister (CmmLoad mem pk)
+ | pk /= I64
+ = do
+ code <- intLoadCode (instr pk) mem
+ return (Any pk code)
+ where
+ instr I8 = MOVZxL pk
+ instr I16 = MOV I16
+ instr I32 = MOV I32
+ -- we always zero-extend 8-bit loads, if we
+ -- can't think of anything better. This is because
+ -- we can't guarantee access to an 8-bit variant of every register
+ -- (esi and edi don't have 8-bit variants), so to make things
+ -- simpler we do our 8-bit arithmetic with full 32-bit registers.
+#endif
+
+#if x86_64_TARGET_ARCH
+-- Simpler memory load code on x86_64
+getRegister (CmmLoad mem pk)
+ = do
+ code <- intLoadCode (MOV pk) mem
+ return (Any pk code)
+#endif
+
+getRegister (CmmLit (CmmInt 0 rep))
+ = let
+ -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
+ adj_rep = case rep of I64 -> I32; _ -> rep
+ rep1 = IF_ARCH_i386( rep, adj_rep )
+ code dst
+ = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
+ in
+ return (Any rep code)
+
+#if x86_64_TARGET_ARCH
+ -- optimisation for loading small literals on x86_64: take advantage
+ -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
+ -- instruction forms are shorter.
+getRegister (CmmLit lit)
+ | I64 <- cmmLitRep lit, not (isBigLit lit)
+ = let
+ imm = litToImm lit
+ code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
+ in
+ return (Any I64 code)
+ where
+ isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
+ isBigLit _ = False
+ -- note1: not the same as is64BitLit, because that checks for
+ -- signed literals that fit in 32 bits, but we want unsigned
+ -- literals here.
+ -- note2: all labels are small, because we're assuming the
+ -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+
+getRegister (CmmLit lit)
+ = let
+ rep = cmmLitRep lit
+ imm = litToImm lit
+ code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
+ in
+ return (Any rep code)
+
+getRegister other = pprPanic "getRegister(x86)" (ppr other)
+
+
+intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
+ -> NatM (Reg -> InstrBlock)
+intLoadCode instr mem = do
+ Amode src mem_code <- getAmode mem
+ return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
+
+-- Compute an expression into *any* register, adding the appropriate
+-- move instruction if necessary.
+getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
+getAnyReg expr = do
+ r <- getRegister expr
+ anyReg r
+
+anyReg :: Register -> NatM (Reg -> InstrBlock)
+anyReg (Any _ code) = return code
+anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
+
+-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
+-- Fixed registers might not be byte-addressable, so we make sure we've
+-- got a temporary, inserting an extra reg copy if necessary.
+getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
+#if x86_64_TARGET_ARCH
+getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
+#else
+getByteReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code tmp)
+ Fixed rep reg code
+ | isVirtualReg reg -> return (reg,code)
+ | otherwise -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code `snocOL` reg2reg rep reg tmp)
+ -- ToDo: could optimise slightly by checking for byte-addressable
+ -- real registers, but that will happen very rarely if at all.
+#endif
+
+-- Another variant: this time we want the result in a register that cannot
+-- be modified by code to evaluate an arbitrary expression.
+getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getNonClobberedReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code tmp)
+ Fixed rep reg code
+ -- only free regs can be clobbered
+ | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code `snocOL` reg2reg rep reg tmp)
+ | otherwise ->
+ return (reg, code)
+
+reg2reg :: MachRep -> Reg -> Reg -> Instr
+reg2reg rep src dst
+#if i386_TARGET_ARCH
+ | isFloatingRep rep = GMOV src dst
+#endif
+ | otherwise = MOV rep (OpReg src) (OpReg dst)
+
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat f F32)) = do
+ lbl <- getNewLabelNat
+ let code dst = toOL [
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat f F32)],
+ SETHI (HI (ImmCLbl lbl)) dst,
+ LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
+ return (Any F32 code)
+
+getRegister (CmmLit (CmmFloat d F64)) = do
+ lbl <- getNewLabelNat
+ let code dst = toOL [
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat d F64)],
+ SETHI (HI (ImmCLbl lbl)) dst,
+ LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
+ return (Any F64 code)
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+ = case mop of
+ MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
+ MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
+
+ MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
+ MO_Not rep -> trivialUCode rep (XNOR False g0) x
+
+ MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
+
+ MO_U_Conv F64 F32-> coerceDbl2Flt x
+ MO_U_Conv F32 F64-> coerceFlt2Dbl x
+
+ MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
+ MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
+ MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
+ MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
+
+ -- Conversions which are a nop on sparc
+ MO_U_Conv from to
+ | from == to -> conversionNop to x
+ MO_U_Conv I32 to -> conversionNop to x
+ MO_S_Conv I32 to -> conversionNop to x
+
+ -- widenings
+ MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
+ MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
+ MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
+ MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
+
+ other_op -> panic "Unknown unary mach op"
+ where
+ -- XXX SLL/SRL?
+ integerExtend signed from to expr = do
+ (reg, e_code) <- getSomeReg expr
+ let
+ code dst =
+ e_code `snocOL`
+ ((if signed then SRA else SRL)
+ reg (RIImm (ImmInt 0)) dst)
+ return (Any to code)
+ conversionNop new_rep expr
+ = do e_code <- getRegister expr
+ return (swizzleRegisterRep e_code new_rep)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+ = case mop of
+ MO_Eq F32 -> condFltReg EQQ x y
+ MO_Ne F32 -> condFltReg NE x y
+
+ MO_S_Gt F32 -> condFltReg GTT x y
+ MO_S_Ge F32 -> condFltReg GE x y
+ MO_S_Lt F32 -> condFltReg LTT x y
+ MO_S_Le F32 -> condFltReg LE x y
+
+ MO_Eq F64 -> condFltReg EQQ x y
+ MO_Ne F64 -> condFltReg NE x y
+
+ MO_S_Gt F64 -> condFltReg GTT x y
+ MO_S_Ge F64 -> condFltReg GE x y
+ MO_S_Lt F64 -> condFltReg LTT x y
+ MO_S_Le F64 -> condFltReg LE x y
+
+ MO_Eq rep -> condIntReg EQQ x y
+ MO_Ne rep -> condIntReg NE x y
+
+ MO_S_Gt rep -> condIntReg GTT x y
+ MO_S_Ge rep -> condIntReg GE x y
+ MO_S_Lt rep -> condIntReg LTT x y
+ MO_S_Le rep -> condIntReg LE x y
+
+ MO_U_Gt I32 -> condIntReg GTT x y
+ MO_U_Ge I32 -> condIntReg GE x y
+ MO_U_Lt I32 -> condIntReg LTT x y
+ MO_U_Le I32 -> condIntReg LE x y
+
+ MO_U_Gt I16 -> condIntReg GU x y
+ MO_U_Ge I16 -> condIntReg GEU x y
+ MO_U_Lt I16 -> condIntReg LU x y
+ MO_U_Le I16 -> condIntReg LEU x y
+
+ MO_Add I32 -> trivialCode I32 (ADD False False) x y
+ MO_Sub I32 -> trivialCode I32 (SUB False False) x y
+
+ MO_S_MulMayOflo rep -> imulMayOflo rep x y
+{-
+ -- ToDo: teach about V8+ SPARC div instructions
+ MO_S_Quot I32 -> idiv FSLIT(".div") x y
+ MO_S_Rem I32 -> idiv FSLIT(".rem") x y
+ MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
+ MO_U_Rem I32 -> idiv FSLIT(".urem") x y
+-}
+ MO_Add F32 -> trivialFCode F32 FADD x y
+ MO_Sub F32 -> trivialFCode F32 FSUB x y
+ MO_Mul F32 -> trivialFCode F32 FMUL x y
+ MO_S_Quot F32 -> trivialFCode F32 FDIV x y
+
+ MO_Add F64 -> trivialFCode F64 FADD x y
+ MO_Sub F64 -> trivialFCode F64 FSUB x y
+ MO_Mul F64 -> trivialFCode F64 FMUL x y
+ MO_S_Quot F64 -> trivialFCode F64 FDIV x y
+
+ MO_And rep -> trivialCode rep (AND False) x y
+ MO_Or rep -> trivialCode rep (OR False) x y
+ MO_Xor rep -> trivialCode rep (XOR False) x y
+
+ MO_Mul rep -> trivialCode rep (SMUL False) x y
+
+ MO_Shl rep -> trivialCode rep SLL x y
+ MO_U_Shr rep -> trivialCode rep SRL x y
+ MO_S_Shr rep -> trivialCode rep SRA x y
+
+{-
+ MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
+ [promote x, promote y])
+ where promote x = CmmMachOp MO_F32_to_Dbl [x]
+ MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
+ [x, y])
+-}
+ other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
+ where
+ --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
+
+ --------------------
+ imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+ imulMayOflo rep a b = do
+ (a_reg, a_code) <- getSomeReg a
+ (b_reg, b_code) <- getSomeReg b
+ res_lo <- getNewRegNat I32
+ res_hi <- getNewRegNat I32
+ let
+ shift_amt = case rep of
+ I32 -> 31
+ I64 -> 63
+ _ -> panic "shift_amt"
+ code dst = a_code `appOL` b_code `appOL`
+ toOL [
+ SMUL False a_reg (RIReg b_reg) res_lo,
+ RDY res_hi,
+ SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
+ SUB False False res_lo (RIReg res_hi) dst
+ ]
+ return (Any I32 code)
+
+getRegister (CmmLoad mem pk) = do
+ Amode src code <- getAmode mem
+ let
+ code__2 dst = code `snocOL` LD pk src dst
+ return (Any pk code__2)
+
+getRegister (CmmLit (CmmInt i _))
+ | fits13Bits i
+ = let
+ src = ImmInt (fromInteger i)
+ code dst = unitOL (OR False g0 (RIImm src) dst)
+ in
+ return (Any I32 code)
+
+getRegister (CmmLit lit)
+ = let rep = cmmLitRep lit
+ imm = litToImm lit
+ code dst = toOL [
+ SETHI (HI imm) dst,
+ OR False dst (RIImm (LO imm)) dst]
+ in return (Any I32 code)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+getRegister (CmmLoad mem pk)
+ | pk /= I64
+ = do
+ Amode addr addr_code <- getAmode mem
+ let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
+ addr_code `snocOL` LD pk dst addr
+ return (Any pk code)
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
+ Amode addr addr_code <- getAmode mem
+ return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
+
+-- Note: there is no Load Byte Arithmetic instruction, so no signed case here
+
+getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
+ Amode addr addr_code <- getAmode mem
+ return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
+
+getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
+ Amode addr addr_code <- getAmode mem
+ return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+ = case mop of
+ MO_Not rep -> trivialUCode rep NOT x
+
+ MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
+ MO_S_Conv F32 F64 -> conversionNop F64 x
+
+ MO_S_Conv from to
+ | from == to -> conversionNop to x
+ | isFloatingRep from -> coerceFP2Int from to x
+ | isFloatingRep to -> coerceInt2FP from to x
+
+ -- narrowing is a nop: we treat the high bits as undefined
+ MO_S_Conv I32 to -> conversionNop to x
+ MO_S_Conv I16 I8 -> conversionNop I8 x
+ MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
+ MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
+
+ MO_U_Conv from to
+ | from == to -> conversionNop to x
+ -- narrowing is a nop: we treat the high bits as undefined
+ MO_U_Conv I32 to -> conversionNop to x
+ MO_U_Conv I16 I8 -> conversionNop I8 x
+ MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
+ MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
+
+ MO_S_Neg F32 -> trivialUCode F32 FNEG x
+ MO_S_Neg F64 -> trivialUCode F64 FNEG x
+ MO_S_Neg rep -> trivialUCode rep NEG x
+
+ where
+ conversionNop new_rep expr
+ = do e_code <- getRegister expr
+ return (swizzleRegisterRep e_code new_rep)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+ = case mop of
+ MO_Eq F32 -> condFltReg EQQ x y
+ MO_Ne F32 -> condFltReg NE x y
+
+ MO_S_Gt F32 -> condFltReg GTT x y
+ MO_S_Ge F32 -> condFltReg GE x y
+ MO_S_Lt F32 -> condFltReg LTT x y
+ MO_S_Le F32 -> condFltReg LE x y
+
+ MO_Eq F64 -> condFltReg EQQ x y
+ MO_Ne F64 -> condFltReg NE x y
+
+ MO_S_Gt F64 -> condFltReg GTT x y
+ MO_S_Ge F64 -> condFltReg GE x y
+ MO_S_Lt F64 -> condFltReg LTT x y
+ MO_S_Le F64 -> condFltReg LE x y
+
+ MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
+ MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
+
+ MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
+ MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
+ MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
+ MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
+
+ MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
+ MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
+ MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
+ MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
+
+ MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
+ MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
+ MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
+ MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
+
+ MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
+ MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
+ MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
+ MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
+
+ -- optimize addition with 32-bit immediate
+ -- (needed for PIC)
+ MO_Add I32 ->
+ case y of
+ CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
+ -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
+ CmmLit lit
+ -> do
+ (src, srcCode) <- getSomeReg x
+ let imm = litToImm lit
+ code dst = srcCode `appOL` toOL [
+ ADDIS dst src (HA imm),
+ ADD dst dst (RIImm (LO imm))
+ ]
+ return (Any I32 code)
+ _ -> trivialCode I32 True ADD x y
+
+ MO_Add rep -> trivialCode rep True ADD x y
+ MO_Sub rep ->
+ case y of -- subfi ('substract from' with immediate) doesn't exist
+ CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
+ -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
+ _ -> trivialCodeNoImm rep SUBF y x
+
+ MO_Mul rep -> trivialCode rep True MULLW x y
+
+ MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
+
+ MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
+ MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
+
+ MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
+ MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
+
+ MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
+ MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
+
+ MO_And rep -> trivialCode rep False AND x y
+ MO_Or rep -> trivialCode rep False OR x y
+ MO_Xor rep -> trivialCode rep False XOR x y
+
+ MO_Shl rep -> trivialCode rep False SLW x y
+ MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
+ MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
+
+getRegister (CmmLit (CmmInt i rep))
+ | Just imm <- makeImmediate rep True i
+ = let
+ code dst = unitOL (LI dst imm)
+ in
+ return (Any rep code)
+
+getRegister (CmmLit (CmmFloat f frep)) = do
+ lbl <- getNewLabelNat
+ dynRef <- cmmMakeDynamicReference addImportNat False lbl
+ Amode addr addr_code <- getAmode dynRef
+ let code dst =
+ LDATA ReadOnlyData [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat f frep)]
+ `consOL` (addr_code `snocOL` LD frep dst addr)
+ return (Any frep code)
+
+getRegister (CmmLit lit)
+ = let rep = cmmLitRep lit
+ imm = litToImm lit
+ code dst = toOL [
+ LIS dst (HI imm),
+ OR dst dst (RIImm (LO imm))
+ ]
+ in return (Any rep code)
+
+getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
+
+ -- extend?Rep: wrap integer expression of type rep
+ -- in a conversion to I32
+extendSExpr I32 x = x
+extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
+extendUExpr I32 x = x
+extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- The 'Amode' type: Memory addressing modes passed up the tree.
+
+data Amode = Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to. So you can't put
+anything in between, lest it overwrite some of those registers. If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+ code
+ LEA amode, tmp
+ ... other computation ...
+ ... (tmp) ...
+-}
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+getAmode (StPrim IntSubOp [x, StInt i])
+ = getNewRegNat PtrRep `thenNat` \ tmp ->
+ getRegister x `thenNat` \ register ->
+ let
+ code = registerCode register tmp
+ reg = registerName register tmp
+ off = ImmInt (-(fromInteger i))
+ in
+ return (Amode (AddrRegImm reg off) code)
+
+getAmode (StPrim IntAddOp [x, StInt i])
+ = getNewRegNat PtrRep `thenNat` \ tmp ->
+ getRegister x `thenNat` \ register ->
+ let
+ code = registerCode register tmp
+ reg = registerName register tmp
+ off = ImmInt (fromInteger i)
+ in
+ return (Amode (AddrRegImm reg off) code)
+
+getAmode leaf
+ | isJust imm
+ = return (Amode (AddrImm imm__2) id)
+ where
+ imm = maybeImm leaf
+ imm__2 = case imm of Just x -> x
+
+getAmode other
+ = getNewRegNat PtrRep `thenNat` \ tmp ->
+ getRegister other `thenNat` \ register ->
+ let
+ code = registerCode register tmp
+ reg = registerName register tmp
+ in
+ return (Amode (AddrReg reg) code)
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- This is all just ridiculous, since it carefully undoes
+-- what mangleIndexTree has just done.
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
+ | not (is64BitLit lit)
+ -- ASSERT(rep == I32)???
+ = do (x_reg, x_code) <- getSomeReg x
+ let off = ImmInt (-(fromInteger i))
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
+ | not (is64BitLit lit)
+ -- ASSERT(rep == I32)???
+ = do (x_reg, x_code) <- getSomeReg x
+ let off = ImmInt (fromInteger i)
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
+
+-- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
+-- recognised by the next rule.
+getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
+ b@(CmmLit _)])
+ = getAmode (CmmMachOp (MO_Add rep) [b,a])
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
+ [y, CmmLit (CmmInt shift _)]])
+ | shift == 0 || shift == 1 || shift == 2 || shift == 3
+ = do (x_reg, x_code) <- getNonClobberedReg x
+ -- x must be in a temp, because it has to stay live over y_code
+ -- we could compre x_reg and y_reg and do something better here...
+ (y_reg, y_code) <- getSomeReg y
+ let
+ code = x_code `appOL` y_code
+ base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
+ code)
+
+getAmode (CmmLit lit) | not (is64BitLit lit)
+ = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
+
+getAmode expr = do
+ (reg,code) <- getSomeReg expr
+ return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
+
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
+ | fits13Bits (-i)
+ = do
+ (reg, code) <- getSomeReg x
+ let
+ off = ImmInt (-(fromInteger i))
+ return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
+ | fits13Bits i
+ = do
+ (reg, code) <- getSomeReg x
+ let
+ off = ImmInt (fromInteger i)
+ return (Amode (AddrRegImm reg off) code)
+
+getAmode (CmmMachOp (MO_Add rep) [x, y])
+ = do
+ (regX, codeX) <- getSomeReg x
+ (regY, codeY) <- getSomeReg y
+ let
+ code = codeX `appOL` codeY
+ return (Amode (AddrRegReg regX regY) code)
+
+-- XXX Is this same as "leaf" in Stix?
+getAmode (CmmLit lit)
+ = do
+ tmp <- getNewRegNat I32
+ let
+ code = unitOL (SETHI (HI imm__2) tmp)
+ return (Amode (AddrRegImm tmp (LO imm__2)) code)
+ where
+ imm__2 = litToImm lit
+
+getAmode other
+ = do
+ (reg, code) <- getSomeReg other
+ let
+ off = ImmInt 0
+ return (Amode (AddrRegImm reg off) code)
+
+#endif /* sparc_TARGET_ARCH */
+
+#ifdef powerpc_TARGET_ARCH
+getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
+ | Just off <- makeImmediate I32 True (-i)
+ = do
+ (reg, code) <- getSomeReg x
+ return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
+ | Just off <- makeImmediate I32 True i
+ = do
+ (reg, code) <- getSomeReg x
+ return (Amode (AddrRegImm reg off) code)
+
+ -- optimize addition with 32-bit immediate
+ -- (needed for PIC)
+getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
+ = do
+ tmp <- getNewRegNat I32
+ (src, srcCode) <- getSomeReg x
+ let imm = litToImm lit
+ code = srcCode `snocOL` ADDIS tmp src (HA imm)
+ return (Amode (AddrRegImm tmp (LO imm)) code)
+
+getAmode (CmmLit lit)
+ = do
+ tmp <- getNewRegNat I32
+ let imm = litToImm lit
+ code = unitOL (LIS tmp (HA imm))
+ return (Amode (AddrRegImm tmp (LO imm)) code)
+
+getAmode (CmmMachOp (MO_Add I32) [x, y])
+ = do
+ (regX, codeX) <- getSomeReg x
+ (regY, codeY) <- getSomeReg y
+ return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
+
+getAmode other
+ = do
+ (reg, code) <- getSomeReg other
+ let
+ off = ImmInt 0
+ return (Amode (AddrRegImm reg off) code)
+#endif /* powerpc_TARGET_ARCH */
+
+-- -----------------------------------------------------------------------------
+-- getOperand: sometimes any operand will do.
+
+-- getNonClobberedOperand: the value of the operand will remain valid across
+-- the computation of an arbitrary expression, unless the expression
+-- is computed directly into a register which the operand refers to
+-- (see trivialCode where this function is used for an example).
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getNonClobberedOperand (CmmLit lit)
+ | isSuitableFloatingPointLit lit = do
+ lbl <- getNewLabelNat
+ let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
+ CmmStaticLit lit])
+ return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getNonClobberedOperand (CmmLit lit)
+ | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
+ return (OpImm (litToImm lit), nilOL)
+getNonClobberedOperand (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+ Amode src mem_code <- getAmode mem
+ (src',save_code) <-
+ if (amodeCouldBeClobbered src)
+ then do
+ tmp <- getNewRegNat wordRep
+ return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
+ unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
+ else
+ return (src, nilOL)
+ return (OpAddr src', save_code `appOL` mem_code)
+getNonClobberedOperand e = do
+ (reg, code) <- getNonClobberedReg e
+ return (OpReg reg, code)
+
+amodeCouldBeClobbered :: AddrMode -> Bool
+amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
+
+regClobbered (RealReg rr) = isFastTrue (freeReg rr)
+regClobbered _ = False
+
+-- getOperand: the operand is not required to remain valid across the
+-- computation of an arbitrary expression.
+getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getOperand (CmmLit lit)
+ | isSuitableFloatingPointLit lit = do
+ lbl <- getNewLabelNat
+ let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
+ CmmStaticLit lit])
+ return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getOperand (CmmLit lit)
+ | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
+ return (OpImm (litToImm lit), nilOL)
+getOperand (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+getOperand e = do
+ (reg, code) <- getSomeReg e
+ return (OpReg reg, code)
+
+isOperand :: CmmExpr -> Bool
+isOperand (CmmLoad _ _) = True
+isOperand (CmmLit lit) = not (is64BitLit lit)
+ || isSuitableFloatingPointLit lit
+isOperand _ = False
+
+-- if we want a floating-point literal as an operand, we can
+-- use it directly from memory. However, if the literal is
+-- zero, we're better off generating it into a register using
+-- xor.
+isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
+isSuitableFloatingPointLit _ = False
+
+getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
+getRegOrMem (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+getRegOrMem e = do
+ (reg, code) <- getNonClobberedReg e
+ return (OpReg reg, code)
+
+#if x86_64_TARGET_ARCH
+is64BitLit (CmmInt i I64) = is64BitInteger i
+ -- assume that labels are in the range 0-2^31-1: this assumes the
+ -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+is64BitLit x = False
+#endif
+
+is64BitInteger :: Integer -> Bool
+is64BitInteger i = i > 0x7fffffff || i < -0x80000000
+
+-- -----------------------------------------------------------------------------
+-- The 'CondCode' type: Condition codes passed up the tree.
+
+data CondCode = CondCode Bool Cond InstrBlock
+
+-- Set up a condition code for a conditional branch.
+
+getCondCode :: CmmExpr -> NatM CondCode
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+getCondCode = panic "MachCode.getCondCode: not on Alphas"
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
+-- yes, they really do seem to want exactly the same!
+
+getCondCode (CmmMachOp mop [x, y])
+ = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
+ case mop of
+ MO_Eq F32 -> condFltCode EQQ x y
+ MO_Ne F32 -> condFltCode NE x y
+
+ MO_S_Gt F32 -> condFltCode GTT x y
+ MO_S_Ge F32 -> condFltCode GE x y
+ MO_S_Lt F32 -> condFltCode LTT x y
+ MO_S_Le F32 -> condFltCode LE x y
+
+ MO_Eq F64 -> condFltCode EQQ x y
+ MO_Ne F64 -> condFltCode NE x y
+
+ MO_S_Gt F64 -> condFltCode GTT x y
+ MO_S_Ge F64 -> condFltCode GE x y
+ MO_S_Lt F64 -> condFltCode LTT x y
+ MO_S_Le F64 -> condFltCode LE x y
+
+ MO_Eq rep -> condIntCode EQQ x y
+ MO_Ne rep -> condIntCode NE x y
+
+ MO_S_Gt rep -> condIntCode GTT x y
+ MO_S_Ge rep -> condIntCode GE x y
+ MO_S_Lt rep -> condIntCode LTT x y
+ MO_S_Le rep -> condIntCode LE x y
+
+ MO_U_Gt rep -> condIntCode GU x y
+ MO_U_Ge rep -> condIntCode GEU x y
+ MO_U_Lt rep -> condIntCode LU x y
+ MO_U_Le rep -> condIntCode LEU x y
+
+ other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
+
+getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
+
+#elif powerpc_TARGET_ARCH
+
+-- almost the same as everywhere else - but we need to
+-- extend small integers to 32 bit first
+
+getCondCode (CmmMachOp mop [x, y])
+ = case mop of
+ MO_Eq F32 -> condFltCode EQQ x y
+ MO_Ne F32 -> condFltCode NE x y
+
+ MO_S_Gt F32 -> condFltCode GTT x y
+ MO_S_Ge F32 -> condFltCode GE x y
+ MO_S_Lt F32 -> condFltCode LTT x y
+ MO_S_Le F32 -> condFltCode LE x y
+
+ MO_Eq F64 -> condFltCode EQQ x y
+ MO_Ne F64 -> condFltCode NE x y
+
+ MO_S_Gt F64 -> condFltCode GTT x y
+ MO_S_Ge F64 -> condFltCode GE x y
+ MO_S_Lt F64 -> condFltCode LTT x y
+ MO_S_Le F64 -> condFltCode LE x y
+
+ MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
+ MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
+
+ MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
+ MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
+ MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
+ MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
+
+ MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
+ MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
+ MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
+ MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
+
+ other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
+
+getCondCode other = panic "getCondCode(2)(powerpc)"
+
+
+#endif
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+
+#if alpha_TARGET_ARCH
+condIntCode = panic "MachCode.condIntCode: not on Alphas"
+condFltCode = panic "MachCode.condFltCode: not on Alphas"
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- memory vs immediate
+condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
+ Amode x_addr x_code <- getAmode x
+ let
+ imm = litToImm lit
+ code = x_code `snocOL`
+ CMP pk (OpImm imm) (OpAddr x_addr)
+ --
+ return (CondCode False cond code)
+
+-- anything vs zero
+condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ code = x_code `snocOL`
+ TEST pk (OpReg x_reg) (OpReg x_reg)
+ --
+ return (CondCode False cond code)
+
+-- anything vs operand
+condIntCode cond x y | isOperand y = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_op, y_code) <- getOperand y
+ let
+ code = x_code `appOL` y_code `snocOL`
+ CMP (cmmExprRep x) y_op (OpReg x_reg)
+ -- in
+ return (CondCode False cond code)
+
+-- anything vs anything
+condIntCode cond x y = do
+ (y_reg, y_code) <- getNonClobberedReg y
+ (x_op, x_code) <- getRegOrMem x
+ let
+ code = y_code `appOL`
+ x_code `snocOL`
+ CMP (cmmExprRep x) (OpReg y_reg) x_op
+ -- in
+ return (CondCode False cond code)
+#endif
+
+#if i386_TARGET_ARCH
+condFltCode cond x y
+ = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_reg, y_code) <- getSomeReg y
+ let
+ code = x_code `appOL` y_code `snocOL`
+ GCMP cond x_reg y_reg
+ -- The GCMP insn does the test and sets the zero flag if comparable
+ -- and true. Hence we always supply EQQ as the condition to test.
+ return (CondCode True EQQ code)
+#endif /* i386_TARGET_ARCH */
+
+#if x86_64_TARGET_ARCH
+-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+-- an operand, but the right must be a reg. We can probably do better
+-- than this general case...
+condFltCode cond x y = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_op, y_code) <- getOperand y
+ let
+ code = x_code `appOL`
+ y_code `snocOL`
+ CMP (cmmExprRep x) y_op (OpReg x_reg)
+ -- NB(1): we need to use the unsigned comparison operators on the
+ -- result of this comparison.
+ -- in
+ return (CondCode True (condToUnsigned cond) code)
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+condIntCode cond x (CmmLit (CmmInt y rep))
+ | fits13Bits y
+ = do
+ (src1, code) <- getSomeReg x
+ let
+ src2 = ImmInt (fromInteger y)
+ code' = code `snocOL` SUB False True src1 (RIImm src2) g0
+ return (CondCode False cond code')
+
+condIntCode cond x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let
+ code__2 = code1 `appOL` code2 `snocOL`
+ SUB False True src1 (RIReg src2) g0
+ return (CondCode False cond code__2)
+
+-----------
+condFltCode cond x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ tmp <- getNewRegNat F64
+ let
+ promote x = FxTOy F32 F64 x tmp
+
+ pk1 = cmmExprRep x
+ pk2 = cmmExprRep y
+
+ code__2 =
+ if pk1 == pk2 then
+ code1 `appOL` code2 `snocOL`
+ FCMP True pk1 src1 src2
+ else if pk1 == F32 then
+ code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+ FCMP True F64 tmp src2
+ else
+ code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+ FCMP True F64 src1 tmp
+ return (CondCode True cond code__2)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+-- ###FIXME: I16 and I8!
+condIntCode cond x (CmmLit (CmmInt y rep))
+ | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
+ = do
+ (src1, code) <- getSomeReg x
+ let
+ code' = code `snocOL`
+ (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
+ return (CondCode False cond code')
+
+condIntCode cond x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let
+ code' = code1 `appOL` code2 `snocOL`
+ (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
+ return (CondCode False cond code')
+
+condFltCode cond x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let
+ code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
+ code'' = case cond of -- twiddle CR to handle unordered case
+ GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
+ LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
+ _ -> code'
+ where
+ ltbit = 0 ; eqbit = 2 ; gtbit = 1
+ return (CondCode True cond code'')
+
+#endif /* powerpc_TARGET_ARCH */
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business. Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers. If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side. This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+assignIntCode pk (CmmLoad dst _) src
+ = getNewRegNat IntRep `thenNat` \ tmp ->
+ getAmode dst `thenNat` \ amode ->
+ getRegister src `thenNat` \ register ->
+ let
+ code1 = amodeCode amode []
+ dst__2 = amodeAddr amode
+ code2 = registerCode register tmp []
+ src__2 = registerName register tmp
+ sz = primRepToSize pk
+ code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
+ in
+ return code__2
+
+assignIntCode pk dst src
+ = getRegister dst `thenNat` \ register1 ->
+ getRegister src `thenNat` \ register2 ->
+ let
+ dst__2 = registerName register1 zeroh
+ code = registerCode register2 dst__2
+ src__2 = registerName register2 dst__2
+ code__2 = if isFixed register2
+ then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
+ else code
+ in
+ return code__2
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- integer assignment to memory
+assignMem_IntCode pk addr src = do
+ Amode addr code_addr <- getAmode addr
+ (code_src, op_src) <- get_op_RI src
+ let
+ code = code_src `appOL`
+ code_addr `snocOL`
+ MOV pk op_src (OpAddr addr)
+ -- NOTE: op_src is stable, so it will still be valid
+ -- after code_addr. This may involve the introduction
+ -- of an extra MOV to a temporary register, but we hope
+ -- the register allocator will get rid of it.
+ --
+ return code
+ where
+ get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
+ get_op_RI (CmmLit lit) | not (is64BitLit lit)
+ = return (nilOL, OpImm (litToImm lit))
+ get_op_RI op
+ = do (reg,code) <- getNonClobberedReg op
+ return (code, OpReg reg)
+
+
+-- Assign; dst is a reg, rhs is mem
+assignReg_IntCode pk reg (CmmLoad src _) = do
+ load_code <- intLoadCode (MOV pk) src
+ return (load_code (getRegisterReg reg))
+
+-- dst is a reg, but src could be anything
+assignReg_IntCode pk reg src = do
+ code <- getAnyReg src
+ return (code (getRegisterReg reg))
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+assignMem_IntCode pk addr src = do
+ (srcReg, code) <- getSomeReg src
+ Amode dstAddr addr_code <- getAmode addr
+ return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+assignReg_IntCode pk reg src = do
+ r <- getRegister src
+ return $ case r of
+ Any _ code -> code dst
+ Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
+ where
+ dst = getRegisterReg reg
+
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+
+assignMem_IntCode pk addr src = do
+ (srcReg, code) <- getSomeReg src
+ Amode dstAddr addr_code <- getAmode addr
+ return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+-- dst is a reg, but src could be anything
+assignReg_IntCode pk reg src
+ = do
+ r <- getRegister src
+ return $ case r of
+ Any _ code -> code dst
+ Fixed _ freg fcode -> fcode `snocOL` MR dst freg
+ where
+ dst = getRegisterReg reg
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Floating-point assignments
+
+#if alpha_TARGET_ARCH
+
+assignFltCode pk (CmmLoad dst _) src
+ = getNewRegNat pk `thenNat` \ tmp ->
+ getAmode dst `thenNat` \ amode ->
+ getRegister src `thenNat` \ register ->
+ let
+ code1 = amodeCode amode []
+ dst__2 = amodeAddr amode
+ code2 = registerCode register tmp []
+ src__2 = registerName register tmp
+ sz = primRepToSize pk
+ code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
+ in
+ return code__2
+
+assignFltCode pk dst src
+ = getRegister dst `thenNat` \ register1 ->
+ getRegister src `thenNat` \ register2 ->
+ let
+ dst__2 = registerName register1 zeroh
+ code = registerCode register2 dst__2
+ src__2 = registerName register2 dst__2
+ code__2 = if isFixed register2
+ then code . mkSeqInstr (FMOV src__2 dst__2)
+ else code
+ in
+ return code__2
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+-- Floating point assignment to memory
+assignMem_FltCode pk addr src = do
+ (src_reg, src_code) <- getNonClobberedReg src
+ Amode addr addr_code <- getAmode addr
+ let
+ code = src_code `appOL`
+ addr_code `snocOL`
+ IF_ARCH_i386(GST pk src_reg addr,
+ MOV pk (OpReg src_reg) (OpAddr addr))
+ return code
+
+-- Floating point assignment to a register/temporary
+assignReg_FltCode pk reg src = do
+ src_code <- getAnyReg src
+ return (src_code (getRegisterReg reg))
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+-- Floating point assignment to memory
+assignMem_FltCode pk addr src = do
+ Amode dst__2 code1 <- getAmode addr
+ (src__2, code2) <- getSomeReg src
+ tmp1 <- getNewRegNat pk
+ let
+ pk__2 = cmmExprRep src
+ code__2 = code1 `appOL` code2 `appOL`
+ if pk == pk__2
+ then unitOL (ST pk src__2 dst__2)
+ else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
+ return code__2
+
+-- Floating point assignment to a register/temporary
+-- ToDo: Verify correctness
+assignReg_FltCode pk reg src = do
+ r <- getRegister src
+ v1 <- getNewRegNat pk
+ return $ case r of
+ Any _ code -> code dst
+ Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
+ where
+ dst = getRegisterReg reg
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+
+-- Easy, isn't it?
+assignMem_FltCode = assignMem_IntCode
+assignReg_FltCode = assignReg_IntCode
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Generating an non-local jump
+
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+genJump (CmmLabel lbl)
+ | isAsmTemp lbl = returnInstr (BR target)
+ | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
+ where
+ target = ImmCLbl lbl
+
+genJump tree
+ = getRegister tree `thenNat` \ register ->
+ getNewRegNat PtrRep `thenNat` \ tmp ->
+ let
+ dst = registerName register pv
+ code = registerCode register pv
+ target = registerName register pv
+ in
+ if isFixed register then
+ returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
+ else
+ return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+genJump (CmmLoad mem pk) = do
+ Amode target code <- getAmode mem
+ return (code `snocOL` JMP (OpAddr target))
+
+genJump (CmmLit lit) = do
+ return (unitOL (JMP (OpImm (litToImm lit))))
+
+genJump expr = do
+ (reg,code) <- getSomeReg expr
+ return (code `snocOL` JMP (OpReg reg))
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+genJump (CmmLit (CmmLabel lbl))
+ = return (toOL [CALL (Left target) 0 True, NOP])
+ where
+ target = ImmCLbl lbl
+
+genJump tree
+ = do
+ (target, code) <- getSomeReg tree
+ return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+genJump (CmmLit (CmmLabel lbl))
+ = return (unitOL $ JMP lbl)
+
+genJump tree
+ = do
+ (target,code) <- getSomeReg tree
+ return (code `snocOL` MTCTR target `snocOL` BCTR [])
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+
+genBranch = return . toOL . mkBranchInstr
+
+-- -----------------------------------------------------------------------------
+-- Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions. We peek at the arguments to decide what kind of
+comparison to do.
+
+ALPHA: For comparisons with 0, we're laughing, because we can just do
+the desired conditional branch.
+
+I386: First, we have to ensure that the condition
+codes are set according to the supplied comparison operation.
+
+SPARC: First, we have to ensure that the condition codes are set
+according to the supplied comparison operation. We generate slightly
+different code for floating point comparisons, because a floating
+point operation cannot directly precede a @BF@. We assume the worst
+and fill that slot with a @NOP@.
+
+SPARC: Do not fill the delay slots here; you will confuse the register
+allocator.
+-}
+
+
+genCondJump
+ :: BlockId -- the branch target
+ -> CmmExpr -- the condition on which to branch
+ -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+genCondJump id (StPrim op [x, StInt 0])
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat (registerRep register)
+ `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ value = registerName register tmp
+ pk = registerRep register
+ target = ImmCLbl lbl
+ in
+ returnSeq code [BI (cmpOp op) value target]
+ where
+ cmpOp CharGtOp = GTT
+ cmpOp CharGeOp = GE
+ cmpOp CharEqOp = EQQ
+ cmpOp CharNeOp = NE
+ cmpOp CharLtOp = LTT
+ cmpOp CharLeOp = LE
+ cmpOp IntGtOp = GTT
+ cmpOp IntGeOp = GE
+ cmpOp IntEqOp = EQQ
+ cmpOp IntNeOp = NE
+ cmpOp IntLtOp = LTT
+ cmpOp IntLeOp = LE
+ cmpOp WordGtOp = NE
+ cmpOp WordGeOp = ALWAYS
+ cmpOp WordEqOp = EQQ
+ cmpOp WordNeOp = NE
+ cmpOp WordLtOp = NEVER
+ cmpOp WordLeOp = EQQ
+ cmpOp AddrGtOp = NE
+ cmpOp AddrGeOp = ALWAYS
+ cmpOp AddrEqOp = EQQ
+ cmpOp AddrNeOp = NE
+ cmpOp AddrLtOp = NEVER
+ cmpOp AddrLeOp = EQQ
+
+genCondJump lbl (StPrim op [x, StDouble 0.0])
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat (registerRep register)
+ `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ value = registerName register tmp
+ pk = registerRep register
+ target = ImmCLbl lbl
+ in
+ return (code . mkSeqInstr (BF (cmpOp op) value target))
+ where
+ cmpOp FloatGtOp = GTT
+ cmpOp FloatGeOp = GE
+ cmpOp FloatEqOp = EQQ
+ cmpOp FloatNeOp = NE
+ cmpOp FloatLtOp = LTT
+ cmpOp FloatLeOp = LE
+ cmpOp DoubleGtOp = GTT
+ cmpOp DoubleGeOp = GE
+ cmpOp DoubleEqOp = EQQ
+ cmpOp DoubleNeOp = NE
+ cmpOp DoubleLtOp = LTT
+ cmpOp DoubleLeOp = LE
+
+genCondJump lbl (StPrim op [x, y])
+ | fltCmpOp op
+ = trivialFCode pr instr x y `thenNat` \ register ->
+ getNewRegNat F64 `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ result = registerName register tmp
+ target = ImmCLbl lbl
+ in
+ return (code . mkSeqInstr (BF cond result target))
+ where
+ pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
+
+ fltCmpOp op = case op of
+ FloatGtOp -> True
+ FloatGeOp -> True
+ FloatEqOp -> True
+ FloatNeOp -> True
+ FloatLtOp -> True
+ FloatLeOp -> True
+ DoubleGtOp -> True
+ DoubleGeOp -> True
+ DoubleEqOp -> True
+ DoubleNeOp -> True
+ DoubleLtOp -> True
+ DoubleLeOp -> True
+ _ -> False
+ (instr, cond) = case op of
+ FloatGtOp -> (FCMP TF LE, EQQ)
+ FloatGeOp -> (FCMP TF LTT, EQQ)
+ FloatEqOp -> (FCMP TF EQQ, NE)
+ FloatNeOp -> (FCMP TF EQQ, EQQ)
+ FloatLtOp -> (FCMP TF LTT, NE)
+ FloatLeOp -> (FCMP TF LE, NE)
+ DoubleGtOp -> (FCMP TF LE, EQQ)
+ DoubleGeOp -> (FCMP TF LTT, EQQ)
+ DoubleEqOp -> (FCMP TF EQQ, NE)
+ DoubleNeOp -> (FCMP TF EQQ, EQQ)
+ DoubleLtOp -> (FCMP TF LTT, NE)
+ DoubleLeOp -> (FCMP TF LE, NE)
+
+genCondJump lbl (StPrim op [x, y])
+ = trivialCode instr x y `thenNat` \ register ->
+ getNewRegNat IntRep `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ result = registerName register tmp
+ target = ImmCLbl lbl
+ in
+ return (code . mkSeqInstr (BI cond result target))
+ where
+ (instr, cond) = case op of
+ CharGtOp -> (CMP LE, EQQ)
+ CharGeOp -> (CMP LTT, EQQ)
+ CharEqOp -> (CMP EQQ, NE)
+ CharNeOp -> (CMP EQQ, EQQ)
+ CharLtOp -> (CMP LTT, NE)
+ CharLeOp -> (CMP LE, NE)
+ IntGtOp -> (CMP LE, EQQ)
+ IntGeOp -> (CMP LTT, EQQ)
+ IntEqOp -> (CMP EQQ, NE)
+ IntNeOp -> (CMP EQQ, EQQ)
+ IntLtOp -> (CMP LTT, NE)
+ IntLeOp -> (CMP LE, NE)
+ WordGtOp -> (CMP ULE, EQQ)
+ WordGeOp -> (CMP ULT, EQQ)
+ WordEqOp -> (CMP EQQ, NE)
+ WordNeOp -> (CMP EQQ, EQQ)
+ WordLtOp -> (CMP ULT, NE)
+ WordLeOp -> (CMP ULE, NE)
+ AddrGtOp -> (CMP ULE, EQQ)
+ AddrGeOp -> (CMP ULT, EQQ)
+ AddrEqOp -> (CMP EQQ, NE)
+ AddrNeOp -> (CMP EQQ, EQQ)
+ AddrLtOp -> (CMP ULT, NE)
+ AddrLeOp -> (CMP ULE, NE)
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+genCondJump id bool = do
+ CondCode _ cond code <- getCondCode bool
+ return (code `snocOL` JXX cond id)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCondJump id bool = do
+ CondCode is_float cond cond_code <- getCondCode bool
+ if not is_float
+ then
+ return (cond_code `snocOL` JXX cond id)
+ else do
+ lbl <- getBlockIdNat
+
+ -- see comment with condFltReg
+ let code = case cond of
+ NE -> or_unordered
+ GU -> plain_test
+ GEU -> plain_test
+ _ -> and_ordered
+
+ plain_test = unitOL (
+ JXX cond id
+ )
+ or_unordered = toOL [
+ JXX cond id,
+ JXX PARITY id
+ ]
+ and_ordered = toOL [
+ JXX PARITY lbl,
+ JXX cond id,
+ JXX ALWAYS lbl,
+ NEWBLOCK lbl
+ ]
+ return (cond_code `appOL` code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+genCondJump (BlockId id) bool = do
+ CondCode is_float cond code <- getCondCode bool
+ return (
+ code `appOL`
+ toOL (
+ if is_float
+ then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
+ else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
+ )
+ )
+
+#endif /* sparc_TARGET_ARCH */
+
+
+#if powerpc_TARGET_ARCH
+
+genCondJump id bool = do
+ CondCode is_float cond code <- getCondCode bool
+ return (code `snocOL` BCC cond id)
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Generating C calls
+
+-- Now the biggest nightmare---calls. Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations. Apart from that, the code is easy.
+--
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+ :: CmmCallTarget -- function to call
+ -> [(CmmReg,MachHint)] -- where to put the result
+ -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
+ -> Maybe [GlobalReg] -- volatile regs to save
+ -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+ccallResultRegs =
+
+genCCall fn cconv result_regs args
+ = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
+ `thenNat` \ ((unused,_), argCode) ->
+ let
+ nRegs = length allArgRegs - length unused
+ code = asmSeqThen (map ($ []) argCode)
+ in
+ returnSeq code [
+ LDA pv (AddrImm (ImmLab (ptext fn))),
+ JSR ra (AddrReg pv) nRegs,
+ LDGP gp (AddrReg ra)]
+ where
+ ------------------------
+ {- Try to get a value into a specific register (or registers) for
+ a call. The first 6 arguments go into the appropriate
+ argument register (separate registers for integer and floating
+ point arguments, but used in lock-step), and the remaining
+ arguments are dumped to the stack, beginning at 0(sp). Our
+ first argument is a pair of the list of remaining argument
+ registers to be assigned for this call and the next stack
+ offset to use for overflowing arguments. This way,
+ @get_Arg@ can be applied to all of a call's arguments using
+ @mapAccumLNat@.
+ -}
+ get_arg
+ :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
+ -> StixTree -- Current argument
+ -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
+
+ -- We have to use up all of our argument registers first...
+
+ get_arg ((iDst,fDst):dsts, offset) arg
+ = getRegister arg `thenNat` \ register ->
+ let
+ reg = if isFloatingRep pk then fDst else iDst
+ code = registerCode register reg
+ src = registerName register reg
+ pk = registerRep register
+ in
+ return (
+ if isFloatingRep pk then
+ ((dsts, offset), if isFixed register then
+ code . mkSeqInstr (FMOV src fDst)
+ else code)
+ else
+ ((dsts, offset), if isFixed register then
+ code . mkSeqInstr (OR src (RIReg src) iDst)
+ else code))
+
+ -- Once we have run out of argument registers, we move to the
+ -- stack...
+
+ get_arg ([], offset) arg
+ = getRegister arg `thenNat` \ register ->
+ getNewRegNat (registerRep register)
+ `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ src = registerName register tmp
+ pk = registerRep register
+ sz = primRepToSize pk
+ in
+ return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+-- we only cope with a single result for foreign calls
+genCCall (CmmPrim op) [(r,_)] args vols = do
+ case op of
+ MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
+ MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
+
+ MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
+ MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
+
+ MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
+ MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
+
+ MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
+ MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
+
+ other_op -> outOfLineFloatOp op r args vols
+ where
+ actuallyInlineFloatOp rep instr [(x,_)]
+ = do res <- trivialUFCode rep instr x
+ any <- anyReg res
+ return (any (getRegisterReg r))
+
+genCCall target dest_regs args vols = do
+ let
+ sizes = map (arg_size . cmmExprRep . fst) (reverse args)
+#if !darwin_TARGET_OS
+ tot_arg_size = sum sizes
+#else
+ raw_arg_size = sum sizes
+ tot_arg_size = roundTo 16 raw_arg_size
+ arg_pad_size = tot_arg_size - raw_arg_size
+ delta0 <- getDeltaNat
+ setDeltaNat (delta0 - arg_pad_size)
+#endif
+
+ push_codes <- mapM push_arg (reverse args)
+ delta <- getDeltaNat
+
+ -- in
+ -- deal with static vs dynamic call targets
+ (callinsns,cconv) <-
+ case target of
+ -- CmmPrim -> ...
+ CmmForeignCall (CmmLit (CmmLabel lbl)) conv
+ -> -- ToDo: stdcall arg sizes
+ return (unitOL (CALL (Left fn_imm) []), conv)
+ where fn_imm = ImmCLbl lbl
+ CmmForeignCall expr conv
+ -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
+ ASSERT(dyn_rep == I32)
+ return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
+
+ let push_code
+#if darwin_TARGET_OS
+ | arg_pad_size /= 0
+ = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
+ DELTA (delta0 - arg_pad_size)]
+ `appOL` concatOL push_codes
+ | otherwise
+#endif
+ = concatOL push_codes
+ call = callinsns `appOL`
+ toOL (
+ -- Deallocate parameters after call for ccall;
+ -- but not for stdcall (callee does it)
+ (if cconv == StdCallConv || tot_arg_size==0 then [] else
+ [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
+ ++
+ [DELTA (delta + tot_arg_size)]
+ )
+ -- in
+ setDeltaNat (delta + tot_arg_size)
+
+ let
+ -- assign the results, if necessary
+ assign_code [] = nilOL
+ assign_code [(dest,_hint)] =
+ case rep of
+ I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
+ MOV I32 (OpReg edx) (OpReg r_dest_hi)]
+ F32 -> unitOL (GMOV fake0 r_dest)
+ F64 -> unitOL (GMOV fake0 r_dest)
+ rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
+ where
+ r_dest_hi = getHiVRegFromLo r_dest
+ rep = cmmRegRep dest
+ r_dest = getRegisterReg dest
+ assign_code many = panic "genCCall.assign_code many"
+
+ return (push_code `appOL`
+ call `appOL`
+ assign_code dest_regs)
+
+ where
+ arg_size F64 = 8
+ arg_size F32 = 4
+ arg_size I64 = 8
+ arg_size _ = 4
+
+ roundTo a x | x `mod` a == 0 = x
+ | otherwise = x + a - (x `mod` a)
+
+
+ push_arg :: (CmmExpr,MachHint){-current argument-}
+ -> NatM InstrBlock -- code
+
+ push_arg (arg,_hint) -- we don't need the hints on x86
+ | arg_rep == I64 = do
+ ChildCode64 code r_lo <- iselExpr64 arg
+ delta <- getDeltaNat
+ setDeltaNat (delta - 8)
+ let
+ r_hi = getHiVRegFromLo r_lo
+ -- in
+ return ( code `appOL`
+ toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
+ PUSH I32 (OpReg r_lo), DELTA (delta - 8),
+ DELTA (delta-8)]
+ )
+
+ | otherwise = do
+ (code, reg, sz) <- get_op arg
+ delta <- getDeltaNat
+ let size = arg_size sz
+ setDeltaNat (delta-size)
+ if (case sz of F64 -> True; F32 -> True; _ -> False)
+ then return (code `appOL`
+ toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
+ DELTA (delta-size),
+ GST sz reg (AddrBaseIndex (EABaseReg esp)
+ EAIndexNone
+ (ImmInt 0))]
+ )
+ else return (code `snocOL`
+ PUSH I32 (OpReg reg) `snocOL`
+ DELTA (delta-size)
+ )
+ where
+ arg_rep = cmmExprRep arg
+
+ ------------
+ get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
+ get_op op = do
+ (reg,code) <- getSomeReg op
+ return (code, reg, cmmExprRep op)
+
+#endif /* i386_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
+ -> Maybe [GlobalReg] -> NatM InstrBlock
+outOfLineFloatOp mop res args vols
+ = do
+ targetExpr <- cmmMakeDynamicReference addImportNat True lbl
+ let target = CmmForeignCall targetExpr CCallConv
+
+ if cmmRegRep res == F64
+ then
+ stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
+ else do
+ uq <- getUniqueNat
+ let
+ tmp = CmmLocal (LocalReg uq F64)
+ -- in
+ code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
+ code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
+ return (code1 `appOL` code2)
+ where
+ lbl = mkForeignLabel fn Nothing True
+
+ fn = case mop of
+ MO_F32_Sqrt -> FSLIT("sqrtf")
+ MO_F32_Sin -> FSLIT("sinf")
+ MO_F32_Cos -> FSLIT("cosf")
+ MO_F32_Tan -> FSLIT("tanf")
+ MO_F32_Exp -> FSLIT("expf")
+ MO_F32_Log -> FSLIT("logf")
+
+ MO_F32_Asin -> FSLIT("asinf")
+ MO_F32_Acos -> FSLIT("acosf")
+ MO_F32_Atan -> FSLIT("atanf")
+
+ MO_F32_Sinh -> FSLIT("sinhf")
+ MO_F32_Cosh -> FSLIT("coshf")
+ MO_F32_Tanh -> FSLIT("tanhf")
+ MO_F32_Pwr -> FSLIT("powf")
+
+ MO_F64_Sqrt -> FSLIT("sqrt")
+ MO_F64_Sin -> FSLIT("sin")
+ MO_F64_Cos -> FSLIT("cos")
+ MO_F64_Tan -> FSLIT("tan")
+ MO_F64_Exp -> FSLIT("exp")
+ MO_F64_Log -> FSLIT("log")
+
+ MO_F64_Asin -> FSLIT("asin")
+ MO_F64_Acos -> FSLIT("acos")
+ MO_F64_Atan -> FSLIT("atan")
+
+ MO_F64_Sinh -> FSLIT("sinh")
+ MO_F64_Cosh -> FSLIT("cosh")
+ MO_F64_Tanh -> FSLIT("tanh")
+ MO_F64_Pwr -> FSLIT("pow")
+
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCCall (CmmPrim op) [(r,_)] args vols =
+ outOfLineFloatOp op r args vols
+
+genCCall target dest_regs args vols = do
+
+ -- load up the register arguments
+ (stack_args, aregs, fregs, load_args_code)
+ <- load_args args allArgRegs allFPArgRegs nilOL
+
+ let
+ fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
+ int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
+ arg_regs = int_regs_used ++ fp_regs_used
+ -- for annotating the call instruction with
+
+ sse_regs = length fp_regs_used
+
+ tot_arg_size = arg_size * length stack_args
+
+ -- On entry to the called function, %rsp should be aligned
+ -- on a 16-byte boundary +8 (i.e. the first stack arg after
+ -- the return address is 16-byte aligned). In STG land
+ -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+ -- need to make sure we push a multiple of 16-bytes of args,
+ -- plus the return address, to get the correct alignment.
+ -- Urg, this is hard. We need to feed the delta back into
+ -- the arg pushing code.
+ (real_size, adjust_rsp) <-
+ if tot_arg_size `rem` 16 == 0
+ then return (tot_arg_size, nilOL)
+ else do -- we need to adjust...
+ delta <- getDeltaNat
+ setDeltaNat (delta-8)
+ return (tot_arg_size+8, toOL [
+ SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
+ DELTA (delta-8)
+ ])
+
+ -- push the stack args, right to left
+ push_code <- push_args (reverse stack_args) nilOL
+ delta <- getDeltaNat
+
+ -- deal with static vs dynamic call targets
+ (callinsns,cconv) <-
+ case target of
+ -- CmmPrim -> ...
+ CmmForeignCall (CmmLit (CmmLabel lbl)) conv
+ -> -- ToDo: stdcall arg sizes
+ return (unitOL (CALL (Left fn_imm) arg_regs), conv)
+ where fn_imm = ImmCLbl lbl
+ CmmForeignCall expr conv
+ -> do (dyn_r, dyn_c) <- getSomeReg expr
+ return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
+
+ let
+ -- The x86_64 ABI requires us to set %al to the number of SSE
+ -- registers that contain arguments, if the called routine
+ -- is a varargs function. We don't know whether it's a
+ -- varargs function or not, so we have to assume it is.
+ --
+ -- It's not safe to omit this assignment, even if the number
+ -- of SSE regs in use is zero. If %al is larger than 8
+ -- on entry to a varargs function, seg faults ensue.
+ assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
+
+ let call = callinsns `appOL`
+ toOL (
+ -- Deallocate parameters after call for ccall;
+ -- but not for stdcall (callee does it)
+ (if cconv == StdCallConv || real_size==0 then [] else
+ [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
+ ++
+ [DELTA (delta + real_size)]
+ )
+ -- in
+ setDeltaNat (delta + real_size)
+
+ let
+ -- assign the results, if necessary
+ assign_code [] = nilOL
+ assign_code [(dest,_hint)] =
+ case rep of
+ F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+ F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+ rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
+ where
+ rep = cmmRegRep dest
+ r_dest = getRegisterReg dest
+ assign_code many = panic "genCCall.assign_code many"
+
+ return (load_args_code `appOL`
+ adjust_rsp `appOL`
+ push_code `appOL`
+ assign_eax sse_regs `appOL`
+ call `appOL`
+ assign_code dest_regs)
+
+ where
+ arg_size = 8 -- always, at the mo
+
+ load_args :: [(CmmExpr,MachHint)]
+ -> [Reg] -- int regs avail for args
+ -> [Reg] -- FP regs avail for args
+ -> InstrBlock
+ -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
+ load_args args [] [] code = return (args, [], [], code)
+ -- no more regs to use
+ load_args [] aregs fregs code = return ([], aregs, fregs, code)
+ -- no more args to push
+ load_args ((arg,hint) : rest) aregs fregs code
+ | isFloatingRep arg_rep =
+ case fregs of
+ [] -> push_this_arg
+ (r:rs) -> do
+ arg_code <- getAnyReg arg
+ load_args rest aregs rs (code `appOL` arg_code r)
+ | otherwise =
+ case aregs of
+ [] -> push_this_arg
+ (r:rs) -> do
+ arg_code <- getAnyReg arg
+ load_args rest rs fregs (code `appOL` arg_code r)
+ where
+ arg_rep = cmmExprRep arg
+
+ push_this_arg = do
+ (args',ars,frs,code') <- load_args rest aregs fregs code
+ return ((arg,hint):args', ars, frs, code')
+
+ push_args [] code = return code
+ push_args ((arg,hint):rest) code
+ | isFloatingRep arg_rep = do
+ (arg_reg, arg_code) <- getSomeReg arg
+ delta <- getDeltaNat
+ setDeltaNat (delta-arg_size)
+ let code' = code `appOL` toOL [
+ MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
+ SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
+ DELTA (delta-arg_size)]
+ push_args rest code'
+
+ | otherwise = do
+ -- we only ever generate word-sized function arguments. Promotion
+ -- has already happened: our Int8# type is kept sign-extended
+ -- in an Int#, for example.
+ ASSERT(arg_rep == I64) return ()
+ (arg_op, arg_code) <- getOperand arg
+ delta <- getDeltaNat
+ setDeltaNat (delta-arg_size)
+ let code' = code `appOL` toOL [PUSH I64 arg_op,
+ DELTA (delta-arg_size)]
+ push_args rest code'
+ where
+ arg_rep = cmmExprRep arg
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+{-
+ The SPARC calling convention is an absolute
+ nightmare. The first 6x32 bits of arguments are mapped into
+ %o0 through %o5, and the remaining arguments are dumped to the
+ stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
+
+ If we have to put args on the stack, move %o6==%sp down by
+ the number of words to go on the stack, to ensure there's enough space.
+
+ According to Fraser and Hanson's lcc book, page 478, fig 17.2,
+ 16 words above the stack pointer is a word for the address of
+ a structure return value. I use this as a temporary location
+ for moving values from float to int regs. Certainly it isn't
+ safe to put anything in the 16 words starting at %sp, since
+ this area can get trashed at any time due to window overflows
+ caused by signal handlers.
+
+ A final complication (if the above isn't enough) is that
+ we can't blithely calculate the arguments one by one into
+ %o0 .. %o5. Consider the following nested calls:
+
+ fff a (fff b c)
+
+ Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
+ the inner call will itself use %o0, which trashes the value put there
+ in preparation for the outer call. Upshot: we need to calculate the
+ args into temporary regs, and move those to arg regs or onto the
+ stack only immediately prior to the call proper. Sigh.
+-}
+
+genCCall target dest_regs argsAndHints vols = do
+ let
+ args = map fst argsAndHints
+ argcode_and_vregs <- mapM arg_to_int_vregs args
+ let
+ (argcodes, vregss) = unzip argcode_and_vregs
+ n_argRegs = length allArgRegs
+ n_argRegs_used = min (length vregs) n_argRegs
+ vregs = concat vregss
+ -- deal with static vs dynamic call targets
+ callinsns <- (case target of
+ CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
+ return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+ CmmForeignCall expr conv -> do
+ (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+ return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+ CmmPrim mop -> do
+ (res, reduce) <- outOfLineFloatOp mop
+ lblOrMopExpr <- case res of
+ Left lbl -> do
+ return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+ Right mopExpr -> do
+ (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+ return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+ if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
+
+ )
+ let
+ argcode = concatOL argcodes
+ (move_sp_down, move_sp_up)
+ = let diff = length vregs - n_argRegs
+ nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
+ in if nn <= 0
+ then (nilOL, nilOL)
+ else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+ transfer_code
+ = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
+ return (argcode `appOL`
+ move_sp_down `appOL`
+ transfer_code `appOL`
+ callinsns `appOL`
+ unitOL NOP `appOL`
+ move_sp_up)
+ where
+ -- move args from the integer vregs into which they have been
+ -- marshalled, into %o0 .. %o5, and the rest onto the stack.
+ move_final :: [Reg] -> [Reg] -> Int -> [Instr]
+
+ move_final [] _ offset -- all args done
+ = []
+
+ move_final (v:vs) [] offset -- out of aregs; move to stack
+ = ST I32 v (spRel offset)
+ : move_final vs [] (offset+1)
+
+ move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
+ = OR False g0 (RIReg v) a
+ : move_final vs az offset
+
+ -- generate code to calculate an argument, and move it into one
+ -- or two integer vregs.
+ arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
+ arg_to_int_vregs arg
+ | (cmmExprRep arg) == I64
+ = do
+ (ChildCode64 code r_lo) <- iselExpr64 arg
+ let
+ r_hi = getHiVRegFromLo r_lo
+ return (code, [r_hi, r_lo])
+ | otherwise
+ = do
+ (src, code) <- getSomeReg arg
+ tmp <- getNewRegNat (cmmExprRep arg)
+ let
+ pk = cmmExprRep arg
+ case pk of
+ F64 -> do
+ v1 <- getNewRegNat I32
+ v2 <- getNewRegNat I32
+ return (
+ code `snocOL`
+ FMOV F64 src f0 `snocOL`
+ ST F32 f0 (spRel 16) `snocOL`
+ LD I32 (spRel 16) v1 `snocOL`
+ ST F32 (fPair f0) (spRel 16) `snocOL`
+ LD I32 (spRel 16) v2
+ ,
+ [v1,v2]
+ )
+ F32 -> do
+ v1 <- getNewRegNat I32
+ return (
+ code `snocOL`
+ ST F32 src (spRel 16) `snocOL`
+ LD I32 (spRel 16) v1
+ ,
+ [v1]
+ )
+ other -> do
+ v1 <- getNewRegNat I32
+ return (
+ code `snocOL` OR False g0 (RIReg src) v1
+ ,
+ [v1]
+ )
+outOfLineFloatOp mop =
+ do
+ mopExpr <- cmmMakeDynamicReference addImportNat True $
+ mkForeignLabel functionName Nothing True
+ let mopLabelOrExpr = case mopExpr of
+ CmmLit (CmmLabel lbl) -> Left lbl
+ _ -> Right mopExpr
+ return (mopLabelOrExpr, reduce)
+ where
+ (reduce, functionName) = case mop of
+ MO_F32_Exp -> (True, FSLIT("exp"))
+ MO_F32_Log -> (True, FSLIT("log"))
+ MO_F32_Sqrt -> (True, FSLIT("sqrt"))
+
+ MO_F32_Sin -> (True, FSLIT("sin"))
+ MO_F32_Cos -> (True, FSLIT("cos"))
+ MO_F32_Tan -> (True, FSLIT("tan"))
+
+ MO_F32_Asin -> (True, FSLIT("asin"))
+ MO_F32_Acos -> (True, FSLIT("acos"))
+ MO_F32_Atan -> (True, FSLIT("atan"))
+
+ MO_F32_Sinh -> (True, FSLIT("sinh"))
+ MO_F32_Cosh -> (True, FSLIT("cosh"))
+ MO_F32_Tanh -> (True, FSLIT("tanh"))
+
+ MO_F64_Exp -> (False, FSLIT("exp"))
+ MO_F64_Log -> (False, FSLIT("log"))
+ MO_F64_Sqrt -> (False, FSLIT("sqrt"))
+
+ MO_F64_Sin -> (False, FSLIT("sin"))
+ MO_F64_Cos -> (False, FSLIT("cos"))
+ MO_F64_Tan -> (False, FSLIT("tan"))
+
+ MO_F64_Asin -> (False, FSLIT("asin"))
+ MO_F64_Acos -> (False, FSLIT("acos"))
+ MO_F64_Atan -> (False, FSLIT("atan"))
+
+ MO_F64_Sinh -> (False, FSLIT("sinh"))
+ MO_F64_Cosh -> (False, FSLIT("cosh"))
+ MO_F64_Tanh -> (False, FSLIT("tanh"))
+
+ other -> pprPanic "outOfLineFloatOp(sparc) "
+ (pprCallishMachOp mop)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+
+#if darwin_TARGET_OS || linux_TARGET_OS
+{-
+ The PowerPC calling convention for Darwin/Mac OS X
+ is described in Apple's document
+ "Inside Mac OS X - Mach-O Runtime Architecture".
+
+ PowerPC Linux uses the System V Release 4 Calling Convention
+ for PowerPC. It is described in the
+ "System V Application Binary Interface PowerPC Processor Supplement".
+
+ Both conventions are similar:
+ Parameters may be passed in general-purpose registers starting at r3, in
+ floating point registers starting at f1, or on the stack.
+
+ But there are substantial differences:
+ * The number of registers used for parameter passing and the exact set of
+ nonvolatile registers differs (see MachRegs.lhs).
+ * On Darwin, stack space is always reserved for parameters, even if they are
+ passed in registers. The called routine may choose to save parameters from
+ registers to the corresponding space on the stack.
+ * On Darwin, a corresponding amount of GPRs is skipped when a floating point
+ parameter is passed in an FPR.
+ * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
+ starting with an odd-numbered GPR. It may skip a GPR to achieve this.
+ Darwin just treats an I64 like two separate I32s (high word first).
+ * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
+ 4-byte aligned like everything else on Darwin.
+ * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
+ PowerPC Linux does not agree, so neither do we.
+
+ According to both conventions, The parameter area should be part of the
+ caller's stack frame, allocated in the caller's prologue code (large enough
+ to hold the parameter lists for all called routines). The NCG already
+ uses the stack for register spilling, leaving 64 bytes free at the top.
+ If we need a larger parameter area than that, we just allocate a new stack
+ frame just before ccalling.
+-}
+
+genCCall target dest_regs argsAndHints vols
+ = ASSERT (not $ any (`elem` [I8,I16]) argReps)
+ -- we rely on argument promotion in the codeGen
+ do
+ (finalStack,passArgumentsCode,usedRegs) <- passArguments
+ (zip args argReps)
+ allArgRegs allFPArgRegs
+ initialStackOffset
+ (toOL []) []
+
+ (labelOrExpr, reduceToF32) <- case target of
+ CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
+ CmmForeignCall expr conv -> return (Right expr, False)
+ CmmPrim mop -> outOfLineFloatOp mop
+
+ let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
+ codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
+
+ case labelOrExpr of
+ Left lbl -> do
+ return ( codeBefore
+ `snocOL` BL lbl usedRegs
+ `appOL` codeAfter)
+ Right dyn -> do
+ (dynReg, dynCode) <- getSomeReg dyn
+ return ( dynCode
+ `snocOL` MTCTR dynReg
+ `appOL` codeBefore
+ `snocOL` BCTRL usedRegs
+ `appOL` codeAfter)
+ where
+#if darwin_TARGET_OS
+ initialStackOffset = 24
+ -- size of linkage area + size of arguments, in bytes
+ stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
+ map machRepByteWidth argReps
+#elif linux_TARGET_OS
+ initialStackOffset = 8
+ stackDelta finalStack = roundTo 16 finalStack
+#endif
+ args = map fst argsAndHints
+ argReps = map cmmExprRep args
+
+ roundTo a x | x `mod` a == 0 = x
+ | otherwise = x + a - (x `mod` a)
+
+ move_sp_down finalStack
+ | delta > 64 =
+ toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
+ DELTA (-delta)]
+ | otherwise = nilOL
+ where delta = stackDelta finalStack
+ move_sp_up finalStack
+ | delta > 64 =
+ toOL [ADD sp sp (RIImm (ImmInt delta)),
+ DELTA 0]
+ | otherwise = nilOL
+ where delta = stackDelta finalStack
+
+
+ passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
+ passArguments ((arg,I64):args) gprs fprs stackOffset
+ accumCode accumUsed =
+ do
+ ChildCode64 code vr_lo <- iselExpr64 arg
+ let vr_hi = getHiVRegFromLo vr_lo
+
+#if darwin_TARGET_OS
+ passArguments args
+ (drop 2 gprs)
+ fprs
+ (stackOffset+8)
+ (accumCode `appOL` code
+ `snocOL` storeWord vr_hi gprs stackOffset
+ `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
+ ((take 2 gprs) ++ accumUsed)
+ where
+ storeWord vr (gpr:_) offset = MR gpr vr
+ storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
+
+#elif linux_TARGET_OS
+ let stackOffset' = roundTo 8 stackOffset
+ stackCode = accumCode `appOL` code
+ `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
+ `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
+ regCode hireg loreg =
+ accumCode `appOL` code
+ `snocOL` MR hireg vr_hi
+ `snocOL` MR loreg vr_lo
+
+ case gprs of
+ hireg : loreg : regs | even (length gprs) ->
+ passArguments args regs fprs stackOffset
+ (regCode hireg loreg) (hireg : loreg : accumUsed)
+ _skipped : hireg : loreg : regs ->
+ passArguments args regs fprs stackOffset
+ (regCode hireg loreg) (hireg : loreg : accumUsed)
+ _ -> -- only one or no regs left
+ passArguments args [] fprs (stackOffset'+8)
+ stackCode accumUsed
+#endif
+
+ passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
+ | reg : _ <- regs = do
+ register <- getRegister arg
+ let code = case register of
+ Fixed _ freg fcode -> fcode `snocOL` MR reg freg
+ Any _ acode -> acode reg
+ passArguments args
+ (drop nGprs gprs)
+ (drop nFprs fprs)
+#if darwin_TARGET_OS
+ -- The Darwin ABI requires that we reserve stack slots for register parameters
+ (stackOffset + stackBytes)
+#elif linux_TARGET_OS
+ -- ... the SysV ABI doesn't.
+ stackOffset
+#endif
+ (accumCode `appOL` code)
+ (reg : accumUsed)
+ | otherwise = do
+ (vr, code) <- getSomeReg arg
+ passArguments args
+ (drop nGprs gprs)
+ (drop nFprs fprs)
+ (stackOffset' + stackBytes)
+ (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
+ accumUsed
+ where
+#if darwin_TARGET_OS
+ -- stackOffset is at least 4-byte aligned
+ -- The Darwin ABI is happy with that.
+ stackOffset' = stackOffset
+#else
+ -- ... the SysV ABI requires 8-byte alignment for doubles.
+ stackOffset' | rep == F64 = roundTo 8 stackOffset
+ | otherwise = stackOffset
+#endif
+ stackSlot = AddrRegImm sp (ImmInt stackOffset')
+ (nGprs, nFprs, stackBytes, regs) = case rep of
+ I32 -> (1, 0, 4, gprs)
+#if darwin_TARGET_OS
+ -- The Darwin ABI requires that we skip a corresponding number of GPRs when
+ -- we use the FPRs.
+ F32 -> (1, 1, 4, fprs)
+ F64 -> (2, 1, 8, fprs)
+#elif linux_TARGET_OS
+ -- ... the SysV ABI doesn't.
+ F32 -> (0, 1, 4, fprs)
+ F64 -> (0, 1, 8, fprs)
+#endif
+
+ moveResult reduceToF32 =
+ case dest_regs of
+ [] -> nilOL
+ [(dest, _hint)]
+ | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
+ | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
+ | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
+ MR r_dest r4]
+ | otherwise -> unitOL (MR r_dest r3)
+ where rep = cmmRegRep dest
+ r_dest = getRegisterReg dest
+
+ outOfLineFloatOp mop =
+ do
+ mopExpr <- cmmMakeDynamicReference addImportNat True $
+ mkForeignLabel functionName Nothing True
+ let mopLabelOrExpr = case mopExpr of
+ CmmLit (CmmLabel lbl) -> Left lbl
+ _ -> Right mopExpr
+ return (mopLabelOrExpr, reduce)
+ where
+ (functionName, reduce) = case mop of
+ MO_F32_Exp -> (FSLIT("exp"), True)
+ MO_F32_Log -> (FSLIT("log"), True)
+ MO_F32_Sqrt -> (FSLIT("sqrt"), True)
+
+ MO_F32_Sin -> (FSLIT("sin"), True)
+ MO_F32_Cos -> (FSLIT("cos"), True)
+ MO_F32_Tan -> (FSLIT("tan"), True)
+
+ MO_F32_Asin -> (FSLIT("asin"), True)
+ MO_F32_Acos -> (FSLIT("acos"), True)
+ MO_F32_Atan -> (FSLIT("atan"), True)
+
+ MO_F32_Sinh -> (FSLIT("sinh"), True)
+ MO_F32_Cosh -> (FSLIT("cosh"), True)
+ MO_F32_Tanh -> (FSLIT("tanh"), True)
+ MO_F32_Pwr -> (FSLIT("pow"), True)
+
+ MO_F64_Exp -> (FSLIT("exp"), False)
+ MO_F64_Log -> (FSLIT("log"), False)
+ MO_F64_Sqrt -> (FSLIT("sqrt"), False)
+
+ MO_F64_Sin -> (FSLIT("sin"), False)
+ MO_F64_Cos -> (FSLIT("cos"), False)
+ MO_F64_Tan -> (FSLIT("tan"), False)
+
+ MO_F64_Asin -> (FSLIT("asin"), False)
+ MO_F64_Acos -> (FSLIT("acos"), False)
+ MO_F64_Atan -> (FSLIT("atan"), False)
+
+ MO_F64_Sinh -> (FSLIT("sinh"), False)
+ MO_F64_Cosh -> (FSLIT("cosh"), False)
+ MO_F64_Tanh -> (FSLIT("tanh"), False)
+ MO_F64_Pwr -> (FSLIT("pow"), False)
+ other -> pprPanic "genCCall(ppc): unknown callish op"
+ (pprCallishMachOp other)
+
+#endif /* darwin_TARGET_OS || linux_TARGET_OS */
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+genSwitch expr ids
+ | opt_PIC
+ = do
+ (reg,e_code) <- getSomeReg expr
+ lbl <- getNewLabelNat
+ dynRef <- cmmMakeDynamicReference addImportNat False lbl
+ (tableReg,t_code) <- getSomeReg $ dynRef
+ let
+ jumpTable = map jumpTableEntryRel ids
+
+ jumpTableEntryRel Nothing
+ = CmmStaticLit (CmmInt 0 wordRep)
+ jumpTableEntryRel (Just (BlockId id))
+ = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+ where blockLabel = mkAsmTempLabel id
+
+ op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
+ (EAIndex reg wORD_SIZE) (ImmInt 0))
+
+ code = e_code `appOL` t_code `appOL` toOL [
+ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+ ADD wordRep op (OpReg tableReg),
+ JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
+ ]
+ return code
+ | otherwise
+ = do
+ (reg,e_code) <- getSomeReg expr
+ lbl <- getNewLabelNat
+ let
+ jumpTable = map jumpTableEntry ids
+ op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
+ code = e_code `appOL` toOL [
+ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+ JMP_TBL op [ id | Just id <- ids ]
+ ]
+ -- in
+ return code
+#elif powerpc_TARGET_ARCH
+genSwitch expr ids
+ | opt_PIC
+ = do
+ (reg,e_code) <- getSomeReg expr
+ tmp <- getNewRegNat I32
+ lbl <- getNewLabelNat
+ dynRef <- cmmMakeDynamicReference addImportNat False lbl
+ (tableReg,t_code) <- getSomeReg $ dynRef
+ let
+ jumpTable = map jumpTableEntryRel ids
+
+ jumpTableEntryRel Nothing
+ = CmmStaticLit (CmmInt 0 wordRep)
+ jumpTableEntryRel (Just (BlockId id))
+ = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+ where blockLabel = mkAsmTempLabel id
+
+ code = e_code `appOL` t_code `appOL` toOL [
+ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+ SLW tmp reg (RIImm (ImmInt 2)),
+ LD I32 tmp (AddrRegReg tableReg tmp),
+ ADD tmp tmp (RIReg tableReg),
+ MTCTR tmp,
+ BCTR [ id | Just id <- ids ]
+ ]
+ return code
+ | otherwise
+ = do
+ (reg,e_code) <- getSomeReg expr
+ tmp <- getNewRegNat I32
+ lbl <- getNewLabelNat
+ let
+ jumpTable = map jumpTableEntry ids
+
+ code = e_code `appOL` toOL [
+ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+ SLW tmp reg (RIImm (ImmInt 2)),
+ ADDIS tmp tmp (HA (ImmCLbl lbl)),
+ LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
+ MTCTR tmp,
+ BCTR [ id | Just id <- ids ]
+ ]
+ return code
+#else
+genSwitch expr ids = panic "ToDo: genSwitch"
+#endif
+
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+ where blockLabel = mkAsmTempLabel id
+
+-- -----------------------------------------------------------------------------
+-- Support bits
+-- -----------------------------------------------------------------------------
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+--
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+condIntReg = panic "MachCode.condIntReg (not on Alpha)"
+condFltReg = panic "MachCode.condFltReg (not on Alpha)"
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+condIntReg cond x y = do
+ CondCode _ cond cond_code <- condIntCode cond x y
+ tmp <- getNewRegNat I8
+ let
+ code dst = cond_code `appOL` toOL [
+ SETCC cond (OpReg tmp),
+ MOVZxL I8 (OpReg tmp) (OpReg dst)
+ ]
+ -- in
+ return (Any I32 code)
+
+#endif
+
+#if i386_TARGET_ARCH
+
+condFltReg cond x y = do
+ CondCode _ cond cond_code <- condFltCode cond x y
+ tmp <- getNewRegNat I8
+ let
+ code dst = cond_code `appOL` toOL [
+ SETCC cond (OpReg tmp),
+ MOVZxL I8 (OpReg tmp) (OpReg dst)
+ ]
+ -- in
+ return (Any I32 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+
+condFltReg cond x y = do
+ CondCode _ cond cond_code <- condFltCode cond x y
+ tmp1 <- getNewRegNat wordRep
+ tmp2 <- getNewRegNat wordRep
+ let
+ -- We have to worry about unordered operands (eg. comparisons
+ -- against NaN). If the operands are unordered, the comparison
+ -- sets the parity flag, carry flag and zero flag.
+ -- All comparisons are supposed to return false for unordered
+ -- operands except for !=, which returns true.
+ --
+ -- Optimisation: we don't have to test the parity flag if we
+ -- know the test has already excluded the unordered case: eg >
+ -- and >= test for a zero carry flag, which can only occur for
+ -- ordered operands.
+ --
+ -- ToDo: by reversing comparisons we could avoid testing the
+ -- parity flag in more cases.
+
+ code dst =
+ cond_code `appOL`
+ (case cond of
+ NE -> or_unordered dst
+ GU -> plain_test dst
+ GEU -> plain_test dst
+ _ -> and_ordered dst)
+
+ plain_test dst = toOL [
+ SETCC cond (OpReg tmp1),
+ MOVZxL I8 (OpReg tmp1) (OpReg dst)
+ ]
+ or_unordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC PARITY (OpReg tmp2),
+ OR I8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL I8 (OpReg tmp2) (OpReg dst)
+ ]
+ and_ordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC NOTPARITY (OpReg tmp2),
+ AND I8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL I8 (OpReg tmp2) (OpReg dst)
+ ]
+ -- in
+ return (Any I32 code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
+ (src, code) <- getSomeReg x
+ tmp <- getNewRegNat I32
+ let
+ code__2 dst = code `appOL` toOL [
+ SUB False True g0 (RIReg src) g0,
+ SUB True False g0 (RIImm (ImmInt (-1))) dst]
+ return (Any I32 code__2)
+
+condIntReg EQQ x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ tmp1 <- getNewRegNat I32
+ tmp2 <- getNewRegNat I32
+ let
+ code__2 dst = code1 `appOL` code2 `appOL` toOL [
+ XOR False src1 (RIReg src2) dst,
+ SUB False True g0 (RIReg dst) g0,
+ SUB True False g0 (RIImm (ImmInt (-1))) dst]
+ return (Any I32 code__2)
+
+condIntReg NE x (CmmLit (CmmInt 0 d)) = do
+ (src, code) <- getSomeReg x
+ tmp <- getNewRegNat I32
+ let
+ code__2 dst = code `appOL` toOL [
+ SUB False True g0 (RIReg src) g0,
+ ADD True False g0 (RIImm (ImmInt 0)) dst]
+ return (Any I32 code__2)
+
+condIntReg NE x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ tmp1 <- getNewRegNat I32
+ tmp2 <- getNewRegNat I32
+ let
+ code__2 dst = code1 `appOL` code2 `appOL` toOL [
+ XOR False src1 (RIReg src2) dst,
+ SUB False True g0 (RIReg dst) g0,
+ ADD True False g0 (RIImm (ImmInt 0)) dst]
+ return (Any I32 code__2)
+
+condIntReg cond x y = do
+ BlockId lbl1 <- getBlockIdNat
+ BlockId lbl2 <- getBlockIdNat
+ CondCode _ cond cond_code <- condIntCode cond x y
+ let
+ code__2 dst = cond_code `appOL` toOL [
+ BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
+ OR False g0 (RIImm (ImmInt 0)) dst,
+ BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
+ NEWBLOCK (BlockId lbl1),
+ OR False g0 (RIImm (ImmInt 1)) dst,
+ NEWBLOCK (BlockId lbl2)]
+ return (Any I32 code__2)
+
+condFltReg cond x y = do
+ BlockId lbl1 <- getBlockIdNat
+ BlockId lbl2 <- getBlockIdNat
+ CondCode _ cond cond_code <- condFltCode cond x y
+ let
+ code__2 dst = cond_code `appOL` toOL [
+ NOP,
+ BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
+ OR False g0 (RIImm (ImmInt 0)) dst,
+ BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
+ NEWBLOCK (BlockId lbl1),
+ OR False g0 (RIImm (ImmInt 1)) dst,
+ NEWBLOCK (BlockId lbl2)]
+ return (Any I32 code__2)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+condReg getCond = do
+ lbl1 <- getBlockIdNat
+ lbl2 <- getBlockIdNat
+ CondCode _ cond cond_code <- getCond
+ let
+{- code dst = cond_code `appOL` toOL [
+ BCC cond lbl1,
+ LI dst (ImmInt 0),
+ BCC ALWAYS lbl2,
+ NEWBLOCK lbl1,
+ LI dst (ImmInt 1),
+ BCC ALWAYS lbl2,
+ NEWBLOCK lbl2
+ ]-}
+ code dst = cond_code
+ `appOL` negate_code
+ `appOL` toOL [
+ MFCR dst,
+ RLWINM dst dst (bit + 1) 31 31
+ ]
+
+ negate_code | do_negate = unitOL (CRNOR bit bit bit)
+ | otherwise = nilOL
+
+ (bit, do_negate) = case cond of
+ LTT -> (0, False)
+ LE -> (1, True)
+ EQQ -> (2, False)
+ GE -> (0, True)
+ GTT -> (1, False)
+
+ NE -> (2, True)
+
+ LU -> (0, False)
+ LEU -> (1, True)
+ GEU -> (0, True)
+ GU -> (1, False)
+
+ return (Any I32 code)
+
+condIntReg cond x y = condReg (condIntCode cond x y)
+condFltReg cond x y = condReg (condFltCode cond x y)
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+trivialCode
+ :: MachRep
+ -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
+ ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
+ -> Maybe (Operand -> Operand -> Instr)
+ ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
+ -> Maybe (Operand -> Operand -> Instr)
+ ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
+ ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
+ ,)))))
+ -> CmmExpr -> CmmExpr -- the two arguments
+ -> NatM Register
+
+#ifndef powerpc_TARGET_ARCH
+trivialFCode
+ :: MachRep
+ -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
+ ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
+ ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
+ ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
+ ,))))
+ -> CmmExpr -> CmmExpr -- the two arguments
+ -> NatM Register
+#endif
+
+trivialUCode
+ :: MachRep
+ -> IF_ARCH_alpha((RI -> Reg -> Instr)
+ ,IF_ARCH_i386 ((Operand -> Instr)
+ ,IF_ARCH_x86_64 ((Operand -> Instr)
+ ,IF_ARCH_sparc((RI -> Reg -> Instr)
+ ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
+ ,)))))
+ -> CmmExpr -- the one argument
+ -> NatM Register
+
+#ifndef powerpc_TARGET_ARCH
+trivialUFCode
+ :: MachRep
+ -> IF_ARCH_alpha((Reg -> Reg -> Instr)
+ ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
+ ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
+ ,IF_ARCH_sparc((Reg -> Reg -> Instr)
+ ,))))
+ -> CmmExpr -- the one argument
+ -> NatM Register
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+trivialCode instr x (StInt y)
+ | fits8Bits y
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat IntRep `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ src1 = registerName register tmp
+ src2 = ImmInt (fromInteger y)
+ code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
+ in
+ return (Any IntRep code__2)
+
+trivialCode instr x y
+ = getRegister x `thenNat` \ register1 ->
+ getRegister y `thenNat` \ register2 ->
+ getNewRegNat IntRep `thenNat` \ tmp1 ->
+ getNewRegNat IntRep `thenNat` \ tmp2 ->
+ let
+ code1 = registerCode register1 tmp1 []
+ src1 = registerName register1 tmp1
+ code2 = registerCode register2 tmp2 []
+ src2 = registerName register2 tmp2
+ code__2 dst = asmSeqThen [code1, code2] .
+ mkSeqInstr (instr src1 (RIReg src2) dst)
+ in
+ return (Any IntRep code__2)
+
+------------
+trivialUCode instr x
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat IntRep `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ src = registerName register tmp
+ code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
+ in
+ return (Any IntRep code__2)
+
+------------
+trivialFCode _ instr x y
+ = getRegister x `thenNat` \ register1 ->
+ getRegister y `thenNat` \ register2 ->
+ getNewRegNat F64 `thenNat` \ tmp1 ->
+ getNewRegNat F64 `thenNat` \ tmp2 ->
+ let
+ code1 = registerCode register1 tmp1
+ src1 = registerName register1 tmp1
+
+ code2 = registerCode register2 tmp2
+ src2 = registerName register2 tmp2
+
+ code__2 dst = asmSeqThen [code1 [], code2 []] .
+ mkSeqInstr (instr src1 src2 dst)
+ in
+ return (Any F64 code__2)
+
+trivialUFCode _ instr x
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat F64 `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ src = registerName register tmp
+ code__2 dst = code . mkSeqInstr (instr src dst)
+ in
+ return (Any F64 code__2)
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+
+{-
+The Rules of the Game are:
+
+* You cannot assume anything about the destination register dst;
+ it may be anything, including a fixed reg.
+
+* You may compute an operand into a fixed reg, but you may not
+ subsequently change the contents of that fixed reg. If you
+ want to do so, first copy the value either to a temporary
+ or into dst. You are free to modify dst even if it happens
+ to be a fixed reg -- that's not your problem.
+
+* You cannot assume that a fixed reg will stay live over an
+ arbitrary computation. The same applies to the dst reg.
+
+* Temporary regs obtained from getNewRegNat are distinct from
+ each other and from all other regs, and stay live over
+ arbitrary computations.
+
+--------------------
+
+SDM's version of The Rules:
+
+* If getRegister returns Any, that means it can generate correct
+ code which places the result in any register, period. Even if that
+ register happens to be read during the computation.
+
+ Corollary #1: this means that if you are generating code for an
+ operation with two arbitrary operands, you cannot assign the result
+ of the first operand into the destination register before computing
+ the second operand. The second operand might require the old value
+ of the destination register.
+
+ Corollary #2: A function might be able to generate more efficient
+ code if it knows the destination register is a new temporary (and
+ therefore not read by any of the sub-computations).
+
+* If getRegister returns Any, then the code it generates may modify only:
+ (a) fresh temporaries
+ (b) the destination register
+ (c) known registers (eg. %ecx is used by shifts)
+ In particular, it may *not* modify global registers, unless the global
+ register happens to be the destination register.
+-}
+
+trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
+ | not (is64BitLit lit_a) = do
+ b_code <- getAnyReg b
+ let
+ code dst
+ = b_code dst `snocOL`
+ revinstr (OpImm (litToImm lit_a)) (OpReg dst)
+ -- in
+ return (Any rep code)
+
+trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
+
+-- This is re-used for floating pt instructions too.
+genTrivialCode rep instr a b = do
+ (b_op, b_code) <- getNonClobberedOperand b
+ a_code <- getAnyReg a
+ tmp <- getNewRegNat rep
+ let
+ -- We want the value of b to stay alive across the computation of a.
+ -- But, we want to calculate a straight into the destination register,
+ -- because the instruction only has two operands (dst := dst `op` src).
+ -- The troublesome case is when the result of b is in the same register
+ -- as the destination reg. In this case, we have to save b in a
+ -- new temporary across the computation of a.
+ code dst
+ | dst `regClashesWithOp` b_op =
+ b_code `appOL`
+ unitOL (MOV rep b_op (OpReg tmp)) `appOL`
+ a_code dst `snocOL`
+ instr (OpReg tmp) (OpReg dst)
+ | otherwise =
+ b_code `appOL`
+ a_code dst `snocOL`
+ instr b_op (OpReg dst)
+ -- in
+ return (Any rep code)
+
+reg `regClashesWithOp` OpReg reg2 = reg == reg2
+reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
+reg `regClashesWithOp` _ = False
+
+-----------
+
+trivialUCode rep instr x = do
+ x_code <- getAnyReg x
+ let
+ code dst =
+ x_code dst `snocOL`
+ instr (OpReg dst)
+ -- in
+ return (Any rep code)
+
+-----------
+
+#if i386_TARGET_ARCH
+
+trivialFCode pk instr x y = do
+ (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
+ (y_reg, y_code) <- getSomeReg y
+ let
+ code dst =
+ x_code `appOL`
+ y_code `snocOL`
+ instr pk x_reg y_reg dst
+ -- in
+ return (Any pk code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+
+trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
+
+#endif
+
+-------------
+
+trivialUFCode rep instr x = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ code dst =
+ x_code `snocOL`
+ instr x_reg dst
+ -- in
+ return (Any rep code)
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+trivialCode pk instr x (CmmLit (CmmInt y d))
+ | fits13Bits y
+ = do
+ (src1, code) <- getSomeReg x
+ tmp <- getNewRegNat I32
+ let
+ src2 = ImmInt (fromInteger y)
+ code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
+ return (Any I32 code__2)
+
+trivialCode pk instr x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ tmp1 <- getNewRegNat I32
+ tmp2 <- getNewRegNat I32
+ let
+ code__2 dst = code1 `appOL` code2 `snocOL`
+ instr src1 (RIReg src2) dst
+ return (Any I32 code__2)
+
+------------
+trivialFCode pk instr x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ tmp1 <- getNewRegNat (cmmExprRep x)
+ tmp2 <- getNewRegNat (cmmExprRep y)
+ tmp <- getNewRegNat F64
+ let
+ promote x = FxTOy F32 F64 x tmp
+
+ pk1 = cmmExprRep x
+ pk2 = cmmExprRep y
+
+ code__2 dst =
+ if pk1 == pk2 then
+ code1 `appOL` code2 `snocOL`
+ instr pk src1 src2 dst
+ else if pk1 == F32 then
+ code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+ instr F64 tmp src2 dst
+ else
+ code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+ instr F64 src1 tmp dst
+ return (Any (if pk1 == pk2 then pk1 else F64) code__2)
+
+------------
+trivialUCode pk instr x = do
+ (src, code) <- getSomeReg x
+ tmp <- getNewRegNat pk
+ let
+ code__2 dst = code `snocOL` instr (RIReg src) dst
+ return (Any pk code__2)
+
+-------------
+trivialUFCode pk instr x = do
+ (src, code) <- getSomeReg x
+ tmp <- getNewRegNat pk
+ let
+ code__2 dst = code `snocOL` instr src dst
+ return (Any pk code__2)
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+
+{-
+Wolfgang's PowerPC version of The Rules:
+
+A slightly modified version of The Rules to take advantage of the fact
+that PowerPC instructions work on all registers and don't implicitly
+clobber any fixed registers.
+
+* The only expression for which getRegister returns Fixed is (CmmReg reg).
+
+* If getRegister returns Any, then the code it generates may modify only:
+ (a) fresh temporaries
+ (b) the destination register
+ It may *not* modify global registers, unless the global
+ register happens to be the destination register.
+ It may not clobber any other registers. In fact, only ccalls clobber any
+ fixed registers.
+ Also, it may not modify the counter register (used by genCCall).
+
+ Corollary: If a getRegister for a subexpression returns Fixed, you need
+ not move it to a fresh temporary before evaluating the next subexpression.
+ The Fixed register won't be modified.
+ Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
+
+* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
+ the value of the destination register.
+-}
+
+trivialCode rep signed instr x (CmmLit (CmmInt y _))
+ | Just imm <- makeImmediate rep signed y
+ = do
+ (src1, code1) <- getSomeReg x
+ let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
+ return (Any rep code)
+
+trivialCode rep signed instr x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
+ return (Any rep code)
+
+trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
+ -> CmmExpr -> CmmExpr -> NatM Register
+trivialCodeNoImm rep instr x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
+ return (Any rep code)
+
+trivialUCode rep instr x = do
+ (src, code) <- getSomeReg x
+ let code' dst = code `snocOL` instr dst src
+ return (Any rep code')
+
+-- There is no "remainder" instruction on the PPC, so we have to do
+-- it the hard way.
+-- The "div" parameter is the division instruction to use (DIVW or DIVWU)
+
+remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
+ -> CmmExpr -> CmmExpr -> NatM Register
+remainderCode rep div x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let code dst = code1 `appOL` code2 `appOL` toOL [
+ div dst src1 src2,
+ MULLW dst dst (RIReg src2),
+ SUBF dst dst src1
+ ]
+ return (Any rep code)
+
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Coercing to/from integer/floating-point...
+
+-- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
+-- conversions. We have to store temporaries in memory to move
+-- between the integer and the floating point register sets.
+
+-- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
+-- pretend, on sparc at least, that double and float regs are seperate
+-- kinds, so the value has to be computed into one kind before being
+-- explicitly "converted" to live in the other kind.
+
+coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
+coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
+
+#if sparc_TARGET_ARCH
+coerceDbl2Flt :: CmmExpr -> NatM Register
+coerceFlt2Dbl :: CmmExpr -> NatM Register
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+coerceInt2FP _ x
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat IntRep `thenNat` \ reg ->
+ let
+ code = registerCode register reg
+ src = registerName register reg
+
+ code__2 dst = code . mkSeqInstrs [
+ ST Q src (spRel 0),
+ LD TF dst (spRel 0),
+ CVTxy Q TF dst dst]
+ in
+ return (Any F64 code__2)
+
+-------------
+coerceFP2Int x
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat F64 `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ src = registerName register tmp
+
+ code__2 dst = code . mkSeqInstrs [
+ CVTxy TF Q src tmp,
+ ST TF tmp (spRel 0),
+ LD Q dst (spRel 0)]
+ in
+ return (Any IntRep code__2)
+
+#endif /* alpha_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+coerceInt2FP from to x = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case to of F32 -> GITOF; F64 -> GITOD
+ code dst = x_code `snocOL` opc x_reg dst
+ -- ToDo: works for non-I32 reps?
+ -- in
+ return (Any to code)
+
+------------
+
+coerceFP2Int from to x = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case from of F32 -> GFTOI; F64 -> GDTOI
+ code dst = x_code `snocOL` opc x_reg dst
+ -- ToDo: works for non-I32 reps?
+ -- in
+ return (Any to code)
+
+#endif /* i386_TARGET_ARCH */
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+coerceFP2Int from to x = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+ code dst = x_code `snocOL` opc x_op dst
+ -- in
+ return (Any to code) -- works even if the destination rep is <I32
+
+coerceInt2FP from to x = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
+ code dst = x_code `snocOL` opc x_op dst
+ -- in
+ return (Any to code) -- works even if the destination rep is <I32
+
+coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
+coerceFP2FP to x = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
+ code dst = x_code `snocOL` opc x_reg dst
+ -- in
+ return (Any to code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+coerceInt2FP pk1 pk2 x = do
+ (src, code) <- getSomeReg x
+ let
+ code__2 dst = code `appOL` toOL [
+ ST pk1 src (spRel (-2)),
+ LD pk1 (spRel (-2)) dst,
+ FxTOy pk1 pk2 dst dst]
+ return (Any pk2 code__2)
+
+------------
+coerceFP2Int pk fprep x = do
+ (src, code) <- getSomeReg x
+ reg <- getNewRegNat fprep
+ tmp <- getNewRegNat pk
+ let
+ code__2 dst = ASSERT(fprep == F64 || fprep == F32)
+ code `appOL` toOL [
+ FxTOy fprep pk src tmp,
+ ST pk tmp (spRel (-2)),
+ LD pk (spRel (-2)) dst]
+ return (Any pk code__2)
+
+------------
+coerceDbl2Flt x = do
+ (src, code) <- getSomeReg x
+ return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
+
+------------
+coerceFlt2Dbl x = do
+ (src, code) <- getSomeReg x
+ return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
+
+#endif /* sparc_TARGET_ARCH */
+
+#if powerpc_TARGET_ARCH
+coerceInt2FP fromRep toRep x = do
+ (src, code) <- getSomeReg x
+ lbl <- getNewLabelNat
+ itmp <- getNewRegNat I32
+ ftmp <- getNewRegNat F64
+ dynRef <- cmmMakeDynamicReference addImportNat False lbl
+ Amode addr addr_code <- getAmode dynRef
+ let
+ code' dst = code `appOL` maybe_exts `appOL` toOL [
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmInt 0x43300000 I32),
+ CmmStaticLit (CmmInt 0x80000000 I32)],
+ XORIS itmp src (ImmInt 0x8000),
+ ST I32 itmp (spRel 3),
+ LIS itmp (ImmInt 0x4330),
+ ST I32 itmp (spRel 2),
+ LD F64 ftmp (spRel 2)
+ ] `appOL` addr_code `appOL` toOL [
+ LD F64 dst addr,
+ FSUB F64 dst ftmp dst
+ ] `appOL` maybe_frsp dst
+
+ maybe_exts = case fromRep of
+ I8 -> unitOL $ EXTS I8 src src
+ I16 -> unitOL $ EXTS I16 src src
+ I32 -> nilOL
+ maybe_frsp dst = case toRep of
+ F32 -> unitOL $ FRSP dst dst
+ F64 -> nilOL
+ return (Any toRep code')
+
+coerceFP2Int fromRep toRep x = do
+ -- the reps don't really matter: F*->F64 and I32->I* are no-ops
+ (src, code) <- getSomeReg x
+ tmp <- getNewRegNat F64
+ let
+ code' dst = code `appOL` toOL [
+ -- convert to int in FP reg
+ FCTIWZ tmp src,
+ -- store value (64bit) from FP to stack
+ ST F64 tmp (spRel 2),
+ -- read low word of value (high word is undefined)
+ LD I32 dst (spRel 3)]
+ return (Any toRep code')
+#endif /* powerpc_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- eXTRA_STK_ARGS_HERE
+
+-- We (allegedly) put the first six C-call arguments in registers;
+-- where do we start putting the rest of them?
+
+-- Moved from MachInstrs (SDM):
+
+#if alpha_TARGET_ARCH || sparc_TARGET_ARCH
+eXTRA_STK_ARGS_HERE :: Int
+eXTRA_STK_ARGS_HERE
+ = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
+#endif
+