diff options
Diffstat (limited to 'compiler/nativeGen/MachCodeGen.hs')
-rw-r--r-- | compiler/nativeGen/MachCodeGen.hs | 4654 |
1 files changed, 4654 insertions, 0 deletions
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs new file mode 100644 index 0000000000..90ce6b5bf8 --- /dev/null +++ b/compiler/nativeGen/MachCodeGen.hs @@ -0,0 +1,4654 @@ +----------------------------------------------------------------------------- +-- +-- Generating machine code (instruction selection) +-- +-- (c) The University of Glasgow 1996-2004 +-- +----------------------------------------------------------------------------- + +-- This is a big module, but, if you pay attention to +-- (a) the sectioning, (b) the type signatures, and +-- (c) the #if blah_TARGET_ARCH} things, the +-- structure should not be too overwhelming. + +module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where + +#include "HsVersions.h" +#include "nativeGen/NCG.h" +#include "MachDeps.h" + +-- NCG stuff: +import MachInstrs +import MachRegs +import NCGMonad +import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase ) +import RegAllocInfo ( mkBranchInstr ) + +-- Our intermediate code: +import PprCmm ( pprExpr ) +import Cmm +import MachOp +import CLabel + +-- The rest: +import StaticFlags ( opt_PIC ) +import ForeignCall ( CCallConv(..) ) +import OrdList +import Pretty +import Outputable +import FastString +import FastTypes ( isFastTrue ) +import Constants ( wORD_SIZE ) + +#ifdef DEBUG +import Outputable ( assertPanic ) +import TRACE ( trace ) +#endif + +import Control.Monad ( mapAndUnzipM ) +import Maybe ( fromJust ) +import DATA_BITS +import DATA_WORD + +-- ----------------------------------------------------------------------------- +-- Top-level of the instruction selector + +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal (pre-order?) yields the insns in the correct +-- order. + +type InstrBlock = OrdList Instr + +cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop] +cmmTopCodeGen (CmmProc info lab params blocks) = do + (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks + picBaseMb <- getPicBaseMaybeNat + let proc = CmmProc info lab params (concat nat_blocks) + tops = proc : concat statics + case picBaseMb of + Just picBase -> initializePicBase picBase tops + Nothing -> return tops + +cmmTopCodeGen (CmmData sec dat) = do + return [CmmData sec dat] -- no translation, we just use CmmStatic + +basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop]) +basicBlockCodeGen (BasicBlock id stmts) = do + instrs <- stmtsToInstrs stmts + -- code generation may introduce new basic block boundaries, which + -- are indicated by the NEWBLOCK instruction. We must split up the + -- instruction stream into basic blocks again. Also, we extract + -- LDATAs here too. + let + (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs + + mkBlocks (NEWBLOCK id) (instrs,blocks,statics) + = ([], BasicBlock id instrs : blocks, statics) + mkBlocks (LDATA sec dat) (instrs,blocks,statics) + = (instrs, blocks, CmmData sec dat:statics) + mkBlocks instr (instrs,blocks,statics) + = (instr:instrs, blocks, statics) + -- in + return (BasicBlock id top : other_blocks, statics) + +stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock +stmtsToInstrs stmts + = do instrss <- mapM stmtToInstrs stmts + return (concatOL instrss) + +stmtToInstrs :: CmmStmt -> NatM InstrBlock +stmtToInstrs stmt = case stmt of + CmmNop -> return nilOL + CmmComment s -> return (unitOL (COMMENT s)) + + CmmAssign reg src + | isFloatingRep kind -> assignReg_FltCode kind reg src +#if WORD_SIZE_IN_BITS==32 + | kind == I64 -> assignReg_I64Code reg src +#endif + | otherwise -> assignReg_IntCode kind reg src + where kind = cmmRegRep reg + + CmmStore addr src + | isFloatingRep kind -> assignMem_FltCode kind addr src +#if WORD_SIZE_IN_BITS==32 + | kind == I64 -> assignMem_I64Code addr src +#endif + | otherwise -> assignMem_IntCode kind addr src + where kind = cmmExprRep src + + CmmCall target result_regs args vols + -> genCCall target result_regs args vols + + CmmBranch id -> genBranch id + CmmCondBranch arg id -> genCondJump id arg + CmmSwitch arg ids -> genSwitch arg ids + CmmJump arg params -> genJump arg + +-- ----------------------------------------------------------------------------- +-- General things for putting together code sequences + +-- Expand CmmRegOff. ToDo: should we do it this way around, or convert +-- CmmExprs into CmmRegOff? +mangleIndexTree :: CmmExpr -> CmmExpr +mangleIndexTree (CmmRegOff reg off) + = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)] + where rep = cmmRegRep reg + +-- ----------------------------------------------------------------------------- +-- Code gen for 64-bit arithmetic on 32-bit platforms + +{- +Simple support for generating 64-bit code (ie, 64 bit values and 64 +bit assignments) on 32-bit platforms. Unlike the main code generator +we merely shoot for generating working code as simply as possible, and +pay little attention to code quality. Specifically, there is no +attempt to deal cleverly with the fixed-vs-floating register +distinction; all values are generated into (pairs of) floating +registers, even if this would mean some redundant reg-reg moves as a +result. Only one of the VRegUniques is returned, since it will be +of the VRegUniqueLo form, and the upper-half VReg can be determined +by applying getHiVRegFromLo to it. +-} + +data ChildCode64 -- a.k.a "Register64" + = ChildCode64 + InstrBlock -- code + Reg -- the lower 32-bit temporary which contains the + -- result; use getHiVRegFromLo to find the other + -- VRegUnique. Rules of this simplified insn + -- selection game are therefore that the returned + -- Reg may be modified + +#if WORD_SIZE_IN_BITS==32 +assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock +#endif + +#ifndef x86_64_TARGET_ARCH +iselExpr64 :: CmmExpr -> NatM ChildCode64 +#endif + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH + +assignMem_I64Code addrTree valueTree = do + Amode addr addr_code <- getAmode addrTree + ChildCode64 vcode rlo <- iselExpr64 valueTree + let + rhi = getHiVRegFromLo rlo + + -- Little-endian store + mov_lo = MOV I32 (OpReg rlo) (OpAddr addr) + mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4))) + -- in + return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) + + +assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do + ChildCode64 vcode r_src_lo <- iselExpr64 valueTree + let + r_dst_lo = mkVReg u_dst I32 + r_dst_hi = getHiVRegFromLo r_dst_lo + r_src_hi = getHiVRegFromLo r_src_lo + mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo) + mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi) + -- in + return ( + vcode `snocOL` mov_lo `snocOL` mov_hi + ) + +assignReg_I64Code lvalue valueTree + = panic "assignReg_I64Code(i386): invalid lvalue" + +------------ + +iselExpr64 (CmmLit (CmmInt i _)) = do + (rlo,rhi) <- getNewRegPairNat I32 + let + r = fromIntegral (fromIntegral i :: Word32) + q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32) + code = toOL [ + MOV I32 (OpImm (ImmInteger r)) (OpReg rlo), + MOV I32 (OpImm (ImmInteger q)) (OpReg rhi) + ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 (CmmLoad addrTree I64) = do + Amode addr addr_code <- getAmode addrTree + (rlo,rhi) <- getNewRegPairNat I32 + let + mov_lo = MOV I32 (OpAddr addr) (OpReg rlo) + mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi) + -- in + return ( + ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) + rlo + ) + +iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64))) + = return (ChildCode64 nilOL (mkVReg vu I32)) + +-- we handle addition, but rather badly +iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do + ChildCode64 code1 r1lo <- iselExpr64 e1 + (rlo,rhi) <- getNewRegPairNat I32 + let + r = fromIntegral (fromIntegral i :: Word32) + q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32) + r1hi = getHiVRegFromLo r1lo + code = code1 `appOL` + toOL [ MOV I32 (OpReg r1lo) (OpReg rlo), + ADD I32 (OpImm (ImmInteger r)) (OpReg rlo), + MOV I32 (OpReg r1hi) (OpReg rhi), + ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do + ChildCode64 code1 r1lo <- iselExpr64 e1 + ChildCode64 code2 r2lo <- iselExpr64 e2 + (rlo,rhi) <- getNewRegPairNat I32 + let + r1hi = getHiVRegFromLo r1lo + r2hi = getHiVRegFromLo r2lo + code = code1 `appOL` + code2 `appOL` + toOL [ MOV I32 (OpReg r1lo) (OpReg rlo), + ADD I32 (OpReg r2lo) (OpReg rlo), + MOV I32 (OpReg r1hi) (OpReg rhi), + ADC I32 (OpReg r2hi) (OpReg rhi) ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 expr + = pprPanic "iselExpr64(i386)" (ppr expr) + +#endif /* i386_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if sparc_TARGET_ARCH + +assignMem_I64Code addrTree valueTree = do + Amode addr addr_code <- getAmode addrTree + ChildCode64 vcode rlo <- iselExpr64 valueTree + (src, code) <- getSomeReg addrTree + let + rhi = getHiVRegFromLo rlo + -- Big-endian store + mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0)) + mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4)) + return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo) + +assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do + ChildCode64 vcode r_src_lo <- iselExpr64 valueTree + let + r_dst_lo = mkVReg u_dst pk + r_dst_hi = getHiVRegFromLo r_dst_lo + r_src_hi = getHiVRegFromLo r_src_lo + mov_lo = mkMOV r_src_lo r_dst_lo + mov_hi = mkMOV r_src_hi r_dst_hi + mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg + return (vcode `snocOL` mov_hi `snocOL` mov_lo) +assignReg_I64Code lvalue valueTree + = panic "assignReg_I64Code(sparc): invalid lvalue" + + +-- Don't delete this -- it's very handy for debugging. +--iselExpr64 expr +-- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False +-- = panic "iselExpr64(???)" + +iselExpr64 (CmmLoad addrTree I64) = do + Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree + rlo <- getNewRegNat I32 + let rhi = getHiVRegFromLo rlo + mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi + mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo + return ( + ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo) + rlo + ) + +iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do + r_dst_lo <- getNewRegNat I32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + r_src_lo = mkVReg uq I32 + r_src_hi = getHiVRegFromLo r_src_lo + mov_lo = mkMOV r_src_lo r_dst_lo + mov_hi = mkMOV r_src_hi r_dst_hi + mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg + return ( + ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo + ) + +iselExpr64 expr + = pprPanic "iselExpr64(sparc)" (ppr expr) + +#endif /* sparc_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if powerpc_TARGET_ARCH + +getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock) +getI64Amodes addrTree = do + Amode hi_addr addr_code <- getAmode addrTree + case addrOffset hi_addr 4 of + Just lo_addr -> return (hi_addr, lo_addr, addr_code) + Nothing -> do (hi_ptr, code) <- getSomeReg addrTree + return (AddrRegImm hi_ptr (ImmInt 0), + AddrRegImm hi_ptr (ImmInt 4), + code) + +assignMem_I64Code addrTree valueTree = do + (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree + ChildCode64 vcode rlo <- iselExpr64 valueTree + let + rhi = getHiVRegFromLo rlo + + -- Big-endian store + mov_hi = ST I32 rhi hi_addr + mov_lo = ST I32 rlo lo_addr + -- in + return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) + +assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do + ChildCode64 vcode r_src_lo <- iselExpr64 valueTree + let + r_dst_lo = mkVReg u_dst I32 + r_dst_hi = getHiVRegFromLo r_dst_lo + r_src_hi = getHiVRegFromLo r_src_lo + mov_lo = MR r_dst_lo r_src_lo + mov_hi = MR r_dst_hi r_src_hi + -- in + return ( + vcode `snocOL` mov_lo `snocOL` mov_hi + ) + +assignReg_I64Code lvalue valueTree + = panic "assignReg_I64Code(powerpc): invalid lvalue" + + +-- Don't delete this -- it's very handy for debugging. +--iselExpr64 expr +-- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False +-- = panic "iselExpr64(???)" + +iselExpr64 (CmmLoad addrTree I64) = do + (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree + (rlo, rhi) <- getNewRegPairNat I32 + let mov_hi = LD I32 rhi hi_addr + mov_lo = LD I32 rlo lo_addr + return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) + rlo + +iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64))) + = return (ChildCode64 nilOL (mkVReg vu I32)) + +iselExpr64 (CmmLit (CmmInt i _)) = do + (rlo,rhi) <- getNewRegPairNat I32 + let + half0 = fromIntegral (fromIntegral i :: Word16) + half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16) + half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16) + half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16) + + code = toOL [ + LIS rlo (ImmInt half1), + OR rlo rlo (RIImm $ ImmInt half0), + LIS rhi (ImmInt half3), + OR rlo rlo (RIImm $ ImmInt half2) + ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do + ChildCode64 code1 r1lo <- iselExpr64 e1 + ChildCode64 code2 r2lo <- iselExpr64 e2 + (rlo,rhi) <- getNewRegPairNat I32 + let + r1hi = getHiVRegFromLo r1lo + r2hi = getHiVRegFromLo r2lo + code = code1 `appOL` + code2 `appOL` + toOL [ ADDC rlo r1lo r2lo, + ADDE rhi r1hi r2hi ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 expr + = pprPanic "iselExpr64(powerpc)" (ppr expr) + +#endif /* powerpc_TARGET_ARCH */ + + +-- ----------------------------------------------------------------------------- +-- The 'Register' type + +-- 'Register's passed up the tree. If the stix code forces the register +-- to live in a pre-decided machine register, it comes out as @Fixed@; +-- otherwise, it comes out as @Any@, and the parent can decide which +-- register to put it in. + +data Register + = Fixed MachRep Reg InstrBlock + | Any MachRep (Reg -> InstrBlock) + +swizzleRegisterRep :: Register -> MachRep -> Register +swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code +swizzleRegisterRep (Any _ codefn) rep = Any rep codefn + + +-- ----------------------------------------------------------------------------- +-- Utils based on getRegister, below + +-- The dual to getAnyReg: compute an expression into a register, but +-- we don't mind which one it is. +getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) +getSomeReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, code tmp) + Fixed _ reg code -> + return (reg, code) + +-- ----------------------------------------------------------------------------- +-- Grab the Reg for a CmmReg + +getRegisterReg :: CmmReg -> Reg + +getRegisterReg (CmmLocal (LocalReg u pk)) + = mkVReg u pk + +getRegisterReg (CmmGlobal mid) + = case get_GlobalReg_reg_or_addr mid of + Left (RealReg rrno) -> RealReg rrno + _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence ... + + +-- ----------------------------------------------------------------------------- +-- Generate code to get a subtree into a Register + +-- Don't delete this -- it's very handy for debugging. +--getRegister expr +-- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False +-- = panic "getRegister(???)" + +getRegister :: CmmExpr -> NatM Register + +getRegister (CmmReg (CmmGlobal PicBaseReg)) + = do + reg <- getPicBaseNat wordRep + return (Fixed wordRep reg nilOL) + +getRegister (CmmReg reg) + = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL) + +getRegister tree@(CmmRegOff _ _) + = getRegister (mangleIndexTree tree) + +-- end of machine-"independent" bit; here we go on the rest... + +#if alpha_TARGET_ARCH + +getRegister (StDouble d) + = getBlockIdNat `thenNat` \ lbl -> + getNewRegNat PtrRep `thenNat` \ tmp -> + let code dst = mkSeqInstrs [ + LDATA RoDataSegment lbl [ + DATA TF [ImmLab (rational d)] + ], + LDA tmp (AddrImm (ImmCLbl lbl)), + LD TF dst (AddrReg tmp)] + in + return (Any F64 code) + +getRegister (StPrim primop [x]) -- unary PrimOps + = case primop of + IntNegOp -> trivialUCode (NEG Q False) x + + NotOp -> trivialUCode NOT x + + FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x + DoubleNegOp -> trivialUFCode F64 (FNEG TF) x + + OrdOp -> coerceIntCode IntRep x + ChrOp -> chrCode x + + Float2IntOp -> coerceFP2Int x + Int2FloatOp -> coerceInt2FP pr x + Double2IntOp -> coerceFP2Int x + Int2DoubleOp -> coerceInt2FP pr x + + Double2FloatOp -> coerceFltCode x + Float2DoubleOp -> coerceFltCode x + + other_op -> getRegister (StCall fn CCallConv F64 [x]) + where + fn = case other_op of + FloatExpOp -> FSLIT("exp") + FloatLogOp -> FSLIT("log") + FloatSqrtOp -> FSLIT("sqrt") + FloatSinOp -> FSLIT("sin") + FloatCosOp -> FSLIT("cos") + FloatTanOp -> FSLIT("tan") + FloatAsinOp -> FSLIT("asin") + FloatAcosOp -> FSLIT("acos") + FloatAtanOp -> FSLIT("atan") + FloatSinhOp -> FSLIT("sinh") + FloatCoshOp -> FSLIT("cosh") + FloatTanhOp -> FSLIT("tanh") + DoubleExpOp -> FSLIT("exp") + DoubleLogOp -> FSLIT("log") + DoubleSqrtOp -> FSLIT("sqrt") + DoubleSinOp -> FSLIT("sin") + DoubleCosOp -> FSLIT("cos") + DoubleTanOp -> FSLIT("tan") + DoubleAsinOp -> FSLIT("asin") + DoubleAcosOp -> FSLIT("acos") + DoubleAtanOp -> FSLIT("atan") + DoubleSinhOp -> FSLIT("sinh") + DoubleCoshOp -> FSLIT("cosh") + DoubleTanhOp -> FSLIT("tanh") + where + pr = panic "MachCode.getRegister: no primrep needed for Alpha" + +getRegister (StPrim primop [x, y]) -- dyadic PrimOps + = case primop of + CharGtOp -> trivialCode (CMP LTT) y x + CharGeOp -> trivialCode (CMP LE) y x + CharEqOp -> trivialCode (CMP EQQ) x y + CharNeOp -> int_NE_code x y + CharLtOp -> trivialCode (CMP LTT) x y + CharLeOp -> trivialCode (CMP LE) x y + + IntGtOp -> trivialCode (CMP LTT) y x + IntGeOp -> trivialCode (CMP LE) y x + IntEqOp -> trivialCode (CMP EQQ) x y + IntNeOp -> int_NE_code x y + IntLtOp -> trivialCode (CMP LTT) x y + IntLeOp -> trivialCode (CMP LE) x y + + WordGtOp -> trivialCode (CMP ULT) y x + WordGeOp -> trivialCode (CMP ULE) x y + WordEqOp -> trivialCode (CMP EQQ) x y + WordNeOp -> int_NE_code x y + WordLtOp -> trivialCode (CMP ULT) x y + WordLeOp -> trivialCode (CMP ULE) x y + + AddrGtOp -> trivialCode (CMP ULT) y x + AddrGeOp -> trivialCode (CMP ULE) y x + AddrEqOp -> trivialCode (CMP EQQ) x y + AddrNeOp -> int_NE_code x y + AddrLtOp -> trivialCode (CMP ULT) x y + AddrLeOp -> trivialCode (CMP ULE) x y + + FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y + FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y + FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y + FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y + FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y + FloatLeOp -> cmpF_code (FCMP TF LE) NE x y + + DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y + DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y + DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y + DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y + DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y + DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y + + IntAddOp -> trivialCode (ADD Q False) x y + IntSubOp -> trivialCode (SUB Q False) x y + IntMulOp -> trivialCode (MUL Q False) x y + IntQuotOp -> trivialCode (DIV Q False) x y + IntRemOp -> trivialCode (REM Q False) x y + + WordAddOp -> trivialCode (ADD Q False) x y + WordSubOp -> trivialCode (SUB Q False) x y + WordMulOp -> trivialCode (MUL Q False) x y + WordQuotOp -> trivialCode (DIV Q True) x y + WordRemOp -> trivialCode (REM Q True) x y + + FloatAddOp -> trivialFCode FloatRep (FADD TF) x y + FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y + FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y + FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y + + DoubleAddOp -> trivialFCode F64 (FADD TF) x y + DoubleSubOp -> trivialFCode F64 (FSUB TF) x y + DoubleMulOp -> trivialFCode F64 (FMUL TF) x y + DoubleDivOp -> trivialFCode F64 (FDIV TF) x y + + AddrAddOp -> trivialCode (ADD Q False) x y + AddrSubOp -> trivialCode (SUB Q False) x y + AddrRemOp -> trivialCode (REM Q True) x y + + AndOp -> trivialCode AND x y + OrOp -> trivialCode OR x y + XorOp -> trivialCode XOR x y + SllOp -> trivialCode SLL x y + SrlOp -> trivialCode SRL x y + + ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll" + ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra" + ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl" + + FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y]) + DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y]) + where + {- ------------------------------------------------------------ + Some bizarre special code for getting condition codes into + registers. Integer non-equality is a test for equality + followed by an XOR with 1. (Integer comparisons always set + the result register to 0 or 1.) Floating point comparisons of + any kind leave the result in a floating point register, so we + need to wrangle an integer register out of things. + -} + int_NE_code :: StixTree -> StixTree -> NatM Register + + int_NE_code x y + = trivialCode (CMP EQQ) x y `thenNat` \ register -> + getNewRegNat IntRep `thenNat` \ tmp -> + let + code = registerCode register tmp + src = registerName register tmp + code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst) + in + return (Any IntRep code__2) + + {- ------------------------------------------------------------ + Comments for int_NE_code also apply to cmpF_code + -} + cmpF_code + :: (Reg -> Reg -> Reg -> Instr) + -> Cond + -> StixTree -> StixTree + -> NatM Register + + cmpF_code instr cond x y + = trivialFCode pr instr x y `thenNat` \ register -> + getNewRegNat F64 `thenNat` \ tmp -> + getBlockIdNat `thenNat` \ lbl -> + let + code = registerCode register tmp + result = registerName register tmp + + code__2 dst = code . mkSeqInstrs [ + OR zeroh (RIImm (ImmInt 1)) dst, + BF cond result (ImmCLbl lbl), + OR zeroh (RIReg zeroh) dst, + NEWBLOCK lbl] + in + return (Any IntRep code__2) + where + pr = panic "trivialU?FCode: does not use PrimRep on Alpha" + ------------------------------------------------------------ + +getRegister (CmmLoad pk mem) + = getAmode mem `thenNat` \ amode -> + let + code = amodeCode amode + src = amodeAddr amode + size = primRepToSize pk + code__2 dst = code . mkSeqInstr (LD size dst src) + in + return (Any pk code__2) + +getRegister (StInt i) + | fits8Bits i + = let + code dst = mkSeqInstr (OR zeroh (RIImm src) dst) + in + return (Any IntRep code) + | otherwise + = let + code dst = mkSeqInstr (LDI Q dst src) + in + return (Any IntRep code) + where + src = ImmInt (fromInteger i) + +getRegister leaf + | isJust imm + = let + code dst = mkSeqInstr (LDA dst (AddrImm imm__2)) + in + return (Any PtrRep code) + where + imm = maybeImm leaf + imm__2 = case imm of Just x -> x + +#endif /* alpha_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH + +getRegister (CmmLit (CmmFloat f F32)) = do + lbl <- getNewLabelNat + dynRef <- cmmMakeDynamicReference addImportNat False lbl + Amode addr addr_code <- getAmode dynRef + let code dst = + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmFloat f F32)] + `consOL` (addr_code `snocOL` + GLD F32 addr dst) + -- in + return (Any F32 code) + + +getRegister (CmmLit (CmmFloat d F64)) + | d == 0.0 + = let code dst = unitOL (GLDZ dst) + in return (Any F64 code) + + | d == 1.0 + = let code dst = unitOL (GLD1 dst) + in return (Any F64 code) + + | otherwise = do + lbl <- getNewLabelNat + dynRef <- cmmMakeDynamicReference addImportNat False lbl + Amode addr addr_code <- getAmode dynRef + let code dst = + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmFloat d F64)] + `consOL` (addr_code `snocOL` + GLD F64 addr dst) + -- in + return (Any F64 code) + +#endif /* i386_TARGET_ARCH */ + +#if x86_64_TARGET_ARCH + +getRegister (CmmLit (CmmFloat 0.0 rep)) = do + let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst)) + -- I don't know why there are xorpd, xorps, and pxor instructions. + -- They all appear to do the same thing --SDM + return (Any rep code) + +getRegister (CmmLit (CmmFloat f rep)) = do + lbl <- getNewLabelNat + let code dst = toOL [ + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmFloat f rep)], + MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) + ] + -- in + return (Any rep code) + +#endif /* x86_64_TARGET_ARCH */ + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH + +-- catch simple cases of zero- or sign-extended load +getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do + code <- intLoadCode (MOVZxL I8) addr + return (Any I32 code) + +getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL I8) addr + return (Any I32 code) + +getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do + code <- intLoadCode (MOVZxL I16) addr + return (Any I32 code) + +getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL I16) addr + return (Any I32 code) + +#endif + +#if x86_64_TARGET_ARCH + +-- catch simple cases of zero- or sign-extended load +getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVZxL I8) addr + return (Any I64 code) + +getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL I8) addr + return (Any I64 code) + +getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVZxL I16) addr + return (Any I64 code) + +getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL I16) addr + return (Any I64 code) + +getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do + code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend + return (Any I64 code) + +getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL I32) addr + return (Any I64 code) + +#endif + +#if x86_64_TARGET_ARCH +getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do + x_code <- getAnyReg x + lbl <- getNewLabelNat + let + code dst = x_code dst `appOL` toOL [ + -- This is how gcc does it, so it can't be that bad: + LDATA ReadOnlyData16 [ + CmmAlign 16, + CmmDataLabel lbl, + CmmStaticLit (CmmInt 0x80000000 I32), + CmmStaticLit (CmmInt 0 I32), + CmmStaticLit (CmmInt 0 I32), + CmmStaticLit (CmmInt 0 I32) + ], + XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) + -- xorps, so we need the 128-bit constant + -- ToDo: rip-relative + ] + -- + return (Any F32 code) + +getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do + x_code <- getAnyReg x + lbl <- getNewLabelNat + let + -- This is how gcc does it, so it can't be that bad: + code dst = x_code dst `appOL` toOL [ + LDATA ReadOnlyData16 [ + CmmAlign 16, + CmmDataLabel lbl, + CmmStaticLit (CmmInt 0x8000000000000000 I64), + CmmStaticLit (CmmInt 0 I64) + ], + -- gcc puts an unpck here. Wonder if we need it. + XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) + -- xorpd, so we need the 128-bit constant + ] + -- + return (Any F64 code) +#endif + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH + +getRegister (CmmMachOp mop [x]) -- unary MachOps + = case mop of +#if i386_TARGET_ARCH + MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x + MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x +#endif + + MO_S_Neg rep -> trivialUCode rep (NEGI rep) x + MO_Not rep -> trivialUCode rep (NOT rep) x + + -- Nop conversions + -- TODO: these are only nops if the arg is not a fixed register that + -- can't be byte-addressed. + MO_U_Conv I32 I8 -> conversionNop I32 x + MO_S_Conv I32 I8 -> conversionNop I32 x + MO_U_Conv I16 I8 -> conversionNop I16 x + MO_S_Conv I16 I8 -> conversionNop I16 x + MO_U_Conv I32 I16 -> conversionNop I32 x + MO_S_Conv I32 I16 -> conversionNop I32 x +#if x86_64_TARGET_ARCH + MO_U_Conv I64 I32 -> conversionNop I64 x + MO_S_Conv I64 I32 -> conversionNop I64 x + MO_U_Conv I64 I16 -> conversionNop I64 x + MO_S_Conv I64 I16 -> conversionNop I64 x + MO_U_Conv I64 I8 -> conversionNop I64 x + MO_S_Conv I64 I8 -> conversionNop I64 x +#endif + + MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x + MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x + + -- widenings + MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x + MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x + MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x + + MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x + MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x + MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x + +#if x86_64_TARGET_ARCH + MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x + MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x + MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x + MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x + MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x + MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x + -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl. + -- However, we don't want the register allocator to throw it + -- away as an unnecessary reg-to-reg move, so we keep it in + -- the form of a movzl and print it as a movl later. +#endif + +#if i386_TARGET_ARCH + MO_S_Conv F32 F64 -> conversionNop F64 x + MO_S_Conv F64 F32 -> conversionNop F32 x +#else + MO_S_Conv F32 F64 -> coerceFP2FP F64 x + MO_S_Conv F64 F32 -> coerceFP2FP F32 x +#endif + + MO_S_Conv from to + | isFloatingRep from -> coerceFP2Int from to x + | isFloatingRep to -> coerceInt2FP from to x + + other -> pprPanic "getRegister" (pprMachOp mop) + where + -- signed or unsigned extension. + integerExtend from to instr expr = do + (reg,e_code) <- if from == I8 then getByteReg expr + else getSomeReg expr + let + code dst = + e_code `snocOL` + instr from (OpReg reg) (OpReg dst) + return (Any to code) + + conversionNop new_rep expr + = do e_code <- getRegister expr + return (swizzleRegisterRep e_code new_rep) + + +getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps + = ASSERT2(cmmExprRep x /= I8, pprExpr e) + case mop of + MO_Eq F32 -> condFltReg EQQ x y + MO_Ne F32 -> condFltReg NE x y + MO_S_Gt F32 -> condFltReg GTT x y + MO_S_Ge F32 -> condFltReg GE x y + MO_S_Lt F32 -> condFltReg LTT x y + MO_S_Le F32 -> condFltReg LE x y + + MO_Eq F64 -> condFltReg EQQ x y + MO_Ne F64 -> condFltReg NE x y + MO_S_Gt F64 -> condFltReg GTT x y + MO_S_Ge F64 -> condFltReg GE x y + MO_S_Lt F64 -> condFltReg LTT x y + MO_S_Le F64 -> condFltReg LE x y + + MO_Eq rep -> condIntReg EQQ x y + MO_Ne rep -> condIntReg NE x y + + MO_S_Gt rep -> condIntReg GTT x y + MO_S_Ge rep -> condIntReg GE x y + MO_S_Lt rep -> condIntReg LTT x y + MO_S_Le rep -> condIntReg LE x y + + MO_U_Gt rep -> condIntReg GU x y + MO_U_Ge rep -> condIntReg GEU x y + MO_U_Lt rep -> condIntReg LU x y + MO_U_Le rep -> condIntReg LEU x y + +#if i386_TARGET_ARCH + MO_Add F32 -> trivialFCode F32 GADD x y + MO_Sub F32 -> trivialFCode F32 GSUB x y + + MO_Add F64 -> trivialFCode F64 GADD x y + MO_Sub F64 -> trivialFCode F64 GSUB x y + + MO_S_Quot F32 -> trivialFCode F32 GDIV x y + MO_S_Quot F64 -> trivialFCode F64 GDIV x y +#endif + +#if x86_64_TARGET_ARCH + MO_Add F32 -> trivialFCode F32 ADD x y + MO_Sub F32 -> trivialFCode F32 SUB x y + + MO_Add F64 -> trivialFCode F64 ADD x y + MO_Sub F64 -> trivialFCode F64 SUB x y + + MO_S_Quot F32 -> trivialFCode F32 FDIV x y + MO_S_Quot F64 -> trivialFCode F64 FDIV x y +#endif + + MO_Add rep -> add_code rep x y + MO_Sub rep -> sub_code rep x y + + MO_S_Quot rep -> div_code rep True True x y + MO_S_Rem rep -> div_code rep True False x y + MO_U_Quot rep -> div_code rep False True x y + MO_U_Rem rep -> div_code rep False False x y + +#if i386_TARGET_ARCH + MO_Mul F32 -> trivialFCode F32 GMUL x y + MO_Mul F64 -> trivialFCode F64 GMUL x y +#endif + +#if x86_64_TARGET_ARCH + MO_Mul F32 -> trivialFCode F32 MUL x y + MO_Mul F64 -> trivialFCode F64 MUL x y +#endif + + MO_Mul rep -> let op = IMUL rep in + trivialCode rep op (Just op) x y + + MO_S_MulMayOflo rep -> imulMayOflo rep x y + + MO_And rep -> let op = AND rep in + trivialCode rep op (Just op) x y + MO_Or rep -> let op = OR rep in + trivialCode rep op (Just op) x y + MO_Xor rep -> let op = XOR rep in + trivialCode rep op (Just op) x y + + {- Shift ops on x86s have constraints on their source, it + either has to be Imm, CL or 1 + => trivialCode is not restrictive enough (sigh.) + -} + MO_Shl rep -> shift_code rep (SHL rep) x y {-False-} + MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-} + MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-} + + other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop) + where + -------------------- + imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register + imulMayOflo rep a b = do + (a_reg, a_code) <- getNonClobberedReg a + b_code <- getAnyReg b + let + shift_amt = case rep of + I32 -> 31 + I64 -> 63 + _ -> panic "shift_amt" + + code = a_code `appOL` b_code eax `appOL` + toOL [ + IMUL2 rep (OpReg a_reg), -- result in %edx:%eax + SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax), + -- sign extend lower part + SUB rep (OpReg edx) (OpReg eax) + -- compare against upper + -- eax==0 if high part == sign extended low part + ] + -- in + return (Fixed rep eax code) + + -------------------- + shift_code :: MachRep + -> (Operand -> Operand -> Instr) + -> CmmExpr + -> CmmExpr + -> NatM Register + + {- Case1: shift length as immediate -} + shift_code rep instr x y@(CmmLit lit) = do + x_code <- getAnyReg x + let + code dst + = x_code dst `snocOL` + instr (OpImm (litToImm lit)) (OpReg dst) + -- in + return (Any rep code) + + {- Case2: shift length is complex (non-immediate) -} + shift_code rep instr x y{-amount-} = do + (x_reg, x_code) <- getNonClobberedReg x + y_code <- getAnyReg y + let + code = x_code `appOL` + y_code ecx `snocOL` + instr (OpReg ecx) (OpReg x_reg) + -- in + return (Fixed rep x_reg code) + + -------------------- + add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register + add_code rep x (CmmLit (CmmInt y _)) + | not (is64BitInteger y) = add_int rep x y + add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y + + -------------------- + sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register + sub_code rep x (CmmLit (CmmInt y _)) + | not (is64BitInteger (-y)) = add_int rep x (-y) + sub_code rep x y = trivialCode rep (SUB rep) Nothing x y + + -- our three-operand add instruction: + add_int rep x y = do + (x_reg, x_code) <- getSomeReg x + let + imm = ImmInt (fromInteger y) + code dst + = x_code `snocOL` + LEA rep + (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm)) + (OpReg dst) + -- + return (Any rep code) + + ---------------------- + div_code rep signed quotient x y = do + (y_op, y_code) <- getRegOrMem y -- cannot be clobbered + x_code <- getAnyReg x + let + widen | signed = CLTD rep + | otherwise = XOR rep (OpReg edx) (OpReg edx) + + instr | signed = IDIV + | otherwise = DIV + + code = y_code `appOL` + x_code eax `appOL` + toOL [widen, instr rep y_op] + + result | quotient = eax + | otherwise = edx + + -- in + return (Fixed rep result code) + + +getRegister (CmmLoad mem pk) + | isFloatingRep pk + = do + Amode src mem_code <- getAmode mem + let + code dst = mem_code `snocOL` + IF_ARCH_i386(GLD pk src dst, + MOV pk (OpAddr src) (OpReg dst)) + -- + return (Any pk code) + +#if i386_TARGET_ARCH +getRegister (CmmLoad mem pk) + | pk /= I64 + = do + code <- intLoadCode (instr pk) mem + return (Any pk code) + where + instr I8 = MOVZxL pk + instr I16 = MOV I16 + instr I32 = MOV I32 + -- we always zero-extend 8-bit loads, if we + -- can't think of anything better. This is because + -- we can't guarantee access to an 8-bit variant of every register + -- (esi and edi don't have 8-bit variants), so to make things + -- simpler we do our 8-bit arithmetic with full 32-bit registers. +#endif + +#if x86_64_TARGET_ARCH +-- Simpler memory load code on x86_64 +getRegister (CmmLoad mem pk) + = do + code <- intLoadCode (MOV pk) mem + return (Any pk code) +#endif + +getRegister (CmmLit (CmmInt 0 rep)) + = let + -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits + adj_rep = case rep of I64 -> I32; _ -> rep + rep1 = IF_ARCH_i386( rep, adj_rep ) + code dst + = unitOL (XOR rep1 (OpReg dst) (OpReg dst)) + in + return (Any rep code) + +#if x86_64_TARGET_ARCH + -- optimisation for loading small literals on x86_64: take advantage + -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit + -- instruction forms are shorter. +getRegister (CmmLit lit) + | I64 <- cmmLitRep lit, not (isBigLit lit) + = let + imm = litToImm lit + code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst)) + in + return (Any I64 code) + where + isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff + isBigLit _ = False + -- note1: not the same as is64BitLit, because that checks for + -- signed literals that fit in 32 bits, but we want unsigned + -- literals here. + -- note2: all labels are small, because we're assuming the + -- small memory model (see gcc docs, -mcmodel=small). +#endif + +getRegister (CmmLit lit) + = let + rep = cmmLitRep lit + imm = litToImm lit + code dst = unitOL (MOV rep (OpImm imm) (OpReg dst)) + in + return (Any rep code) + +getRegister other = pprPanic "getRegister(x86)" (ppr other) + + +intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr + -> NatM (Reg -> InstrBlock) +intLoadCode instr mem = do + Amode src mem_code <- getAmode mem + return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst)) + +-- Compute an expression into *any* register, adding the appropriate +-- move instruction if necessary. +getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock) +getAnyReg expr = do + r <- getRegister expr + anyReg r + +anyReg :: Register -> NatM (Reg -> InstrBlock) +anyReg (Any _ code) = return code +anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst) + +-- A bit like getSomeReg, but we want a reg that can be byte-addressed. +-- Fixed registers might not be byte-addressable, so we make sure we've +-- got a temporary, inserting an extra reg copy if necessary. +getByteReg :: CmmExpr -> NatM (Reg, InstrBlock) +#if x86_64_TARGET_ARCH +getByteReg = getSomeReg -- all regs are byte-addressable on x86_64 +#else +getByteReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, code tmp) + Fixed rep reg code + | isVirtualReg reg -> return (reg,code) + | otherwise -> do + tmp <- getNewRegNat rep + return (tmp, code `snocOL` reg2reg rep reg tmp) + -- ToDo: could optimise slightly by checking for byte-addressable + -- real registers, but that will happen very rarely if at all. +#endif + +-- Another variant: this time we want the result in a register that cannot +-- be modified by code to evaluate an arbitrary expression. +getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock) +getNonClobberedReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, code tmp) + Fixed rep reg code + -- only free regs can be clobbered + | RealReg rr <- reg, isFastTrue (freeReg rr) -> do + tmp <- getNewRegNat rep + return (tmp, code `snocOL` reg2reg rep reg tmp) + | otherwise -> + return (reg, code) + +reg2reg :: MachRep -> Reg -> Reg -> Instr +reg2reg rep src dst +#if i386_TARGET_ARCH + | isFloatingRep rep = GMOV src dst +#endif + | otherwise = MOV rep (OpReg src) (OpReg dst) + +#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if sparc_TARGET_ARCH + +getRegister (CmmLit (CmmFloat f F32)) = do + lbl <- getNewLabelNat + let code dst = toOL [ + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmFloat f F32)], + SETHI (HI (ImmCLbl lbl)) dst, + LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] + return (Any F32 code) + +getRegister (CmmLit (CmmFloat d F64)) = do + lbl <- getNewLabelNat + let code dst = toOL [ + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmFloat d F64)], + SETHI (HI (ImmCLbl lbl)) dst, + LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] + return (Any F64 code) + +getRegister (CmmMachOp mop [x]) -- unary MachOps + = case mop of + MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x + MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x + + MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x + MO_Not rep -> trivialUCode rep (XNOR False g0) x + + MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8)) + + MO_U_Conv F64 F32-> coerceDbl2Flt x + MO_U_Conv F32 F64-> coerceFlt2Dbl x + + MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x + MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x + MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x + MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x + + -- Conversions which are a nop on sparc + MO_U_Conv from to + | from == to -> conversionNop to x + MO_U_Conv I32 to -> conversionNop to x + MO_S_Conv I32 to -> conversionNop to x + + -- widenings + MO_U_Conv I8 I32 -> integerExtend False I8 I32 x + MO_U_Conv I16 I32 -> integerExtend False I16 I32 x + MO_U_Conv I8 I16 -> integerExtend False I8 I16 x + MO_S_Conv I16 I32 -> integerExtend True I16 I32 x + + other_op -> panic "Unknown unary mach op" + where + -- XXX SLL/SRL? + integerExtend signed from to expr = do + (reg, e_code) <- getSomeReg expr + let + code dst = + e_code `snocOL` + ((if signed then SRA else SRL) + reg (RIImm (ImmInt 0)) dst) + return (Any to code) + conversionNop new_rep expr + = do e_code <- getRegister expr + return (swizzleRegisterRep e_code new_rep) + +getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps + = case mop of + MO_Eq F32 -> condFltReg EQQ x y + MO_Ne F32 -> condFltReg NE x y + + MO_S_Gt F32 -> condFltReg GTT x y + MO_S_Ge F32 -> condFltReg GE x y + MO_S_Lt F32 -> condFltReg LTT x y + MO_S_Le F32 -> condFltReg LE x y + + MO_Eq F64 -> condFltReg EQQ x y + MO_Ne F64 -> condFltReg NE x y + + MO_S_Gt F64 -> condFltReg GTT x y + MO_S_Ge F64 -> condFltReg GE x y + MO_S_Lt F64 -> condFltReg LTT x y + MO_S_Le F64 -> condFltReg LE x y + + MO_Eq rep -> condIntReg EQQ x y + MO_Ne rep -> condIntReg NE x y + + MO_S_Gt rep -> condIntReg GTT x y + MO_S_Ge rep -> condIntReg GE x y + MO_S_Lt rep -> condIntReg LTT x y + MO_S_Le rep -> condIntReg LE x y + + MO_U_Gt I32 -> condIntReg GTT x y + MO_U_Ge I32 -> condIntReg GE x y + MO_U_Lt I32 -> condIntReg LTT x y + MO_U_Le I32 -> condIntReg LE x y + + MO_U_Gt I16 -> condIntReg GU x y + MO_U_Ge I16 -> condIntReg GEU x y + MO_U_Lt I16 -> condIntReg LU x y + MO_U_Le I16 -> condIntReg LEU x y + + MO_Add I32 -> trivialCode I32 (ADD False False) x y + MO_Sub I32 -> trivialCode I32 (SUB False False) x y + + MO_S_MulMayOflo rep -> imulMayOflo rep x y +{- + -- ToDo: teach about V8+ SPARC div instructions + MO_S_Quot I32 -> idiv FSLIT(".div") x y + MO_S_Rem I32 -> idiv FSLIT(".rem") x y + MO_U_Quot I32 -> idiv FSLIT(".udiv") x y + MO_U_Rem I32 -> idiv FSLIT(".urem") x y +-} + MO_Add F32 -> trivialFCode F32 FADD x y + MO_Sub F32 -> trivialFCode F32 FSUB x y + MO_Mul F32 -> trivialFCode F32 FMUL x y + MO_S_Quot F32 -> trivialFCode F32 FDIV x y + + MO_Add F64 -> trivialFCode F64 FADD x y + MO_Sub F64 -> trivialFCode F64 FSUB x y + MO_Mul F64 -> trivialFCode F64 FMUL x y + MO_S_Quot F64 -> trivialFCode F64 FDIV x y + + MO_And rep -> trivialCode rep (AND False) x y + MO_Or rep -> trivialCode rep (OR False) x y + MO_Xor rep -> trivialCode rep (XOR False) x y + + MO_Mul rep -> trivialCode rep (SMUL False) x y + + MO_Shl rep -> trivialCode rep SLL x y + MO_U_Shr rep -> trivialCode rep SRL x y + MO_S_Shr rep -> trivialCode rep SRA x y + +{- + MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64 + [promote x, promote y]) + where promote x = CmmMachOp MO_F32_to_Dbl [x] + MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64 + [x, y]) +-} + other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop) + where + --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y]) + + -------------------- + imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register + imulMayOflo rep a b = do + (a_reg, a_code) <- getSomeReg a + (b_reg, b_code) <- getSomeReg b + res_lo <- getNewRegNat I32 + res_hi <- getNewRegNat I32 + let + shift_amt = case rep of + I32 -> 31 + I64 -> 63 + _ -> panic "shift_amt" + code dst = a_code `appOL` b_code `appOL` + toOL [ + SMUL False a_reg (RIReg b_reg) res_lo, + RDY res_hi, + SRA res_lo (RIImm (ImmInt shift_amt)) res_lo, + SUB False False res_lo (RIReg res_hi) dst + ] + return (Any I32 code) + +getRegister (CmmLoad mem pk) = do + Amode src code <- getAmode mem + let + code__2 dst = code `snocOL` LD pk src dst + return (Any pk code__2) + +getRegister (CmmLit (CmmInt i _)) + | fits13Bits i + = let + src = ImmInt (fromInteger i) + code dst = unitOL (OR False g0 (RIImm src) dst) + in + return (Any I32 code) + +getRegister (CmmLit lit) + = let rep = cmmLitRep lit + imm = litToImm lit + code dst = toOL [ + SETHI (HI imm) dst, + OR False dst (RIImm (LO imm)) dst] + in return (Any I32 code) + +#endif /* sparc_TARGET_ARCH */ + +#if powerpc_TARGET_ARCH +getRegister (CmmLoad mem pk) + | pk /= I64 + = do + Amode addr addr_code <- getAmode mem + let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk) + addr_code `snocOL` LD pk dst addr + return (Any pk code) + +-- catch simple cases of zero- or sign-extended load +getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do + Amode addr addr_code <- getAmode mem + return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr)) + +-- Note: there is no Load Byte Arithmetic instruction, so no signed case here + +getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do + Amode addr addr_code <- getAmode mem + return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr)) + +getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do + Amode addr addr_code <- getAmode mem + return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr)) + +getRegister (CmmMachOp mop [x]) -- unary MachOps + = case mop of + MO_Not rep -> trivialUCode rep NOT x + + MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x + MO_S_Conv F32 F64 -> conversionNop F64 x + + MO_S_Conv from to + | from == to -> conversionNop to x + | isFloatingRep from -> coerceFP2Int from to x + | isFloatingRep to -> coerceInt2FP from to x + + -- narrowing is a nop: we treat the high bits as undefined + MO_S_Conv I32 to -> conversionNop to x + MO_S_Conv I16 I8 -> conversionNop I8 x + MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x + MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x + + MO_U_Conv from to + | from == to -> conversionNop to x + -- narrowing is a nop: we treat the high bits as undefined + MO_U_Conv I32 to -> conversionNop to x + MO_U_Conv I16 I8 -> conversionNop I8 x + MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32)) + MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32)) + + MO_S_Neg F32 -> trivialUCode F32 FNEG x + MO_S_Neg F64 -> trivialUCode F64 FNEG x + MO_S_Neg rep -> trivialUCode rep NEG x + + where + conversionNop new_rep expr + = do e_code <- getRegister expr + return (swizzleRegisterRep e_code new_rep) + +getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps + = case mop of + MO_Eq F32 -> condFltReg EQQ x y + MO_Ne F32 -> condFltReg NE x y + + MO_S_Gt F32 -> condFltReg GTT x y + MO_S_Ge F32 -> condFltReg GE x y + MO_S_Lt F32 -> condFltReg LTT x y + MO_S_Le F32 -> condFltReg LE x y + + MO_Eq F64 -> condFltReg EQQ x y + MO_Ne F64 -> condFltReg NE x y + + MO_S_Gt F64 -> condFltReg GTT x y + MO_S_Ge F64 -> condFltReg GE x y + MO_S_Lt F64 -> condFltReg LTT x y + MO_S_Le F64 -> condFltReg LE x y + + MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y) + MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y) + + MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y) + MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y) + + MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y) + + MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y + MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y + MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y + MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y + + MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y + MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y + MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y + MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y + + -- optimize addition with 32-bit immediate + -- (needed for PIC) + MO_Add I32 -> + case y of + CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm) + -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep) + CmmLit lit + -> do + (src, srcCode) <- getSomeReg x + let imm = litToImm lit + code dst = srcCode `appOL` toOL [ + ADDIS dst src (HA imm), + ADD dst dst (RIImm (LO imm)) + ] + return (Any I32 code) + _ -> trivialCode I32 True ADD x y + + MO_Add rep -> trivialCode rep True ADD x y + MO_Sub rep -> + case y of -- subfi ('substract from' with immediate) doesn't exist + CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm) + -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep) + _ -> trivialCodeNoImm rep SUBF y x + + MO_Mul rep -> trivialCode rep True MULLW x y + + MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y + + MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented" + MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented" + + MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y) + MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y) + + MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y) + MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y) + + MO_And rep -> trivialCode rep False AND x y + MO_Or rep -> trivialCode rep False OR x y + MO_Xor rep -> trivialCode rep False XOR x y + + MO_Shl rep -> trivialCode rep False SLW x y + MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y + MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y + +getRegister (CmmLit (CmmInt i rep)) + | Just imm <- makeImmediate rep True i + = let + code dst = unitOL (LI dst imm) + in + return (Any rep code) + +getRegister (CmmLit (CmmFloat f frep)) = do + lbl <- getNewLabelNat + dynRef <- cmmMakeDynamicReference addImportNat False lbl + Amode addr addr_code <- getAmode dynRef + let code dst = + LDATA ReadOnlyData [CmmDataLabel lbl, + CmmStaticLit (CmmFloat f frep)] + `consOL` (addr_code `snocOL` LD frep dst addr) + return (Any frep code) + +getRegister (CmmLit lit) + = let rep = cmmLitRep lit + imm = litToImm lit + code dst = toOL [ + LIS dst (HI imm), + OR dst dst (RIImm (LO imm)) + ] + in return (Any rep code) + +getRegister other = pprPanic "getRegister(ppc)" (pprExpr other) + + -- extend?Rep: wrap integer expression of type rep + -- in a conversion to I32 +extendSExpr I32 x = x +extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x] +extendUExpr I32 x = x +extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x] + +#endif /* powerpc_TARGET_ARCH */ + + +-- ----------------------------------------------------------------------------- +-- The 'Amode' type: Memory addressing modes passed up the tree. + +data Amode = Amode AddrMode InstrBlock + +{- +Now, given a tree (the argument to an CmmLoad) that references memory, +produce a suitable addressing mode. + +A Rule of the Game (tm) for Amodes: use of the addr bit must +immediately follow use of the code part, since the code part puts +values in registers which the addr then refers to. So you can't put +anything in between, lest it overwrite some of those registers. If +you need to do some other computation between the code part and use of +the addr bit, first store the effective address from the amode in a +temporary, then do the other computation, and then use the temporary: + + code + LEA amode, tmp + ... other computation ... + ... (tmp) ... +-} + +getAmode :: CmmExpr -> NatM Amode +getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree) + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if alpha_TARGET_ARCH + +getAmode (StPrim IntSubOp [x, StInt i]) + = getNewRegNat PtrRep `thenNat` \ tmp -> + getRegister x `thenNat` \ register -> + let + code = registerCode register tmp + reg = registerName register tmp + off = ImmInt (-(fromInteger i)) + in + return (Amode (AddrRegImm reg off) code) + +getAmode (StPrim IntAddOp [x, StInt i]) + = getNewRegNat PtrRep `thenNat` \ tmp -> + getRegister x `thenNat` \ register -> + let + code = registerCode register tmp + reg = registerName register tmp + off = ImmInt (fromInteger i) + in + return (Amode (AddrRegImm reg off) code) + +getAmode leaf + | isJust imm + = return (Amode (AddrImm imm__2) id) + where + imm = maybeImm leaf + imm__2 = case imm of Just x -> x + +getAmode other + = getNewRegNat PtrRep `thenNat` \ tmp -> + getRegister other `thenNat` \ register -> + let + code = registerCode register tmp + reg = registerName register tmp + in + return (Amode (AddrReg reg) code) + +#endif /* alpha_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH + +-- This is all just ridiculous, since it carefully undoes +-- what mangleIndexTree has just done. +getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)]) + | not (is64BitLit lit) + -- ASSERT(rep == I32)??? + = do (x_reg, x_code) <- getSomeReg x + let off = ImmInt (-(fromInteger i)) + return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) + +getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)]) + | not (is64BitLit lit) + -- ASSERT(rep == I32)??? + = do (x_reg, x_code) <- getSomeReg x + let off = ImmInt (fromInteger i) + return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) + +-- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be +-- recognised by the next rule. +getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _), + b@(CmmLit _)]) + = getAmode (CmmMachOp (MO_Add rep) [b,a]) + +getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) + [y, CmmLit (CmmInt shift _)]]) + | shift == 0 || shift == 1 || shift == 2 || shift == 3 + = do (x_reg, x_code) <- getNonClobberedReg x + -- x must be in a temp, because it has to stay live over y_code + -- we could compre x_reg and y_reg and do something better here... + (y_reg, y_code) <- getSomeReg y + let + code = x_code `appOL` y_code + base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8 + return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0)) + code) + +getAmode (CmmLit lit) | not (is64BitLit lit) + = return (Amode (ImmAddr (litToImm lit) 0) nilOL) + +getAmode expr = do + (reg,code) <- getSomeReg expr + return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code) + +#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if sparc_TARGET_ARCH + +getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)]) + | fits13Bits (-i) + = do + (reg, code) <- getSomeReg x + let + off = ImmInt (-(fromInteger i)) + return (Amode (AddrRegImm reg off) code) + + +getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)]) + | fits13Bits i + = do + (reg, code) <- getSomeReg x + let + off = ImmInt (fromInteger i) + return (Amode (AddrRegImm reg off) code) + +getAmode (CmmMachOp (MO_Add rep) [x, y]) + = do + (regX, codeX) <- getSomeReg x + (regY, codeY) <- getSomeReg y + let + code = codeX `appOL` codeY + return (Amode (AddrRegReg regX regY) code) + +-- XXX Is this same as "leaf" in Stix? +getAmode (CmmLit lit) + = do + tmp <- getNewRegNat I32 + let + code = unitOL (SETHI (HI imm__2) tmp) + return (Amode (AddrRegImm tmp (LO imm__2)) code) + where + imm__2 = litToImm lit + +getAmode other + = do + (reg, code) <- getSomeReg other + let + off = ImmInt 0 + return (Amode (AddrRegImm reg off) code) + +#endif /* sparc_TARGET_ARCH */ + +#ifdef powerpc_TARGET_ARCH +getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate I32 True (-i) + = do + (reg, code) <- getSomeReg x + return (Amode (AddrRegImm reg off) code) + + +getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate I32 True i + = do + (reg, code) <- getSomeReg x + return (Amode (AddrRegImm reg off) code) + + -- optimize addition with 32-bit immediate + -- (needed for PIC) +getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit]) + = do + tmp <- getNewRegNat I32 + (src, srcCode) <- getSomeReg x + let imm = litToImm lit + code = srcCode `snocOL` ADDIS tmp src (HA imm) + return (Amode (AddrRegImm tmp (LO imm)) code) + +getAmode (CmmLit lit) + = do + tmp <- getNewRegNat I32 + let imm = litToImm lit + code = unitOL (LIS tmp (HA imm)) + return (Amode (AddrRegImm tmp (LO imm)) code) + +getAmode (CmmMachOp (MO_Add I32) [x, y]) + = do + (regX, codeX) <- getSomeReg x + (regY, codeY) <- getSomeReg y + return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) + +getAmode other + = do + (reg, code) <- getSomeReg other + let + off = ImmInt 0 + return (Amode (AddrRegImm reg off) code) +#endif /* powerpc_TARGET_ARCH */ + +-- ----------------------------------------------------------------------------- +-- getOperand: sometimes any operand will do. + +-- getNonClobberedOperand: the value of the operand will remain valid across +-- the computation of an arbitrary expression, unless the expression +-- is computed directly into a register which the operand refers to +-- (see trivialCode where this function is used for an example). + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH + +getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock) +#if x86_64_TARGET_ARCH +getNonClobberedOperand (CmmLit lit) + | isSuitableFloatingPointLit lit = do + lbl <- getNewLabelNat + let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl, + CmmStaticLit lit]) + return (OpAddr (ripRel (ImmCLbl lbl)), code) +#endif +getNonClobberedOperand (CmmLit lit) + | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = + return (OpImm (litToImm lit), nilOL) +getNonClobberedOperand (CmmLoad mem pk) + | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do + Amode src mem_code <- getAmode mem + (src',save_code) <- + if (amodeCouldBeClobbered src) + then do + tmp <- getNewRegNat wordRep + return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0), + unitOL (LEA I32 (OpAddr src) (OpReg tmp))) + else + return (src, nilOL) + return (OpAddr src', save_code `appOL` mem_code) +getNonClobberedOperand e = do + (reg, code) <- getNonClobberedReg e + return (OpReg reg, code) + +amodeCouldBeClobbered :: AddrMode -> Bool +amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode) + +regClobbered (RealReg rr) = isFastTrue (freeReg rr) +regClobbered _ = False + +-- getOperand: the operand is not required to remain valid across the +-- computation of an arbitrary expression. +getOperand :: CmmExpr -> NatM (Operand, InstrBlock) +#if x86_64_TARGET_ARCH +getOperand (CmmLit lit) + | isSuitableFloatingPointLit lit = do + lbl <- getNewLabelNat + let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl, + CmmStaticLit lit]) + return (OpAddr (ripRel (ImmCLbl lbl)), code) +#endif +getOperand (CmmLit lit) + | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do + return (OpImm (litToImm lit), nilOL) +getOperand (CmmLoad mem pk) + | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do + Amode src mem_code <- getAmode mem + return (OpAddr src, mem_code) +getOperand e = do + (reg, code) <- getSomeReg e + return (OpReg reg, code) + +isOperand :: CmmExpr -> Bool +isOperand (CmmLoad _ _) = True +isOperand (CmmLit lit) = not (is64BitLit lit) + || isSuitableFloatingPointLit lit +isOperand _ = False + +-- if we want a floating-point literal as an operand, we can +-- use it directly from memory. However, if the literal is +-- zero, we're better off generating it into a register using +-- xor. +isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0 +isSuitableFloatingPointLit _ = False + +getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock) +getRegOrMem (CmmLoad mem pk) + | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do + Amode src mem_code <- getAmode mem + return (OpAddr src, mem_code) +getRegOrMem e = do + (reg, code) <- getNonClobberedReg e + return (OpReg reg, code) + +#if x86_64_TARGET_ARCH +is64BitLit (CmmInt i I64) = is64BitInteger i + -- assume that labels are in the range 0-2^31-1: this assumes the + -- small memory model (see gcc docs, -mcmodel=small). +#endif +is64BitLit x = False +#endif + +is64BitInteger :: Integer -> Bool +is64BitInteger i = i > 0x7fffffff || i < -0x80000000 + +-- ----------------------------------------------------------------------------- +-- The 'CondCode' type: Condition codes passed up the tree. + +data CondCode = CondCode Bool Cond InstrBlock + +-- Set up a condition code for a conditional branch. + +getCondCode :: CmmExpr -> NatM CondCode + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if alpha_TARGET_ARCH +getCondCode = panic "MachCode.getCondCode: not on Alphas" +#endif /* alpha_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH +-- yes, they really do seem to want exactly the same! + +getCondCode (CmmMachOp mop [x, y]) + = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons + case mop of + MO_Eq F32 -> condFltCode EQQ x y + MO_Ne F32 -> condFltCode NE x y + + MO_S_Gt F32 -> condFltCode GTT x y + MO_S_Ge F32 -> condFltCode GE x y + MO_S_Lt F32 -> condFltCode LTT x y + MO_S_Le F32 -> condFltCode LE x y + + MO_Eq F64 -> condFltCode EQQ x y + MO_Ne F64 -> condFltCode NE x y + + MO_S_Gt F64 -> condFltCode GTT x y + MO_S_Ge F64 -> condFltCode GE x y + MO_S_Lt F64 -> condFltCode LTT x y + MO_S_Le F64 -> condFltCode LE x y + + MO_Eq rep -> condIntCode EQQ x y + MO_Ne rep -> condIntCode NE x y + + MO_S_Gt rep -> condIntCode GTT x y + MO_S_Ge rep -> condIntCode GE x y + MO_S_Lt rep -> condIntCode LTT x y + MO_S_Le rep -> condIntCode LE x y + + MO_U_Gt rep -> condIntCode GU x y + MO_U_Ge rep -> condIntCode GEU x y + MO_U_Lt rep -> condIntCode LU x y + MO_U_Le rep -> condIntCode LEU x y + + other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop) + +getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other) + +#elif powerpc_TARGET_ARCH + +-- almost the same as everywhere else - but we need to +-- extend small integers to 32 bit first + +getCondCode (CmmMachOp mop [x, y]) + = case mop of + MO_Eq F32 -> condFltCode EQQ x y + MO_Ne F32 -> condFltCode NE x y + + MO_S_Gt F32 -> condFltCode GTT x y + MO_S_Ge F32 -> condFltCode GE x y + MO_S_Lt F32 -> condFltCode LTT x y + MO_S_Le F32 -> condFltCode LE x y + + MO_Eq F64 -> condFltCode EQQ x y + MO_Ne F64 -> condFltCode NE x y + + MO_S_Gt F64 -> condFltCode GTT x y + MO_S_Ge F64 -> condFltCode GE x y + MO_S_Lt F64 -> condFltCode LTT x y + MO_S_Le F64 -> condFltCode LE x y + + MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y) + MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y) + + MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y) + MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y) + + MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y) + + other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop) + +getCondCode other = panic "getCondCode(2)(powerpc)" + + +#endif + + +-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be +-- passed back up the tree. + +condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode + +#if alpha_TARGET_ARCH +condIntCode = panic "MachCode.condIntCode: not on Alphas" +condFltCode = panic "MachCode.condFltCode: not on Alphas" +#endif /* alpha_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH + +-- memory vs immediate +condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do + Amode x_addr x_code <- getAmode x + let + imm = litToImm lit + code = x_code `snocOL` + CMP pk (OpImm imm) (OpAddr x_addr) + -- + return (CondCode False cond code) + +-- anything vs zero +condIntCode cond x (CmmLit (CmmInt 0 pk)) = do + (x_reg, x_code) <- getSomeReg x + let + code = x_code `snocOL` + TEST pk (OpReg x_reg) (OpReg x_reg) + -- + return (CondCode False cond code) + +-- anything vs operand +condIntCode cond x y | isOperand y = do + (x_reg, x_code) <- getNonClobberedReg x + (y_op, y_code) <- getOperand y + let + code = x_code `appOL` y_code `snocOL` + CMP (cmmExprRep x) y_op (OpReg x_reg) + -- in + return (CondCode False cond code) + +-- anything vs anything +condIntCode cond x y = do + (y_reg, y_code) <- getNonClobberedReg y + (x_op, x_code) <- getRegOrMem x + let + code = y_code `appOL` + x_code `snocOL` + CMP (cmmExprRep x) (OpReg y_reg) x_op + -- in + return (CondCode False cond code) +#endif + +#if i386_TARGET_ARCH +condFltCode cond x y + = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do + (x_reg, x_code) <- getNonClobberedReg x + (y_reg, y_code) <- getSomeReg y + let + code = x_code `appOL` y_code `snocOL` + GCMP cond x_reg y_reg + -- The GCMP insn does the test and sets the zero flag if comparable + -- and true. Hence we always supply EQQ as the condition to test. + return (CondCode True EQQ code) +#endif /* i386_TARGET_ARCH */ + +#if x86_64_TARGET_ARCH +-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be +-- an operand, but the right must be a reg. We can probably do better +-- than this general case... +condFltCode cond x y = do + (x_reg, x_code) <- getNonClobberedReg x + (y_op, y_code) <- getOperand y + let + code = x_code `appOL` + y_code `snocOL` + CMP (cmmExprRep x) y_op (OpReg x_reg) + -- NB(1): we need to use the unsigned comparison operators on the + -- result of this comparison. + -- in + return (CondCode True (condToUnsigned cond) code) +#endif + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if sparc_TARGET_ARCH + +condIntCode cond x (CmmLit (CmmInt y rep)) + | fits13Bits y + = do + (src1, code) <- getSomeReg x + let + src2 = ImmInt (fromInteger y) + code' = code `snocOL` SUB False True src1 (RIImm src2) g0 + return (CondCode False cond code') + +condIntCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let + code__2 = code1 `appOL` code2 `snocOL` + SUB False True src1 (RIReg src2) g0 + return (CondCode False cond code__2) + +----------- +condFltCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp <- getNewRegNat F64 + let + promote x = FxTOy F32 F64 x tmp + + pk1 = cmmExprRep x + pk2 = cmmExprRep y + + code__2 = + if pk1 == pk2 then + code1 `appOL` code2 `snocOL` + FCMP True pk1 src1 src2 + else if pk1 == F32 then + code1 `snocOL` promote src1 `appOL` code2 `snocOL` + FCMP True F64 tmp src2 + else + code1 `appOL` code2 `snocOL` promote src2 `snocOL` + FCMP True F64 src1 tmp + return (CondCode True cond code__2) + +#endif /* sparc_TARGET_ARCH */ + +#if powerpc_TARGET_ARCH +-- ###FIXME: I16 and I8! +condIntCode cond x (CmmLit (CmmInt y rep)) + | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y + = do + (src1, code) <- getSomeReg x + let + code' = code `snocOL` + (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2) + return (CondCode False cond code') + +condIntCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let + code' = code1 `appOL` code2 `snocOL` + (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2) + return (CondCode False cond code') + +condFltCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let + code' = code1 `appOL` code2 `snocOL` FCMP src1 src2 + code'' = case cond of -- twiddle CR to handle unordered case + GE -> code' `snocOL` CRNOR ltbit eqbit gtbit + LE -> code' `snocOL` CRNOR gtbit eqbit ltbit + _ -> code' + where + ltbit = 0 ; eqbit = 2 ; gtbit = 1 + return (CondCode True cond code'') + +#endif /* powerpc_TARGET_ARCH */ + +-- ----------------------------------------------------------------------------- +-- Generating assignments + +-- Assignments are really at the heart of the whole code generation +-- business. Almost all top-level nodes of any real importance are +-- assignments, which correspond to loads, stores, or register +-- transfers. If we're really lucky, some of the register transfers +-- will go away, because we can use the destination register to +-- complete the code generation for the right hand side. This only +-- fails when the right hand side is forced into a fixed register +-- (e.g. the result of a call). + +assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if alpha_TARGET_ARCH + +assignIntCode pk (CmmLoad dst _) src + = getNewRegNat IntRep `thenNat` \ tmp -> + getAmode dst `thenNat` \ amode -> + getRegister src `thenNat` \ register -> + let + code1 = amodeCode amode [] + dst__2 = amodeAddr amode + code2 = registerCode register tmp [] + src__2 = registerName register tmp + sz = primRepToSize pk + code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2) + in + return code__2 + +assignIntCode pk dst src + = getRegister dst `thenNat` \ register1 -> + getRegister src `thenNat` \ register2 -> + let + dst__2 = registerName register1 zeroh + code = registerCode register2 dst__2 + src__2 = registerName register2 dst__2 + code__2 = if isFixed register2 + then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2) + else code + in + return code__2 + +#endif /* alpha_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH + +-- integer assignment to memory +assignMem_IntCode pk addr src = do + Amode addr code_addr <- getAmode addr + (code_src, op_src) <- get_op_RI src + let + code = code_src `appOL` + code_addr `snocOL` + MOV pk op_src (OpAddr addr) + -- NOTE: op_src is stable, so it will still be valid + -- after code_addr. This may involve the introduction + -- of an extra MOV to a temporary register, but we hope + -- the register allocator will get rid of it. + -- + return code + where + get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator + get_op_RI (CmmLit lit) | not (is64BitLit lit) + = return (nilOL, OpImm (litToImm lit)) + get_op_RI op + = do (reg,code) <- getNonClobberedReg op + return (code, OpReg reg) + + +-- Assign; dst is a reg, rhs is mem +assignReg_IntCode pk reg (CmmLoad src _) = do + load_code <- intLoadCode (MOV pk) src + return (load_code (getRegisterReg reg)) + +-- dst is a reg, but src could be anything +assignReg_IntCode pk reg src = do + code <- getAnyReg src + return (code (getRegisterReg reg)) + +#endif /* i386_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if sparc_TARGET_ARCH + +assignMem_IntCode pk addr src = do + (srcReg, code) <- getSomeReg src + Amode dstAddr addr_code <- getAmode addr + return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr + +assignReg_IntCode pk reg src = do + r <- getRegister src + return $ case r of + Any _ code -> code dst + Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg + where + dst = getRegisterReg reg + + +#endif /* sparc_TARGET_ARCH */ + +#if powerpc_TARGET_ARCH + +assignMem_IntCode pk addr src = do + (srcReg, code) <- getSomeReg src + Amode dstAddr addr_code <- getAmode addr + return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr + +-- dst is a reg, but src could be anything +assignReg_IntCode pk reg src + = do + r <- getRegister src + return $ case r of + Any _ code -> code dst + Fixed _ freg fcode -> fcode `snocOL` MR dst freg + where + dst = getRegisterReg reg + +#endif /* powerpc_TARGET_ARCH */ + + +-- ----------------------------------------------------------------------------- +-- Floating-point assignments + +#if alpha_TARGET_ARCH + +assignFltCode pk (CmmLoad dst _) src + = getNewRegNat pk `thenNat` \ tmp -> + getAmode dst `thenNat` \ amode -> + getRegister src `thenNat` \ register -> + let + code1 = amodeCode amode [] + dst__2 = amodeAddr amode + code2 = registerCode register tmp [] + src__2 = registerName register tmp + sz = primRepToSize pk + code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2) + in + return code__2 + +assignFltCode pk dst src + = getRegister dst `thenNat` \ register1 -> + getRegister src `thenNat` \ register2 -> + let + dst__2 = registerName register1 zeroh + code = registerCode register2 dst__2 + src__2 = registerName register2 dst__2 + code__2 = if isFixed register2 + then code . mkSeqInstr (FMOV src__2 dst__2) + else code + in + return code__2 + +#endif /* alpha_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH + +-- Floating point assignment to memory +assignMem_FltCode pk addr src = do + (src_reg, src_code) <- getNonClobberedReg src + Amode addr addr_code <- getAmode addr + let + code = src_code `appOL` + addr_code `snocOL` + IF_ARCH_i386(GST pk src_reg addr, + MOV pk (OpReg src_reg) (OpAddr addr)) + return code + +-- Floating point assignment to a register/temporary +assignReg_FltCode pk reg src = do + src_code <- getAnyReg src + return (src_code (getRegisterReg reg)) + +#endif /* i386_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if sparc_TARGET_ARCH + +-- Floating point assignment to memory +assignMem_FltCode pk addr src = do + Amode dst__2 code1 <- getAmode addr + (src__2, code2) <- getSomeReg src + tmp1 <- getNewRegNat pk + let + pk__2 = cmmExprRep src + code__2 = code1 `appOL` code2 `appOL` + if pk == pk__2 + then unitOL (ST pk src__2 dst__2) + else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2] + return code__2 + +-- Floating point assignment to a register/temporary +-- ToDo: Verify correctness +assignReg_FltCode pk reg src = do + r <- getRegister src + v1 <- getNewRegNat pk + return $ case r of + Any _ code -> code dst + Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1 + where + dst = getRegisterReg reg + +#endif /* sparc_TARGET_ARCH */ + +#if powerpc_TARGET_ARCH + +-- Easy, isn't it? +assignMem_FltCode = assignMem_IntCode +assignReg_FltCode = assignReg_IntCode + +#endif /* powerpc_TARGET_ARCH */ + + +-- ----------------------------------------------------------------------------- +-- Generating an non-local jump + +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if alpha_TARGET_ARCH + +genJump (CmmLabel lbl) + | isAsmTemp lbl = returnInstr (BR target) + | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0] + where + target = ImmCLbl lbl + +genJump tree + = getRegister tree `thenNat` \ register -> + getNewRegNat PtrRep `thenNat` \ tmp -> + let + dst = registerName register pv + code = registerCode register pv + target = registerName register pv + in + if isFixed register then + returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0] + else + return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0)) + +#endif /* alpha_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH + +genJump (CmmLoad mem pk) = do + Amode target code <- getAmode mem + return (code `snocOL` JMP (OpAddr target)) + +genJump (CmmLit lit) = do + return (unitOL (JMP (OpImm (litToImm lit)))) + +genJump expr = do + (reg,code) <- getSomeReg expr + return (code `snocOL` JMP (OpReg reg)) + +#endif /* i386_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if sparc_TARGET_ARCH + +genJump (CmmLit (CmmLabel lbl)) + = return (toOL [CALL (Left target) 0 True, NOP]) + where + target = ImmCLbl lbl + +genJump tree + = do + (target, code) <- getSomeReg tree + return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP) + +#endif /* sparc_TARGET_ARCH */ + +#if powerpc_TARGET_ARCH +genJump (CmmLit (CmmLabel lbl)) + = return (unitOL $ JMP lbl) + +genJump tree + = do + (target,code) <- getSomeReg tree + return (code `snocOL` MTCTR target `snocOL` BCTR []) +#endif /* powerpc_TARGET_ARCH */ + + +-- ----------------------------------------------------------------------------- +-- Unconditional branches + +genBranch :: BlockId -> NatM InstrBlock + +genBranch = return . toOL . mkBranchInstr + +-- ----------------------------------------------------------------------------- +-- Conditional jumps + +{- +Conditional jumps are always to local labels, so we can use branch +instructions. We peek at the arguments to decide what kind of +comparison to do. + +ALPHA: For comparisons with 0, we're laughing, because we can just do +the desired conditional branch. + +I386: First, we have to ensure that the condition +codes are set according to the supplied comparison operation. + +SPARC: First, we have to ensure that the condition codes are set +according to the supplied comparison operation. We generate slightly +different code for floating point comparisons, because a floating +point operation cannot directly precede a @BF@. We assume the worst +and fill that slot with a @NOP@. + +SPARC: Do not fill the delay slots here; you will confuse the register +allocator. +-} + + +genCondJump + :: BlockId -- the branch target + -> CmmExpr -- the condition on which to branch + -> NatM InstrBlock + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if alpha_TARGET_ARCH + +genCondJump id (StPrim op [x, StInt 0]) + = getRegister x `thenNat` \ register -> + getNewRegNat (registerRep register) + `thenNat` \ tmp -> + let + code = registerCode register tmp + value = registerName register tmp + pk = registerRep register + target = ImmCLbl lbl + in + returnSeq code [BI (cmpOp op) value target] + where + cmpOp CharGtOp = GTT + cmpOp CharGeOp = GE + cmpOp CharEqOp = EQQ + cmpOp CharNeOp = NE + cmpOp CharLtOp = LTT + cmpOp CharLeOp = LE + cmpOp IntGtOp = GTT + cmpOp IntGeOp = GE + cmpOp IntEqOp = EQQ + cmpOp IntNeOp = NE + cmpOp IntLtOp = LTT + cmpOp IntLeOp = LE + cmpOp WordGtOp = NE + cmpOp WordGeOp = ALWAYS + cmpOp WordEqOp = EQQ + cmpOp WordNeOp = NE + cmpOp WordLtOp = NEVER + cmpOp WordLeOp = EQQ + cmpOp AddrGtOp = NE + cmpOp AddrGeOp = ALWAYS + cmpOp AddrEqOp = EQQ + cmpOp AddrNeOp = NE + cmpOp AddrLtOp = NEVER + cmpOp AddrLeOp = EQQ + +genCondJump lbl (StPrim op [x, StDouble 0.0]) + = getRegister x `thenNat` \ register -> + getNewRegNat (registerRep register) + `thenNat` \ tmp -> + let + code = registerCode register tmp + value = registerName register tmp + pk = registerRep register + target = ImmCLbl lbl + in + return (code . mkSeqInstr (BF (cmpOp op) value target)) + where + cmpOp FloatGtOp = GTT + cmpOp FloatGeOp = GE + cmpOp FloatEqOp = EQQ + cmpOp FloatNeOp = NE + cmpOp FloatLtOp = LTT + cmpOp FloatLeOp = LE + cmpOp DoubleGtOp = GTT + cmpOp DoubleGeOp = GE + cmpOp DoubleEqOp = EQQ + cmpOp DoubleNeOp = NE + cmpOp DoubleLtOp = LTT + cmpOp DoubleLeOp = LE + +genCondJump lbl (StPrim op [x, y]) + | fltCmpOp op + = trivialFCode pr instr x y `thenNat` \ register -> + getNewRegNat F64 `thenNat` \ tmp -> + let + code = registerCode register tmp + result = registerName register tmp + target = ImmCLbl lbl + in + return (code . mkSeqInstr (BF cond result target)) + where + pr = panic "trivialU?FCode: does not use PrimRep on Alpha" + + fltCmpOp op = case op of + FloatGtOp -> True + FloatGeOp -> True + FloatEqOp -> True + FloatNeOp -> True + FloatLtOp -> True + FloatLeOp -> True + DoubleGtOp -> True + DoubleGeOp -> True + DoubleEqOp -> True + DoubleNeOp -> True + DoubleLtOp -> True + DoubleLeOp -> True + _ -> False + (instr, cond) = case op of + FloatGtOp -> (FCMP TF LE, EQQ) + FloatGeOp -> (FCMP TF LTT, EQQ) + FloatEqOp -> (FCMP TF EQQ, NE) + FloatNeOp -> (FCMP TF EQQ, EQQ) + FloatLtOp -> (FCMP TF LTT, NE) + FloatLeOp -> (FCMP TF LE, NE) + DoubleGtOp -> (FCMP TF LE, EQQ) + DoubleGeOp -> (FCMP TF LTT, EQQ) + DoubleEqOp -> (FCMP TF EQQ, NE) + DoubleNeOp -> (FCMP TF EQQ, EQQ) + DoubleLtOp -> (FCMP TF LTT, NE) + DoubleLeOp -> (FCMP TF LE, NE) + +genCondJump lbl (StPrim op [x, y]) + = trivialCode instr x y `thenNat` \ register -> + getNewRegNat IntRep `thenNat` \ tmp -> + let + code = registerCode register tmp + result = registerName register tmp + target = ImmCLbl lbl + in + return (code . mkSeqInstr (BI cond result target)) + where + (instr, cond) = case op of + CharGtOp -> (CMP LE, EQQ) + CharGeOp -> (CMP LTT, EQQ) + CharEqOp -> (CMP EQQ, NE) + CharNeOp -> (CMP EQQ, EQQ) + CharLtOp -> (CMP LTT, NE) + CharLeOp -> (CMP LE, NE) + IntGtOp -> (CMP LE, EQQ) + IntGeOp -> (CMP LTT, EQQ) + IntEqOp -> (CMP EQQ, NE) + IntNeOp -> (CMP EQQ, EQQ) + IntLtOp -> (CMP LTT, NE) + IntLeOp -> (CMP LE, NE) + WordGtOp -> (CMP ULE, EQQ) + WordGeOp -> (CMP ULT, EQQ) + WordEqOp -> (CMP EQQ, NE) + WordNeOp -> (CMP EQQ, EQQ) + WordLtOp -> (CMP ULT, NE) + WordLeOp -> (CMP ULE, NE) + AddrGtOp -> (CMP ULE, EQQ) + AddrGeOp -> (CMP ULT, EQQ) + AddrEqOp -> (CMP EQQ, NE) + AddrNeOp -> (CMP EQQ, EQQ) + AddrLtOp -> (CMP ULT, NE) + AddrLeOp -> (CMP ULE, NE) + +#endif /* alpha_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH + +genCondJump id bool = do + CondCode _ cond code <- getCondCode bool + return (code `snocOL` JXX cond id) + +#endif + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if x86_64_TARGET_ARCH + +genCondJump id bool = do + CondCode is_float cond cond_code <- getCondCode bool + if not is_float + then + return (cond_code `snocOL` JXX cond id) + else do + lbl <- getBlockIdNat + + -- see comment with condFltReg + let code = case cond of + NE -> or_unordered + GU -> plain_test + GEU -> plain_test + _ -> and_ordered + + plain_test = unitOL ( + JXX cond id + ) + or_unordered = toOL [ + JXX cond id, + JXX PARITY id + ] + and_ordered = toOL [ + JXX PARITY lbl, + JXX cond id, + JXX ALWAYS lbl, + NEWBLOCK lbl + ] + return (cond_code `appOL` code) + +#endif + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if sparc_TARGET_ARCH + +genCondJump (BlockId id) bool = do + CondCode is_float cond code <- getCondCode bool + return ( + code `appOL` + toOL ( + if is_float + then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP] + else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP] + ) + ) + +#endif /* sparc_TARGET_ARCH */ + + +#if powerpc_TARGET_ARCH + +genCondJump id bool = do + CondCode is_float cond code <- getCondCode bool + return (code `snocOL` BCC cond id) + +#endif /* powerpc_TARGET_ARCH */ + + +-- ----------------------------------------------------------------------------- +-- Generating C calls + +-- Now the biggest nightmare---calls. Most of the nastiness is buried in +-- @get_arg@, which moves the arguments to the correct registers/stack +-- locations. Apart from that, the code is easy. +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +genCCall + :: CmmCallTarget -- function to call + -> [(CmmReg,MachHint)] -- where to put the result + -> [(CmmExpr,MachHint)] -- arguments (of mixed type) + -> Maybe [GlobalReg] -- volatile regs to save + -> NatM InstrBlock + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if alpha_TARGET_ARCH + +ccallResultRegs = + +genCCall fn cconv result_regs args + = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args + `thenNat` \ ((unused,_), argCode) -> + let + nRegs = length allArgRegs - length unused + code = asmSeqThen (map ($ []) argCode) + in + returnSeq code [ + LDA pv (AddrImm (ImmLab (ptext fn))), + JSR ra (AddrReg pv) nRegs, + LDGP gp (AddrReg ra)] + where + ------------------------ + {- Try to get a value into a specific register (or registers) for + a call. The first 6 arguments go into the appropriate + argument register (separate registers for integer and floating + point arguments, but used in lock-step), and the remaining + arguments are dumped to the stack, beginning at 0(sp). Our + first argument is a pair of the list of remaining argument + registers to be assigned for this call and the next stack + offset to use for overflowing arguments. This way, + @get_Arg@ can be applied to all of a call's arguments using + @mapAccumLNat@. + -} + get_arg + :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator) + -> StixTree -- Current argument + -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code + + -- We have to use up all of our argument registers first... + + get_arg ((iDst,fDst):dsts, offset) arg + = getRegister arg `thenNat` \ register -> + let + reg = if isFloatingRep pk then fDst else iDst + code = registerCode register reg + src = registerName register reg + pk = registerRep register + in + return ( + if isFloatingRep pk then + ((dsts, offset), if isFixed register then + code . mkSeqInstr (FMOV src fDst) + else code) + else + ((dsts, offset), if isFixed register then + code . mkSeqInstr (OR src (RIReg src) iDst) + else code)) + + -- Once we have run out of argument registers, we move to the + -- stack... + + get_arg ([], offset) arg + = getRegister arg `thenNat` \ register -> + getNewRegNat (registerRep register) + `thenNat` \ tmp -> + let + code = registerCode register tmp + src = registerName register tmp + pk = registerRep register + sz = primRepToSize pk + in + return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset))) + +#endif /* alpha_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH + +-- we only cope with a single result for foreign calls +genCCall (CmmPrim op) [(r,_)] args vols = do + case op of + MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args + MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args + + MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args + MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args + + MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args + MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args + + MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args + MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args + + other_op -> outOfLineFloatOp op r args vols + where + actuallyInlineFloatOp rep instr [(x,_)] + = do res <- trivialUFCode rep instr x + any <- anyReg res + return (any (getRegisterReg r)) + +genCCall target dest_regs args vols = do + let + sizes = map (arg_size . cmmExprRep . fst) (reverse args) +#if !darwin_TARGET_OS + tot_arg_size = sum sizes +#else + raw_arg_size = sum sizes + tot_arg_size = roundTo 16 raw_arg_size + arg_pad_size = tot_arg_size - raw_arg_size + delta0 <- getDeltaNat + setDeltaNat (delta0 - arg_pad_size) +#endif + + push_codes <- mapM push_arg (reverse args) + delta <- getDeltaNat + + -- in + -- deal with static vs dynamic call targets + (callinsns,cconv) <- + case target of + -- CmmPrim -> ... + CmmForeignCall (CmmLit (CmmLabel lbl)) conv + -> -- ToDo: stdcall arg sizes + return (unitOL (CALL (Left fn_imm) []), conv) + where fn_imm = ImmCLbl lbl + CmmForeignCall expr conv + -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr + ASSERT(dyn_rep == I32) + return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) + + let push_code +#if darwin_TARGET_OS + | arg_pad_size /= 0 + = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp), + DELTA (delta0 - arg_pad_size)] + `appOL` concatOL push_codes + | otherwise +#endif + = concatOL push_codes + call = callinsns `appOL` + toOL ( + -- Deallocate parameters after call for ccall; + -- but not for stdcall (callee does it) + (if cconv == StdCallConv || tot_arg_size==0 then [] else + [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)]) + ++ + [DELTA (delta + tot_arg_size)] + ) + -- in + setDeltaNat (delta + tot_arg_size) + + let + -- assign the results, if necessary + assign_code [] = nilOL + assign_code [(dest,_hint)] = + case rep of + I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest), + MOV I32 (OpReg edx) (OpReg r_dest_hi)] + F32 -> unitOL (GMOV fake0 r_dest) + F64 -> unitOL (GMOV fake0 r_dest) + rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest)) + where + r_dest_hi = getHiVRegFromLo r_dest + rep = cmmRegRep dest + r_dest = getRegisterReg dest + assign_code many = panic "genCCall.assign_code many" + + return (push_code `appOL` + call `appOL` + assign_code dest_regs) + + where + arg_size F64 = 8 + arg_size F32 = 4 + arg_size I64 = 8 + arg_size _ = 4 + + roundTo a x | x `mod` a == 0 = x + | otherwise = x + a - (x `mod` a) + + + push_arg :: (CmmExpr,MachHint){-current argument-} + -> NatM InstrBlock -- code + + push_arg (arg,_hint) -- we don't need the hints on x86 + | arg_rep == I64 = do + ChildCode64 code r_lo <- iselExpr64 arg + delta <- getDeltaNat + setDeltaNat (delta - 8) + let + r_hi = getHiVRegFromLo r_lo + -- in + return ( code `appOL` + toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4), + PUSH I32 (OpReg r_lo), DELTA (delta - 8), + DELTA (delta-8)] + ) + + | otherwise = do + (code, reg, sz) <- get_op arg + delta <- getDeltaNat + let size = arg_size sz + setDeltaNat (delta-size) + if (case sz of F64 -> True; F32 -> True; _ -> False) + then return (code `appOL` + toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp), + DELTA (delta-size), + GST sz reg (AddrBaseIndex (EABaseReg esp) + EAIndexNone + (ImmInt 0))] + ) + else return (code `snocOL` + PUSH I32 (OpReg reg) `snocOL` + DELTA (delta-size) + ) + where + arg_rep = cmmExprRep arg + + ------------ + get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size + get_op op = do + (reg,code) <- getSomeReg op + return (code, reg, cmmExprRep op) + +#endif /* i386_TARGET_ARCH */ + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH + +outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)] + -> Maybe [GlobalReg] -> NatM InstrBlock +outOfLineFloatOp mop res args vols + = do + targetExpr <- cmmMakeDynamicReference addImportNat True lbl + let target = CmmForeignCall targetExpr CCallConv + + if cmmRegRep res == F64 + then + stmtToInstrs (CmmCall target [(res,FloatHint)] args vols) + else do + uq <- getUniqueNat + let + tmp = CmmLocal (LocalReg uq F64) + -- in + code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols) + code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp)) + return (code1 `appOL` code2) + where + lbl = mkForeignLabel fn Nothing True + + fn = case mop of + MO_F32_Sqrt -> FSLIT("sqrtf") + MO_F32_Sin -> FSLIT("sinf") + MO_F32_Cos -> FSLIT("cosf") + MO_F32_Tan -> FSLIT("tanf") + MO_F32_Exp -> FSLIT("expf") + MO_F32_Log -> FSLIT("logf") + + MO_F32_Asin -> FSLIT("asinf") + MO_F32_Acos -> FSLIT("acosf") + MO_F32_Atan -> FSLIT("atanf") + + MO_F32_Sinh -> FSLIT("sinhf") + MO_F32_Cosh -> FSLIT("coshf") + MO_F32_Tanh -> FSLIT("tanhf") + MO_F32_Pwr -> FSLIT("powf") + + MO_F64_Sqrt -> FSLIT("sqrt") + MO_F64_Sin -> FSLIT("sin") + MO_F64_Cos -> FSLIT("cos") + MO_F64_Tan -> FSLIT("tan") + MO_F64_Exp -> FSLIT("exp") + MO_F64_Log -> FSLIT("log") + + MO_F64_Asin -> FSLIT("asin") + MO_F64_Acos -> FSLIT("acos") + MO_F64_Atan -> FSLIT("atan") + + MO_F64_Sinh -> FSLIT("sinh") + MO_F64_Cosh -> FSLIT("cosh") + MO_F64_Tanh -> FSLIT("tanh") + MO_F64_Pwr -> FSLIT("pow") + +#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if x86_64_TARGET_ARCH + +genCCall (CmmPrim op) [(r,_)] args vols = + outOfLineFloatOp op r args vols + +genCCall target dest_regs args vols = do + + -- load up the register arguments + (stack_args, aregs, fregs, load_args_code) + <- load_args args allArgRegs allFPArgRegs nilOL + + let + fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs)) + int_regs_used = reverse (drop (length aregs) (reverse allArgRegs)) + arg_regs = int_regs_used ++ fp_regs_used + -- for annotating the call instruction with + + sse_regs = length fp_regs_used + + tot_arg_size = arg_size * length stack_args + + -- On entry to the called function, %rsp should be aligned + -- on a 16-byte boundary +8 (i.e. the first stack arg after + -- the return address is 16-byte aligned). In STG land + -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just + -- need to make sure we push a multiple of 16-bytes of args, + -- plus the return address, to get the correct alignment. + -- Urg, this is hard. We need to feed the delta back into + -- the arg pushing code. + (real_size, adjust_rsp) <- + if tot_arg_size `rem` 16 == 0 + then return (tot_arg_size, nilOL) + else do -- we need to adjust... + delta <- getDeltaNat + setDeltaNat (delta-8) + return (tot_arg_size+8, toOL [ + SUB I64 (OpImm (ImmInt 8)) (OpReg rsp), + DELTA (delta-8) + ]) + + -- push the stack args, right to left + push_code <- push_args (reverse stack_args) nilOL + delta <- getDeltaNat + + -- deal with static vs dynamic call targets + (callinsns,cconv) <- + case target of + -- CmmPrim -> ... + CmmForeignCall (CmmLit (CmmLabel lbl)) conv + -> -- ToDo: stdcall arg sizes + return (unitOL (CALL (Left fn_imm) arg_regs), conv) + where fn_imm = ImmCLbl lbl + CmmForeignCall expr conv + -> do (dyn_r, dyn_c) <- getSomeReg expr + return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv) + + let + -- The x86_64 ABI requires us to set %al to the number of SSE + -- registers that contain arguments, if the called routine + -- is a varargs function. We don't know whether it's a + -- varargs function or not, so we have to assume it is. + -- + -- It's not safe to omit this assignment, even if the number + -- of SSE regs in use is zero. If %al is larger than 8 + -- on entry to a varargs function, seg faults ensue. + assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax)) + + let call = callinsns `appOL` + toOL ( + -- Deallocate parameters after call for ccall; + -- but not for stdcall (callee does it) + (if cconv == StdCallConv || real_size==0 then [] else + [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)]) + ++ + [DELTA (delta + real_size)] + ) + -- in + setDeltaNat (delta + real_size) + + let + -- assign the results, if necessary + assign_code [] = nilOL + assign_code [(dest,_hint)] = + case rep of + F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest)) + F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest)) + rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest)) + where + rep = cmmRegRep dest + r_dest = getRegisterReg dest + assign_code many = panic "genCCall.assign_code many" + + return (load_args_code `appOL` + adjust_rsp `appOL` + push_code `appOL` + assign_eax sse_regs `appOL` + call `appOL` + assign_code dest_regs) + + where + arg_size = 8 -- always, at the mo + + load_args :: [(CmmExpr,MachHint)] + -> [Reg] -- int regs avail for args + -> [Reg] -- FP regs avail for args + -> InstrBlock + -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock) + load_args args [] [] code = return (args, [], [], code) + -- no more regs to use + load_args [] aregs fregs code = return ([], aregs, fregs, code) + -- no more args to push + load_args ((arg,hint) : rest) aregs fregs code + | isFloatingRep arg_rep = + case fregs of + [] -> push_this_arg + (r:rs) -> do + arg_code <- getAnyReg arg + load_args rest aregs rs (code `appOL` arg_code r) + | otherwise = + case aregs of + [] -> push_this_arg + (r:rs) -> do + arg_code <- getAnyReg arg + load_args rest rs fregs (code `appOL` arg_code r) + where + arg_rep = cmmExprRep arg + + push_this_arg = do + (args',ars,frs,code') <- load_args rest aregs fregs code + return ((arg,hint):args', ars, frs, code') + + push_args [] code = return code + push_args ((arg,hint):rest) code + | isFloatingRep arg_rep = do + (arg_reg, arg_code) <- getSomeReg arg + delta <- getDeltaNat + setDeltaNat (delta-arg_size) + let code' = code `appOL` toOL [ + MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)), + SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) , + DELTA (delta-arg_size)] + push_args rest code' + + | otherwise = do + -- we only ever generate word-sized function arguments. Promotion + -- has already happened: our Int8# type is kept sign-extended + -- in an Int#, for example. + ASSERT(arg_rep == I64) return () + (arg_op, arg_code) <- getOperand arg + delta <- getDeltaNat + setDeltaNat (delta-arg_size) + let code' = code `appOL` toOL [PUSH I64 arg_op, + DELTA (delta-arg_size)] + push_args rest code' + where + arg_rep = cmmExprRep arg +#endif + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if sparc_TARGET_ARCH +{- + The SPARC calling convention is an absolute + nightmare. The first 6x32 bits of arguments are mapped into + %o0 through %o5, and the remaining arguments are dumped to the + stack, beginning at [%sp+92]. (Note that %o6 == %sp.) + + If we have to put args on the stack, move %o6==%sp down by + the number of words to go on the stack, to ensure there's enough space. + + According to Fraser and Hanson's lcc book, page 478, fig 17.2, + 16 words above the stack pointer is a word for the address of + a structure return value. I use this as a temporary location + for moving values from float to int regs. Certainly it isn't + safe to put anything in the 16 words starting at %sp, since + this area can get trashed at any time due to window overflows + caused by signal handlers. + + A final complication (if the above isn't enough) is that + we can't blithely calculate the arguments one by one into + %o0 .. %o5. Consider the following nested calls: + + fff a (fff b c) + + Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately + the inner call will itself use %o0, which trashes the value put there + in preparation for the outer call. Upshot: we need to calculate the + args into temporary regs, and move those to arg regs or onto the + stack only immediately prior to the call proper. Sigh. +-} + +genCCall target dest_regs argsAndHints vols = do + let + args = map fst argsAndHints + argcode_and_vregs <- mapM arg_to_int_vregs args + let + (argcodes, vregss) = unzip argcode_and_vregs + n_argRegs = length allArgRegs + n_argRegs_used = min (length vregs) n_argRegs + vregs = concat vregss + -- deal with static vs dynamic call targets + callinsns <- (case target of + CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do + return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) + CmmForeignCall expr conv -> do + (dyn_c, [dyn_r]) <- arg_to_int_vregs expr + return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) + CmmPrim mop -> do + (res, reduce) <- outOfLineFloatOp mop + lblOrMopExpr <- case res of + Left lbl -> do + return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) + Right mopExpr -> do + (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr + return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) + if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr + + ) + let + argcode = concatOL argcodes + (move_sp_down, move_sp_up) + = let diff = length vregs - n_argRegs + nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment + in if nn <= 0 + then (nilOL, nilOL) + else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn))) + transfer_code + = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE) + return (argcode `appOL` + move_sp_down `appOL` + transfer_code `appOL` + callinsns `appOL` + unitOL NOP `appOL` + move_sp_up) + where + -- move args from the integer vregs into which they have been + -- marshalled, into %o0 .. %o5, and the rest onto the stack. + move_final :: [Reg] -> [Reg] -> Int -> [Instr] + + move_final [] _ offset -- all args done + = [] + + move_final (v:vs) [] offset -- out of aregs; move to stack + = ST I32 v (spRel offset) + : move_final vs [] (offset+1) + + move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg + = OR False g0 (RIReg v) a + : move_final vs az offset + + -- generate code to calculate an argument, and move it into one + -- or two integer vregs. + arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg]) + arg_to_int_vregs arg + | (cmmExprRep arg) == I64 + = do + (ChildCode64 code r_lo) <- iselExpr64 arg + let + r_hi = getHiVRegFromLo r_lo + return (code, [r_hi, r_lo]) + | otherwise + = do + (src, code) <- getSomeReg arg + tmp <- getNewRegNat (cmmExprRep arg) + let + pk = cmmExprRep arg + case pk of + F64 -> do + v1 <- getNewRegNat I32 + v2 <- getNewRegNat I32 + return ( + code `snocOL` + FMOV F64 src f0 `snocOL` + ST F32 f0 (spRel 16) `snocOL` + LD I32 (spRel 16) v1 `snocOL` + ST F32 (fPair f0) (spRel 16) `snocOL` + LD I32 (spRel 16) v2 + , + [v1,v2] + ) + F32 -> do + v1 <- getNewRegNat I32 + return ( + code `snocOL` + ST F32 src (spRel 16) `snocOL` + LD I32 (spRel 16) v1 + , + [v1] + ) + other -> do + v1 <- getNewRegNat I32 + return ( + code `snocOL` OR False g0 (RIReg src) v1 + , + [v1] + ) +outOfLineFloatOp mop = + do + mopExpr <- cmmMakeDynamicReference addImportNat True $ + mkForeignLabel functionName Nothing True + let mopLabelOrExpr = case mopExpr of + CmmLit (CmmLabel lbl) -> Left lbl + _ -> Right mopExpr + return (mopLabelOrExpr, reduce) + where + (reduce, functionName) = case mop of + MO_F32_Exp -> (True, FSLIT("exp")) + MO_F32_Log -> (True, FSLIT("log")) + MO_F32_Sqrt -> (True, FSLIT("sqrt")) + + MO_F32_Sin -> (True, FSLIT("sin")) + MO_F32_Cos -> (True, FSLIT("cos")) + MO_F32_Tan -> (True, FSLIT("tan")) + + MO_F32_Asin -> (True, FSLIT("asin")) + MO_F32_Acos -> (True, FSLIT("acos")) + MO_F32_Atan -> (True, FSLIT("atan")) + + MO_F32_Sinh -> (True, FSLIT("sinh")) + MO_F32_Cosh -> (True, FSLIT("cosh")) + MO_F32_Tanh -> (True, FSLIT("tanh")) + + MO_F64_Exp -> (False, FSLIT("exp")) + MO_F64_Log -> (False, FSLIT("log")) + MO_F64_Sqrt -> (False, FSLIT("sqrt")) + + MO_F64_Sin -> (False, FSLIT("sin")) + MO_F64_Cos -> (False, FSLIT("cos")) + MO_F64_Tan -> (False, FSLIT("tan")) + + MO_F64_Asin -> (False, FSLIT("asin")) + MO_F64_Acos -> (False, FSLIT("acos")) + MO_F64_Atan -> (False, FSLIT("atan")) + + MO_F64_Sinh -> (False, FSLIT("sinh")) + MO_F64_Cosh -> (False, FSLIT("cosh")) + MO_F64_Tanh -> (False, FSLIT("tanh")) + + other -> pprPanic "outOfLineFloatOp(sparc) " + (pprCallishMachOp mop) + +#endif /* sparc_TARGET_ARCH */ + +#if powerpc_TARGET_ARCH + +#if darwin_TARGET_OS || linux_TARGET_OS +{- + The PowerPC calling convention for Darwin/Mac OS X + is described in Apple's document + "Inside Mac OS X - Mach-O Runtime Architecture". + + PowerPC Linux uses the System V Release 4 Calling Convention + for PowerPC. It is described in the + "System V Application Binary Interface PowerPC Processor Supplement". + + Both conventions are similar: + Parameters may be passed in general-purpose registers starting at r3, in + floating point registers starting at f1, or on the stack. + + But there are substantial differences: + * The number of registers used for parameter passing and the exact set of + nonvolatile registers differs (see MachRegs.lhs). + * On Darwin, stack space is always reserved for parameters, even if they are + passed in registers. The called routine may choose to save parameters from + registers to the corresponding space on the stack. + * On Darwin, a corresponding amount of GPRs is skipped when a floating point + parameter is passed in an FPR. + * SysV insists on either passing I64 arguments on the stack, or in two GPRs, + starting with an odd-numbered GPR. It may skip a GPR to achieve this. + Darwin just treats an I64 like two separate I32s (high word first). + * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only + 4-byte aligned like everything else on Darwin. + * The SysV spec claims that F32 is represented as F64 on the stack. GCC on + PowerPC Linux does not agree, so neither do we. + + According to both conventions, The parameter area should be part of the + caller's stack frame, allocated in the caller's prologue code (large enough + to hold the parameter lists for all called routines). The NCG already + uses the stack for register spilling, leaving 64 bytes free at the top. + If we need a larger parameter area than that, we just allocate a new stack + frame just before ccalling. +-} + +genCCall target dest_regs argsAndHints vols + = ASSERT (not $ any (`elem` [I8,I16]) argReps) + -- we rely on argument promotion in the codeGen + do + (finalStack,passArgumentsCode,usedRegs) <- passArguments + (zip args argReps) + allArgRegs allFPArgRegs + initialStackOffset + (toOL []) [] + + (labelOrExpr, reduceToF32) <- case target of + CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False) + CmmForeignCall expr conv -> return (Right expr, False) + CmmPrim mop -> outOfLineFloatOp mop + + let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode + codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32 + + case labelOrExpr of + Left lbl -> do + return ( codeBefore + `snocOL` BL lbl usedRegs + `appOL` codeAfter) + Right dyn -> do + (dynReg, dynCode) <- getSomeReg dyn + return ( dynCode + `snocOL` MTCTR dynReg + `appOL` codeBefore + `snocOL` BCTRL usedRegs + `appOL` codeAfter) + where +#if darwin_TARGET_OS + initialStackOffset = 24 + -- size of linkage area + size of arguments, in bytes + stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $ + map machRepByteWidth argReps +#elif linux_TARGET_OS + initialStackOffset = 8 + stackDelta finalStack = roundTo 16 finalStack +#endif + args = map fst argsAndHints + argReps = map cmmExprRep args + + roundTo a x | x `mod` a == 0 = x + | otherwise = x + a - (x `mod` a) + + move_sp_down finalStack + | delta > 64 = + toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))), + DELTA (-delta)] + | otherwise = nilOL + where delta = stackDelta finalStack + move_sp_up finalStack + | delta > 64 = + toOL [ADD sp sp (RIImm (ImmInt delta)), + DELTA 0] + | otherwise = nilOL + where delta = stackDelta finalStack + + + passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed) + passArguments ((arg,I64):args) gprs fprs stackOffset + accumCode accumUsed = + do + ChildCode64 code vr_lo <- iselExpr64 arg + let vr_hi = getHiVRegFromLo vr_lo + +#if darwin_TARGET_OS + passArguments args + (drop 2 gprs) + fprs + (stackOffset+8) + (accumCode `appOL` code + `snocOL` storeWord vr_hi gprs stackOffset + `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4)) + ((take 2 gprs) ++ accumUsed) + where + storeWord vr (gpr:_) offset = MR gpr vr + storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset)) + +#elif linux_TARGET_OS + let stackOffset' = roundTo 8 stackOffset + stackCode = accumCode `appOL` code + `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset')) + `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4))) + regCode hireg loreg = + accumCode `appOL` code + `snocOL` MR hireg vr_hi + `snocOL` MR loreg vr_lo + + case gprs of + hireg : loreg : regs | even (length gprs) -> + passArguments args regs fprs stackOffset + (regCode hireg loreg) (hireg : loreg : accumUsed) + _skipped : hireg : loreg : regs -> + passArguments args regs fprs stackOffset + (regCode hireg loreg) (hireg : loreg : accumUsed) + _ -> -- only one or no regs left + passArguments args [] fprs (stackOffset'+8) + stackCode accumUsed +#endif + + passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed + | reg : _ <- regs = do + register <- getRegister arg + let code = case register of + Fixed _ freg fcode -> fcode `snocOL` MR reg freg + Any _ acode -> acode reg + passArguments args + (drop nGprs gprs) + (drop nFprs fprs) +#if darwin_TARGET_OS + -- The Darwin ABI requires that we reserve stack slots for register parameters + (stackOffset + stackBytes) +#elif linux_TARGET_OS + -- ... the SysV ABI doesn't. + stackOffset +#endif + (accumCode `appOL` code) + (reg : accumUsed) + | otherwise = do + (vr, code) <- getSomeReg arg + passArguments args + (drop nGprs gprs) + (drop nFprs fprs) + (stackOffset' + stackBytes) + (accumCode `appOL` code `snocOL` ST rep vr stackSlot) + accumUsed + where +#if darwin_TARGET_OS + -- stackOffset is at least 4-byte aligned + -- The Darwin ABI is happy with that. + stackOffset' = stackOffset +#else + -- ... the SysV ABI requires 8-byte alignment for doubles. + stackOffset' | rep == F64 = roundTo 8 stackOffset + | otherwise = stackOffset +#endif + stackSlot = AddrRegImm sp (ImmInt stackOffset') + (nGprs, nFprs, stackBytes, regs) = case rep of + I32 -> (1, 0, 4, gprs) +#if darwin_TARGET_OS + -- The Darwin ABI requires that we skip a corresponding number of GPRs when + -- we use the FPRs. + F32 -> (1, 1, 4, fprs) + F64 -> (2, 1, 8, fprs) +#elif linux_TARGET_OS + -- ... the SysV ABI doesn't. + F32 -> (0, 1, 4, fprs) + F64 -> (0, 1, 8, fprs) +#endif + + moveResult reduceToF32 = + case dest_regs of + [] -> nilOL + [(dest, _hint)] + | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1) + | rep == F32 || rep == F64 -> unitOL (MR r_dest f1) + | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3, + MR r_dest r4] + | otherwise -> unitOL (MR r_dest r3) + where rep = cmmRegRep dest + r_dest = getRegisterReg dest + + outOfLineFloatOp mop = + do + mopExpr <- cmmMakeDynamicReference addImportNat True $ + mkForeignLabel functionName Nothing True + let mopLabelOrExpr = case mopExpr of + CmmLit (CmmLabel lbl) -> Left lbl + _ -> Right mopExpr + return (mopLabelOrExpr, reduce) + where + (functionName, reduce) = case mop of + MO_F32_Exp -> (FSLIT("exp"), True) + MO_F32_Log -> (FSLIT("log"), True) + MO_F32_Sqrt -> (FSLIT("sqrt"), True) + + MO_F32_Sin -> (FSLIT("sin"), True) + MO_F32_Cos -> (FSLIT("cos"), True) + MO_F32_Tan -> (FSLIT("tan"), True) + + MO_F32_Asin -> (FSLIT("asin"), True) + MO_F32_Acos -> (FSLIT("acos"), True) + MO_F32_Atan -> (FSLIT("atan"), True) + + MO_F32_Sinh -> (FSLIT("sinh"), True) + MO_F32_Cosh -> (FSLIT("cosh"), True) + MO_F32_Tanh -> (FSLIT("tanh"), True) + MO_F32_Pwr -> (FSLIT("pow"), True) + + MO_F64_Exp -> (FSLIT("exp"), False) + MO_F64_Log -> (FSLIT("log"), False) + MO_F64_Sqrt -> (FSLIT("sqrt"), False) + + MO_F64_Sin -> (FSLIT("sin"), False) + MO_F64_Cos -> (FSLIT("cos"), False) + MO_F64_Tan -> (FSLIT("tan"), False) + + MO_F64_Asin -> (FSLIT("asin"), False) + MO_F64_Acos -> (FSLIT("acos"), False) + MO_F64_Atan -> (FSLIT("atan"), False) + + MO_F64_Sinh -> (FSLIT("sinh"), False) + MO_F64_Cosh -> (FSLIT("cosh"), False) + MO_F64_Tanh -> (FSLIT("tanh"), False) + MO_F64_Pwr -> (FSLIT("pow"), False) + other -> pprPanic "genCCall(ppc): unknown callish op" + (pprCallishMachOp other) + +#endif /* darwin_TARGET_OS || linux_TARGET_OS */ + +#endif /* powerpc_TARGET_ARCH */ + + +-- ----------------------------------------------------------------------------- +-- Generating a table-branch + +genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH +genSwitch expr ids + | opt_PIC + = do + (reg,e_code) <- getSomeReg expr + lbl <- getNewLabelNat + dynRef <- cmmMakeDynamicReference addImportNat False lbl + (tableReg,t_code) <- getSomeReg $ dynRef + let + jumpTable = map jumpTableEntryRel ids + + jumpTableEntryRel Nothing + = CmmStaticLit (CmmInt 0 wordRep) + jumpTableEntryRel (Just (BlockId id)) + = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) + where blockLabel = mkAsmTempLabel id + + op = OpAddr (AddrBaseIndex (EABaseReg tableReg) + (EAIndex reg wORD_SIZE) (ImmInt 0)) + + code = e_code `appOL` t_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + ADD wordRep op (OpReg tableReg), + JMP_TBL (OpReg tableReg) [ id | Just id <- ids ] + ] + return code + | otherwise + = do + (reg,e_code) <- getSomeReg expr + lbl <- getNewLabelNat + let + jumpTable = map jumpTableEntry ids + op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl)) + code = e_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + JMP_TBL op [ id | Just id <- ids ] + ] + -- in + return code +#elif powerpc_TARGET_ARCH +genSwitch expr ids + | opt_PIC + = do + (reg,e_code) <- getSomeReg expr + tmp <- getNewRegNat I32 + lbl <- getNewLabelNat + dynRef <- cmmMakeDynamicReference addImportNat False lbl + (tableReg,t_code) <- getSomeReg $ dynRef + let + jumpTable = map jumpTableEntryRel ids + + jumpTableEntryRel Nothing + = CmmStaticLit (CmmInt 0 wordRep) + jumpTableEntryRel (Just (BlockId id)) + = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) + where blockLabel = mkAsmTempLabel id + + code = e_code `appOL` t_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + SLW tmp reg (RIImm (ImmInt 2)), + LD I32 tmp (AddrRegReg tableReg tmp), + ADD tmp tmp (RIReg tableReg), + MTCTR tmp, + BCTR [ id | Just id <- ids ] + ] + return code + | otherwise + = do + (reg,e_code) <- getSomeReg expr + tmp <- getNewRegNat I32 + lbl <- getNewLabelNat + let + jumpTable = map jumpTableEntry ids + + code = e_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + SLW tmp reg (RIImm (ImmInt 2)), + ADDIS tmp tmp (HA (ImmCLbl lbl)), + LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), + MTCTR tmp, + BCTR [ id | Just id <- ids ] + ] + return code +#else +genSwitch expr ids = panic "ToDo: genSwitch" +#endif + +jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep) +jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) + where blockLabel = mkAsmTempLabel id + +-- ----------------------------------------------------------------------------- +-- Support bits +-- ----------------------------------------------------------------------------- + + +-- ----------------------------------------------------------------------------- +-- 'condIntReg' and 'condFltReg': condition codes into registers + +-- Turn those condition codes into integers now (when they appear on +-- the right hand side of an assignment). +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if alpha_TARGET_ARCH +condIntReg = panic "MachCode.condIntReg (not on Alpha)" +condFltReg = panic "MachCode.condFltReg (not on Alpha)" +#endif /* alpha_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH + +condIntReg cond x y = do + CondCode _ cond cond_code <- condIntCode cond x y + tmp <- getNewRegNat I8 + let + code dst = cond_code `appOL` toOL [ + SETCC cond (OpReg tmp), + MOVZxL I8 (OpReg tmp) (OpReg dst) + ] + -- in + return (Any I32 code) + +#endif + +#if i386_TARGET_ARCH + +condFltReg cond x y = do + CondCode _ cond cond_code <- condFltCode cond x y + tmp <- getNewRegNat I8 + let + code dst = cond_code `appOL` toOL [ + SETCC cond (OpReg tmp), + MOVZxL I8 (OpReg tmp) (OpReg dst) + ] + -- in + return (Any I32 code) + +#endif + +#if x86_64_TARGET_ARCH + +condFltReg cond x y = do + CondCode _ cond cond_code <- condFltCode cond x y + tmp1 <- getNewRegNat wordRep + tmp2 <- getNewRegNat wordRep + let + -- We have to worry about unordered operands (eg. comparisons + -- against NaN). If the operands are unordered, the comparison + -- sets the parity flag, carry flag and zero flag. + -- All comparisons are supposed to return false for unordered + -- operands except for !=, which returns true. + -- + -- Optimisation: we don't have to test the parity flag if we + -- know the test has already excluded the unordered case: eg > + -- and >= test for a zero carry flag, which can only occur for + -- ordered operands. + -- + -- ToDo: by reversing comparisons we could avoid testing the + -- parity flag in more cases. + + code dst = + cond_code `appOL` + (case cond of + NE -> or_unordered dst + GU -> plain_test dst + GEU -> plain_test dst + _ -> and_ordered dst) + + plain_test dst = toOL [ + SETCC cond (OpReg tmp1), + MOVZxL I8 (OpReg tmp1) (OpReg dst) + ] + or_unordered dst = toOL [ + SETCC cond (OpReg tmp1), + SETCC PARITY (OpReg tmp2), + OR I8 (OpReg tmp1) (OpReg tmp2), + MOVZxL I8 (OpReg tmp2) (OpReg dst) + ] + and_ordered dst = toOL [ + SETCC cond (OpReg tmp1), + SETCC NOTPARITY (OpReg tmp2), + AND I8 (OpReg tmp1) (OpReg tmp2), + MOVZxL I8 (OpReg tmp2) (OpReg dst) + ] + -- in + return (Any I32 code) + +#endif + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if sparc_TARGET_ARCH + +condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do + (src, code) <- getSomeReg x + tmp <- getNewRegNat I32 + let + code__2 dst = code `appOL` toOL [ + SUB False True g0 (RIReg src) g0, + SUB True False g0 (RIImm (ImmInt (-1))) dst] + return (Any I32 code__2) + +condIntReg EQQ x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp1 <- getNewRegNat I32 + tmp2 <- getNewRegNat I32 + let + code__2 dst = code1 `appOL` code2 `appOL` toOL [ + XOR False src1 (RIReg src2) dst, + SUB False True g0 (RIReg dst) g0, + SUB True False g0 (RIImm (ImmInt (-1))) dst] + return (Any I32 code__2) + +condIntReg NE x (CmmLit (CmmInt 0 d)) = do + (src, code) <- getSomeReg x + tmp <- getNewRegNat I32 + let + code__2 dst = code `appOL` toOL [ + SUB False True g0 (RIReg src) g0, + ADD True False g0 (RIImm (ImmInt 0)) dst] + return (Any I32 code__2) + +condIntReg NE x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp1 <- getNewRegNat I32 + tmp2 <- getNewRegNat I32 + let + code__2 dst = code1 `appOL` code2 `appOL` toOL [ + XOR False src1 (RIReg src2) dst, + SUB False True g0 (RIReg dst) g0, + ADD True False g0 (RIImm (ImmInt 0)) dst] + return (Any I32 code__2) + +condIntReg cond x y = do + BlockId lbl1 <- getBlockIdNat + BlockId lbl2 <- getBlockIdNat + CondCode _ cond cond_code <- condIntCode cond x y + let + code__2 dst = cond_code `appOL` toOL [ + BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP, + OR False g0 (RIImm (ImmInt 0)) dst, + BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP, + NEWBLOCK (BlockId lbl1), + OR False g0 (RIImm (ImmInt 1)) dst, + NEWBLOCK (BlockId lbl2)] + return (Any I32 code__2) + +condFltReg cond x y = do + BlockId lbl1 <- getBlockIdNat + BlockId lbl2 <- getBlockIdNat + CondCode _ cond cond_code <- condFltCode cond x y + let + code__2 dst = cond_code `appOL` toOL [ + NOP, + BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP, + OR False g0 (RIImm (ImmInt 0)) dst, + BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP, + NEWBLOCK (BlockId lbl1), + OR False g0 (RIImm (ImmInt 1)) dst, + NEWBLOCK (BlockId lbl2)] + return (Any I32 code__2) + +#endif /* sparc_TARGET_ARCH */ + +#if powerpc_TARGET_ARCH +condReg getCond = do + lbl1 <- getBlockIdNat + lbl2 <- getBlockIdNat + CondCode _ cond cond_code <- getCond + let +{- code dst = cond_code `appOL` toOL [ + BCC cond lbl1, + LI dst (ImmInt 0), + BCC ALWAYS lbl2, + NEWBLOCK lbl1, + LI dst (ImmInt 1), + BCC ALWAYS lbl2, + NEWBLOCK lbl2 + ]-} + code dst = cond_code + `appOL` negate_code + `appOL` toOL [ + MFCR dst, + RLWINM dst dst (bit + 1) 31 31 + ] + + negate_code | do_negate = unitOL (CRNOR bit bit bit) + | otherwise = nilOL + + (bit, do_negate) = case cond of + LTT -> (0, False) + LE -> (1, True) + EQQ -> (2, False) + GE -> (0, True) + GTT -> (1, False) + + NE -> (2, True) + + LU -> (0, False) + LEU -> (1, True) + GEU -> (0, True) + GU -> (1, False) + + return (Any I32 code) + +condIntReg cond x y = condReg (condIntCode cond x y) +condFltReg cond x y = condReg (condFltCode cond x y) +#endif /* powerpc_TARGET_ARCH */ + + +-- ----------------------------------------------------------------------------- +-- 'trivial*Code': deal with trivial instructions + +-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', +-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. +-- Only look for constants on the right hand side, because that's +-- where the generic optimizer will have put them. + +-- Similarly, for unary instructions, we don't have to worry about +-- matching an StInt as the argument, because genericOpt will already +-- have handled the constant-folding. + +trivialCode + :: MachRep + -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr) + ,IF_ARCH_i386 ((Operand -> Operand -> Instr) + -> Maybe (Operand -> Operand -> Instr) + ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr) + -> Maybe (Operand -> Operand -> Instr) + ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr) + ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr) + ,))))) + -> CmmExpr -> CmmExpr -- the two arguments + -> NatM Register + +#ifndef powerpc_TARGET_ARCH +trivialFCode + :: MachRep + -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr) + ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr) + ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr) + ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr) + ,)))) + -> CmmExpr -> CmmExpr -- the two arguments + -> NatM Register +#endif + +trivialUCode + :: MachRep + -> IF_ARCH_alpha((RI -> Reg -> Instr) + ,IF_ARCH_i386 ((Operand -> Instr) + ,IF_ARCH_x86_64 ((Operand -> Instr) + ,IF_ARCH_sparc((RI -> Reg -> Instr) + ,IF_ARCH_powerpc((Reg -> Reg -> Instr) + ,))))) + -> CmmExpr -- the one argument + -> NatM Register + +#ifndef powerpc_TARGET_ARCH +trivialUFCode + :: MachRep + -> IF_ARCH_alpha((Reg -> Reg -> Instr) + ,IF_ARCH_i386 ((Reg -> Reg -> Instr) + ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr) + ,IF_ARCH_sparc((Reg -> Reg -> Instr) + ,)))) + -> CmmExpr -- the one argument + -> NatM Register +#endif + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if alpha_TARGET_ARCH + +trivialCode instr x (StInt y) + | fits8Bits y + = getRegister x `thenNat` \ register -> + getNewRegNat IntRep `thenNat` \ tmp -> + let + code = registerCode register tmp + src1 = registerName register tmp + src2 = ImmInt (fromInteger y) + code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst) + in + return (Any IntRep code__2) + +trivialCode instr x y + = getRegister x `thenNat` \ register1 -> + getRegister y `thenNat` \ register2 -> + getNewRegNat IntRep `thenNat` \ tmp1 -> + getNewRegNat IntRep `thenNat` \ tmp2 -> + let + code1 = registerCode register1 tmp1 [] + src1 = registerName register1 tmp1 + code2 = registerCode register2 tmp2 [] + src2 = registerName register2 tmp2 + code__2 dst = asmSeqThen [code1, code2] . + mkSeqInstr (instr src1 (RIReg src2) dst) + in + return (Any IntRep code__2) + +------------ +trivialUCode instr x + = getRegister x `thenNat` \ register -> + getNewRegNat IntRep `thenNat` \ tmp -> + let + code = registerCode register tmp + src = registerName register tmp + code__2 dst = code . mkSeqInstr (instr (RIReg src) dst) + in + return (Any IntRep code__2) + +------------ +trivialFCode _ instr x y + = getRegister x `thenNat` \ register1 -> + getRegister y `thenNat` \ register2 -> + getNewRegNat F64 `thenNat` \ tmp1 -> + getNewRegNat F64 `thenNat` \ tmp2 -> + let + code1 = registerCode register1 tmp1 + src1 = registerName register1 tmp1 + + code2 = registerCode register2 tmp2 + src2 = registerName register2 tmp2 + + code__2 dst = asmSeqThen [code1 [], code2 []] . + mkSeqInstr (instr src1 src2 dst) + in + return (Any F64 code__2) + +trivialUFCode _ instr x + = getRegister x `thenNat` \ register -> + getNewRegNat F64 `thenNat` \ tmp -> + let + code = registerCode register tmp + src = registerName register tmp + code__2 dst = code . mkSeqInstr (instr src dst) + in + return (Any F64 code__2) + +#endif /* alpha_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH + +{- +The Rules of the Game are: + +* You cannot assume anything about the destination register dst; + it may be anything, including a fixed reg. + +* You may compute an operand into a fixed reg, but you may not + subsequently change the contents of that fixed reg. If you + want to do so, first copy the value either to a temporary + or into dst. You are free to modify dst even if it happens + to be a fixed reg -- that's not your problem. + +* You cannot assume that a fixed reg will stay live over an + arbitrary computation. The same applies to the dst reg. + +* Temporary regs obtained from getNewRegNat are distinct from + each other and from all other regs, and stay live over + arbitrary computations. + +-------------------- + +SDM's version of The Rules: + +* If getRegister returns Any, that means it can generate correct + code which places the result in any register, period. Even if that + register happens to be read during the computation. + + Corollary #1: this means that if you are generating code for an + operation with two arbitrary operands, you cannot assign the result + of the first operand into the destination register before computing + the second operand. The second operand might require the old value + of the destination register. + + Corollary #2: A function might be able to generate more efficient + code if it knows the destination register is a new temporary (and + therefore not read by any of the sub-computations). + +* If getRegister returns Any, then the code it generates may modify only: + (a) fresh temporaries + (b) the destination register + (c) known registers (eg. %ecx is used by shifts) + In particular, it may *not* modify global registers, unless the global + register happens to be the destination register. +-} + +trivialCode rep instr (Just revinstr) (CmmLit lit_a) b + | not (is64BitLit lit_a) = do + b_code <- getAnyReg b + let + code dst + = b_code dst `snocOL` + revinstr (OpImm (litToImm lit_a)) (OpReg dst) + -- in + return (Any rep code) + +trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b + +-- This is re-used for floating pt instructions too. +genTrivialCode rep instr a b = do + (b_op, b_code) <- getNonClobberedOperand b + a_code <- getAnyReg a + tmp <- getNewRegNat rep + let + -- We want the value of b to stay alive across the computation of a. + -- But, we want to calculate a straight into the destination register, + -- because the instruction only has two operands (dst := dst `op` src). + -- The troublesome case is when the result of b is in the same register + -- as the destination reg. In this case, we have to save b in a + -- new temporary across the computation of a. + code dst + | dst `regClashesWithOp` b_op = + b_code `appOL` + unitOL (MOV rep b_op (OpReg tmp)) `appOL` + a_code dst `snocOL` + instr (OpReg tmp) (OpReg dst) + | otherwise = + b_code `appOL` + a_code dst `snocOL` + instr b_op (OpReg dst) + -- in + return (Any rep code) + +reg `regClashesWithOp` OpReg reg2 = reg == reg2 +reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode) +reg `regClashesWithOp` _ = False + +----------- + +trivialUCode rep instr x = do + x_code <- getAnyReg x + let + code dst = + x_code dst `snocOL` + instr (OpReg dst) + -- in + return (Any rep code) + +----------- + +#if i386_TARGET_ARCH + +trivialFCode pk instr x y = do + (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too + (y_reg, y_code) <- getSomeReg y + let + code dst = + x_code `appOL` + y_code `snocOL` + instr pk x_reg y_reg dst + -- in + return (Any pk code) + +#endif + +#if x86_64_TARGET_ARCH + +trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y + +#endif + +------------- + +trivialUFCode rep instr x = do + (x_reg, x_code) <- getSomeReg x + let + code dst = + x_code `snocOL` + instr x_reg dst + -- in + return (Any rep code) + +#endif /* i386_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if sparc_TARGET_ARCH + +trivialCode pk instr x (CmmLit (CmmInt y d)) + | fits13Bits y + = do + (src1, code) <- getSomeReg x + tmp <- getNewRegNat I32 + let + src2 = ImmInt (fromInteger y) + code__2 dst = code `snocOL` instr src1 (RIImm src2) dst + return (Any I32 code__2) + +trivialCode pk instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp1 <- getNewRegNat I32 + tmp2 <- getNewRegNat I32 + let + code__2 dst = code1 `appOL` code2 `snocOL` + instr src1 (RIReg src2) dst + return (Any I32 code__2) + +------------ +trivialFCode pk instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp1 <- getNewRegNat (cmmExprRep x) + tmp2 <- getNewRegNat (cmmExprRep y) + tmp <- getNewRegNat F64 + let + promote x = FxTOy F32 F64 x tmp + + pk1 = cmmExprRep x + pk2 = cmmExprRep y + + code__2 dst = + if pk1 == pk2 then + code1 `appOL` code2 `snocOL` + instr pk src1 src2 dst + else if pk1 == F32 then + code1 `snocOL` promote src1 `appOL` code2 `snocOL` + instr F64 tmp src2 dst + else + code1 `appOL` code2 `snocOL` promote src2 `snocOL` + instr F64 src1 tmp dst + return (Any (if pk1 == pk2 then pk1 else F64) code__2) + +------------ +trivialUCode pk instr x = do + (src, code) <- getSomeReg x + tmp <- getNewRegNat pk + let + code__2 dst = code `snocOL` instr (RIReg src) dst + return (Any pk code__2) + +------------- +trivialUFCode pk instr x = do + (src, code) <- getSomeReg x + tmp <- getNewRegNat pk + let + code__2 dst = code `snocOL` instr src dst + return (Any pk code__2) + +#endif /* sparc_TARGET_ARCH */ + +#if powerpc_TARGET_ARCH + +{- +Wolfgang's PowerPC version of The Rules: + +A slightly modified version of The Rules to take advantage of the fact +that PowerPC instructions work on all registers and don't implicitly +clobber any fixed registers. + +* The only expression for which getRegister returns Fixed is (CmmReg reg). + +* If getRegister returns Any, then the code it generates may modify only: + (a) fresh temporaries + (b) the destination register + It may *not* modify global registers, unless the global + register happens to be the destination register. + It may not clobber any other registers. In fact, only ccalls clobber any + fixed registers. + Also, it may not modify the counter register (used by genCCall). + + Corollary: If a getRegister for a subexpression returns Fixed, you need + not move it to a fresh temporary before evaluating the next subexpression. + The Fixed register won't be modified. + Therefore, we don't need a counterpart for the x86's getStableReg on PPC. + +* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on + the value of the destination register. +-} + +trivialCode rep signed instr x (CmmLit (CmmInt y _)) + | Just imm <- makeImmediate rep signed y + = do + (src1, code1) <- getSomeReg x + let code dst = code1 `snocOL` instr dst src1 (RIImm imm) + return (Any rep code) + +trivialCode rep signed instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2) + return (Any rep code) + +trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialCodeNoImm rep instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2 + return (Any rep code) + +trivialUCode rep instr x = do + (src, code) <- getSomeReg x + let code' dst = code `snocOL` instr dst src + return (Any rep code') + +-- There is no "remainder" instruction on the PPC, so we have to do +-- it the hard way. +-- The "div" parameter is the division instruction to use (DIVW or DIVWU) + +remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +remainderCode rep div x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `appOL` toOL [ + div dst src1 src2, + MULLW dst dst (RIReg src2), + SUBF dst dst src1 + ] + return (Any rep code) + +#endif /* powerpc_TARGET_ARCH */ + + +-- ----------------------------------------------------------------------------- +-- Coercing to/from integer/floating-point... + +-- @coerce(Int2FP|FP2Int)@ are more complicated integer/float +-- conversions. We have to store temporaries in memory to move +-- between the integer and the floating point register sets. + +-- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we +-- pretend, on sparc at least, that double and float regs are seperate +-- kinds, so the value has to be computed into one kind before being +-- explicitly "converted" to live in the other kind. + +coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register +coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register + +#if sparc_TARGET_ARCH +coerceDbl2Flt :: CmmExpr -> NatM Register +coerceFlt2Dbl :: CmmExpr -> NatM Register +#endif + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if alpha_TARGET_ARCH + +coerceInt2FP _ x + = getRegister x `thenNat` \ register -> + getNewRegNat IntRep `thenNat` \ reg -> + let + code = registerCode register reg + src = registerName register reg + + code__2 dst = code . mkSeqInstrs [ + ST Q src (spRel 0), + LD TF dst (spRel 0), + CVTxy Q TF dst dst] + in + return (Any F64 code__2) + +------------- +coerceFP2Int x + = getRegister x `thenNat` \ register -> + getNewRegNat F64 `thenNat` \ tmp -> + let + code = registerCode register tmp + src = registerName register tmp + + code__2 dst = code . mkSeqInstrs [ + CVTxy TF Q src tmp, + ST TF tmp (spRel 0), + LD Q dst (spRel 0)] + in + return (Any IntRep code__2) + +#endif /* alpha_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH + +coerceInt2FP from to x = do + (x_reg, x_code) <- getSomeReg x + let + opc = case to of F32 -> GITOF; F64 -> GITOD + code dst = x_code `snocOL` opc x_reg dst + -- ToDo: works for non-I32 reps? + -- in + return (Any to code) + +------------ + +coerceFP2Int from to x = do + (x_reg, x_code) <- getSomeReg x + let + opc = case from of F32 -> GFTOI; F64 -> GDTOI + code dst = x_code `snocOL` opc x_reg dst + -- ToDo: works for non-I32 reps? + -- in + return (Any to code) + +#endif /* i386_TARGET_ARCH */ + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if x86_64_TARGET_ARCH + +coerceFP2Int from to x = do + (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand + let + opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI + code dst = x_code `snocOL` opc x_op dst + -- in + return (Any to code) -- works even if the destination rep is <I32 + +coerceInt2FP from to x = do + (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand + let + opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD + code dst = x_code `snocOL` opc x_op dst + -- in + return (Any to code) -- works even if the destination rep is <I32 + +coerceFP2FP :: MachRep -> CmmExpr -> NatM Register +coerceFP2FP to x = do + (x_reg, x_code) <- getSomeReg x + let + opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD + code dst = x_code `snocOL` opc x_reg dst + -- in + return (Any to code) + +#endif + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if sparc_TARGET_ARCH + +coerceInt2FP pk1 pk2 x = do + (src, code) <- getSomeReg x + let + code__2 dst = code `appOL` toOL [ + ST pk1 src (spRel (-2)), + LD pk1 (spRel (-2)) dst, + FxTOy pk1 pk2 dst dst] + return (Any pk2 code__2) + +------------ +coerceFP2Int pk fprep x = do + (src, code) <- getSomeReg x + reg <- getNewRegNat fprep + tmp <- getNewRegNat pk + let + code__2 dst = ASSERT(fprep == F64 || fprep == F32) + code `appOL` toOL [ + FxTOy fprep pk src tmp, + ST pk tmp (spRel (-2)), + LD pk (spRel (-2)) dst] + return (Any pk code__2) + +------------ +coerceDbl2Flt x = do + (src, code) <- getSomeReg x + return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst)) + +------------ +coerceFlt2Dbl x = do + (src, code) <- getSomeReg x + return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst)) + +#endif /* sparc_TARGET_ARCH */ + +#if powerpc_TARGET_ARCH +coerceInt2FP fromRep toRep x = do + (src, code) <- getSomeReg x + lbl <- getNewLabelNat + itmp <- getNewRegNat I32 + ftmp <- getNewRegNat F64 + dynRef <- cmmMakeDynamicReference addImportNat False lbl + Amode addr addr_code <- getAmode dynRef + let + code' dst = code `appOL` maybe_exts `appOL` toOL [ + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmInt 0x43300000 I32), + CmmStaticLit (CmmInt 0x80000000 I32)], + XORIS itmp src (ImmInt 0x8000), + ST I32 itmp (spRel 3), + LIS itmp (ImmInt 0x4330), + ST I32 itmp (spRel 2), + LD F64 ftmp (spRel 2) + ] `appOL` addr_code `appOL` toOL [ + LD F64 dst addr, + FSUB F64 dst ftmp dst + ] `appOL` maybe_frsp dst + + maybe_exts = case fromRep of + I8 -> unitOL $ EXTS I8 src src + I16 -> unitOL $ EXTS I16 src src + I32 -> nilOL + maybe_frsp dst = case toRep of + F32 -> unitOL $ FRSP dst dst + F64 -> nilOL + return (Any toRep code') + +coerceFP2Int fromRep toRep x = do + -- the reps don't really matter: F*->F64 and I32->I* are no-ops + (src, code) <- getSomeReg x + tmp <- getNewRegNat F64 + let + code' dst = code `appOL` toOL [ + -- convert to int in FP reg + FCTIWZ tmp src, + -- store value (64bit) from FP to stack + ST F64 tmp (spRel 2), + -- read low word of value (high word is undefined) + LD I32 dst (spRel 3)] + return (Any toRep code') +#endif /* powerpc_TARGET_ARCH */ + + +-- ----------------------------------------------------------------------------- +-- eXTRA_STK_ARGS_HERE + +-- We (allegedly) put the first six C-call arguments in registers; +-- where do we start putting the rest of them? + +-- Moved from MachInstrs (SDM): + +#if alpha_TARGET_ARCH || sparc_TARGET_ARCH +eXTRA_STK_ARGS_HERE :: Int +eXTRA_STK_ARGS_HERE + = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???)) +#endif + |