diff options
Diffstat (limited to 'compiler/nativeGen/PPC/CodeGen.hs')
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 1364 |
1 files changed, 1364 insertions, 0 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs new file mode 100644 index 0000000000..6661a3ec92 --- /dev/null +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -0,0 +1,1364 @@ +{-# OPTIONS -w #-} + +----------------------------------------------------------------------------- +-- +-- Generating machine code (instruction selection) +-- +-- (c) The University of Glasgow 1996-2004 +-- +----------------------------------------------------------------------------- + +-- This is a big module, but, if you pay attention to +-- (a) the sectioning, (b) the type signatures, and +-- (c) the #if blah_TARGET_ARCH} things, the +-- structure should not be too overwhelming. + +module PPC.CodeGen ( + cmmTopCodeGen, + InstrBlock +) + +where + +#include "HsVersions.h" +#include "nativeGen/NCG.h" +#include "MachDeps.h" + +-- NCG stuff: +import PPC.Instr +import PPC.Cond +import PPC.Regs +import PPC.RegInfo +import NCGMonad +import Instruction +import PIC +import Size +import RegClass +import Reg +import Platform + +-- Our intermediate code: +import BlockId +import PprCmm ( pprExpr ) +import Cmm +import CLabel + +-- The rest: +import StaticFlags ( opt_PIC ) +import OrdList +import qualified Outputable as O +import Outputable +import DynFlags + +import Control.Monad ( mapAndUnzipM ) +import Data.Bits +import Data.Int +import Data.Word + +-- ----------------------------------------------------------------------------- +-- Top-level of the instruction selector + +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal (pre-order?) yields the insns in the correct +-- order. + +cmmTopCodeGen + :: DynFlags + -> RawCmmTop + -> NatM [NatCmmTop Instr] + +cmmTopCodeGen dflags (CmmProc info lab params (ListGraph blocks)) = do + (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks + picBaseMb <- getPicBaseMaybeNat + let proc = CmmProc info lab params (ListGraph $ concat nat_blocks) + tops = proc : concat statics + os = platformOS $ targetPlatform dflags + case picBaseMb of + Just picBase -> initializePicBase_ppc ArchPPC os picBase tops + Nothing -> return tops + +cmmTopCodeGen dflags (CmmData sec dat) = do + return [CmmData sec dat] -- no translation, we just use CmmStatic + +basicBlockCodeGen + :: CmmBasicBlock + -> NatM ( [NatBasicBlock Instr] + , [NatCmmTop Instr]) + +basicBlockCodeGen (BasicBlock id stmts) = do + instrs <- stmtsToInstrs stmts + -- code generation may introduce new basic block boundaries, which + -- are indicated by the NEWBLOCK instruction. We must split up the + -- instruction stream into basic blocks again. Also, we extract + -- LDATAs here too. + let + (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs + + mkBlocks (NEWBLOCK id) (instrs,blocks,statics) + = ([], BasicBlock id instrs : blocks, statics) + mkBlocks (LDATA sec dat) (instrs,blocks,statics) + = (instrs, blocks, CmmData sec dat:statics) + mkBlocks instr (instrs,blocks,statics) + = (instr:instrs, blocks, statics) + -- in + return (BasicBlock id top : other_blocks, statics) + +stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock +stmtsToInstrs stmts + = do instrss <- mapM stmtToInstrs stmts + return (concatOL instrss) + +stmtToInstrs :: CmmStmt -> NatM InstrBlock +stmtToInstrs stmt = case stmt of + CmmNop -> return nilOL + CmmComment s -> return (unitOL (COMMENT s)) + + CmmAssign reg src + | isFloatType ty -> assignReg_FltCode size reg src +#if WORD_SIZE_IN_BITS==32 + | isWord64 ty -> assignReg_I64Code reg src +#endif + | otherwise -> assignReg_IntCode size reg src + where ty = cmmRegType reg + size = cmmTypeSize ty + + CmmStore addr src + | isFloatType ty -> assignMem_FltCode size addr src +#if WORD_SIZE_IN_BITS==32 + | isWord64 ty -> assignMem_I64Code addr src +#endif + | otherwise -> assignMem_IntCode size addr src + where ty = cmmExprType src + size = cmmTypeSize ty + + CmmCall target result_regs args _ _ + -> genCCall target result_regs args + + CmmBranch id -> genBranch id + CmmCondBranch arg id -> genCondJump id arg + CmmSwitch arg ids -> genSwitch arg ids + CmmJump arg params -> genJump arg + CmmReturn params -> + panic "stmtToInstrs: return statement should have been cps'd away" + + +-------------------------------------------------------------------------------- +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal yields the insns in the correct order. +-- +type InstrBlock + = OrdList Instr + + +-- | Register's passed up the tree. If the stix code forces the register +-- to live in a pre-decided machine register, it comes out as @Fixed@; +-- otherwise, it comes out as @Any@, and the parent can decide which +-- register to put it in. +-- +data Register + = Fixed Size Reg InstrBlock + | Any Size (Reg -> InstrBlock) + + +swizzleRegisterRep :: Register -> Size -> Register +swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code +swizzleRegisterRep (Any _ codefn) size = Any size codefn + + +-- | Grab the Reg for a CmmReg +getRegisterReg :: CmmReg -> Reg + +getRegisterReg (CmmLocal (LocalReg u pk)) + = mkVReg u (cmmTypeSize pk) + +getRegisterReg (CmmGlobal mid) + = case get_GlobalReg_reg_or_addr mid of + Left (RealReg rrno) -> RealReg rrno + _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence ... + + +{- +Now, given a tree (the argument to an CmmLoad) that references memory, +produce a suitable addressing mode. + +A Rule of the Game (tm) for Amodes: use of the addr bit must +immediately follow use of the code part, since the code part puts +values in registers which the addr then refers to. So you can't put +anything in between, lest it overwrite some of those registers. If +you need to do some other computation between the code part and use of +the addr bit, first store the effective address from the amode in a +temporary, then do the other computation, and then use the temporary: + + code + LEA amode, tmp + ... other computation ... + ... (tmp) ... +-} + + +-- | Check whether an integer will fit in 32 bits. +-- A CmmInt is intended to be truncated to the appropriate +-- number of bits, so here we truncate it to Int64. This is +-- important because e.g. -1 as a CmmInt might be either +-- -1 or 18446744073709551615. +-- +is32BitInteger :: Integer -> Bool +is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 + where i64 = fromIntegral i :: Int64 + + +-- | Convert a BlockId to some CmmStatic data +jumpTableEntry :: Maybe BlockId -> CmmStatic +jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth) +jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) + where blockLabel = mkAsmTempLabel id + + + +-- ----------------------------------------------------------------------------- +-- General things for putting together code sequences + +-- Expand CmmRegOff. ToDo: should we do it this way around, or convert +-- CmmExprs into CmmRegOff? +mangleIndexTree :: CmmExpr -> CmmExpr +mangleIndexTree (CmmRegOff reg off) + = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] + where width = typeWidth (cmmRegType reg) + +mangleIndexTree _ + = panic "PPC.CodeGen.mangleIndexTree: no match" + +-- ----------------------------------------------------------------------------- +-- Code gen for 64-bit arithmetic on 32-bit platforms + +{- +Simple support for generating 64-bit code (ie, 64 bit values and 64 +bit assignments) on 32-bit platforms. Unlike the main code generator +we merely shoot for generating working code as simply as possible, and +pay little attention to code quality. Specifically, there is no +attempt to deal cleverly with the fixed-vs-floating register +distinction; all values are generated into (pairs of) floating +registers, even if this would mean some redundant reg-reg moves as a +result. Only one of the VRegUniques is returned, since it will be +of the VRegUniqueLo form, and the upper-half VReg can be determined +by applying getHiVRegFromLo to it. +-} + +data ChildCode64 -- a.k.a "Register64" + = ChildCode64 + InstrBlock -- code + Reg -- the lower 32-bit temporary which contains the + -- result; use getHiVRegFromLo to find the other + -- VRegUnique. Rules of this simplified insn + -- selection game are therefore that the returned + -- Reg may be modified + + +-- | The dual to getAnyReg: compute an expression into a register, but +-- we don't mind which one it is. +getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) +getSomeReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, code tmp) + Fixed _ reg code -> + return (reg, code) + +getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock) +getI64Amodes addrTree = do + Amode hi_addr addr_code <- getAmode addrTree + case addrOffset hi_addr 4 of + Just lo_addr -> return (hi_addr, lo_addr, addr_code) + Nothing -> do (hi_ptr, code) <- getSomeReg addrTree + return (AddrRegImm hi_ptr (ImmInt 0), + AddrRegImm hi_ptr (ImmInt 4), + code) + + +assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock +assignMem_I64Code addrTree valueTree = do + (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree + ChildCode64 vcode rlo <- iselExpr64 valueTree + let + rhi = getHiVRegFromLo rlo + + -- Big-endian store + mov_hi = ST II32 rhi hi_addr + mov_lo = ST II32 rlo lo_addr + -- in + return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) + + +assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock +assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do + ChildCode64 vcode r_src_lo <- iselExpr64 valueTree + let + r_dst_lo = mkVReg u_dst II32 + r_dst_hi = getHiVRegFromLo r_dst_lo + r_src_hi = getHiVRegFromLo r_src_lo + mov_lo = MR r_dst_lo r_src_lo + mov_hi = MR r_dst_hi r_src_hi + -- in + return ( + vcode `snocOL` mov_lo `snocOL` mov_hi + ) + +assignReg_I64Code lvalue valueTree + = panic "assignReg_I64Code(powerpc): invalid lvalue" + + +iselExpr64 :: CmmExpr -> NatM ChildCode64 +iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do + (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree + (rlo, rhi) <- getNewRegPairNat II32 + let mov_hi = LD II32 rhi hi_addr + mov_lo = LD II32 rlo lo_addr + return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) + rlo + +iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty + = return (ChildCode64 nilOL (mkVReg vu II32)) + +iselExpr64 (CmmLit (CmmInt i _)) = do + (rlo,rhi) <- getNewRegPairNat II32 + let + half0 = fromIntegral (fromIntegral i :: Word16) + half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16) + half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16) + half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16) + + code = toOL [ + LIS rlo (ImmInt half1), + OR rlo rlo (RIImm $ ImmInt half0), + LIS rhi (ImmInt half3), + OR rlo rlo (RIImm $ ImmInt half2) + ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do + ChildCode64 code1 r1lo <- iselExpr64 e1 + ChildCode64 code2 r2lo <- iselExpr64 e2 + (rlo,rhi) <- getNewRegPairNat II32 + let + r1hi = getHiVRegFromLo r1lo + r2hi = getHiVRegFromLo r2lo + code = code1 `appOL` + code2 `appOL` + toOL [ ADDC rlo r1lo r2lo, + ADDE rhi r1hi r2hi ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do + (expr_reg,expr_code) <- getSomeReg expr + (rlo, rhi) <- getNewRegPairNat II32 + let mov_hi = LI rhi (ImmInt 0) + mov_lo = MR rlo expr_reg + return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) + rlo +iselExpr64 expr + = pprPanic "iselExpr64(powerpc)" (ppr expr) + + + +getRegister :: CmmExpr -> NatM Register + +getRegister (CmmReg reg) + = return (Fixed (cmmTypeSize (cmmRegType reg)) + (getRegisterReg reg) nilOL) + +getRegister tree@(CmmRegOff _ _) + = getRegister (mangleIndexTree tree) + + +#if WORD_SIZE_IN_BITS==32 + -- for 32-bit architectuers, support some 64 -> 32 bit conversions: + -- TO_W_(x), TO_W_(x >> 32) + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + +#endif + + +getRegister (CmmLoad mem pk) + | not (isWord64 pk) + = do + Amode addr addr_code <- getAmode mem + let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk) + addr_code `snocOL` LD size dst addr + return (Any size code) + where size = cmmTypeSize pk + +-- catch simple cases of zero- or sign-extended load +getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do + Amode addr addr_code <- getAmode mem + return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) + +-- Note: there is no Load Byte Arithmetic instruction, so no signed case here + +getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do + Amode addr addr_code <- getAmode mem + return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr)) + +getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do + Amode addr addr_code <- getAmode mem + return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr)) + +getRegister (CmmMachOp mop [x]) -- unary MachOps + = case mop of + MO_Not rep -> triv_ucode_int rep NOT + + MO_F_Neg w -> triv_ucode_float w FNEG + MO_S_Neg w -> triv_ucode_int w NEG + + MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x + MO_FF_Conv W32 W64 -> conversionNop FF64 x + + MO_FS_Conv from to -> coerceFP2Int from to x + MO_SF_Conv from to -> coerceInt2FP from to x + + MO_SS_Conv from to + | from == to -> conversionNop (intSize to) x + + -- narrowing is a nop: we treat the high bits as undefined + MO_SS_Conv W32 to -> conversionNop (intSize to) x + MO_SS_Conv W16 W8 -> conversionNop II8 x + MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8) + MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16) + + MO_UU_Conv from to + | from == to -> conversionNop (intSize to) x + -- narrowing is a nop: we treat the high bits as undefined + MO_UU_Conv W32 to -> conversionNop (intSize to) x + MO_UU_Conv W16 W8 -> conversionNop II8 x + MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32)) + MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32)) + _ -> panic "PPC.CodeGen.getRegister: no match" + + where + triv_ucode_int width instr = trivialUCode (intSize width) instr x + triv_ucode_float width instr = trivialUCode (floatSize width) instr x + + conversionNop new_size expr + = do e_code <- getRegister expr + return (swizzleRegisterRep e_code new_size) + +getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps + = case mop of + MO_F_Eq w -> condFltReg EQQ x y + MO_F_Ne w -> condFltReg NE x y + MO_F_Gt w -> condFltReg GTT x y + MO_F_Ge w -> condFltReg GE x y + MO_F_Lt w -> condFltReg LTT x y + MO_F_Le w -> condFltReg LE x y + + MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y) + MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y) + + MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y) + MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y) + + MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y) + + MO_F_Add w -> triv_float w FADD + MO_F_Sub w -> triv_float w FSUB + MO_F_Mul w -> triv_float w FMUL + MO_F_Quot w -> triv_float w FDIV + + -- optimize addition with 32-bit immediate + -- (needed for PIC) + MO_Add W32 -> + case y of + CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm) + -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep) + CmmLit lit + -> do + (src, srcCode) <- getSomeReg x + let imm = litToImm lit + code dst = srcCode `appOL` toOL [ + ADDIS dst src (HA imm), + ADD dst dst (RIImm (LO imm)) + ] + return (Any II32 code) + _ -> trivialCode W32 True ADD x y + + MO_Add rep -> trivialCode rep True ADD x y + MO_Sub rep -> + case y of -- subfi ('substract from' with immediate) doesn't exist + CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm) + -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep) + _ -> trivialCodeNoImm' (intSize rep) SUBF y x + + MO_Mul rep -> trivialCode rep True MULLW x y + + MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y + + MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented" + MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented" + + MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y) + MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y) + + MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y) + MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y) + + MO_And rep -> trivialCode rep False AND x y + MO_Or rep -> trivialCode rep False OR x y + MO_Xor rep -> trivialCode rep False XOR x y + + MO_Shl rep -> trivialCode rep False SLW x y + MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y + MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y + _ -> panic "PPC.CodeGen.getRegister: no match" + + where + triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register + triv_float width instr = trivialCodeNoImm (floatSize width) instr x y + +getRegister (CmmLit (CmmInt i rep)) + | Just imm <- makeImmediate rep True i + = let + code dst = unitOL (LI dst imm) + in + return (Any (intSize rep) code) + +getRegister (CmmLit (CmmFloat f frep)) = do + lbl <- getNewLabelNat + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + Amode addr addr_code <- getAmode dynRef + let size = floatSize frep + code dst = + LDATA ReadOnlyData [CmmDataLabel lbl, + CmmStaticLit (CmmFloat f frep)] + `consOL` (addr_code `snocOL` LD size dst addr) + return (Any size code) + +getRegister (CmmLit lit) + = let rep = cmmLitType lit + imm = litToImm lit + code dst = toOL [ + LIS dst (HA imm), + ADD dst dst (RIImm (LO imm)) + ] + in return (Any (cmmTypeSize rep) code) + +getRegister other = pprPanic "getRegister(ppc)" (pprExpr other) + + -- extend?Rep: wrap integer expression of type rep + -- in a conversion to II32 +extendSExpr W32 x = x +extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x] +extendUExpr W32 x = x +extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x] + +-- ----------------------------------------------------------------------------- +-- The 'Amode' type: Memory addressing modes passed up the tree. + +data Amode + = Amode AddrMode InstrBlock + +{- +Now, given a tree (the argument to an CmmLoad) that references memory, +produce a suitable addressing mode. + +A Rule of the Game (tm) for Amodes: use of the addr bit must +immediately follow use of the code part, since the code part puts +values in registers which the addr then refers to. So you can't put +anything in between, lest it overwrite some of those registers. If +you need to do some other computation between the code part and use of +the addr bit, first store the effective address from the amode in a +temporary, then do the other computation, and then use the temporary: + + code + LEA amode, tmp + ... other computation ... + ... (tmp) ... +-} + +getAmode :: CmmExpr -> NatM Amode +getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree) + +getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate W32 True (-i) + = do + (reg, code) <- getSomeReg x + return (Amode (AddrRegImm reg off) code) + + +getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate W32 True i + = do + (reg, code) <- getSomeReg x + return (Amode (AddrRegImm reg off) code) + + -- optimize addition with 32-bit immediate + -- (needed for PIC) +getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit]) + = do + tmp <- getNewRegNat II32 + (src, srcCode) <- getSomeReg x + let imm = litToImm lit + code = srcCode `snocOL` ADDIS tmp src (HA imm) + return (Amode (AddrRegImm tmp (LO imm)) code) + +getAmode (CmmLit lit) + = do + tmp <- getNewRegNat II32 + let imm = litToImm lit + code = unitOL (LIS tmp (HA imm)) + return (Amode (AddrRegImm tmp (LO imm)) code) + +getAmode (CmmMachOp (MO_Add W32) [x, y]) + = do + (regX, codeX) <- getSomeReg x + (regY, codeY) <- getSomeReg y + return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) + +getAmode other + = do + (reg, code) <- getSomeReg other + let + off = ImmInt 0 + return (Amode (AddrRegImm reg off) code) + + + +-- The 'CondCode' type: Condition codes passed up the tree. +data CondCode + = CondCode Bool Cond InstrBlock + +-- Set up a condition code for a conditional branch. + +getCondCode :: CmmExpr -> NatM CondCode + +-- almost the same as everywhere else - but we need to +-- extend small integers to 32 bit first + +getCondCode (CmmMachOp mop [x, y]) + = case mop of + MO_F_Eq W32 -> condFltCode EQQ x y + MO_F_Ne W32 -> condFltCode NE x y + MO_F_Gt W32 -> condFltCode GTT x y + MO_F_Ge W32 -> condFltCode GE x y + MO_F_Lt W32 -> condFltCode LTT x y + MO_F_Le W32 -> condFltCode LE x y + + MO_F_Eq W64 -> condFltCode EQQ x y + MO_F_Ne W64 -> condFltCode NE x y + MO_F_Gt W64 -> condFltCode GTT x y + MO_F_Ge W64 -> condFltCode GE x y + MO_F_Lt W64 -> condFltCode LTT x y + MO_F_Le W64 -> condFltCode LE x y + + MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y) + MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y) + + MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y) + MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y) + + MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y) + + other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop) + +getCondCode other = panic "getCondCode(2)(powerpc)" + + + +-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be +-- passed back up the tree. + +condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode + +-- ###FIXME: I16 and I8! +condIntCode cond x (CmmLit (CmmInt y rep)) + | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y + = do + (src1, code) <- getSomeReg x + let + code' = code `snocOL` + (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2) + return (CondCode False cond code') + +condIntCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let + code' = code1 `appOL` code2 `snocOL` + (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2) + return (CondCode False cond code') + +condFltCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let + code' = code1 `appOL` code2 `snocOL` FCMP src1 src2 + code'' = case cond of -- twiddle CR to handle unordered case + GE -> code' `snocOL` CRNOR ltbit eqbit gtbit + LE -> code' `snocOL` CRNOR gtbit eqbit ltbit + _ -> code' + where + ltbit = 0 ; eqbit = 2 ; gtbit = 1 + return (CondCode True cond code'') + + + +-- ----------------------------------------------------------------------------- +-- Generating assignments + +-- Assignments are really at the heart of the whole code generation +-- business. Almost all top-level nodes of any real importance are +-- assignments, which correspond to loads, stores, or register +-- transfers. If we're really lucky, some of the register transfers +-- will go away, because we can use the destination register to +-- complete the code generation for the right hand side. This only +-- fails when the right hand side is forced into a fixed register +-- (e.g. the result of a call). + +assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_IntCode pk addr src = do + (srcReg, code) <- getSomeReg src + Amode dstAddr addr_code <- getAmode addr + return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr + +-- dst is a reg, but src could be anything +assignReg_IntCode _ reg src + = do + r <- getRegister src + return $ case r of + Any _ code -> code dst + Fixed _ freg fcode -> fcode `snocOL` MR dst freg + where + dst = getRegisterReg reg + + + +-- Easy, isn't it? +assignMem_FltCode = assignMem_IntCode +assignReg_FltCode = assignReg_IntCode + + + +genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock + +genJump (CmmLit (CmmLabel lbl)) + = return (unitOL $ JMP lbl) + +genJump tree + = do + (target,code) <- getSomeReg tree + return (code `snocOL` MTCTR target `snocOL` BCTR []) + + +-- ----------------------------------------------------------------------------- +-- Unconditional branches +genBranch :: BlockId -> NatM InstrBlock +genBranch = return . toOL . mkJumpInstr + + +-- ----------------------------------------------------------------------------- +-- Conditional jumps + +{- +Conditional jumps are always to local labels, so we can use branch +instructions. We peek at the arguments to decide what kind of +comparison to do. + +SPARC: First, we have to ensure that the condition codes are set +according to the supplied comparison operation. We generate slightly +different code for floating point comparisons, because a floating +point operation cannot directly precede a @BF@. We assume the worst +and fill that slot with a @NOP@. + +SPARC: Do not fill the delay slots here; you will confuse the register +allocator. +-} + + +genCondJump + :: BlockId -- the branch target + -> CmmExpr -- the condition on which to branch + -> NatM InstrBlock + +genCondJump id bool = do + CondCode _ cond code <- getCondCode bool + return (code `snocOL` BCC cond id) + + + +-- ----------------------------------------------------------------------------- +-- Generating C calls + +-- Now the biggest nightmare---calls. Most of the nastiness is buried in +-- @get_arg@, which moves the arguments to the correct registers/stack +-- locations. Apart from that, the code is easy. +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +genCCall + :: CmmCallTarget -- function to call + -> HintedCmmFormals -- where to put the result + -> HintedCmmActuals -- arguments (of mixed type) + -> NatM InstrBlock + + +#if darwin_TARGET_OS || linux_TARGET_OS +{- + The PowerPC calling convention for Darwin/Mac OS X + is described in Apple's document + "Inside Mac OS X - Mach-O Runtime Architecture". + + PowerPC Linux uses the System V Release 4 Calling Convention + for PowerPC. It is described in the + "System V Application Binary Interface PowerPC Processor Supplement". + + Both conventions are similar: + Parameters may be passed in general-purpose registers starting at r3, in + floating point registers starting at f1, or on the stack. + + But there are substantial differences: + * The number of registers used for parameter passing and the exact set of + nonvolatile registers differs (see MachRegs.lhs). + * On Darwin, stack space is always reserved for parameters, even if they are + passed in registers. The called routine may choose to save parameters from + registers to the corresponding space on the stack. + * On Darwin, a corresponding amount of GPRs is skipped when a floating point + parameter is passed in an FPR. + * SysV insists on either passing I64 arguments on the stack, or in two GPRs, + starting with an odd-numbered GPR. It may skip a GPR to achieve this. + Darwin just treats an I64 like two separate II32s (high word first). + * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only + 4-byte aligned like everything else on Darwin. + * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on + PowerPC Linux does not agree, so neither do we. + + According to both conventions, The parameter area should be part of the + caller's stack frame, allocated in the caller's prologue code (large enough + to hold the parameter lists for all called routines). The NCG already + uses the stack for register spilling, leaving 64 bytes free at the top. + If we need a larger parameter area than that, we just allocate a new stack + frame just before ccalling. +-} + + +genCCall (CmmPrim MO_WriteBarrier) _ _ + = return $ unitOL LWSYNC + +genCCall target dest_regs argsAndHints + = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps) + -- we rely on argument promotion in the codeGen + do + (finalStack,passArgumentsCode,usedRegs) <- passArguments + (zip args argReps) + allArgRegs allFPArgRegs + initialStackOffset + (toOL []) [] + + (labelOrExpr, reduceToFF32) <- case target of + CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False) + CmmCallee expr conv -> return (Right expr, False) + CmmPrim mop -> outOfLineFloatOp mop + + let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode + codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32 + + case labelOrExpr of + Left lbl -> do + return ( codeBefore + `snocOL` BL lbl usedRegs + `appOL` codeAfter) + Right dyn -> do + (dynReg, dynCode) <- getSomeReg dyn + return ( dynCode + `snocOL` MTCTR dynReg + `appOL` codeBefore + `snocOL` BCTRL usedRegs + `appOL` codeAfter) + where +#if darwin_TARGET_OS + initialStackOffset = 24 + -- size of linkage area + size of arguments, in bytes + stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $ + map (widthInBytes . typeWidth) argReps +#elif linux_TARGET_OS + initialStackOffset = 8 + stackDelta finalStack = roundTo 16 finalStack +#endif + args = map hintlessCmm argsAndHints + argReps = map cmmExprType args + + roundTo a x | x `mod` a == 0 = x + | otherwise = x + a - (x `mod` a) + + move_sp_down finalStack + | delta > 64 = + toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))), + DELTA (-delta)] + | otherwise = nilOL + where delta = stackDelta finalStack + move_sp_up finalStack + | delta > 64 = + toOL [ADD sp sp (RIImm (ImmInt delta)), + DELTA 0] + | otherwise = nilOL + where delta = stackDelta finalStack + + + passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed) + passArguments ((arg,arg_ty):args) gprs fprs stackOffset + accumCode accumUsed | isWord64 arg_ty = + do + ChildCode64 code vr_lo <- iselExpr64 arg + let vr_hi = getHiVRegFromLo vr_lo + +#if darwin_TARGET_OS + passArguments args + (drop 2 gprs) + fprs + (stackOffset+8) + (accumCode `appOL` code + `snocOL` storeWord vr_hi gprs stackOffset + `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4)) + ((take 2 gprs) ++ accumUsed) + where + storeWord vr (gpr:_) offset = MR gpr vr + storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset)) + +#elif linux_TARGET_OS + let stackOffset' = roundTo 8 stackOffset + stackCode = accumCode `appOL` code + `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset')) + `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4))) + regCode hireg loreg = + accumCode `appOL` code + `snocOL` MR hireg vr_hi + `snocOL` MR loreg vr_lo + + case gprs of + hireg : loreg : regs | even (length gprs) -> + passArguments args regs fprs stackOffset + (regCode hireg loreg) (hireg : loreg : accumUsed) + _skipped : hireg : loreg : regs -> + passArguments args regs fprs stackOffset + (regCode hireg loreg) (hireg : loreg : accumUsed) + _ -> -- only one or no regs left + passArguments args [] fprs (stackOffset'+8) + stackCode accumUsed +#endif + + passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed + | reg : _ <- regs = do + register <- getRegister arg + let code = case register of + Fixed _ freg fcode -> fcode `snocOL` MR reg freg + Any _ acode -> acode reg + passArguments args + (drop nGprs gprs) + (drop nFprs fprs) +#if darwin_TARGET_OS + -- The Darwin ABI requires that we reserve stack slots for register parameters + (stackOffset + stackBytes) +#elif linux_TARGET_OS + -- ... the SysV ABI doesn't. + stackOffset +#endif + (accumCode `appOL` code) + (reg : accumUsed) + | otherwise = do + (vr, code) <- getSomeReg arg + passArguments args + (drop nGprs gprs) + (drop nFprs fprs) + (stackOffset' + stackBytes) + (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot) + accumUsed + where +#if darwin_TARGET_OS + -- stackOffset is at least 4-byte aligned + -- The Darwin ABI is happy with that. + stackOffset' = stackOffset +#else + -- ... the SysV ABI requires 8-byte alignment for doubles. + stackOffset' | isFloatType rep && typeWidth rep == W64 = + roundTo 8 stackOffset + | otherwise = stackOffset +#endif + stackSlot = AddrRegImm sp (ImmInt stackOffset') + (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of + II32 -> (1, 0, 4, gprs) +#if darwin_TARGET_OS + -- The Darwin ABI requires that we skip a corresponding number of GPRs when + -- we use the FPRs. + FF32 -> (1, 1, 4, fprs) + FF64 -> (2, 1, 8, fprs) +#elif linux_TARGET_OS + -- ... the SysV ABI doesn't. + FF32 -> (0, 1, 4, fprs) + FF64 -> (0, 1, 8, fprs) +#endif + + moveResult reduceToFF32 = + case dest_regs of + [] -> nilOL + [CmmHinted dest _hint] + | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1) + | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1) + | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3, + MR r_dest r4] + | otherwise -> unitOL (MR r_dest r3) + where rep = cmmRegType (CmmLocal dest) + r_dest = getRegisterReg (CmmLocal dest) + + outOfLineFloatOp mop = + do + dflags <- getDynFlagsNat + mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $ + mkForeignLabel functionName Nothing True + let mopLabelOrExpr = case mopExpr of + CmmLit (CmmLabel lbl) -> Left lbl + _ -> Right mopExpr + return (mopLabelOrExpr, reduce) + where + (functionName, reduce) = case mop of + MO_F32_Exp -> (fsLit "exp", True) + MO_F32_Log -> (fsLit "log", True) + MO_F32_Sqrt -> (fsLit "sqrt", True) + + MO_F32_Sin -> (fsLit "sin", True) + MO_F32_Cos -> (fsLit "cos", True) + MO_F32_Tan -> (fsLit "tan", True) + + MO_F32_Asin -> (fsLit "asin", True) + MO_F32_Acos -> (fsLit "acos", True) + MO_F32_Atan -> (fsLit "atan", True) + + MO_F32_Sinh -> (fsLit "sinh", True) + MO_F32_Cosh -> (fsLit "cosh", True) + MO_F32_Tanh -> (fsLit "tanh", True) + MO_F32_Pwr -> (fsLit "pow", True) + + MO_F64_Exp -> (fsLit "exp", False) + MO_F64_Log -> (fsLit "log", False) + MO_F64_Sqrt -> (fsLit "sqrt", False) + + MO_F64_Sin -> (fsLit "sin", False) + MO_F64_Cos -> (fsLit "cos", False) + MO_F64_Tan -> (fsLit "tan", False) + + MO_F64_Asin -> (fsLit "asin", False) + MO_F64_Acos -> (fsLit "acos", False) + MO_F64_Atan -> (fsLit "atan", False) + + MO_F64_Sinh -> (fsLit "sinh", False) + MO_F64_Cosh -> (fsLit "cosh", False) + MO_F64_Tanh -> (fsLit "tanh", False) + MO_F64_Pwr -> (fsLit "pow", False) + other -> pprPanic "genCCall(ppc): unknown callish op" + (pprCallishMachOp other) + +#else /* darwin_TARGET_OS || linux_TARGET_OS */ +genCCall = panic "PPC.CodeGen.genCCall: not defined for this os" +#endif + + +-- ----------------------------------------------------------------------------- +-- Generating a table-branch + +genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock +genSwitch expr ids + | opt_PIC + = do + (reg,e_code) <- getSomeReg expr + tmp <- getNewRegNat II32 + lbl <- getNewLabelNat + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + (tableReg,t_code) <- getSomeReg $ dynRef + let + jumpTable = map jumpTableEntryRel ids + + jumpTableEntryRel Nothing + = CmmStaticLit (CmmInt 0 wordWidth) + jumpTableEntryRel (Just (BlockId id)) + = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) + where blockLabel = mkAsmTempLabel id + + code = e_code `appOL` t_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + SLW tmp reg (RIImm (ImmInt 2)), + LD II32 tmp (AddrRegReg tableReg tmp), + ADD tmp tmp (RIReg tableReg), + MTCTR tmp, + BCTR [ id | Just id <- ids ] + ] + return code + | otherwise + = do + (reg,e_code) <- getSomeReg expr + tmp <- getNewRegNat II32 + lbl <- getNewLabelNat + let + jumpTable = map jumpTableEntry ids + + code = e_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + SLW tmp reg (RIImm (ImmInt 2)), + ADDIS tmp tmp (HA (ImmCLbl lbl)), + LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), + MTCTR tmp, + BCTR [ id | Just id <- ids ] + ] + return code + + +-- ----------------------------------------------------------------------------- +-- 'condIntReg' and 'condFltReg': condition codes into registers + +-- Turn those condition codes into integers now (when they appear on +-- the right hand side of an assignment). +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register + +condReg :: NatM CondCode -> NatM Register +condReg getCond = do + CondCode _ cond cond_code <- getCond + let +{- code dst = cond_code `appOL` toOL [ + BCC cond lbl1, + LI dst (ImmInt 0), + BCC ALWAYS lbl2, + NEWBLOCK lbl1, + LI dst (ImmInt 1), + BCC ALWAYS lbl2, + NEWBLOCK lbl2 + ]-} + code dst = cond_code + `appOL` negate_code + `appOL` toOL [ + MFCR dst, + RLWINM dst dst (bit + 1) 31 31 + ] + + negate_code | do_negate = unitOL (CRNOR bit bit bit) + | otherwise = nilOL + + (bit, do_negate) = case cond of + LTT -> (0, False) + LE -> (1, True) + EQQ -> (2, False) + GE -> (0, True) + GTT -> (1, False) + + NE -> (2, True) + + LU -> (0, False) + LEU -> (1, True) + GEU -> (0, True) + GU -> (1, False) + _ -> panic "PPC.CodeGen.codeReg: no match" + + return (Any II32 code) + +condIntReg cond x y = condReg (condIntCode cond x y) +condFltReg cond x y = condReg (condFltCode cond x y) + + + +-- ----------------------------------------------------------------------------- +-- 'trivial*Code': deal with trivial instructions + +-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', +-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. +-- Only look for constants on the right hand side, because that's +-- where the generic optimizer will have put them. + +-- Similarly, for unary instructions, we don't have to worry about +-- matching an StInt as the argument, because genericOpt will already +-- have handled the constant-folding. + + + +{- +Wolfgang's PowerPC version of The Rules: + +A slightly modified version of The Rules to take advantage of the fact +that PowerPC instructions work on all registers and don't implicitly +clobber any fixed registers. + +* The only expression for which getRegister returns Fixed is (CmmReg reg). + +* If getRegister returns Any, then the code it generates may modify only: + (a) fresh temporaries + (b) the destination register + It may *not* modify global registers, unless the global + register happens to be the destination register. + It may not clobber any other registers. In fact, only ccalls clobber any + fixed registers. + Also, it may not modify the counter register (used by genCCall). + + Corollary: If a getRegister for a subexpression returns Fixed, you need + not move it to a fresh temporary before evaluating the next subexpression. + The Fixed register won't be modified. + Therefore, we don't need a counterpart for the x86's getStableReg on PPC. + +* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on + the value of the destination register. +-} + +trivialCode + :: Width + -> Bool + -> (Reg -> Reg -> RI -> Instr) + -> CmmExpr + -> CmmExpr + -> NatM Register + +trivialCode rep signed instr x (CmmLit (CmmInt y _)) + | Just imm <- makeImmediate rep signed y + = do + (src1, code1) <- getSomeReg x + let code dst = code1 `snocOL` instr dst src1 (RIImm imm) + return (Any (intSize rep) code) + +trivialCode rep _ instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2) + return (Any (intSize rep) code) + +trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialCodeNoImm' size instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2 + return (Any size code) + +trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y + + +trivialUCode + :: Size + -> (Reg -> Reg -> Instr) + -> CmmExpr + -> NatM Register +trivialUCode rep instr x = do + (src, code) <- getSomeReg x + let code' dst = code `snocOL` instr dst src + return (Any rep code') + +-- There is no "remainder" instruction on the PPC, so we have to do +-- it the hard way. +-- The "div" parameter is the division instruction to use (DIVW or DIVWU) + +remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +remainderCode rep div x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `appOL` toOL [ + div dst src1 src2, + MULLW dst dst (RIReg src2), + SUBF dst dst src1 + ] + return (Any (intSize rep) code) + + +coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register +coerceInt2FP fromRep toRep x = do + (src, code) <- getSomeReg x + lbl <- getNewLabelNat + itmp <- getNewRegNat II32 + ftmp <- getNewRegNat FF64 + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + Amode addr addr_code <- getAmode dynRef + let + code' dst = code `appOL` maybe_exts `appOL` toOL [ + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmInt 0x43300000 W32), + CmmStaticLit (CmmInt 0x80000000 W32)], + XORIS itmp src (ImmInt 0x8000), + ST II32 itmp (spRel 3), + LIS itmp (ImmInt 0x4330), + ST II32 itmp (spRel 2), + LD FF64 ftmp (spRel 2) + ] `appOL` addr_code `appOL` toOL [ + LD FF64 dst addr, + FSUB FF64 dst ftmp dst + ] `appOL` maybe_frsp dst + + maybe_exts = case fromRep of + W8 -> unitOL $ EXTS II8 src src + W16 -> unitOL $ EXTS II16 src src + W32 -> nilOL + _ -> panic "PPC.CodeGen.coerceInt2FP: no match" + + maybe_frsp dst + = case toRep of + W32 -> unitOL $ FRSP dst dst + W64 -> nilOL + _ -> panic "PPC.CodeGen.coerceInt2FP: no match" + + return (Any (floatSize toRep) code') + +coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register +coerceFP2Int _ toRep x = do + -- the reps don't really matter: F*->FF64 and II32->I* are no-ops + (src, code) <- getSomeReg x + tmp <- getNewRegNat FF64 + let + code' dst = code `appOL` toOL [ + -- convert to int in FP reg + FCTIWZ tmp src, + -- store value (64bit) from FP to stack + ST FF64 tmp (spRel 2), + -- read low word of value (high word is undefined) + LD II32 dst (spRel 3)] + return (Any (intSize toRep) code') |