diff options
author | Ben.Lippmeier@anu.edu.au <unknown> | 2009-02-15 05:51:58 +0000 |
---|---|---|
committer | Ben.Lippmeier@anu.edu.au <unknown> | 2009-02-15 05:51:58 +0000 |
commit | b04a210e26ca57242fd052f2aa91011a80b76299 (patch) | |
tree | 6f26993cc3ef37f4555087bd80da4195edcda4ed /compiler/nativeGen/PPC | |
parent | 77ed23d51b968505b3ad8541c075657ae94f0ea3 (diff) | |
download | haskell-b04a210e26ca57242fd052f2aa91011a80b76299.tar.gz |
NCG: Split up the native code generator into arch specific modules
- nativeGen/Instruction defines a type class for a generic
instruction set. Each of the instruction sets we have,
X86, PPC and SPARC are instances of it.
- The register alloctors use this type class when they need
info about a certain register or instruction, such as
regUsage, mkSpillInstr, mkJumpInstr, patchRegs..
- nativeGen/Platform defines some data types enumerating
the architectures and operating systems supported by the
native code generator.
- DynFlags now keeps track of the current build platform, and
the PositionIndependentCode module uses this to decide what
to do instead of relying of #ifdefs.
- It's not totally retargetable yet. Some info info about the
build target is still hardwired, but I've tried to contain
most of it to a single module, TargetRegs.
- Moved the SPILL and RELOAD instructions into LiveInstr.
- Reg and RegClass now have their own modules, and are shared
across all architectures.
Diffstat (limited to 'compiler/nativeGen/PPC')
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 1364 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Cond.hs | 62 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Instr.hs | 363 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Ppr.hs | 128 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/RegInfo.hs | 315 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Regs.hs | 102 |
6 files changed, 1949 insertions, 385 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs new file mode 100644 index 0000000000..6661a3ec92 --- /dev/null +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -0,0 +1,1364 @@ +{-# OPTIONS -w #-} + +----------------------------------------------------------------------------- +-- +-- Generating machine code (instruction selection) +-- +-- (c) The University of Glasgow 1996-2004 +-- +----------------------------------------------------------------------------- + +-- This is a big module, but, if you pay attention to +-- (a) the sectioning, (b) the type signatures, and +-- (c) the #if blah_TARGET_ARCH} things, the +-- structure should not be too overwhelming. + +module PPC.CodeGen ( + cmmTopCodeGen, + InstrBlock +) + +where + +#include "HsVersions.h" +#include "nativeGen/NCG.h" +#include "MachDeps.h" + +-- NCG stuff: +import PPC.Instr +import PPC.Cond +import PPC.Regs +import PPC.RegInfo +import NCGMonad +import Instruction +import PIC +import Size +import RegClass +import Reg +import Platform + +-- Our intermediate code: +import BlockId +import PprCmm ( pprExpr ) +import Cmm +import CLabel + +-- The rest: +import StaticFlags ( opt_PIC ) +import OrdList +import qualified Outputable as O +import Outputable +import DynFlags + +import Control.Monad ( mapAndUnzipM ) +import Data.Bits +import Data.Int +import Data.Word + +-- ----------------------------------------------------------------------------- +-- Top-level of the instruction selector + +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal (pre-order?) yields the insns in the correct +-- order. + +cmmTopCodeGen + :: DynFlags + -> RawCmmTop + -> NatM [NatCmmTop Instr] + +cmmTopCodeGen dflags (CmmProc info lab params (ListGraph blocks)) = do + (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks + picBaseMb <- getPicBaseMaybeNat + let proc = CmmProc info lab params (ListGraph $ concat nat_blocks) + tops = proc : concat statics + os = platformOS $ targetPlatform dflags + case picBaseMb of + Just picBase -> initializePicBase_ppc ArchPPC os picBase tops + Nothing -> return tops + +cmmTopCodeGen dflags (CmmData sec dat) = do + return [CmmData sec dat] -- no translation, we just use CmmStatic + +basicBlockCodeGen + :: CmmBasicBlock + -> NatM ( [NatBasicBlock Instr] + , [NatCmmTop Instr]) + +basicBlockCodeGen (BasicBlock id stmts) = do + instrs <- stmtsToInstrs stmts + -- code generation may introduce new basic block boundaries, which + -- are indicated by the NEWBLOCK instruction. We must split up the + -- instruction stream into basic blocks again. Also, we extract + -- LDATAs here too. + let + (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs + + mkBlocks (NEWBLOCK id) (instrs,blocks,statics) + = ([], BasicBlock id instrs : blocks, statics) + mkBlocks (LDATA sec dat) (instrs,blocks,statics) + = (instrs, blocks, CmmData sec dat:statics) + mkBlocks instr (instrs,blocks,statics) + = (instr:instrs, blocks, statics) + -- in + return (BasicBlock id top : other_blocks, statics) + +stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock +stmtsToInstrs stmts + = do instrss <- mapM stmtToInstrs stmts + return (concatOL instrss) + +stmtToInstrs :: CmmStmt -> NatM InstrBlock +stmtToInstrs stmt = case stmt of + CmmNop -> return nilOL + CmmComment s -> return (unitOL (COMMENT s)) + + CmmAssign reg src + | isFloatType ty -> assignReg_FltCode size reg src +#if WORD_SIZE_IN_BITS==32 + | isWord64 ty -> assignReg_I64Code reg src +#endif + | otherwise -> assignReg_IntCode size reg src + where ty = cmmRegType reg + size = cmmTypeSize ty + + CmmStore addr src + | isFloatType ty -> assignMem_FltCode size addr src +#if WORD_SIZE_IN_BITS==32 + | isWord64 ty -> assignMem_I64Code addr src +#endif + | otherwise -> assignMem_IntCode size addr src + where ty = cmmExprType src + size = cmmTypeSize ty + + CmmCall target result_regs args _ _ + -> genCCall target result_regs args + + CmmBranch id -> genBranch id + CmmCondBranch arg id -> genCondJump id arg + CmmSwitch arg ids -> genSwitch arg ids + CmmJump arg params -> genJump arg + CmmReturn params -> + panic "stmtToInstrs: return statement should have been cps'd away" + + +-------------------------------------------------------------------------------- +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal yields the insns in the correct order. +-- +type InstrBlock + = OrdList Instr + + +-- | Register's passed up the tree. If the stix code forces the register +-- to live in a pre-decided machine register, it comes out as @Fixed@; +-- otherwise, it comes out as @Any@, and the parent can decide which +-- register to put it in. +-- +data Register + = Fixed Size Reg InstrBlock + | Any Size (Reg -> InstrBlock) + + +swizzleRegisterRep :: Register -> Size -> Register +swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code +swizzleRegisterRep (Any _ codefn) size = Any size codefn + + +-- | Grab the Reg for a CmmReg +getRegisterReg :: CmmReg -> Reg + +getRegisterReg (CmmLocal (LocalReg u pk)) + = mkVReg u (cmmTypeSize pk) + +getRegisterReg (CmmGlobal mid) + = case get_GlobalReg_reg_or_addr mid of + Left (RealReg rrno) -> RealReg rrno + _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence ... + + +{- +Now, given a tree (the argument to an CmmLoad) that references memory, +produce a suitable addressing mode. + +A Rule of the Game (tm) for Amodes: use of the addr bit must +immediately follow use of the code part, since the code part puts +values in registers which the addr then refers to. So you can't put +anything in between, lest it overwrite some of those registers. If +you need to do some other computation between the code part and use of +the addr bit, first store the effective address from the amode in a +temporary, then do the other computation, and then use the temporary: + + code + LEA amode, tmp + ... other computation ... + ... (tmp) ... +-} + + +-- | Check whether an integer will fit in 32 bits. +-- A CmmInt is intended to be truncated to the appropriate +-- number of bits, so here we truncate it to Int64. This is +-- important because e.g. -1 as a CmmInt might be either +-- -1 or 18446744073709551615. +-- +is32BitInteger :: Integer -> Bool +is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 + where i64 = fromIntegral i :: Int64 + + +-- | Convert a BlockId to some CmmStatic data +jumpTableEntry :: Maybe BlockId -> CmmStatic +jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth) +jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) + where blockLabel = mkAsmTempLabel id + + + +-- ----------------------------------------------------------------------------- +-- General things for putting together code sequences + +-- Expand CmmRegOff. ToDo: should we do it this way around, or convert +-- CmmExprs into CmmRegOff? +mangleIndexTree :: CmmExpr -> CmmExpr +mangleIndexTree (CmmRegOff reg off) + = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] + where width = typeWidth (cmmRegType reg) + +mangleIndexTree _ + = panic "PPC.CodeGen.mangleIndexTree: no match" + +-- ----------------------------------------------------------------------------- +-- Code gen for 64-bit arithmetic on 32-bit platforms + +{- +Simple support for generating 64-bit code (ie, 64 bit values and 64 +bit assignments) on 32-bit platforms. Unlike the main code generator +we merely shoot for generating working code as simply as possible, and +pay little attention to code quality. Specifically, there is no +attempt to deal cleverly with the fixed-vs-floating register +distinction; all values are generated into (pairs of) floating +registers, even if this would mean some redundant reg-reg moves as a +result. Only one of the VRegUniques is returned, since it will be +of the VRegUniqueLo form, and the upper-half VReg can be determined +by applying getHiVRegFromLo to it. +-} + +data ChildCode64 -- a.k.a "Register64" + = ChildCode64 + InstrBlock -- code + Reg -- the lower 32-bit temporary which contains the + -- result; use getHiVRegFromLo to find the other + -- VRegUnique. Rules of this simplified insn + -- selection game are therefore that the returned + -- Reg may be modified + + +-- | The dual to getAnyReg: compute an expression into a register, but +-- we don't mind which one it is. +getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) +getSomeReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, code tmp) + Fixed _ reg code -> + return (reg, code) + +getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock) +getI64Amodes addrTree = do + Amode hi_addr addr_code <- getAmode addrTree + case addrOffset hi_addr 4 of + Just lo_addr -> return (hi_addr, lo_addr, addr_code) + Nothing -> do (hi_ptr, code) <- getSomeReg addrTree + return (AddrRegImm hi_ptr (ImmInt 0), + AddrRegImm hi_ptr (ImmInt 4), + code) + + +assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock +assignMem_I64Code addrTree valueTree = do + (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree + ChildCode64 vcode rlo <- iselExpr64 valueTree + let + rhi = getHiVRegFromLo rlo + + -- Big-endian store + mov_hi = ST II32 rhi hi_addr + mov_lo = ST II32 rlo lo_addr + -- in + return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) + + +assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock +assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do + ChildCode64 vcode r_src_lo <- iselExpr64 valueTree + let + r_dst_lo = mkVReg u_dst II32 + r_dst_hi = getHiVRegFromLo r_dst_lo + r_src_hi = getHiVRegFromLo r_src_lo + mov_lo = MR r_dst_lo r_src_lo + mov_hi = MR r_dst_hi r_src_hi + -- in + return ( + vcode `snocOL` mov_lo `snocOL` mov_hi + ) + +assignReg_I64Code lvalue valueTree + = panic "assignReg_I64Code(powerpc): invalid lvalue" + + +iselExpr64 :: CmmExpr -> NatM ChildCode64 +iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do + (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree + (rlo, rhi) <- getNewRegPairNat II32 + let mov_hi = LD II32 rhi hi_addr + mov_lo = LD II32 rlo lo_addr + return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) + rlo + +iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty + = return (ChildCode64 nilOL (mkVReg vu II32)) + +iselExpr64 (CmmLit (CmmInt i _)) = do + (rlo,rhi) <- getNewRegPairNat II32 + let + half0 = fromIntegral (fromIntegral i :: Word16) + half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16) + half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16) + half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16) + + code = toOL [ + LIS rlo (ImmInt half1), + OR rlo rlo (RIImm $ ImmInt half0), + LIS rhi (ImmInt half3), + OR rlo rlo (RIImm $ ImmInt half2) + ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do + ChildCode64 code1 r1lo <- iselExpr64 e1 + ChildCode64 code2 r2lo <- iselExpr64 e2 + (rlo,rhi) <- getNewRegPairNat II32 + let + r1hi = getHiVRegFromLo r1lo + r2hi = getHiVRegFromLo r2lo + code = code1 `appOL` + code2 `appOL` + toOL [ ADDC rlo r1lo r2lo, + ADDE rhi r1hi r2hi ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do + (expr_reg,expr_code) <- getSomeReg expr + (rlo, rhi) <- getNewRegPairNat II32 + let mov_hi = LI rhi (ImmInt 0) + mov_lo = MR rlo expr_reg + return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) + rlo +iselExpr64 expr + = pprPanic "iselExpr64(powerpc)" (ppr expr) + + + +getRegister :: CmmExpr -> NatM Register + +getRegister (CmmReg reg) + = return (Fixed (cmmTypeSize (cmmRegType reg)) + (getRegisterReg reg) nilOL) + +getRegister tree@(CmmRegOff _ _) + = getRegister (mangleIndexTree tree) + + +#if WORD_SIZE_IN_BITS==32 + -- for 32-bit architectuers, support some 64 -> 32 bit conversions: + -- TO_W_(x), TO_W_(x >> 32) + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + +#endif + + +getRegister (CmmLoad mem pk) + | not (isWord64 pk) + = do + Amode addr addr_code <- getAmode mem + let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk) + addr_code `snocOL` LD size dst addr + return (Any size code) + where size = cmmTypeSize pk + +-- catch simple cases of zero- or sign-extended load +getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do + Amode addr addr_code <- getAmode mem + return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) + +-- Note: there is no Load Byte Arithmetic instruction, so no signed case here + +getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do + Amode addr addr_code <- getAmode mem + return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr)) + +getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do + Amode addr addr_code <- getAmode mem + return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr)) + +getRegister (CmmMachOp mop [x]) -- unary MachOps + = case mop of + MO_Not rep -> triv_ucode_int rep NOT + + MO_F_Neg w -> triv_ucode_float w FNEG + MO_S_Neg w -> triv_ucode_int w NEG + + MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x + MO_FF_Conv W32 W64 -> conversionNop FF64 x + + MO_FS_Conv from to -> coerceFP2Int from to x + MO_SF_Conv from to -> coerceInt2FP from to x + + MO_SS_Conv from to + | from == to -> conversionNop (intSize to) x + + -- narrowing is a nop: we treat the high bits as undefined + MO_SS_Conv W32 to -> conversionNop (intSize to) x + MO_SS_Conv W16 W8 -> conversionNop II8 x + MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8) + MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16) + + MO_UU_Conv from to + | from == to -> conversionNop (intSize to) x + -- narrowing is a nop: we treat the high bits as undefined + MO_UU_Conv W32 to -> conversionNop (intSize to) x + MO_UU_Conv W16 W8 -> conversionNop II8 x + MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32)) + MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32)) + _ -> panic "PPC.CodeGen.getRegister: no match" + + where + triv_ucode_int width instr = trivialUCode (intSize width) instr x + triv_ucode_float width instr = trivialUCode (floatSize width) instr x + + conversionNop new_size expr + = do e_code <- getRegister expr + return (swizzleRegisterRep e_code new_size) + +getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps + = case mop of + MO_F_Eq w -> condFltReg EQQ x y + MO_F_Ne w -> condFltReg NE x y + MO_F_Gt w -> condFltReg GTT x y + MO_F_Ge w -> condFltReg GE x y + MO_F_Lt w -> condFltReg LTT x y + MO_F_Le w -> condFltReg LE x y + + MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y) + MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y) + + MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y) + MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y) + + MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y) + + MO_F_Add w -> triv_float w FADD + MO_F_Sub w -> triv_float w FSUB + MO_F_Mul w -> triv_float w FMUL + MO_F_Quot w -> triv_float w FDIV + + -- optimize addition with 32-bit immediate + -- (needed for PIC) + MO_Add W32 -> + case y of + CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm) + -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep) + CmmLit lit + -> do + (src, srcCode) <- getSomeReg x + let imm = litToImm lit + code dst = srcCode `appOL` toOL [ + ADDIS dst src (HA imm), + ADD dst dst (RIImm (LO imm)) + ] + return (Any II32 code) + _ -> trivialCode W32 True ADD x y + + MO_Add rep -> trivialCode rep True ADD x y + MO_Sub rep -> + case y of -- subfi ('substract from' with immediate) doesn't exist + CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm) + -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep) + _ -> trivialCodeNoImm' (intSize rep) SUBF y x + + MO_Mul rep -> trivialCode rep True MULLW x y + + MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y + + MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented" + MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented" + + MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y) + MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y) + + MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y) + MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y) + + MO_And rep -> trivialCode rep False AND x y + MO_Or rep -> trivialCode rep False OR x y + MO_Xor rep -> trivialCode rep False XOR x y + + MO_Shl rep -> trivialCode rep False SLW x y + MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y + MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y + _ -> panic "PPC.CodeGen.getRegister: no match" + + where + triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register + triv_float width instr = trivialCodeNoImm (floatSize width) instr x y + +getRegister (CmmLit (CmmInt i rep)) + | Just imm <- makeImmediate rep True i + = let + code dst = unitOL (LI dst imm) + in + return (Any (intSize rep) code) + +getRegister (CmmLit (CmmFloat f frep)) = do + lbl <- getNewLabelNat + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + Amode addr addr_code <- getAmode dynRef + let size = floatSize frep + code dst = + LDATA ReadOnlyData [CmmDataLabel lbl, + CmmStaticLit (CmmFloat f frep)] + `consOL` (addr_code `snocOL` LD size dst addr) + return (Any size code) + +getRegister (CmmLit lit) + = let rep = cmmLitType lit + imm = litToImm lit + code dst = toOL [ + LIS dst (HA imm), + ADD dst dst (RIImm (LO imm)) + ] + in return (Any (cmmTypeSize rep) code) + +getRegister other = pprPanic "getRegister(ppc)" (pprExpr other) + + -- extend?Rep: wrap integer expression of type rep + -- in a conversion to II32 +extendSExpr W32 x = x +extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x] +extendUExpr W32 x = x +extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x] + +-- ----------------------------------------------------------------------------- +-- The 'Amode' type: Memory addressing modes passed up the tree. + +data Amode + = Amode AddrMode InstrBlock + +{- +Now, given a tree (the argument to an CmmLoad) that references memory, +produce a suitable addressing mode. + +A Rule of the Game (tm) for Amodes: use of the addr bit must +immediately follow use of the code part, since the code part puts +values in registers which the addr then refers to. So you can't put +anything in between, lest it overwrite some of those registers. If +you need to do some other computation between the code part and use of +the addr bit, first store the effective address from the amode in a +temporary, then do the other computation, and then use the temporary: + + code + LEA amode, tmp + ... other computation ... + ... (tmp) ... +-} + +getAmode :: CmmExpr -> NatM Amode +getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree) + +getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate W32 True (-i) + = do + (reg, code) <- getSomeReg x + return (Amode (AddrRegImm reg off) code) + + +getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate W32 True i + = do + (reg, code) <- getSomeReg x + return (Amode (AddrRegImm reg off) code) + + -- optimize addition with 32-bit immediate + -- (needed for PIC) +getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit]) + = do + tmp <- getNewRegNat II32 + (src, srcCode) <- getSomeReg x + let imm = litToImm lit + code = srcCode `snocOL` ADDIS tmp src (HA imm) + return (Amode (AddrRegImm tmp (LO imm)) code) + +getAmode (CmmLit lit) + = do + tmp <- getNewRegNat II32 + let imm = litToImm lit + code = unitOL (LIS tmp (HA imm)) + return (Amode (AddrRegImm tmp (LO imm)) code) + +getAmode (CmmMachOp (MO_Add W32) [x, y]) + = do + (regX, codeX) <- getSomeReg x + (regY, codeY) <- getSomeReg y + return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) + +getAmode other + = do + (reg, code) <- getSomeReg other + let + off = ImmInt 0 + return (Amode (AddrRegImm reg off) code) + + + +-- The 'CondCode' type: Condition codes passed up the tree. +data CondCode + = CondCode Bool Cond InstrBlock + +-- Set up a condition code for a conditional branch. + +getCondCode :: CmmExpr -> NatM CondCode + +-- almost the same as everywhere else - but we need to +-- extend small integers to 32 bit first + +getCondCode (CmmMachOp mop [x, y]) + = case mop of + MO_F_Eq W32 -> condFltCode EQQ x y + MO_F_Ne W32 -> condFltCode NE x y + MO_F_Gt W32 -> condFltCode GTT x y + MO_F_Ge W32 -> condFltCode GE x y + MO_F_Lt W32 -> condFltCode LTT x y + MO_F_Le W32 -> condFltCode LE x y + + MO_F_Eq W64 -> condFltCode EQQ x y + MO_F_Ne W64 -> condFltCode NE x y + MO_F_Gt W64 -> condFltCode GTT x y + MO_F_Ge W64 -> condFltCode GE x y + MO_F_Lt W64 -> condFltCode LTT x y + MO_F_Le W64 -> condFltCode LE x y + + MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y) + MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y) + + MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y) + MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y) + + MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y) + + other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop) + +getCondCode other = panic "getCondCode(2)(powerpc)" + + + +-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be +-- passed back up the tree. + +condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode + +-- ###FIXME: I16 and I8! +condIntCode cond x (CmmLit (CmmInt y rep)) + | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y + = do + (src1, code) <- getSomeReg x + let + code' = code `snocOL` + (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2) + return (CondCode False cond code') + +condIntCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let + code' = code1 `appOL` code2 `snocOL` + (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2) + return (CondCode False cond code') + +condFltCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let + code' = code1 `appOL` code2 `snocOL` FCMP src1 src2 + code'' = case cond of -- twiddle CR to handle unordered case + GE -> code' `snocOL` CRNOR ltbit eqbit gtbit + LE -> code' `snocOL` CRNOR gtbit eqbit ltbit + _ -> code' + where + ltbit = 0 ; eqbit = 2 ; gtbit = 1 + return (CondCode True cond code'') + + + +-- ----------------------------------------------------------------------------- +-- Generating assignments + +-- Assignments are really at the heart of the whole code generation +-- business. Almost all top-level nodes of any real importance are +-- assignments, which correspond to loads, stores, or register +-- transfers. If we're really lucky, some of the register transfers +-- will go away, because we can use the destination register to +-- complete the code generation for the right hand side. This only +-- fails when the right hand side is forced into a fixed register +-- (e.g. the result of a call). + +assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_IntCode pk addr src = do + (srcReg, code) <- getSomeReg src + Amode dstAddr addr_code <- getAmode addr + return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr + +-- dst is a reg, but src could be anything +assignReg_IntCode _ reg src + = do + r <- getRegister src + return $ case r of + Any _ code -> code dst + Fixed _ freg fcode -> fcode `snocOL` MR dst freg + where + dst = getRegisterReg reg + + + +-- Easy, isn't it? +assignMem_FltCode = assignMem_IntCode +assignReg_FltCode = assignReg_IntCode + + + +genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock + +genJump (CmmLit (CmmLabel lbl)) + = return (unitOL $ JMP lbl) + +genJump tree + = do + (target,code) <- getSomeReg tree + return (code `snocOL` MTCTR target `snocOL` BCTR []) + + +-- ----------------------------------------------------------------------------- +-- Unconditional branches +genBranch :: BlockId -> NatM InstrBlock +genBranch = return . toOL . mkJumpInstr + + +-- ----------------------------------------------------------------------------- +-- Conditional jumps + +{- +Conditional jumps are always to local labels, so we can use branch +instructions. We peek at the arguments to decide what kind of +comparison to do. + +SPARC: First, we have to ensure that the condition codes are set +according to the supplied comparison operation. We generate slightly +different code for floating point comparisons, because a floating +point operation cannot directly precede a @BF@. We assume the worst +and fill that slot with a @NOP@. + +SPARC: Do not fill the delay slots here; you will confuse the register +allocator. +-} + + +genCondJump + :: BlockId -- the branch target + -> CmmExpr -- the condition on which to branch + -> NatM InstrBlock + +genCondJump id bool = do + CondCode _ cond code <- getCondCode bool + return (code `snocOL` BCC cond id) + + + +-- ----------------------------------------------------------------------------- +-- Generating C calls + +-- Now the biggest nightmare---calls. Most of the nastiness is buried in +-- @get_arg@, which moves the arguments to the correct registers/stack +-- locations. Apart from that, the code is easy. +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +genCCall + :: CmmCallTarget -- function to call + -> HintedCmmFormals -- where to put the result + -> HintedCmmActuals -- arguments (of mixed type) + -> NatM InstrBlock + + +#if darwin_TARGET_OS || linux_TARGET_OS +{- + The PowerPC calling convention for Darwin/Mac OS X + is described in Apple's document + "Inside Mac OS X - Mach-O Runtime Architecture". + + PowerPC Linux uses the System V Release 4 Calling Convention + for PowerPC. It is described in the + "System V Application Binary Interface PowerPC Processor Supplement". + + Both conventions are similar: + Parameters may be passed in general-purpose registers starting at r3, in + floating point registers starting at f1, or on the stack. + + But there are substantial differences: + * The number of registers used for parameter passing and the exact set of + nonvolatile registers differs (see MachRegs.lhs). + * On Darwin, stack space is always reserved for parameters, even if they are + passed in registers. The called routine may choose to save parameters from + registers to the corresponding space on the stack. + * On Darwin, a corresponding amount of GPRs is skipped when a floating point + parameter is passed in an FPR. + * SysV insists on either passing I64 arguments on the stack, or in two GPRs, + starting with an odd-numbered GPR. It may skip a GPR to achieve this. + Darwin just treats an I64 like two separate II32s (high word first). + * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only + 4-byte aligned like everything else on Darwin. + * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on + PowerPC Linux does not agree, so neither do we. + + According to both conventions, The parameter area should be part of the + caller's stack frame, allocated in the caller's prologue code (large enough + to hold the parameter lists for all called routines). The NCG already + uses the stack for register spilling, leaving 64 bytes free at the top. + If we need a larger parameter area than that, we just allocate a new stack + frame just before ccalling. +-} + + +genCCall (CmmPrim MO_WriteBarrier) _ _ + = return $ unitOL LWSYNC + +genCCall target dest_regs argsAndHints + = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps) + -- we rely on argument promotion in the codeGen + do + (finalStack,passArgumentsCode,usedRegs) <- passArguments + (zip args argReps) + allArgRegs allFPArgRegs + initialStackOffset + (toOL []) [] + + (labelOrExpr, reduceToFF32) <- case target of + CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False) + CmmCallee expr conv -> return (Right expr, False) + CmmPrim mop -> outOfLineFloatOp mop + + let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode + codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32 + + case labelOrExpr of + Left lbl -> do + return ( codeBefore + `snocOL` BL lbl usedRegs + `appOL` codeAfter) + Right dyn -> do + (dynReg, dynCode) <- getSomeReg dyn + return ( dynCode + `snocOL` MTCTR dynReg + `appOL` codeBefore + `snocOL` BCTRL usedRegs + `appOL` codeAfter) + where +#if darwin_TARGET_OS + initialStackOffset = 24 + -- size of linkage area + size of arguments, in bytes + stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $ + map (widthInBytes . typeWidth) argReps +#elif linux_TARGET_OS + initialStackOffset = 8 + stackDelta finalStack = roundTo 16 finalStack +#endif + args = map hintlessCmm argsAndHints + argReps = map cmmExprType args + + roundTo a x | x `mod` a == 0 = x + | otherwise = x + a - (x `mod` a) + + move_sp_down finalStack + | delta > 64 = + toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))), + DELTA (-delta)] + | otherwise = nilOL + where delta = stackDelta finalStack + move_sp_up finalStack + | delta > 64 = + toOL [ADD sp sp (RIImm (ImmInt delta)), + DELTA 0] + | otherwise = nilOL + where delta = stackDelta finalStack + + + passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed) + passArguments ((arg,arg_ty):args) gprs fprs stackOffset + accumCode accumUsed | isWord64 arg_ty = + do + ChildCode64 code vr_lo <- iselExpr64 arg + let vr_hi = getHiVRegFromLo vr_lo + +#if darwin_TARGET_OS + passArguments args + (drop 2 gprs) + fprs + (stackOffset+8) + (accumCode `appOL` code + `snocOL` storeWord vr_hi gprs stackOffset + `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4)) + ((take 2 gprs) ++ accumUsed) + where + storeWord vr (gpr:_) offset = MR gpr vr + storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset)) + +#elif linux_TARGET_OS + let stackOffset' = roundTo 8 stackOffset + stackCode = accumCode `appOL` code + `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset')) + `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4))) + regCode hireg loreg = + accumCode `appOL` code + `snocOL` MR hireg vr_hi + `snocOL` MR loreg vr_lo + + case gprs of + hireg : loreg : regs | even (length gprs) -> + passArguments args regs fprs stackOffset + (regCode hireg loreg) (hireg : loreg : accumUsed) + _skipped : hireg : loreg : regs -> + passArguments args regs fprs stackOffset + (regCode hireg loreg) (hireg : loreg : accumUsed) + _ -> -- only one or no regs left + passArguments args [] fprs (stackOffset'+8) + stackCode accumUsed +#endif + + passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed + | reg : _ <- regs = do + register <- getRegister arg + let code = case register of + Fixed _ freg fcode -> fcode `snocOL` MR reg freg + Any _ acode -> acode reg + passArguments args + (drop nGprs gprs) + (drop nFprs fprs) +#if darwin_TARGET_OS + -- The Darwin ABI requires that we reserve stack slots for register parameters + (stackOffset + stackBytes) +#elif linux_TARGET_OS + -- ... the SysV ABI doesn't. + stackOffset +#endif + (accumCode `appOL` code) + (reg : accumUsed) + | otherwise = do + (vr, code) <- getSomeReg arg + passArguments args + (drop nGprs gprs) + (drop nFprs fprs) + (stackOffset' + stackBytes) + (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot) + accumUsed + where +#if darwin_TARGET_OS + -- stackOffset is at least 4-byte aligned + -- The Darwin ABI is happy with that. + stackOffset' = stackOffset +#else + -- ... the SysV ABI requires 8-byte alignment for doubles. + stackOffset' | isFloatType rep && typeWidth rep == W64 = + roundTo 8 stackOffset + | otherwise = stackOffset +#endif + stackSlot = AddrRegImm sp (ImmInt stackOffset') + (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of + II32 -> (1, 0, 4, gprs) +#if darwin_TARGET_OS + -- The Darwin ABI requires that we skip a corresponding number of GPRs when + -- we use the FPRs. + FF32 -> (1, 1, 4, fprs) + FF64 -> (2, 1, 8, fprs) +#elif linux_TARGET_OS + -- ... the SysV ABI doesn't. + FF32 -> (0, 1, 4, fprs) + FF64 -> (0, 1, 8, fprs) +#endif + + moveResult reduceToFF32 = + case dest_regs of + [] -> nilOL + [CmmHinted dest _hint] + | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1) + | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1) + | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3, + MR r_dest r4] + | otherwise -> unitOL (MR r_dest r3) + where rep = cmmRegType (CmmLocal dest) + r_dest = getRegisterReg (CmmLocal dest) + + outOfLineFloatOp mop = + do + dflags <- getDynFlagsNat + mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $ + mkForeignLabel functionName Nothing True + let mopLabelOrExpr = case mopExpr of + CmmLit (CmmLabel lbl) -> Left lbl + _ -> Right mopExpr + return (mopLabelOrExpr, reduce) + where + (functionName, reduce) = case mop of + MO_F32_Exp -> (fsLit "exp", True) + MO_F32_Log -> (fsLit "log", True) + MO_F32_Sqrt -> (fsLit "sqrt", True) + + MO_F32_Sin -> (fsLit "sin", True) + MO_F32_Cos -> (fsLit "cos", True) + MO_F32_Tan -> (fsLit "tan", True) + + MO_F32_Asin -> (fsLit "asin", True) + MO_F32_Acos -> (fsLit "acos", True) + MO_F32_Atan -> (fsLit "atan", True) + + MO_F32_Sinh -> (fsLit "sinh", True) + MO_F32_Cosh -> (fsLit "cosh", True) + MO_F32_Tanh -> (fsLit "tanh", True) + MO_F32_Pwr -> (fsLit "pow", True) + + MO_F64_Exp -> (fsLit "exp", False) + MO_F64_Log -> (fsLit "log", False) + MO_F64_Sqrt -> (fsLit "sqrt", False) + + MO_F64_Sin -> (fsLit "sin", False) + MO_F64_Cos -> (fsLit "cos", False) + MO_F64_Tan -> (fsLit "tan", False) + + MO_F64_Asin -> (fsLit "asin", False) + MO_F64_Acos -> (fsLit "acos", False) + MO_F64_Atan -> (fsLit "atan", False) + + MO_F64_Sinh -> (fsLit "sinh", False) + MO_F64_Cosh -> (fsLit "cosh", False) + MO_F64_Tanh -> (fsLit "tanh", False) + MO_F64_Pwr -> (fsLit "pow", False) + other -> pprPanic "genCCall(ppc): unknown callish op" + (pprCallishMachOp other) + +#else /* darwin_TARGET_OS || linux_TARGET_OS */ +genCCall = panic "PPC.CodeGen.genCCall: not defined for this os" +#endif + + +-- ----------------------------------------------------------------------------- +-- Generating a table-branch + +genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock +genSwitch expr ids + | opt_PIC + = do + (reg,e_code) <- getSomeReg expr + tmp <- getNewRegNat II32 + lbl <- getNewLabelNat + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + (tableReg,t_code) <- getSomeReg $ dynRef + let + jumpTable = map jumpTableEntryRel ids + + jumpTableEntryRel Nothing + = CmmStaticLit (CmmInt 0 wordWidth) + jumpTableEntryRel (Just (BlockId id)) + = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) + where blockLabel = mkAsmTempLabel id + + code = e_code `appOL` t_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + SLW tmp reg (RIImm (ImmInt 2)), + LD II32 tmp (AddrRegReg tableReg tmp), + ADD tmp tmp (RIReg tableReg), + MTCTR tmp, + BCTR [ id | Just id <- ids ] + ] + return code + | otherwise + = do + (reg,e_code) <- getSomeReg expr + tmp <- getNewRegNat II32 + lbl <- getNewLabelNat + let + jumpTable = map jumpTableEntry ids + + code = e_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + SLW tmp reg (RIImm (ImmInt 2)), + ADDIS tmp tmp (HA (ImmCLbl lbl)), + LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), + MTCTR tmp, + BCTR [ id | Just id <- ids ] + ] + return code + + +-- ----------------------------------------------------------------------------- +-- 'condIntReg' and 'condFltReg': condition codes into registers + +-- Turn those condition codes into integers now (when they appear on +-- the right hand side of an assignment). +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register + +condReg :: NatM CondCode -> NatM Register +condReg getCond = do + CondCode _ cond cond_code <- getCond + let +{- code dst = cond_code `appOL` toOL [ + BCC cond lbl1, + LI dst (ImmInt 0), + BCC ALWAYS lbl2, + NEWBLOCK lbl1, + LI dst (ImmInt 1), + BCC ALWAYS lbl2, + NEWBLOCK lbl2 + ]-} + code dst = cond_code + `appOL` negate_code + `appOL` toOL [ + MFCR dst, + RLWINM dst dst (bit + 1) 31 31 + ] + + negate_code | do_negate = unitOL (CRNOR bit bit bit) + | otherwise = nilOL + + (bit, do_negate) = case cond of + LTT -> (0, False) + LE -> (1, True) + EQQ -> (2, False) + GE -> (0, True) + GTT -> (1, False) + + NE -> (2, True) + + LU -> (0, False) + LEU -> (1, True) + GEU -> (0, True) + GU -> (1, False) + _ -> panic "PPC.CodeGen.codeReg: no match" + + return (Any II32 code) + +condIntReg cond x y = condReg (condIntCode cond x y) +condFltReg cond x y = condReg (condFltCode cond x y) + + + +-- ----------------------------------------------------------------------------- +-- 'trivial*Code': deal with trivial instructions + +-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', +-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. +-- Only look for constants on the right hand side, because that's +-- where the generic optimizer will have put them. + +-- Similarly, for unary instructions, we don't have to worry about +-- matching an StInt as the argument, because genericOpt will already +-- have handled the constant-folding. + + + +{- +Wolfgang's PowerPC version of The Rules: + +A slightly modified version of The Rules to take advantage of the fact +that PowerPC instructions work on all registers and don't implicitly +clobber any fixed registers. + +* The only expression for which getRegister returns Fixed is (CmmReg reg). + +* If getRegister returns Any, then the code it generates may modify only: + (a) fresh temporaries + (b) the destination register + It may *not* modify global registers, unless the global + register happens to be the destination register. + It may not clobber any other registers. In fact, only ccalls clobber any + fixed registers. + Also, it may not modify the counter register (used by genCCall). + + Corollary: If a getRegister for a subexpression returns Fixed, you need + not move it to a fresh temporary before evaluating the next subexpression. + The Fixed register won't be modified. + Therefore, we don't need a counterpart for the x86's getStableReg on PPC. + +* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on + the value of the destination register. +-} + +trivialCode + :: Width + -> Bool + -> (Reg -> Reg -> RI -> Instr) + -> CmmExpr + -> CmmExpr + -> NatM Register + +trivialCode rep signed instr x (CmmLit (CmmInt y _)) + | Just imm <- makeImmediate rep signed y + = do + (src1, code1) <- getSomeReg x + let code dst = code1 `snocOL` instr dst src1 (RIImm imm) + return (Any (intSize rep) code) + +trivialCode rep _ instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2) + return (Any (intSize rep) code) + +trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialCodeNoImm' size instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2 + return (Any size code) + +trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y + + +trivialUCode + :: Size + -> (Reg -> Reg -> Instr) + -> CmmExpr + -> NatM Register +trivialUCode rep instr x = do + (src, code) <- getSomeReg x + let code' dst = code `snocOL` instr dst src + return (Any rep code') + +-- There is no "remainder" instruction on the PPC, so we have to do +-- it the hard way. +-- The "div" parameter is the division instruction to use (DIVW or DIVWU) + +remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +remainderCode rep div x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `appOL` toOL [ + div dst src1 src2, + MULLW dst dst (RIReg src2), + SUBF dst dst src1 + ] + return (Any (intSize rep) code) + + +coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register +coerceInt2FP fromRep toRep x = do + (src, code) <- getSomeReg x + lbl <- getNewLabelNat + itmp <- getNewRegNat II32 + ftmp <- getNewRegNat FF64 + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + Amode addr addr_code <- getAmode dynRef + let + code' dst = code `appOL` maybe_exts `appOL` toOL [ + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmInt 0x43300000 W32), + CmmStaticLit (CmmInt 0x80000000 W32)], + XORIS itmp src (ImmInt 0x8000), + ST II32 itmp (spRel 3), + LIS itmp (ImmInt 0x4330), + ST II32 itmp (spRel 2), + LD FF64 ftmp (spRel 2) + ] `appOL` addr_code `appOL` toOL [ + LD FF64 dst addr, + FSUB FF64 dst ftmp dst + ] `appOL` maybe_frsp dst + + maybe_exts = case fromRep of + W8 -> unitOL $ EXTS II8 src src + W16 -> unitOL $ EXTS II16 src src + W32 -> nilOL + _ -> panic "PPC.CodeGen.coerceInt2FP: no match" + + maybe_frsp dst + = case toRep of + W32 -> unitOL $ FRSP dst dst + W64 -> nilOL + _ -> panic "PPC.CodeGen.coerceInt2FP: no match" + + return (Any (floatSize toRep) code') + +coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register +coerceFP2Int _ toRep x = do + -- the reps don't really matter: F*->FF64 and II32->I* are no-ops + (src, code) <- getSomeReg x + tmp <- getNewRegNat FF64 + let + code' dst = code `appOL` toOL [ + -- convert to int in FP reg + FCTIWZ tmp src, + -- store value (64bit) from FP to stack + ST FF64 tmp (spRel 2), + -- read low word of value (high word is undefined) + LD II32 dst (spRel 3)] + return (Any (intSize toRep) code') diff --git a/compiler/nativeGen/PPC/Cond.hs b/compiler/nativeGen/PPC/Cond.hs new file mode 100644 index 0000000000..7345ee5f1d --- /dev/null +++ b/compiler/nativeGen/PPC/Cond.hs @@ -0,0 +1,62 @@ + +module PPC.Cond ( + Cond(..), + condNegate, + condUnsigned, + condToSigned, + condToUnsigned, +) + +where + +import Panic + +data Cond + = ALWAYS + | EQQ + | GE + | GEU + | GTT + | GU + | LE + | LEU + | LTT + | LU + | NE + deriving Eq + + +condNegate :: Cond -> Cond +condNegate ALWAYS = panic "condNegate: ALWAYS" +condNegate EQQ = NE +condNegate GE = LTT +condNegate GEU = LU +condNegate GTT = LE +condNegate GU = LEU +condNegate LE = GTT +condNegate LEU = GU +condNegate LTT = GE +condNegate LU = GEU +condNegate NE = EQQ + +-- Condition utils +condUnsigned :: Cond -> Bool +condUnsigned GU = True +condUnsigned LU = True +condUnsigned GEU = True +condUnsigned LEU = True +condUnsigned _ = False + +condToSigned :: Cond -> Cond +condToSigned GU = GTT +condToSigned LU = LTT +condToSigned GEU = GE +condToSigned LEU = LE +condToSigned x = x + +condToUnsigned :: Cond -> Cond +condToUnsigned GTT = GU +condToUnsigned LTT = LU +condToUnsigned GE = GEU +condToUnsigned LE = LEU +condToUnsigned x = x diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs index 85aa494ba4..55affc6e1e 100644 --- a/compiler/nativeGen/PPC/Instr.hs +++ b/compiler/nativeGen/PPC/Instr.hs @@ -10,49 +10,50 @@ #include "nativeGen/NCG.h" module PPC.Instr ( - Cond(..), - condNegate, + archWordSize, RI(..), - Instr(..) + Instr(..), + maxSpillSlots ) where -import BlockId import PPC.Regs -import RegsBase +import PPC.Cond +import Instruction +import Size +import RegClass +import Reg + +import Constants (rESERVED_C_STACK_BYTES) +import BlockId import Cmm -import Outputable import FastString import CLabel +import Outputable +import FastBool + +-------------------------------------------------------------------------------- +-- Size of a PPC memory address, in bytes. +-- +archWordSize :: Size +archWordSize = II32 -data Cond - = ALWAYS - | EQQ - | GE - | GEU - | GTT - | GU - | LE - | LEU - | LTT - | LU - | NE - deriving Eq - - -condNegate :: Cond -> Cond -condNegate ALWAYS = panic "condNegate: ALWAYS" -condNegate EQQ = NE -condNegate GE = LTT -condNegate GEU = LU -condNegate GTT = LE -condNegate GU = LEU -condNegate LE = GTT -condNegate LEU = GU -condNegate LTT = GE -condNegate LU = GEU -condNegate NE = EQQ + +-- | Instruction instance for powerpc +instance Instruction Instr where + regUsageOfInstr = ppc_regUsageOfInstr + patchRegsOfInstr = ppc_patchRegsOfInstr + isJumpishInstr = ppc_isJumpishInstr + jumpDestsOfInstr = ppc_jumpDestsOfInstr + patchJumpInstr = ppc_patchJumpInstr + mkSpillInstr = ppc_mkSpillInstr + mkLoadInstr = ppc_mkLoadInstr + takeDeltaInstr = ppc_takeDeltaInstr + isMetaInstr = ppc_isMetaInstr + mkRegRegMoveInstr = ppc_mkRegRegMoveInstr + takeRegRegMoveInstr = ppc_takeRegRegMoveInstr + mkJumpInstr = ppc_mkJumpInstr -- ----------------------------------------------------------------------------- @@ -85,12 +86,6 @@ data Instr -- benefit of subsequent passes | DELTA Int - -- | spill this reg to a stack slot - | SPILL Reg Int - - -- | reload this reg from a stack slot - | RELOAD Int Reg - -- Loads and stores. | LD Size Reg AddrMode -- Load size, dst, src | LA Size Reg AddrMode -- Load arithmetic size, dst, src @@ -165,3 +160,293 @@ data Instr -- bcl to next insn, mflr reg | LWSYNC -- memory barrier + + +-- | Get the registers that are being used by this instruction. +-- regUsage doesn't need to do any trickery for jumps and such. +-- Just state precisely the regs read and written by that insn. +-- The consequences of control flow transfers, as far as register +-- allocation goes, are taken care of by the register allocator. +-- +ppc_regUsageOfInstr :: Instr -> RegUsage +ppc_regUsageOfInstr instr + = case instr of + LD _ reg addr -> usage (regAddr addr, [reg]) + LA _ reg addr -> usage (regAddr addr, [reg]) + ST _ reg addr -> usage (reg : regAddr addr, []) + STU _ reg addr -> usage (reg : regAddr addr, []) + LIS reg _ -> usage ([], [reg]) + LI reg _ -> usage ([], [reg]) + MR reg1 reg2 -> usage ([reg2], [reg1]) + CMP _ reg ri -> usage (reg : regRI ri,[]) + CMPL _ reg ri -> usage (reg : regRI ri,[]) + BCC _ _ -> noUsage + BCCFAR _ _ -> noUsage + MTCTR reg -> usage ([reg],[]) + BCTR _ -> noUsage + BL _ params -> usage (params, callClobberedRegs) + BCTRL params -> usage (params, callClobberedRegs) + ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + ADDC reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) + ADDE reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) + ADDIS reg1 reg2 _ -> usage ([reg2], [reg1]) + SUBF reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) + MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + DIVW reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) + DIVWU reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) + MULLW_MayOflo reg1 reg2 reg3 + -> usage ([reg2,reg3], [reg1]) + AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + XORIS reg1 reg2 _ -> usage ([reg2], [reg1]) + EXTS _ reg1 reg2 -> usage ([reg2], [reg1]) + NEG reg1 reg2 -> usage ([reg2], [reg1]) + NOT reg1 reg2 -> usage ([reg2], [reg1]) + SLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + SRW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + SRAW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + RLWINM reg1 reg2 _ _ _ + -> usage ([reg2], [reg1]) + FADD _ r1 r2 r3 -> usage ([r2,r3], [r1]) + FSUB _ r1 r2 r3 -> usage ([r2,r3], [r1]) + FMUL _ r1 r2 r3 -> usage ([r2,r3], [r1]) + FDIV _ r1 r2 r3 -> usage ([r2,r3], [r1]) + FNEG r1 r2 -> usage ([r2], [r1]) + FCMP r1 r2 -> usage ([r1,r2], []) + FCTIWZ r1 r2 -> usage ([r2], [r1]) + FRSP r1 r2 -> usage ([r2], [r1]) + MFCR reg -> usage ([], [reg]) + MFLR reg -> usage ([], [reg]) + FETCHPC reg -> usage ([], [reg]) + _ -> noUsage + where + usage (src, dst) = RU (filter interesting src) + (filter interesting dst) + regAddr (AddrRegReg r1 r2) = [r1, r2] + regAddr (AddrRegImm r1 _) = [r1] + + regRI (RIReg r) = [r] + regRI _ = [] + +interesting :: Reg -> Bool +interesting (VirtualRegI _) = True +interesting (VirtualRegHi _) = True +interesting (VirtualRegF _) = True +interesting (VirtualRegD _) = True +interesting (RealReg i) = isFastTrue (freeReg i) + + + + +-- | Apply a given mapping to all the register references in this +-- instruction. +ppc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr +ppc_patchRegsOfInstr instr env + = case instr of + LD sz reg addr -> LD sz (env reg) (fixAddr addr) + LA sz reg addr -> LA sz (env reg) (fixAddr addr) + ST sz reg addr -> ST sz (env reg) (fixAddr addr) + STU sz reg addr -> STU sz (env reg) (fixAddr addr) + LIS reg imm -> LIS (env reg) imm + LI reg imm -> LI (env reg) imm + MR reg1 reg2 -> MR (env reg1) (env reg2) + CMP sz reg ri -> CMP sz (env reg) (fixRI ri) + CMPL sz reg ri -> CMPL sz (env reg) (fixRI ri) + BCC cond lbl -> BCC cond lbl + BCCFAR cond lbl -> BCCFAR cond lbl + MTCTR reg -> MTCTR (env reg) + BCTR targets -> BCTR targets + BL imm argRegs -> BL imm argRegs -- argument regs + BCTRL argRegs -> BCTRL argRegs -- cannot be remapped + ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri) + ADDC reg1 reg2 reg3-> ADDC (env reg1) (env reg2) (env reg3) + ADDE reg1 reg2 reg3-> ADDE (env reg1) (env reg2) (env reg3) + ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm + SUBF reg1 reg2 reg3-> SUBF (env reg1) (env reg2) (env reg3) + MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri) + DIVW reg1 reg2 reg3-> DIVW (env reg1) (env reg2) (env reg3) + DIVWU reg1 reg2 reg3-> DIVWU (env reg1) (env reg2) (env reg3) + MULLW_MayOflo reg1 reg2 reg3 + -> MULLW_MayOflo (env reg1) (env reg2) (env reg3) + AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri) + OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri) + XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri) + XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm + EXTS sz reg1 reg2 -> EXTS sz (env reg1) (env reg2) + NEG reg1 reg2 -> NEG (env reg1) (env reg2) + NOT reg1 reg2 -> NOT (env reg1) (env reg2) + SLW reg1 reg2 ri -> SLW (env reg1) (env reg2) (fixRI ri) + SRW reg1 reg2 ri -> SRW (env reg1) (env reg2) (fixRI ri) + SRAW reg1 reg2 ri -> SRAW (env reg1) (env reg2) (fixRI ri) + RLWINM reg1 reg2 sh mb me + -> RLWINM (env reg1) (env reg2) sh mb me + FADD sz r1 r2 r3 -> FADD sz (env r1) (env r2) (env r3) + FSUB sz r1 r2 r3 -> FSUB sz (env r1) (env r2) (env r3) + FMUL sz r1 r2 r3 -> FMUL sz (env r1) (env r2) (env r3) + FDIV sz r1 r2 r3 -> FDIV sz (env r1) (env r2) (env r3) + FNEG r1 r2 -> FNEG (env r1) (env r2) + FCMP r1 r2 -> FCMP (env r1) (env r2) + FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2) + FRSP r1 r2 -> FRSP (env r1) (env r2) + MFCR reg -> MFCR (env reg) + MFLR reg -> MFLR (env reg) + FETCHPC reg -> FETCHPC (env reg) + _ -> instr + where + fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) + fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i + + fixRI (RIReg r) = RIReg (env r) + fixRI other = other + + +-------------------------------------------------------------------------------- +-- | Checks whether this instruction is a jump/branch instruction. +-- One that can change the flow of control in a way that the +-- register allocator needs to worry about. +ppc_isJumpishInstr :: Instr -> Bool +ppc_isJumpishInstr instr + = case instr of + BCC{} -> True + BCCFAR{} -> True + BCTR{} -> True + BCTRL{} -> True + BL{} -> True + JMP{} -> True + _ -> False + + +-- | Checks whether this instruction is a jump/branch instruction. +-- One that can change the flow of control in a way that the +-- register allocator needs to worry about. +ppc_jumpDestsOfInstr :: Instr -> [BlockId] +ppc_jumpDestsOfInstr insn + = case insn of + BCC _ id -> [id] + BCCFAR _ id -> [id] + BCTR targets -> targets + _ -> [] + + +-- | Change the destination of this jump instruction. +-- Used in the linear allocator when adding fixup blocks for join +-- points. +ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr +ppc_patchJumpInstr insn patchF + = case insn of + BCC cc id -> BCC cc (patchF id) + BCCFAR cc id -> BCCFAR cc (patchF id) + BCTR _ -> error "Cannot patch BCTR" + _ -> insn + + +-- ----------------------------------------------------------------------------- + +-- | An instruction to spill a register into a spill slot. +ppc_mkSpillInstr + :: Reg -- register to spill + -> Int -- current stack delta + -> Int -- spill slot to use + -> Instr + +ppc_mkSpillInstr reg delta slot + = let off = spillSlotToOffset slot + in + let sz = case regClass reg of + RcInteger -> II32 + RcDouble -> FF64 + _ -> panic "PPC.Instr.mkSpillInstr: no match" + in ST sz reg (AddrRegImm sp (ImmInt (off-delta))) + + +ppc_mkLoadInstr + :: Reg -- register to load + -> Int -- current stack delta + -> Int -- spill slot to use + -> Instr + +ppc_mkLoadInstr reg delta slot + = let off = spillSlotToOffset slot + in + let sz = case regClass reg of + RcInteger -> II32 + RcDouble -> FF64 + _ -> panic "PPC.Instr.mkLoadInstr: no match" + in LD sz reg (AddrRegImm sp (ImmInt (off-delta))) + + +spillSlotSize :: Int +spillSlotSize = 8 + +maxSpillSlots :: Int +maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1 + +-- convert a spill slot number to a *byte* offset, with no sign: +-- decide on a per arch basis whether you are spilling above or below +-- the C stack pointer. +spillSlotToOffset :: Int -> Int +spillSlotToOffset slot + | slot >= 0 && slot < maxSpillSlots + = 64 + spillSlotSize * slot + | otherwise + = pprPanic "spillSlotToOffset:" + ( text "invalid spill location: " <> int slot + $$ text "maxSpillSlots: " <> int maxSpillSlots) + + +-------------------------------------------------------------------------------- +-- | See if this instruction is telling us the current C stack delta +ppc_takeDeltaInstr + :: Instr + -> Maybe Int + +ppc_takeDeltaInstr instr + = case instr of + DELTA i -> Just i + _ -> Nothing + + +ppc_isMetaInstr + :: Instr + -> Bool + +ppc_isMetaInstr instr + = case instr of + COMMENT{} -> True + LDATA{} -> True + NEWBLOCK{} -> True + DELTA{} -> True + _ -> False + + +-- | Copy the value in a register to another one. +-- Must work for all register classes. +ppc_mkRegRegMoveInstr + :: Reg + -> Reg + -> Instr + +ppc_mkRegRegMoveInstr src dst + = MR dst src + + +-- | Make an unconditional jump instruction. +-- For architectures with branch delay slots, its ok to put +-- a NOP after the jump. Don't fill the delay slot with an +-- instruction that references regs or you'll confuse the +-- linear allocator. +ppc_mkJumpInstr + :: BlockId + -> [Instr] + +ppc_mkJumpInstr id + = [BCC ALWAYS id] + + +-- | Take the source and destination from this reg -> reg move instruction +-- or Nothing if it's not one +ppc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg) +ppc_takeRegRegMoveInstr (MR dst src) = Just (src,dst) +ppc_takeRegRegMoveInstr _ = Nothing + diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs index ac83600f9c..f12d32a9b0 100644 --- a/compiler/nativeGen/PPC/Ppr.hs +++ b/compiler/nativeGen/PPC/Ppr.hs @@ -7,12 +7,15 @@ ----------------------------------------------------------------------------- module PPC.Ppr ( + pprNatCmmTop, + pprBasicBlock, + pprSectionHeader, + pprData, + pprInstr, pprUserReg, pprSize, pprImm, - pprSectionHeader, pprDataItem, - pprInstr ) where @@ -20,26 +23,134 @@ where #include "nativeGen/NCG.h" #include "HsVersions.h" -import RegsBase -import PprBase import PPC.Regs import PPC.Instr +import PPC.Cond +import PprBase +import Instruction +import Size +import Reg +import RegClass import BlockId import Cmm -import CLabel ( mkAsmTempLabel ) +import CLabel import Unique ( pprUnique ) import Pretty import FastString import qualified Outputable -import Outputable ( panic ) +import Outputable ( Outputable, panic ) -import Data.Word(Word32) +import Data.Word import Data.Bits +-- ----------------------------------------------------------------------------- +-- Printing this stuff out + +pprNatCmmTop :: NatCmmTop Instr -> Doc +pprNatCmmTop (CmmData section dats) = + pprSectionHeader section $$ vcat (map pprData dats) + + -- special case for split markers: +pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl + +pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) = + pprSectionHeader Text $$ + (if null info then -- blocks guaranteed not null, so label needed + pprLabel lbl + else +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl) + <> char ':' $$ +#endif + vcat (map pprData info) $$ + pprLabel (entryLblToInfoLbl lbl) + ) $$ + vcat (map pprBasicBlock blocks) + -- above: Even the first block gets a label, because with branch-chain + -- elimination, it might be the target of a goto. +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + -- If we are using the .subsections_via_symbols directive + -- (available on recent versions of Darwin), + -- we have to make sure that there is some kind of reference + -- from the entry code to a label on the _top_ of of the info table, + -- so that the linker will not think it is unreferenced and dead-strip + -- it. That's why the label is called a DeadStripPreventer (_dsp). + $$ if not (null info) + then text "\t.long " + <+> pprCLabel_asm (entryLblToInfoLbl lbl) + <+> char '-' + <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl) + else empty +#endif + + +pprBasicBlock :: NatBasicBlock Instr -> Doc +pprBasicBlock (BasicBlock (BlockId id) instrs) = + pprLabel (mkAsmTempLabel id) $$ + vcat (map pprInstr instrs) + + +pprData :: CmmStatic -> Doc +pprData (CmmAlign bytes) = pprAlign bytes +pprData (CmmDataLabel lbl) = pprLabel lbl +pprData (CmmString str) = pprASCII str +pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes +pprData (CmmStaticLit lit) = pprDataItem lit + +pprGloblDecl :: CLabel -> Doc +pprGloblDecl lbl + | not (externallyVisibleCLabel lbl) = empty + | otherwise = ptext IF_ARCH_sparc((sLit ".global "), + (sLit ".globl ")) <> + pprCLabel_asm lbl + +pprTypeAndSizeDecl :: CLabel -> Doc +#if linux_TARGET_OS +pprTypeAndSizeDecl lbl + | not (externallyVisibleCLabel lbl) = empty + | otherwise = ptext (sLit ".type ") <> + pprCLabel_asm lbl <> ptext (sLit ", @object") +#else +pprTypeAndSizeDecl _ + = empty +#endif + +pprLabel :: CLabel -> Doc +pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':') + + +pprASCII :: [Word8] -> Doc +pprASCII str + = vcat (map do1 str) $$ do1 0 + where + do1 :: Word8 -> Doc + do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w) + +pprAlign :: Int -> Doc +pprAlign bytes = + ptext (sLit ".align ") <> int pow2 + where + pow2 = log2 bytes + + log2 :: Int -> Int -- cache the common ones + log2 1 = 0 + log2 2 = 1 + log2 4 = 2 + log2 8 = 3 + log2 n = 1 + log2 (n `quot` 2) + + +-- ----------------------------------------------------------------------------- +-- pprInstr: print an 'Instr' + +instance Outputable Instr where + ppr instr = Outputable.docToSDoc $ pprInstr instr + + pprUserReg :: Reg -> Doc pprUserReg = pprReg @@ -255,7 +366,7 @@ pprInstr (NEWBLOCK _) pprInstr (LDATA _ _) = panic "PprMach.pprInstr: LDATA" - +{- pprInstr (SPILL reg slot) = hcat [ ptext (sLit "\tSPILL"), @@ -271,6 +382,7 @@ pprInstr (RELOAD slot reg) ptext (sLit "SLOT") <> parens (int slot), comma, pprReg reg] +-} pprInstr (LD sz reg addr) = hcat [ char '\t', diff --git a/compiler/nativeGen/PPC/RegInfo.hs b/compiler/nativeGen/PPC/RegInfo.hs index ea882a0d23..b2806c74d1 100644 --- a/compiler/nativeGen/PPC/RegInfo.hs +++ b/compiler/nativeGen/PPC/RegInfo.hs @@ -7,27 +7,14 @@ ----------------------------------------------------------------------------- module PPC.RegInfo ( - RegUsage(..), - noUsage, - regUsage, - patchRegs, - jumpDests, - isJumpish, - patchJump, - isRegRegMove, + mkVReg, - JumpDest(..), + JumpDest, canShortcut, shortcutJump, - mkSpillInstr, - mkLoadInstr, - mkRegRegMoveInstr, - mkBranchInstr, - - spillSlotSize, - maxSpillSlots, - spillSlotToOffset + shortcutStatic, + regDotColor ) where @@ -35,203 +22,29 @@ where #include "nativeGen/NCG.h" #include "HsVersions.h" -import BlockId -import RegsBase import PPC.Regs import PPC.Instr -import Outputable -import Constants ( rESERVED_C_STACK_BYTES ) -import FastBool - -data RegUsage = RU [Reg] [Reg] - -noUsage :: RegUsage -noUsage = RU [] [] - -regUsage :: Instr -> RegUsage -regUsage instr = case instr of - SPILL reg _ -> usage ([reg], []) - RELOAD _ reg -> usage ([], [reg]) - - LD _ reg addr -> usage (regAddr addr, [reg]) - LA _ reg addr -> usage (regAddr addr, [reg]) - ST _ reg addr -> usage (reg : regAddr addr, []) - STU _ reg addr -> usage (reg : regAddr addr, []) - LIS reg _ -> usage ([], [reg]) - LI reg _ -> usage ([], [reg]) - MR reg1 reg2 -> usage ([reg2], [reg1]) - CMP _ reg ri -> usage (reg : regRI ri,[]) - CMPL _ reg ri -> usage (reg : regRI ri,[]) - BCC _ _ -> noUsage - BCCFAR _ _ -> noUsage - MTCTR reg -> usage ([reg],[]) - BCTR _ -> noUsage - BL _ params -> usage (params, callClobberedRegs) - BCTRL params -> usage (params, callClobberedRegs) - ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - ADDC reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) - ADDE reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) - ADDIS reg1 reg2 _ -> usage ([reg2], [reg1]) - SUBF reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) - MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - DIVW reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) - DIVWU reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) - MULLW_MayOflo reg1 reg2 reg3 - -> usage ([reg2,reg3], [reg1]) - AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - XORIS reg1 reg2 _ -> usage ([reg2], [reg1]) - EXTS _ reg1 reg2 -> usage ([reg2], [reg1]) - NEG reg1 reg2 -> usage ([reg2], [reg1]) - NOT reg1 reg2 -> usage ([reg2], [reg1]) - SLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - SRW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - SRAW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - RLWINM reg1 reg2 _ _ _ - -> usage ([reg2], [reg1]) - FADD _ r1 r2 r3 -> usage ([r2,r3], [r1]) - FSUB _ r1 r2 r3 -> usage ([r2,r3], [r1]) - FMUL _ r1 r2 r3 -> usage ([r2,r3], [r1]) - FDIV _ r1 r2 r3 -> usage ([r2,r3], [r1]) - FNEG r1 r2 -> usage ([r2], [r1]) - FCMP r1 r2 -> usage ([r1,r2], []) - FCTIWZ r1 r2 -> usage ([r2], [r1]) - FRSP r1 r2 -> usage ([r2], [r1]) - MFCR reg -> usage ([], [reg]) - MFLR reg -> usage ([], [reg]) - FETCHPC reg -> usage ([], [reg]) - _ -> noUsage - where - usage (src, dst) = RU (filter interesting src) - (filter interesting dst) - regAddr (AddrRegReg r1 r2) = [r1, r2] - regAddr (AddrRegImm r1 _) = [r1] - - regRI (RIReg r) = [r] - regRI _ = [] - -interesting :: Reg -> Bool -interesting (VirtualRegI _) = True -interesting (VirtualRegHi _) = True -interesting (VirtualRegF _) = True -interesting (VirtualRegD _) = True -interesting (RealReg i) = isFastTrue (freeReg i) - - --- ----------------------------------------------------------------------------- --- 'patchRegs' function - --- 'patchRegs' takes an instruction and applies the given mapping to --- all the register references. - -patchRegs :: Instr -> (Reg -> Reg) -> Instr -patchRegs instr env = case instr of - SPILL reg slot -> SPILL (env reg) slot - RELOAD slot reg -> RELOAD slot (env reg) - - LD sz reg addr -> LD sz (env reg) (fixAddr addr) - LA sz reg addr -> LA sz (env reg) (fixAddr addr) - ST sz reg addr -> ST sz (env reg) (fixAddr addr) - STU sz reg addr -> STU sz (env reg) (fixAddr addr) - LIS reg imm -> LIS (env reg) imm - LI reg imm -> LI (env reg) imm - MR reg1 reg2 -> MR (env reg1) (env reg2) - CMP sz reg ri -> CMP sz (env reg) (fixRI ri) - CMPL sz reg ri -> CMPL sz (env reg) (fixRI ri) - BCC cond lbl -> BCC cond lbl - BCCFAR cond lbl -> BCCFAR cond lbl - MTCTR reg -> MTCTR (env reg) - BCTR targets -> BCTR targets - BL imm argRegs -> BL imm argRegs -- argument regs - BCTRL argRegs -> BCTRL argRegs -- cannot be remapped - ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri) - ADDC reg1 reg2 reg3-> ADDC (env reg1) (env reg2) (env reg3) - ADDE reg1 reg2 reg3-> ADDE (env reg1) (env reg2) (env reg3) - ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm - SUBF reg1 reg2 reg3-> SUBF (env reg1) (env reg2) (env reg3) - MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri) - DIVW reg1 reg2 reg3-> DIVW (env reg1) (env reg2) (env reg3) - DIVWU reg1 reg2 reg3-> DIVWU (env reg1) (env reg2) (env reg3) - MULLW_MayOflo reg1 reg2 reg3 - -> MULLW_MayOflo (env reg1) (env reg2) (env reg3) - AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri) - OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri) - XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri) - XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm - EXTS sz reg1 reg2 -> EXTS sz (env reg1) (env reg2) - NEG reg1 reg2 -> NEG (env reg1) (env reg2) - NOT reg1 reg2 -> NOT (env reg1) (env reg2) - SLW reg1 reg2 ri -> SLW (env reg1) (env reg2) (fixRI ri) - SRW reg1 reg2 ri -> SRW (env reg1) (env reg2) (fixRI ri) - SRAW reg1 reg2 ri -> SRAW (env reg1) (env reg2) (fixRI ri) - RLWINM reg1 reg2 sh mb me - -> RLWINM (env reg1) (env reg2) sh mb me - FADD sz r1 r2 r3 -> FADD sz (env r1) (env r2) (env r3) - FSUB sz r1 r2 r3 -> FSUB sz (env r1) (env r2) (env r3) - FMUL sz r1 r2 r3 -> FMUL sz (env r1) (env r2) (env r3) - FDIV sz r1 r2 r3 -> FDIV sz (env r1) (env r2) (env r3) - FNEG r1 r2 -> FNEG (env r1) (env r2) - FCMP r1 r2 -> FCMP (env r1) (env r2) - FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2) - FRSP r1 r2 -> FRSP (env r1) (env r2) - MFCR reg -> MFCR (env reg) - MFLR reg -> MFLR (env reg) - FETCHPC reg -> FETCHPC (env reg) - _ -> instr - where - fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) - fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i - - fixRI (RIReg r) = RIReg (env r) - fixRI other = other - +import RegClass +import Reg +import Size +import BlockId +import Cmm +import CLabel -jumpDests :: Instr -> [BlockId] -> [BlockId] -jumpDests insn acc - = case insn of - BCC _ id -> id : acc - BCCFAR _ id -> id : acc - BCTR targets -> targets ++ acc - _ -> acc - - --- | Check whether a particular instruction is a jump, branch or call instruction (jumpish) --- We can't just use jumpDests above because the jump might take its arg, --- so the instr won't contain a blockid. --- -isJumpish :: Instr -> Bool -isJumpish instr - = case instr of - BCC{} -> True - BCCFAR{} -> True - BCTR{} -> True - BCTRL{} -> True - BL{} -> True - JMP{} -> True - _ -> False - --- | Change the destination of this jump instruction --- Used in joinToTargets in the linear allocator, when emitting fixup code --- for join points. -patchJump :: Instr -> BlockId -> BlockId -> Instr -patchJump insn old new - = case insn of - BCC cc id - | id == old -> BCC cc new - - BCCFAR cc id - | id == old -> BCCFAR cc new - - BCTR _ -> error "Cannot patch BCTR" +import Outputable +import Unique - _ -> insn +mkVReg :: Unique -> Size -> Reg +mkVReg u size + | not (isFloatSize size) = VirtualRegI u + | otherwise + = case size of + FF32 -> VirtualRegD u + FF64 -> VirtualRegD u + _ -> panic "mkVReg" -isRegRegMove :: Instr -> Maybe (Reg,Reg) -isRegRegMove (MR dst src) = Just (src,dst) -isRegRegMove _ = Nothing data JumpDest = DestBlockId BlockId | DestImm Imm @@ -243,71 +56,39 @@ shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr shortcutJump _ other = other +-- Here because it knows about JumpDest +shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic +shortcutStatic fn (CmmStaticLit (CmmLabel lab)) + | Just uq <- maybeAsmTemp lab + = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq))) --- ----------------------------------------------------------------------------- --- Generating spill instructions - -mkSpillInstr - :: Reg -- register to spill - -> Int -- current stack delta - -> Int -- spill slot to use - -> Instr -mkSpillInstr reg delta slot - = let off = spillSlotToOffset slot - in - let sz = case regClass reg of - RcInteger -> II32 - RcDouble -> FF64 - RcFloat -> panic "PPC.RegInfo.mkSpillInstr: no match" - in ST sz reg (AddrRegImm sp (ImmInt (off-delta))) +shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off)) + | Just uq <- maybeAsmTemp lbl1 + = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off) + -- slightly dodgy, we're ignoring the second label, but this + -- works with the way we use CmmLabelDiffOff for jump tables now. +shortcutStatic _ other_static + = other_static -mkLoadInstr - :: Reg -- register to load - -> Int -- current stack delta - -> Int -- spill slot to use - -> Instr -mkLoadInstr reg delta slot - = let off = spillSlotToOffset slot - in - let sz = case regClass reg of - RcInteger -> II32 - RcDouble -> FF64 - RcFloat -> panic "PPC.RegInfo.mkSpillInstr: no match" - in LD sz reg (AddrRegImm sp (ImmInt (off-delta))) +shortBlockId + :: (BlockId -> Maybe JumpDest) + -> BlockId + -> CLabel +shortBlockId fn blockid@(BlockId uq) = + case fn blockid of + Nothing -> mkAsmTempLabel uq + Just (DestBlockId blockid') -> shortBlockId fn blockid' + Just (DestImm (ImmCLbl lbl)) -> lbl + _other -> panic "shortBlockId" -mkRegRegMoveInstr - :: Reg - -> Reg - -> Instr -mkRegRegMoveInstr src dst - = MR dst src -mkBranchInstr - :: BlockId - -> [Instr] - -mkBranchInstr id = [BCC ALWAYS id] - - - -spillSlotSize :: Int -spillSlotSize = 8 - -maxSpillSlots :: Int -maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1 - --- convert a spill slot number to a *byte* offset, with no sign: --- decide on a per arch basis whether you are spilling above or below --- the C stack pointer. -spillSlotToOffset :: Int -> Int -spillSlotToOffset slot - | slot >= 0 && slot < maxSpillSlots - = 64 + spillSlotSize * slot - | otherwise - = pprPanic "spillSlotToOffset:" - ( text "invalid spill location: " <> int slot - $$ text "maxSpillSlots: " <> int maxSpillSlots) +regDotColor :: Reg -> SDoc +regDotColor reg + = case regClass reg of + RcInteger -> text "blue" + RcFloat -> text "red" + RcDouble -> text "green" diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs index d6993b210b..80c68dd096 100644 --- a/compiler/nativeGen/PPC/Regs.hs +++ b/compiler/nativeGen/PPC/Regs.hs @@ -5,16 +5,6 @@ -- ----------------------------------------------------------------------------- module PPC.Regs ( - -- sizes - Size(..), - intSize, - floatSize, - isFloatSize, - wordSize, - cmmTypeSize, - sizeToWidth, - mkVReg, - -- immediates Imm(..), strImmLit, @@ -42,7 +32,10 @@ module PPC.Regs ( -- horrow show freeReg, - globalRegMaybe + globalRegMaybe, + get_GlobalReg_reg_or_addr, + allocatableRegs + ) where @@ -51,78 +44,22 @@ where #include "HsVersions.h" #include "../includes/MachRegs.h" -import RegsBase +import Reg +import RegClass +import CgUtils ( get_GlobalReg_addr ) import BlockId import Cmm import CLabel ( CLabel ) import Pretty import Outputable ( Outputable(..), pprPanic, panic ) import qualified Outputable -import Unique import Constants import FastBool import Data.Word ( Word8, Word16, Word32 ) import Data.Int ( Int8, Int16, Int32 ) --- sizes ----------------------------------------------------------------------- --- For these three, the "size" also gives the int/float --- distinction, because the instructions for int/float --- differ only in their suffices -data Size - = II8 | II16 | II32 | II64 | FF32 | FF64 | FF80 - deriving Eq - -intSize, floatSize :: Width -> Size -intSize W8 = II8 -intSize W16 = II16 -intSize W32 = II32 -intSize W64 = II64 -intSize other = pprPanic "MachInstrs.intSize" (ppr other) - -floatSize W32 = FF32 -floatSize W64 = FF64 -floatSize other = pprPanic "MachInstrs.intSize" (ppr other) - - -isFloatSize :: Size -> Bool -isFloatSize FF32 = True -isFloatSize FF64 = True -isFloatSize FF80 = True -isFloatSize _ = False - - -wordSize :: Size -wordSize = intSize wordWidth - - -cmmTypeSize :: CmmType -> Size -cmmTypeSize ty - | isFloatType ty = floatSize (typeWidth ty) - | otherwise = intSize (typeWidth ty) - - -sizeToWidth :: Size -> Width -sizeToWidth II8 = W8 -sizeToWidth II16 = W16 -sizeToWidth II32 = W32 -sizeToWidth II64 = W64 -sizeToWidth FF32 = W32 -sizeToWidth FF64 = W64 -sizeToWidth _ = panic "MachInstrs.sizeToWidth" - - -mkVReg :: Unique -> Size -> Reg -mkVReg u size - | not (isFloatSize size) = VirtualRegI u - | otherwise - = case size of - FF32 -> VirtualRegD u - FF64 -> VirtualRegD u - _ -> panic "mkVReg" - - -- immediates ------------------------------------------------------------------ data Imm @@ -490,7 +427,7 @@ freeReg REG_Hp = fastBool False #ifdef REG_HpLim freeReg REG_HpLim = fastBool False #endif -freeReg n = fastBool True +freeReg _ = fastBool True -- | Returns 'Nothing' if this global register is not stored @@ -582,3 +519,26 @@ freeReg _ = 0# globalRegMaybe _ = panic "PPC.Regs.globalRegMaybe: not defined" #endif /* powerpc_TARGET_ARCH */ + + +-- We map STG registers onto appropriate CmmExprs. Either they map +-- to real machine registers or stored as offsets from BaseReg. Given +-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real +-- register it is in, on this platform, or a CmmExpr denoting the +-- address in the register table holding it. +-- (See also get_GlobalReg_addr in CgUtils.) + +get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr +get_GlobalReg_reg_or_addr mid + = case globalRegMaybe mid of + Just rr -> Left rr + Nothing -> Right (get_GlobalReg_addr mid) + + +-- allocatableRegs is allMachRegNos with the fixed-use regs removed. +-- i.e., these are the regs for which we are prepared to allow the +-- register allocator to attempt to map VRegs to. +allocatableRegs :: [RegNo] +allocatableRegs + = let isFree i = isFastTrue (freeReg i) + in filter isFree allMachRegNos |