diff options
Diffstat (limited to 'compiler/nativeGen/SPARC')
-rw-r--r-- | compiler/nativeGen/SPARC/CodeGen.hs | 1545 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/Cond.hs | 53 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/Instr.hs | 370 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/Ppr.hs | 135 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/RegInfo.hs | 413 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/Regs.hs | 197 |
6 files changed, 2160 insertions, 553 deletions
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs new file mode 100644 index 0000000000..d921c12e7a --- /dev/null +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -0,0 +1,1545 @@ +{-# OPTIONS -w #-} +----------------------------------------------------------------------------- +-- +-- Generating machine code (instruction selection) +-- +-- (c) The University of Glasgow 1996-2004 +-- +----------------------------------------------------------------------------- + +module SPARC.CodeGen ( + cmmTopCodeGen, + InstrBlock +) + +where + +#include "HsVersions.h" +#include "nativeGen/NCG.h" +#include "MachDeps.h" + +-- NCG stuff: +import SPARC.Instr +import SPARC.Cond +import SPARC.Regs +import SPARC.RegInfo +import Instruction +import Size +import Reg +import PIC +import NCGMonad + +-- Our intermediate code: +import BlockId +import Cmm +import CLabel + +-- The rest: +import BasicTypes +import StaticFlags ( opt_PIC ) +import OrdList +import qualified Outputable as O +import Outputable +import FastString + +import Control.Monad ( mapAndUnzipM ) +import Data.Int +import DynFlags + +-- | Top level code generation +cmmTopCodeGen + :: DynFlags + -> RawCmmTop + -> NatM [NatCmmTop Instr] + +cmmTopCodeGen _ + (CmmProc info lab params (ListGraph blocks)) + = do + (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks + +-- picBaseMb <- getPicBaseMaybeNat + let proc = CmmProc info lab params (ListGraph $ concat nat_blocks) + let tops = proc : concat statics + +-- case picBaseMb of +-- Just picBase -> initializePicBase picBase tops +-- Nothing -> return tops + + return tops + + +cmmTopCodeGen _ (CmmData sec dat) = do + return [CmmData sec dat] -- no translation, we just use CmmStatic + + + +basicBlockCodeGen + :: CmmBasicBlock + -> NatM ( [NatBasicBlock Instr] + , [NatCmmTop Instr]) + +basicBlockCodeGen (BasicBlock id stmts) = do + instrs <- stmtsToInstrs stmts + -- code generation may introduce new basic block boundaries, which + -- are indicated by the NEWBLOCK instruction. We must split up the + -- instruction stream into basic blocks again. Also, we extract + -- LDATAs here too. + let + (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs + + mkBlocks (NEWBLOCK id) (instrs,blocks,statics) + = ([], BasicBlock id instrs : blocks, statics) + mkBlocks (LDATA sec dat) (instrs,blocks,statics) + = (instrs, blocks, CmmData sec dat:statics) + mkBlocks instr (instrs,blocks,statics) + = (instr:instrs, blocks, statics) + -- in + return (BasicBlock id top : other_blocks, statics) + + +stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock +stmtsToInstrs stmts + = do instrss <- mapM stmtToInstrs stmts + return (concatOL instrss) + + +stmtToInstrs :: CmmStmt -> NatM InstrBlock +stmtToInstrs stmt = case stmt of + CmmNop -> return nilOL + CmmComment s -> return (unitOL (COMMENT s)) + + CmmAssign reg src + | isFloatType ty -> assignReg_FltCode size reg src + | isWord64 ty -> assignReg_I64Code reg src + | otherwise -> assignReg_IntCode size reg src + where ty = cmmRegType reg + size = cmmTypeSize ty + + CmmStore addr src + | isFloatType ty -> assignMem_FltCode size addr src + | isWord64 ty -> assignMem_I64Code addr src + | otherwise -> assignMem_IntCode size addr src + where ty = cmmExprType src + size = cmmTypeSize ty + + CmmCall target result_regs args _ _ + -> genCCall target result_regs args + + CmmBranch id -> genBranch id + CmmCondBranch arg id -> genCondJump id arg + CmmSwitch arg ids -> genSwitch arg ids + CmmJump arg _ -> genJump arg + + CmmReturn _ + -> panic "stmtToInstrs: return statement should have been cps'd away" + + +-------------------------------------------------------------------------------- +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal yields the insns in the correct order. +-- +type InstrBlock + = OrdList Instr + + +-- | Condition codes passed up the tree. +-- +data CondCode + = CondCode Bool Cond InstrBlock + + +-- | a.k.a "Register64" +-- Reg is the lower 32-bit temporary which contains the result. +-- Use getHiVRegFromLo to find the other VRegUnique. +-- +-- Rules of this simplified insn selection game are therefore that +-- the returned Reg may be modified +-- +data ChildCode64 + = ChildCode64 + InstrBlock + Reg + + +-- | Register's passed up the tree. If the stix code forces the register +-- to live in a pre-decided machine register, it comes out as @Fixed@; +-- otherwise, it comes out as @Any@, and the parent can decide which +-- register to put it in. +-- +data Register + = Fixed Size Reg InstrBlock + | Any Size (Reg -> InstrBlock) + + +swizzleRegisterRep :: Register -> Size -> Register +swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code +swizzleRegisterRep (Any _ codefn) size = Any size codefn + + +-- | Grab the Reg for a CmmReg +getRegisterReg :: CmmReg -> Reg + +getRegisterReg (CmmLocal (LocalReg u pk)) + = mkVReg u (cmmTypeSize pk) + +getRegisterReg (CmmGlobal mid) + = case get_GlobalReg_reg_or_addr mid of + Left (RealReg rrno) -> RealReg rrno + _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence ... + + +-- | Memory addressing modes passed up the tree. +data Amode + = Amode AddrMode InstrBlock + +{- +Now, given a tree (the argument to an CmmLoad) that references memory, +produce a suitable addressing mode. + +A Rule of the Game (tm) for Amodes: use of the addr bit must +immediately follow use of the code part, since the code part puts +values in registers which the addr then refers to. So you can't put +anything in between, lest it overwrite some of those registers. If +you need to do some other computation between the code part and use of +the addr bit, first store the effective address from the amode in a +temporary, then do the other computation, and then use the temporary: + + code + LEA amode, tmp + ... other computation ... + ... (tmp) ... +-} + + +-- | Check whether an integer will fit in 32 bits. +-- A CmmInt is intended to be truncated to the appropriate +-- number of bits, so here we truncate it to Int64. This is +-- important because e.g. -1 as a CmmInt might be either +-- -1 or 18446744073709551615. +-- +is32BitInteger :: Integer -> Bool +is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 + where i64 = fromIntegral i :: Int64 + + +-- | Convert a BlockId to some CmmStatic data +jumpTableEntry :: Maybe BlockId -> CmmStatic +jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth) +jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) + where blockLabel = mkAsmTempLabel id + + + + +-- ----------------------------------------------------------------------------- +-- General things for putting together code sequences + +-- Expand CmmRegOff. ToDo: should we do it this way around, or convert +-- CmmExprs into CmmRegOff? +mangleIndexTree :: CmmExpr -> CmmExpr +mangleIndexTree (CmmRegOff reg off) + = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] + where width = typeWidth (cmmRegType reg) + + +assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock +assignMem_I64Code addrTree valueTree = do + Amode _ addr_code <- getAmode addrTree + ChildCode64 vcode rlo <- iselExpr64 valueTree + + (src, code) <- getSomeReg addrTree + let + rhi = getHiVRegFromLo rlo + -- Big-endian store + mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0)) + mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4)) + + return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo) + + +assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock +assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do + ChildCode64 vcode r_src_lo <- iselExpr64 valueTree + let + r_dst_lo = mkVReg u_dst (cmmTypeSize pk) + r_dst_hi = getHiVRegFromLo r_dst_lo + r_src_hi = getHiVRegFromLo r_src_lo + mov_lo = mkMOV r_src_lo r_dst_lo + mov_hi = mkMOV r_src_hi r_dst_hi + mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg + return (vcode `snocOL` mov_hi `snocOL` mov_lo) +assignReg_I64Code lvalue valueTree + = panic "assignReg_I64Code(sparc): invalid lvalue" + + +-- Load a 64 bit word +iselExpr64 (CmmLoad addrTree ty) + | isWord64 ty + = do Amode amode addr_code <- getAmode addrTree + let result + + | AddrRegReg r1 r2 <- amode + = do rlo <- getNewRegNat II32 + tmp <- getNewRegNat II32 + let rhi = getHiVRegFromLo rlo + + return $ ChildCode64 + ( addr_code + `appOL` toOL + [ ADD False False r1 (RIReg r2) tmp + , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi + , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ]) + rlo + + | AddrRegImm r1 (ImmInt i) <- amode + = do rlo <- getNewRegNat II32 + let rhi = getHiVRegFromLo rlo + + return $ ChildCode64 + ( addr_code + `appOL` toOL + [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi + , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ]) + rlo + + result + + +-- Add a literal to a 64 bit integer +iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) + = do ChildCode64 code1 r1_lo <- iselExpr64 e1 + let r1_hi = getHiVRegFromLo r1_lo + + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + + return $ ChildCode64 + ( toOL + [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo + , ADD True False r1_hi (RIReg g0) r_dst_hi ]) + r_dst_lo + + +-- Addition of II64 +iselExpr64 (CmmMachOp (MO_Add width) [e1, e2]) + = do ChildCode64 code1 r1_lo <- iselExpr64 e1 + let r1_hi = getHiVRegFromLo r1_lo + + ChildCode64 code2 r2_lo <- iselExpr64 e2 + let r2_hi = getHiVRegFromLo r2_lo + + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + + let code = code1 + `appOL` code2 + `appOL` toOL + [ ADD False False r1_lo (RIReg r2_lo) r_dst_lo + , ADD True False r1_hi (RIReg r2_hi) r_dst_hi ] + + return $ ChildCode64 code r_dst_lo + + +iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + r_src_lo = mkVReg uq II32 + r_src_hi = getHiVRegFromLo r_src_lo + mov_lo = mkMOV r_src_lo r_dst_lo + mov_hi = mkMOV r_src_hi r_dst_hi + mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg + return ( + ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo + ) + +-- Convert something into II64 +iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) + = do + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + + -- compute expr and load it into r_dst_lo + (a_reg, a_code) <- getSomeReg expr + + let code = a_code + `appOL` toOL + [ mkRegRegMoveInstr g0 r_dst_hi -- clear high 32 bits + , mkRegRegMoveInstr a_reg r_dst_lo ] + + return $ ChildCode64 code r_dst_lo + + +iselExpr64 expr + = pprPanic "iselExpr64(sparc)" (ppr expr) + + +-- | The dual to getAnyReg: compute an expression into a register, but +-- we don't mind which one it is. +getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) +getSomeReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, code tmp) + Fixed _ reg code -> + return (reg, code) + + +-- +getRegister :: CmmExpr -> NatM Register + +getRegister (CmmReg reg) + = return (Fixed (cmmTypeSize (cmmRegType reg)) + (getRegisterReg reg) nilOL) + +getRegister tree@(CmmRegOff _ _) + = getRegister (mangleIndexTree tree) + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + + + +-- Load a literal float into a float register. +-- The actual literal is stored in a new data area, and we load it +-- at runtime. +getRegister (CmmLit (CmmFloat f W32)) = do + + -- a label for the new data area + lbl <- getNewLabelNat + tmp <- getNewRegNat II32 + + let code dst = toOL [ + -- the data area + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmFloat f W32)], + + -- load the literal + SETHI (HI (ImmCLbl lbl)) tmp, + LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] + + return (Any FF32 code) + +getRegister (CmmLit (CmmFloat d W64)) = do + lbl <- getNewLabelNat + tmp <- getNewRegNat II32 + let code dst = toOL [ + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmFloat d W64)], + SETHI (HI (ImmCLbl lbl)) tmp, + LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] + return (Any FF64 code) + +getRegister (CmmMachOp mop [x]) -- unary MachOps + = case mop of + MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x + MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x + + MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x + MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x + + MO_FF_Conv W64 W32-> coerceDbl2Flt x + MO_FF_Conv W32 W64-> coerceFlt2Dbl x + + MO_FS_Conv from to -> coerceFP2Int from to x + MO_SF_Conv from to -> coerceInt2FP from to x + + -- Conversions which are a nop on sparc + MO_UU_Conv from to + | from == to -> conversionNop (intSize to) x + MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8)) + MO_UU_Conv W32 to -> conversionNop (intSize to) x + MO_SS_Conv W32 to -> conversionNop (intSize to) x + + MO_UU_Conv W8 to@W32 -> conversionNop (intSize to) x + MO_UU_Conv W16 to@W32 -> conversionNop (intSize to) x + MO_UU_Conv W8 to@W16 -> conversionNop (intSize to) x + + -- sign extension + MO_SS_Conv W8 W32 -> integerExtend W8 W32 x + MO_SS_Conv W16 W32 -> integerExtend W16 W32 x + MO_SS_Conv W8 W16 -> integerExtend W8 W16 x + + other_op -> panic ("Unknown unary mach op: " ++ show mop) + where + + -- | sign extend and widen + integerExtend + :: Width -- ^ width of source expression + -> Width -- ^ width of result + -> CmmExpr -- ^ source expression + -> NatM Register + + integerExtend from to expr + = do -- load the expr into some register + (reg, e_code) <- getSomeReg expr + tmp <- getNewRegNat II32 + let bitCount + = case (from, to) of + (W8, W32) -> 24 + (W16, W32) -> 16 + (W8, W16) -> 24 + let code dst + = e_code + + -- local shift word left to load the sign bit + `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp + + -- arithmetic shift right to sign extend + `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst + + return (Any (intSize to) code) + + + conversionNop new_rep expr + = do e_code <- getRegister expr + return (swizzleRegisterRep e_code new_rep) + +getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps + = case mop of + MO_Eq rep -> condIntReg EQQ x y + MO_Ne rep -> condIntReg NE x y + + MO_S_Gt rep -> condIntReg GTT x y + MO_S_Ge rep -> condIntReg GE x y + MO_S_Lt rep -> condIntReg LTT x y + MO_S_Le rep -> condIntReg LE x y + + MO_U_Gt W32 -> condIntReg GTT x y + MO_U_Ge W32 -> condIntReg GE x y + MO_U_Lt W32 -> condIntReg LTT x y + MO_U_Le W32 -> condIntReg LE x y + + MO_U_Gt W16 -> condIntReg GU x y + MO_U_Ge W16 -> condIntReg GEU x y + MO_U_Lt W16 -> condIntReg LU x y + MO_U_Le W16 -> condIntReg LEU x y + + MO_Add W32 -> trivialCode W32 (ADD False False) x y + MO_Sub W32 -> trivialCode W32 (SUB False False) x y + + MO_S_MulMayOflo rep -> imulMayOflo rep x y + + MO_S_Quot W32 -> idiv True False x y + MO_U_Quot W32 -> idiv False False x y + + MO_S_Rem W32 -> irem True x y + MO_U_Rem W32 -> irem False x y + + MO_F_Eq w -> condFltReg EQQ x y + MO_F_Ne w -> condFltReg NE x y + + MO_F_Gt w -> condFltReg GTT x y + MO_F_Ge w -> condFltReg GE x y + MO_F_Lt w -> condFltReg LTT x y + MO_F_Le w -> condFltReg LE x y + + MO_F_Add w -> trivialFCode w FADD x y + MO_F_Sub w -> trivialFCode w FSUB x y + MO_F_Mul w -> trivialFCode w FMUL x y + MO_F_Quot w -> trivialFCode w FDIV x y + + MO_And rep -> trivialCode rep (AND False) x y + MO_Or rep -> trivialCode rep (OR False) x y + MO_Xor rep -> trivialCode rep (XOR False) x y + + MO_Mul rep -> trivialCode rep (SMUL False) x y + + MO_Shl rep -> trivialCode rep SLL x y + MO_U_Shr rep -> trivialCode rep SRL x y + MO_S_Shr rep -> trivialCode rep SRA x y + +{- + MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 + [promote x, promote y]) + where promote x = CmmMachOp MO_F32_to_Dbl [x] + MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 + [x, y]) +-} + other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop) + where + -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y]) + + + -- | Generate an integer division instruction. + idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register + + -- For unsigned division with a 32 bit numerator, + -- we can just clear the Y register. + idiv False cc x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ WRY g0 g0 + , UDIV cc a_reg (RIReg b_reg) dst] + + return (Any II32 code) + + + -- For _signed_ division with a 32 bit numerator, + -- we have to sign extend the numerator into the Y register. + idiv True cc x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + tmp <- getNewRegNat II32 + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend + , SRA tmp (RIImm (ImmInt 16)) tmp + + , WRY tmp g0 + , SDIV cc a_reg (RIReg b_reg) dst] + + return (Any II32 code) + + + -- | Do an integer remainder. + -- + -- NOTE: The SPARC v8 architecture manual says that integer division + -- instructions _may_ generate a remainder, depending on the implementation. + -- If so it is _recommended_ that the remainder is placed in the Y register. + -- + -- The UltraSparc 2007 manual says Y is _undefined_ after division. + -- + -- The SPARC T2 doesn't store the remainder, not sure about the others. + -- It's probably best not to worry about it, and just generate our own + -- remainders. + -- + irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register + + -- For unsigned operands: + -- Division is between a 64 bit numerator and a 32 bit denominator, + -- so we still have to clear the Y register. + irem False x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + tmp_reg <- getNewRegNat II32 + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ WRY g0 g0 + , UDIV False a_reg (RIReg b_reg) tmp_reg + , UMUL False tmp_reg (RIReg b_reg) tmp_reg + , SUB False False a_reg (RIReg tmp_reg) dst] + + return (Any II32 code) + + + -- For signed operands: + -- Make sure to sign extend into the Y register, or the remainder + -- will have the wrong sign when the numerator is negative. + -- + -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits, + -- not the full 32. Not sure why this is, something to do with overflow? + -- If anyone cares enough about the speed of signed remainder they + -- can work it out themselves (then tell me). -- BL 2009/01/20 + + irem True x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + tmp1_reg <- getNewRegNat II32 + tmp2_reg <- getNewRegNat II32 + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend + , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend + , WRY tmp1_reg g0 + + , SDIV False a_reg (RIReg b_reg) tmp2_reg + , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg + , SUB False False a_reg (RIReg tmp2_reg) dst] + + return (Any II32 code) + + + imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register + imulMayOflo rep a b = do + (a_reg, a_code) <- getSomeReg a + (b_reg, b_code) <- getSomeReg b + res_lo <- getNewRegNat II32 + res_hi <- getNewRegNat II32 + let + shift_amt = case rep of + W32 -> 31 + W64 -> 63 + _ -> panic "shift_amt" + code dst = a_code `appOL` b_code `appOL` + toOL [ + SMUL False a_reg (RIReg b_reg) res_lo, + RDY res_hi, + SRA res_lo (RIImm (ImmInt shift_amt)) res_lo, + SUB False False res_lo (RIReg res_hi) dst + ] + return (Any II32 code) + +getRegister (CmmLoad mem pk) = do + Amode src code <- getAmode mem + let + code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst + return (Any (cmmTypeSize pk) code__2) + +getRegister (CmmLit (CmmInt i _)) + | fits13Bits i + = let + src = ImmInt (fromInteger i) + code dst = unitOL (OR False g0 (RIImm src) dst) + in + return (Any II32 code) + +getRegister (CmmLit lit) + = let rep = cmmLitType lit + imm = litToImm lit + code dst = toOL [ + SETHI (HI imm) dst, + OR False dst (RIImm (LO imm)) dst] + in return (Any II32 code) + + + +getAmode :: CmmExpr -> NatM Amode +getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree) + +getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)]) + | fits13Bits (-i) + = do + (reg, code) <- getSomeReg x + let + off = ImmInt (-(fromInteger i)) + return (Amode (AddrRegImm reg off) code) + + +getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)]) + | fits13Bits i + = do + (reg, code) <- getSomeReg x + let + off = ImmInt (fromInteger i) + return (Amode (AddrRegImm reg off) code) + +getAmode (CmmMachOp (MO_Add rep) [x, y]) + = do + (regX, codeX) <- getSomeReg x + (regY, codeY) <- getSomeReg y + let + code = codeX `appOL` codeY + return (Amode (AddrRegReg regX regY) code) + +getAmode (CmmLit lit) + = do + let imm__2 = litToImm lit + tmp1 <- getNewRegNat II32 + tmp2 <- getNewRegNat II32 + + let code = toOL [ SETHI (HI imm__2) tmp1 + , OR False tmp1 (RIImm (LO imm__2)) tmp2] + + return (Amode (AddrRegReg tmp2 g0) code) + +getAmode other + = do + (reg, code) <- getSomeReg other + let + off = ImmInt 0 + return (Amode (AddrRegImm reg off) code) + + +getCondCode :: CmmExpr -> NatM CondCode +getCondCode (CmmMachOp mop [x, y]) + = + case mop of + MO_F_Eq W32 -> condFltCode EQQ x y + MO_F_Ne W32 -> condFltCode NE x y + MO_F_Gt W32 -> condFltCode GTT x y + MO_F_Ge W32 -> condFltCode GE x y + MO_F_Lt W32 -> condFltCode LTT x y + MO_F_Le W32 -> condFltCode LE x y + + MO_F_Eq W64 -> condFltCode EQQ x y + MO_F_Ne W64 -> condFltCode NE x y + MO_F_Gt W64 -> condFltCode GTT x y + MO_F_Ge W64 -> condFltCode GE x y + MO_F_Lt W64 -> condFltCode LTT x y + MO_F_Le W64 -> condFltCode LE x y + + MO_Eq rep -> condIntCode EQQ x y + MO_Ne rep -> condIntCode NE x y + + MO_S_Gt rep -> condIntCode GTT x y + MO_S_Ge rep -> condIntCode GE x y + MO_S_Lt rep -> condIntCode LTT x y + MO_S_Le rep -> condIntCode LE x y + + MO_U_Gt rep -> condIntCode GU x y + MO_U_Ge rep -> condIntCode GEU x y + MO_U_Lt rep -> condIntCode LU x y + MO_U_Le rep -> condIntCode LEU x y + + other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y])) + +getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other) + + + + + +-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be +-- passed back up the tree. + +condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode +condIntCode cond x (CmmLit (CmmInt y rep)) + | fits13Bits y + = do + (src1, code) <- getSomeReg x + let + src2 = ImmInt (fromInteger y) + code' = code `snocOL` SUB False True src1 (RIImm src2) g0 + return (CondCode False cond code') + +condIntCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let + code__2 = code1 `appOL` code2 `snocOL` + SUB False True src1 (RIReg src2) g0 + return (CondCode False cond code__2) + + +condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode +condFltCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp <- getNewRegNat FF64 + let + promote x = FxTOy FF32 FF64 x tmp + + pk1 = cmmExprType x + pk2 = cmmExprType y + + code__2 = + if pk1 `cmmEqType` pk2 then + code1 `appOL` code2 `snocOL` + FCMP True (cmmTypeSize pk1) src1 src2 + else if typeWidth pk1 == W32 then + code1 `snocOL` promote src1 `appOL` code2 `snocOL` + FCMP True FF64 tmp src2 + else + code1 `appOL` code2 `snocOL` promote src2 `snocOL` + FCMP True FF64 src1 tmp + return (CondCode True cond code__2) + + + +-- ----------------------------------------------------------------------------- +-- Generating assignments + +-- Assignments are really at the heart of the whole code generation +-- business. Almost all top-level nodes of any real importance are +-- assignments, which correspond to loads, stores, or register +-- transfers. If we're really lucky, some of the register transfers +-- will go away, because we can use the destination register to +-- complete the code generation for the right hand side. This only +-- fails when the right hand side is forced into a fixed register +-- (e.g. the result of a call). + +assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignMem_IntCode pk addr src = do + (srcReg, code) <- getSomeReg src + Amode dstAddr addr_code <- getAmode addr + return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr + + +assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock +assignReg_IntCode pk reg src = do + r <- getRegister src + return $ case r of + Any _ code -> code dst + Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst + where + dst = getRegisterReg reg + + + +-- Floating point assignment to memory +assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignMem_FltCode pk addr src = do + Amode dst__2 code1 <- getAmode addr + (src__2, code2) <- getSomeReg src + tmp1 <- getNewRegNat pk + let + pk__2 = cmmExprType src + code__2 = code1 `appOL` code2 `appOL` + if sizeToWidth pk == typeWidth pk__2 + then unitOL (ST pk src__2 dst__2) + else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1 + , ST pk tmp1 dst__2] + return code__2 + +-- Floating point assignment to a register/temporary +assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock +assignReg_FltCode pk dstCmmReg srcCmmExpr = do + srcRegister <- getRegister srcCmmExpr + let dstReg = getRegisterReg dstCmmReg + + return $ case srcRegister of + Any _ code -> code dstReg + Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg + + + + +genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock + +genJump (CmmLit (CmmLabel lbl)) + = return (toOL [CALL (Left target) 0 True, NOP]) + where + target = ImmCLbl lbl + +genJump tree + = do + (target, code) <- getSomeReg tree + return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP) + +-- ----------------------------------------------------------------------------- +-- Unconditional branches + +genBranch :: BlockId -> NatM InstrBlock +genBranch = return . toOL . mkJumpInstr + + +-- ----------------------------------------------------------------------------- +-- Conditional jumps + +{- +Conditional jumps are always to local labels, so we can use branch +instructions. We peek at the arguments to decide what kind of +comparison to do. + +SPARC: First, we have to ensure that the condition codes are set +according to the supplied comparison operation. We generate slightly +different code for floating point comparisons, because a floating +point operation cannot directly precede a @BF@. We assume the worst +and fill that slot with a @NOP@. + +SPARC: Do not fill the delay slots here; you will confuse the register +allocator. +-} + + +genCondJump + :: BlockId -- the branch target + -> CmmExpr -- the condition on which to branch + -> NatM InstrBlock + + + +genCondJump bid bool = do + CondCode is_float cond code <- getCondCode bool + return ( + code `appOL` + toOL ( + if is_float + then [NOP, BF cond False bid, NOP] + else [BI cond False bid, NOP] + ) + ) + + + +-- ----------------------------------------------------------------------------- +-- Generating C calls + +-- Now the biggest nightmare---calls. Most of the nastiness is buried in +-- @get_arg@, which moves the arguments to the correct registers/stack +-- locations. Apart from that, the code is easy. +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +genCCall + :: CmmCallTarget -- function to call + -> HintedCmmFormals -- where to put the result + -> HintedCmmActuals -- arguments (of mixed type) + -> NatM InstrBlock + + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +{- + The SPARC calling convention is an absolute + nightmare. The first 6x32 bits of arguments are mapped into + %o0 through %o5, and the remaining arguments are dumped to the + stack, beginning at [%sp+92]. (Note that %o6 == %sp.) + + If we have to put args on the stack, move %o6==%sp down by + the number of words to go on the stack, to ensure there's enough space. + + According to Fraser and Hanson's lcc book, page 478, fig 17.2, + 16 words above the stack pointer is a word for the address of + a structure return value. I use this as a temporary location + for moving values from float to int regs. Certainly it isn't + safe to put anything in the 16 words starting at %sp, since + this area can get trashed at any time due to window overflows + caused by signal handlers. + + A final complication (if the above isn't enough) is that + we can't blithely calculate the arguments one by one into + %o0 .. %o5. Consider the following nested calls: + + fff a (fff b c) + + Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately + the inner call will itself use %o0, which trashes the value put there + in preparation for the outer call. Upshot: we need to calculate the + args into temporary regs, and move those to arg regs or onto the + stack only immediately prior to the call proper. Sigh. + +genCCall + :: CmmCallTarget -- function to call + -> HintedCmmFormals -- where to put the result + -> HintedCmmActuals -- arguments (of mixed type) + -> NatM InstrBlock + +-} + + +-- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream +-- are guaranteed to take place before writes afterwards (unlike on PowerPC). +-- Ref: Section 8.4 of the SPARC V9 Architecture manual. +-- +-- In the SPARC case we don't need a barrier. +-- +genCCall (CmmPrim (MO_WriteBarrier)) _ _ + = do return nilOL + +genCCall target dest_regs argsAndHints + = do + -- strip hints from the arg regs + let args :: [CmmExpr] + args = map hintlessCmm argsAndHints + + + -- work out the arguments, and assign them to integer regs + argcode_and_vregs <- mapM arg_to_int_vregs args + let (argcodes, vregss) = unzip argcode_and_vregs + let vregs = concat vregss + + let n_argRegs = length allArgRegs + let n_argRegs_used = min (length vregs) n_argRegs + + + -- deal with static vs dynamic call targets + callinsns <- case target of + CmmCallee (CmmLit (CmmLabel lbl)) conv -> + return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) + + CmmCallee expr conv + -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr + return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) + + CmmPrim mop + -> do res <- outOfLineFloatOp mop + lblOrMopExpr <- case res of + Left lbl -> do + return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) + + Right mopExpr -> do + (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr + return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) + + return lblOrMopExpr + + let argcode = concatOL argcodes + + let (move_sp_down, move_sp_up) + = let diff = length vregs - n_argRegs + nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment + in if nn <= 0 + then (nilOL, nilOL) + else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn))) + + let transfer_code + = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE) + + return + $ argcode `appOL` + move_sp_down `appOL` + transfer_code `appOL` + callinsns `appOL` + unitOL NOP `appOL` + move_sp_up `appOL` + assign_code dest_regs + + +-- | Generate code to calculate an argument, and move it into one +-- or two integer vregs. +arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg]) +arg_to_int_vregs arg + + -- If the expr produces a 64 bit int, then we can just use iselExpr64 + | isWord64 (cmmExprType arg) + = do (ChildCode64 code r_lo) <- iselExpr64 arg + let r_hi = getHiVRegFromLo r_lo + return (code, [r_hi, r_lo]) + + | otherwise + = do (src, code) <- getSomeReg arg + tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg) + let pk = cmmExprType arg + + case cmmTypeSize pk of + + -- Load a 64 bit float return value into two integer regs. + FF64 -> do + v1 <- getNewRegNat II32 + v2 <- getNewRegNat II32 + + let Just f0_high = fPair f0 + + let code2 = + code `snocOL` + FMOV FF64 src f0 `snocOL` + ST FF32 f0 (spRel 16) `snocOL` + LD II32 (spRel 16) v1 `snocOL` + ST FF32 f0_high (spRel 16) `snocOL` + LD II32 (spRel 16) v2 + + return (code2, [v1,v2]) + + -- Load a 32 bit float return value into an integer reg + FF32 -> do + v1 <- getNewRegNat II32 + + let code2 = + code `snocOL` + ST FF32 src (spRel 16) `snocOL` + LD II32 (spRel 16) v1 + + return (code2, [v1]) + + -- Move an integer return value into its destination reg. + other -> do + v1 <- getNewRegNat II32 + + let code2 = + code `snocOL` + OR False g0 (RIReg src) v1 + + return (code2, [v1]) + + +-- | Move args from the integer vregs into which they have been +-- marshalled, into %o0 .. %o5, and the rest onto the stack. +-- +move_final :: [Reg] -> [Reg] -> Int -> [Instr] + +-- all args done +move_final [] _ offset + = [] + +-- out of aregs; move to stack +move_final (v:vs) [] offset + = ST II32 v (spRel offset) + : move_final vs [] (offset+1) + +-- move into an arg (%o[0..5]) reg +move_final (v:vs) (a:az) offset + = OR False g0 (RIReg v) a + : move_final vs az offset + + +-- | Assign results returned from the call into their +-- desination regs. +-- +assign_code :: [CmmHinted LocalReg] -> OrdList Instr +assign_code [] = nilOL + +assign_code [CmmHinted dest _hint] + = let rep = localRegType dest + width = typeWidth rep + r_dest = getRegisterReg (CmmLocal dest) + + result + | isFloatType rep + , W32 <- width + = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest + + | isFloatType rep + , W64 <- width + = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest + + | not $ isFloatType rep + , W32 <- width + = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest + + | not $ isFloatType rep + , W64 <- width + , r_dest_hi <- getHiVRegFromLo r_dest + = toOL [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi + , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest] + in result + + +-- | Generate a call to implement an out-of-line floating point operation +outOfLineFloatOp + :: CallishMachOp + -> NatM (Either CLabel CmmExpr) + +outOfLineFloatOp mop + = do let functionName + = outOfLineFloatOp_table mop + + dflags <- getDynFlagsNat + mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference + $ mkForeignLabel functionName Nothing True IsFunction + + let mopLabelOrExpr + = case mopExpr of + CmmLit (CmmLabel lbl) -> Left lbl + _ -> Right mopExpr + + return mopLabelOrExpr + + +-- | Decide what C function to use to implement a CallishMachOp +-- +outOfLineFloatOp_table + :: CallishMachOp + -> FastString + +outOfLineFloatOp_table mop + = case mop of + MO_F32_Exp -> fsLit "expf" + MO_F32_Log -> fsLit "logf" + MO_F32_Sqrt -> fsLit "sqrtf" + MO_F32_Pwr -> fsLit "powf" + + MO_F32_Sin -> fsLit "sinf" + MO_F32_Cos -> fsLit "cosf" + MO_F32_Tan -> fsLit "tanf" + + MO_F32_Asin -> fsLit "asinf" + MO_F32_Acos -> fsLit "acosf" + MO_F32_Atan -> fsLit "atanf" + + MO_F32_Sinh -> fsLit "sinhf" + MO_F32_Cosh -> fsLit "coshf" + MO_F32_Tanh -> fsLit "tanhf" + + MO_F64_Exp -> fsLit "exp" + MO_F64_Log -> fsLit "log" + MO_F64_Sqrt -> fsLit "sqrt" + MO_F64_Pwr -> fsLit "pow" + + MO_F64_Sin -> fsLit "sin" + MO_F64_Cos -> fsLit "cos" + MO_F64_Tan -> fsLit "tan" + + MO_F64_Asin -> fsLit "asin" + MO_F64_Acos -> fsLit "acos" + MO_F64_Atan -> fsLit "atan" + + MO_F64_Sinh -> fsLit "sinh" + MO_F64_Cosh -> fsLit "cosh" + MO_F64_Tanh -> fsLit "tanh" + + other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op " + (pprCallishMachOp mop) + + +-- ----------------------------------------------------------------------------- +-- Generating a table-branch + +genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock +genSwitch expr ids + | opt_PIC + = error "MachCodeGen: sparc genSwitch PIC not finished\n" + + | otherwise + = do (e_reg, e_code) <- getSomeReg expr + + base_reg <- getNewRegNat II32 + offset_reg <- getNewRegNat II32 + dst <- getNewRegNat II32 + + label <- getNewLabelNat + let jumpTable = map jumpTableEntry ids + + return $ e_code `appOL` + toOL + -- the jump table + [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable) + + -- load base of jump table + , SETHI (HI (ImmCLbl label)) base_reg + , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg + + -- the addrs in the table are 32 bits wide.. + , SLL e_reg (RIImm $ ImmInt 2) offset_reg + + -- load and jump to the destination + , LD II32 (AddrRegReg base_reg offset_reg) dst + , JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids] + , NOP ] + + + +-- ----------------------------------------------------------------------------- +-- 'condIntReg' and 'condFltReg': condition codes into registers + +-- Turn those condition codes into integers now (when they appear on +-- the right hand side of an assignment). +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register + +condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do + (src, code) <- getSomeReg x + tmp <- getNewRegNat II32 + let + code__2 dst = code `appOL` toOL [ + SUB False True g0 (RIReg src) g0, + SUB True False g0 (RIImm (ImmInt (-1))) dst] + return (Any II32 code__2) + +condIntReg EQQ x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp1 <- getNewRegNat II32 + tmp2 <- getNewRegNat II32 + let + code__2 dst = code1 `appOL` code2 `appOL` toOL [ + XOR False src1 (RIReg src2) dst, + SUB False True g0 (RIReg dst) g0, + SUB True False g0 (RIImm (ImmInt (-1))) dst] + return (Any II32 code__2) + +condIntReg NE x (CmmLit (CmmInt 0 d)) = do + (src, code) <- getSomeReg x + tmp <- getNewRegNat II32 + let + code__2 dst = code `appOL` toOL [ + SUB False True g0 (RIReg src) g0, + ADD True False g0 (RIImm (ImmInt 0)) dst] + return (Any II32 code__2) + +condIntReg NE x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp1 <- getNewRegNat II32 + tmp2 <- getNewRegNat II32 + let + code__2 dst = code1 `appOL` code2 `appOL` toOL [ + XOR False src1 (RIReg src2) dst, + SUB False True g0 (RIReg dst) g0, + ADD True False g0 (RIImm (ImmInt 0)) dst] + return (Any II32 code__2) + +condIntReg cond x y = do + bid1@(BlockId lbl1) <- getBlockIdNat + bid2@(BlockId lbl2) <- getBlockIdNat + CondCode _ cond cond_code <- condIntCode cond x y + let + code__2 dst = cond_code `appOL` toOL [ + BI cond False bid1, NOP, + OR False g0 (RIImm (ImmInt 0)) dst, + BI ALWAYS False bid2, NOP, + NEWBLOCK bid1, + OR False g0 (RIImm (ImmInt 1)) dst, + NEWBLOCK bid2] + return (Any II32 code__2) + +condFltReg cond x y = do + bid1@(BlockId lbl1) <- getBlockIdNat + bid2@(BlockId lbl2) <- getBlockIdNat + CondCode _ cond cond_code <- condFltCode cond x y + let + code__2 dst = cond_code `appOL` toOL [ + NOP, + BF cond False bid1, NOP, + OR False g0 (RIImm (ImmInt 0)) dst, + BI ALWAYS False bid2, NOP, + NEWBLOCK bid1, + OR False g0 (RIImm (ImmInt 1)) dst, + NEWBLOCK bid2] + return (Any II32 code__2) + + + +-- ----------------------------------------------------------------------------- +-- 'trivial*Code': deal with trivial instructions + +-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', +-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. +-- Only look for constants on the right hand side, because that's +-- where the generic optimizer will have put them. + +-- Similarly, for unary instructions, we don't have to worry about +-- matching an StInt as the argument, because genericOpt will already +-- have handled the constant-folding. + +trivialCode pk instr x (CmmLit (CmmInt y d)) + | fits13Bits y + = do + (src1, code) <- getSomeReg x + tmp <- getNewRegNat II32 + let + src2 = ImmInt (fromInteger y) + code__2 dst = code `snocOL` instr src1 (RIImm src2) dst + return (Any II32 code__2) + +trivialCode pk instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp1 <- getNewRegNat II32 + tmp2 <- getNewRegNat II32 + let + code__2 dst = code1 `appOL` code2 `snocOL` + instr src1 (RIReg src2) dst + return (Any II32 code__2) + +------------ +trivialFCode pk instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x) + tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y) + tmp <- getNewRegNat FF64 + let + promote x = FxTOy FF32 FF64 x tmp + + pk1 = cmmExprType x + pk2 = cmmExprType y + + code__2 dst = + if pk1 `cmmEqType` pk2 then + code1 `appOL` code2 `snocOL` + instr (floatSize pk) src1 src2 dst + else if typeWidth pk1 == W32 then + code1 `snocOL` promote src1 `appOL` code2 `snocOL` + instr FF64 tmp src2 dst + else + code1 `appOL` code2 `snocOL` promote src2 `snocOL` + instr FF64 src1 tmp dst + return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64) + code__2) + +------------ +trivialUCode size instr x = do + (src, code) <- getSomeReg x + tmp <- getNewRegNat size + let + code__2 dst = code `snocOL` instr (RIReg src) dst + return (Any size code__2) + +------------- +trivialUFCode pk instr x = do + (src, code) <- getSomeReg x + tmp <- getNewRegNat pk + let + code__2 dst = code `snocOL` instr src dst + return (Any pk code__2) + + + +coerceDbl2Flt :: CmmExpr -> NatM Register +coerceFlt2Dbl :: CmmExpr -> NatM Register + + +coerceInt2FP width1 width2 x = do + (src, code) <- getSomeReg x + let + code__2 dst = code `appOL` toOL [ + ST (intSize width1) src (spRel (-2)), + LD (intSize width1) (spRel (-2)) dst, + FxTOy (intSize width1) (floatSize width2) dst dst] + return (Any (floatSize $ width2) code__2) + + +-- | Coerce a floating point value to integer +-- +-- NOTE: On sparc v9 there are no instructions to move a value from an +-- FP register directly to an int register, so we have to use a load/store. +-- +coerceFP2Int width1 width2 x + = do let fsize1 = floatSize width1 + fsize2 = floatSize width2 + + isize2 = intSize width2 + + (fsrc, code) <- getSomeReg x + fdst <- getNewRegNat fsize2 + + let code2 dst + = code + `appOL` toOL + -- convert float to int format, leaving it in a float reg. + [ FxTOy fsize1 isize2 fsrc fdst + + -- store the int into mem, then load it back to move + -- it into an actual int reg. + , ST fsize2 fdst (spRel (-2)) + , LD isize2 (spRel (-2)) dst] + + return (Any isize2 code2) + +------------ +coerceDbl2Flt x = do + (src, code) <- getSomeReg x + return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst)) + +------------ +coerceFlt2Dbl x = do + (src, code) <- getSomeReg x + return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst)) + + + +-- eXTRA_STK_ARGS_HERE + +-- We (allegedly) put the first six C-call arguments in registers; +-- where do we start putting the rest of them? + +-- Moved from Instrs (SDM): + +eXTRA_STK_ARGS_HERE :: Int +eXTRA_STK_ARGS_HERE + = 23 diff --git a/compiler/nativeGen/SPARC/Cond.hs b/compiler/nativeGen/SPARC/Cond.hs new file mode 100644 index 0000000000..d0f12efcf5 --- /dev/null +++ b/compiler/nativeGen/SPARC/Cond.hs @@ -0,0 +1,53 @@ + +module SPARC.Cond ( + Cond(..), + condUnsigned, + condToSigned, + condToUnsigned +) + +where + +-- | Branch condition codes. +data Cond + = ALWAYS + | EQQ + | GE + | GEU + | GTT + | GU + | LE + | LEU + | LTT + | LU + | NE + | NEG + | NEVER + | POS + | VC + | VS + deriving Eq + + +condUnsigned :: Cond -> Bool +condUnsigned GU = True +condUnsigned LU = True +condUnsigned GEU = True +condUnsigned LEU = True +condUnsigned _ = False + + +condToSigned :: Cond -> Cond +condToSigned GU = GTT +condToSigned LU = LTT +condToSigned GEU = GE +condToSigned LEU = LE +condToSigned x = x + + +condToUnsigned :: Cond -> Cond +condToUnsigned GTT = GU +condToUnsigned LTT = LU +condToUnsigned GE = GEU +condToUnsigned LE = LEU +condToUnsigned x = x diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs index 9c332317ea..6dc6477f9c 100644 --- a/compiler/nativeGen/SPARC/Instr.hs +++ b/compiler/nativeGen/SPARC/Instr.hs @@ -10,56 +10,55 @@ #include "nativeGen/NCG.h" module SPARC.Instr ( - Cond(..), RI(..), Instr(..), - riZero, - fpRelEA, - moveSp, - fPair, + maxSpillSlots ) where -import BlockId -import RegsBase import SPARC.Regs +import SPARC.Cond +import Instruction +import RegClass +import Reg +import Size + +import BlockId import Cmm import Outputable -import Constants ( wORD_SIZE ) +import Constants (rESERVED_C_STACK_BYTES ) import FastString +import FastBool import GHC.Exts --- | Branch condition codes. -data Cond - = ALWAYS - | EQQ - | GE - | GEU - | GTT - | GU - | LE - | LEU - | LTT - | LU - | NE - | NEG - | NEVER - | POS - | VC - | VS - deriving Eq - - -- | Register or immediate data RI = RIReg Reg | RIImm Imm --- | SPARC isntruction set. +-- | instance for sparc instruction set +instance Instruction Instr where + regUsageOfInstr = sparc_regUsageOfInstr + patchRegsOfInstr = sparc_patchRegsOfInstr + isJumpishInstr = sparc_isJumpishInstr + jumpDestsOfInstr = sparc_jumpDestsOfInstr + patchJumpInstr = sparc_patchJumpInstr + mkSpillInstr = sparc_mkSpillInstr + mkLoadInstr = sparc_mkLoadInstr + takeDeltaInstr = sparc_takeDeltaInstr + isMetaInstr = sparc_isMetaInstr + mkRegRegMoveInstr = sparc_mkRegRegMoveInstr + takeRegRegMoveInstr = sparc_takeRegRegMoveInstr + mkJumpInstr = sparc_mkJumpInstr + + +-- | SPARC instruction set. +-- Not complete. This is only the ones we need. +-- data Instr -- meta ops -------------------------------------------------- @@ -78,12 +77,6 @@ data Instr -- specify current stack offset for benefit of subsequent passes. | DELTA Int - -- | spill this reg to a stack slot - | SPILL Reg Int - - -- | reload this reg from a stack slot - | RELOAD Int Reg - -- real instrs ----------------------------------------------- -- Loads and stores. | LD Size AddrMode Reg -- size, src, dst @@ -157,39 +150,290 @@ data Instr | CALL (Either Imm Reg) Int Bool -- target, args, terminal --- | Check if a RI represents a zero value. --- - a literal zero --- - register %g0, which is always zero. +-- | regUsage returns the sets of src and destination registers used +-- by a particular instruction. Machine registers that are +-- pre-allocated to stgRegs are filtered out, because they are +-- uninteresting from a register allocation standpoint. (We wouldn't +-- want them to end up on the free list!) As far as we are concerned, +-- the fixed registers simply don't exist (for allocation purposes, +-- anyway). + +-- regUsage doesn't need to do any trickery for jumps and such. Just +-- state precisely the regs read and written by that insn. The +-- consequences of control flow transfers, as far as register +-- allocation goes, are taken care of by the register allocator. +-- +sparc_regUsageOfInstr :: Instr -> RegUsage +sparc_regUsageOfInstr instr + = case instr of + LD _ addr reg -> usage (regAddr addr, [reg]) + ST _ reg addr -> usage (reg : regAddr addr, []) + ADD _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SUB _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + UMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + UDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + RDY rd -> usage ([], [rd]) + WRY r1 r2 -> usage ([r1, r2], []) + AND _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + ANDN _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + OR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + ORN _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + XOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + XNOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SLL r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SRL r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SRA r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SETHI _ reg -> usage ([], [reg]) + FABS _ r1 r2 -> usage ([r1], [r2]) + FADD _ r1 r2 r3 -> usage ([r1, r2], [r3]) + FCMP _ _ r1 r2 -> usage ([r1, r2], []) + FDIV _ r1 r2 r3 -> usage ([r1, r2], [r3]) + FMOV _ r1 r2 -> usage ([r1], [r2]) + FMUL _ r1 r2 r3 -> usage ([r1, r2], [r3]) + FNEG _ r1 r2 -> usage ([r1], [r2]) + FSQRT _ r1 r2 -> usage ([r1], [r2]) + FSUB _ r1 r2 r3 -> usage ([r1, r2], [r3]) + FxTOy _ _ r1 r2 -> usage ([r1], [r2]) + + JMP addr -> usage (regAddr addr, []) + JMP_TBL addr _ -> usage (regAddr addr, []) + + CALL (Left _ ) _ True -> noUsage + CALL (Left _ ) n False -> usage (argRegs n, callClobberedRegs) + CALL (Right reg) _ True -> usage ([reg], []) + CALL (Right reg) n False -> usage (reg : (argRegs n), callClobberedRegs) + _ -> noUsage + + where + usage (src, dst) + = RU (filter interesting src) (filter interesting dst) + + regAddr (AddrRegReg r1 r2) = [r1, r2] + regAddr (AddrRegImm r1 _) = [r1] + + regRI (RIReg r) = [r] + regRI _ = [] + + +-- | Interesting regs are virtuals, or ones that are allocatable +-- by the register allocator. +interesting :: Reg -> Bool +interesting reg + = case reg of + VirtualRegI _ -> True + VirtualRegHi _ -> True + VirtualRegF _ -> True + VirtualRegD _ -> True + RealReg i -> isFastTrue (freeReg i) + + + +-- | Apply a given mapping to tall the register references in this instruction. +sparc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr +sparc_patchRegsOfInstr instr env = case instr of + LD sz addr reg -> LD sz (fixAddr addr) (env reg) + ST sz reg addr -> ST sz (env reg) (fixAddr addr) + + ADD x cc r1 ar r2 -> ADD x cc (env r1) (fixRI ar) (env r2) + SUB x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2) + UMUL cc r1 ar r2 -> UMUL cc (env r1) (fixRI ar) (env r2) + SMUL cc r1 ar r2 -> SMUL cc (env r1) (fixRI ar) (env r2) + UDIV cc r1 ar r2 -> UDIV cc (env r1) (fixRI ar) (env r2) + SDIV cc r1 ar r2 -> SDIV cc (env r1) (fixRI ar) (env r2) + RDY rd -> RDY (env rd) + WRY r1 r2 -> WRY (env r1) (env r2) + AND b r1 ar r2 -> AND b (env r1) (fixRI ar) (env r2) + ANDN b r1 ar r2 -> ANDN b (env r1) (fixRI ar) (env r2) + OR b r1 ar r2 -> OR b (env r1) (fixRI ar) (env r2) + ORN b r1 ar r2 -> ORN b (env r1) (fixRI ar) (env r2) + XOR b r1 ar r2 -> XOR b (env r1) (fixRI ar) (env r2) + XNOR b r1 ar r2 -> XNOR b (env r1) (fixRI ar) (env r2) + SLL r1 ar r2 -> SLL (env r1) (fixRI ar) (env r2) + SRL r1 ar r2 -> SRL (env r1) (fixRI ar) (env r2) + SRA r1 ar r2 -> SRA (env r1) (fixRI ar) (env r2) + + SETHI imm reg -> SETHI imm (env reg) + + FABS s r1 r2 -> FABS s (env r1) (env r2) + FADD s r1 r2 r3 -> FADD s (env r1) (env r2) (env r3) + FCMP e s r1 r2 -> FCMP e s (env r1) (env r2) + FDIV s r1 r2 r3 -> FDIV s (env r1) (env r2) (env r3) + FMOV s r1 r2 -> FMOV s (env r1) (env r2) + FMUL s r1 r2 r3 -> FMUL s (env r1) (env r2) (env r3) + FNEG s r1 r2 -> FNEG s (env r1) (env r2) + FSQRT s r1 r2 -> FSQRT s (env r1) (env r2) + FSUB s r1 r2 r3 -> FSUB s (env r1) (env r2) (env r3) + FxTOy s1 s2 r1 r2 -> FxTOy s1 s2 (env r1) (env r2) + + JMP addr -> JMP (fixAddr addr) + JMP_TBL addr ids -> JMP_TBL (fixAddr addr) ids + + CALL (Left i) n t -> CALL (Left i) n t + CALL (Right r) n t -> CALL (Right (env r)) n t + _ -> instr + + where + fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) + fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i + + fixRI (RIReg r) = RIReg (env r) + fixRI other = other + + +-------------------------------------------------------------------------------- +sparc_isJumpishInstr :: Instr -> Bool +sparc_isJumpishInstr instr + = case instr of + BI{} -> True + BF{} -> True + JMP{} -> True + JMP_TBL{} -> True + CALL{} -> True + _ -> False + +sparc_jumpDestsOfInstr :: Instr -> [BlockId] +sparc_jumpDestsOfInstr insn + = case insn of + BI _ _ id -> [id] + BF _ _ id -> [id] + JMP_TBL _ ids -> ids + _ -> [] + + +sparc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr +sparc_patchJumpInstr insn patchF + = case insn of + BI cc annul id -> BI cc annul (patchF id) + BF cc annul id -> BF cc annul (patchF id) + _ -> insn + + +-------------------------------------------------------------------------------- +-- | Make a spill instruction. +-- On SPARC we spill below frame pointer leaving 2 words/spill +sparc_mkSpillInstr + :: Reg -- ^ register to spill + -> Int -- ^ current stack delta + -> Int -- ^ spill slot to use + -> Instr + +sparc_mkSpillInstr reg _ slot + = let off = spillSlotToOffset slot + off_w = 1 + (off `div` 4) + sz = case regClass reg of + RcInteger -> II32 + RcFloat -> FF32 + RcDouble -> FF64 + + in ST sz reg (fpRel (negate off_w)) + + +-- | Make a spill reload instruction. +sparc_mkLoadInstr + :: Reg -- ^ register to load + -> Int -- ^ current stack delta + -> Int -- ^ spill slot to use + -> Instr + +sparc_mkLoadInstr reg _ slot + = let off = spillSlotToOffset slot + off_w = 1 + (off `div` 4) + sz = case regClass reg of + RcInteger -> II32 + RcFloat -> FF32 + RcDouble -> FF64 + + in LD sz (fpRel (- off_w)) reg + +-- | Convert a spill slot number to a *byte* offset, with no sign. -- -riZero :: RI -> Bool -riZero (RIImm (ImmInt 0)) = True -riZero (RIImm (ImmInteger 0)) = True -riZero (RIReg (RealReg 0)) = True -riZero _ = False +spillSlotToOffset :: Int -> Int +spillSlotToOffset slot + | slot >= 0 && slot < maxSpillSlots + = 64 + spillSlotSize * slot + + | otherwise + = pprPanic "spillSlotToOffset:" + ( text "invalid spill location: " <> int slot + $$ text "maxSpillSlots: " <> int maxSpillSlots) + + +-- | We need 8 bytes because our largest registers are 64 bit. +spillSlotSize :: Int +spillSlotSize = 8 --- | Calculate the effective address which would be used by the --- corresponding fpRel sequence. fpRel is in MachRegs.lhs, --- alas -- can't have fpRelEA here because of module dependencies. -fpRelEA :: Int -> Reg -> Instr -fpRelEA n dst - = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst +-- | The maximum number of spill slots available on the C stack. +-- If we use up all of the slots, then we're screwed. +maxSpillSlots :: Int +maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1 --- | Code to shift the stack pointer by n words. -moveSp :: Int -> Instr -moveSp n - = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp +-------------------------------------------------------------------------------- +-- | See if this instruction is telling us the current C stack delta +sparc_takeDeltaInstr + :: Instr + -> Maybe Int + +sparc_takeDeltaInstr instr + = case instr of + DELTA i -> Just i + _ -> Nothing + + +sparc_isMetaInstr + :: Instr + -> Bool + +sparc_isMetaInstr instr + = case instr of + COMMENT{} -> True + LDATA{} -> True + NEWBLOCK{} -> True + DELTA{} -> True + _ -> False + + +-- | Make a reg-reg move instruction. +-- On SPARC v8 there are no instructions to move directly between +-- floating point and integer regs. If we need to do that then we +-- have to go via memory. +-- +sparc_mkRegRegMoveInstr + :: Reg + -> Reg + -> Instr + +sparc_mkRegRegMoveInstr src dst + = case regClass src of + RcInteger -> ADD False False src (RIReg g0) dst + RcDouble -> FMOV FF64 src dst + RcFloat -> FMOV FF32 src dst + + +-- | Check whether an instruction represents a reg-reg move. +-- The register allocator attempts to eliminate reg->reg moves whenever it can, +-- by assigning the src and dest temporaries to the same real register. +-- +sparc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg) +sparc_takeRegRegMoveInstr instr + = case instr of + ADD False False src (RIReg src2) dst + | g0 == src2 -> Just (src, dst) + + FMOV FF64 src dst -> Just (src, dst) + FMOV FF32 src dst -> Just (src, dst) + _ -> Nothing --- | Produce the second-half-of-a-double register given the first half. -fPair :: Reg -> Maybe Reg -fPair (RealReg n) - | n >= 32 && n `mod` 2 == 0 = Just (RealReg (n+1)) +-- | Make an unconditional branch instruction. +sparc_mkJumpInstr + :: BlockId + -> [Instr] -fPair (VirtualRegD u) - = Just (VirtualRegHi u) +sparc_mkJumpInstr id + = [BI ALWAYS False id + , NOP] -- fill the branch delay slot. -fPair _ - = trace ("MachInstrs.fPair: can't get high half of supposed double reg ") - Nothing diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs index 7d64df1b15..a0d5fffce1 100644 --- a/compiler/nativeGen/SPARC/Ppr.hs +++ b/compiler/nativeGen/SPARC/Ppr.hs @@ -7,12 +7,15 @@ ----------------------------------------------------------------------------- module SPARC.Ppr ( + pprNatCmmTop, + pprBasicBlock, + pprSectionHeader, + pprData, + pprInstr, pprUserReg, pprSize, pprImm, - pprSectionHeader, - pprDataItem, - pprInstr + pprDataItem ) where @@ -20,20 +23,119 @@ where #include "HsVersions.h" #include "nativeGen/NCG.h" -import PprBase -import RegsBase import SPARC.Regs +import SPARC.RegInfo import SPARC.Instr +import SPARC.Cond +import Instruction +import Reg +import Size +import PprBase import BlockId import Cmm - import CLabel -import Panic ( panic ) import Unique ( pprUnique ) +import qualified Outputable +import Outputable (Outputable, panic) import Pretty import FastString +import Data.Word + +-- ----------------------------------------------------------------------------- +-- Printing this stuff out + +pprNatCmmTop :: NatCmmTop Instr -> Doc +pprNatCmmTop (CmmData section dats) = + pprSectionHeader section $$ vcat (map pprData dats) + + -- special case for split markers: +pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl + +pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) = + pprSectionHeader Text $$ + (if null info then -- blocks guaranteed not null, so label needed + pprLabel lbl + else +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl) + <> char ':' $$ +#endif + vcat (map pprData info) $$ + pprLabel (entryLblToInfoLbl lbl) + ) $$ + vcat (map pprBasicBlock blocks) + -- above: Even the first block gets a label, because with branch-chain + -- elimination, it might be the target of a goto. +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + -- If we are using the .subsections_via_symbols directive + -- (available on recent versions of Darwin), + -- we have to make sure that there is some kind of reference + -- from the entry code to a label on the _top_ of of the info table, + -- so that the linker will not think it is unreferenced and dead-strip + -- it. That's why the label is called a DeadStripPreventer (_dsp). + $$ if not (null info) + then text "\t.long " + <+> pprCLabel_asm (entryLblToInfoLbl lbl) + <+> char '-' + <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl) + else empty +#endif + + +pprBasicBlock :: NatBasicBlock Instr -> Doc +pprBasicBlock (BasicBlock (BlockId id) instrs) = + pprLabel (mkAsmTempLabel id) $$ + vcat (map pprInstr instrs) + + +pprData :: CmmStatic -> Doc +pprData (CmmAlign bytes) = pprAlign bytes +pprData (CmmDataLabel lbl) = pprLabel lbl +pprData (CmmString str) = pprASCII str +pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes +pprData (CmmStaticLit lit) = pprDataItem lit + +pprGloblDecl :: CLabel -> Doc +pprGloblDecl lbl + | not (externallyVisibleCLabel lbl) = empty + | otherwise = ptext IF_ARCH_sparc((sLit ".global "), + (sLit ".globl ")) <> + pprCLabel_asm lbl + +pprTypeAndSizeDecl :: CLabel -> Doc +#if linux_TARGET_OS +pprTypeAndSizeDecl lbl + | not (externallyVisibleCLabel lbl) = empty + | otherwise = ptext (sLit ".type ") <> + pprCLabel_asm lbl <> ptext (sLit ", @object") +#else +pprTypeAndSizeDecl _ + = empty +#endif + +pprLabel :: CLabel -> Doc +pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':') + + +pprASCII :: [Word8] -> Doc +pprASCII str + = vcat (map do1 str) $$ do1 0 + where + do1 :: Word8 -> Doc + do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w) + +pprAlign :: Int -> Doc +pprAlign bytes = + ptext (sLit ".align ") <> int bytes + + +-- ----------------------------------------------------------------------------- +-- pprInstr: print an 'Instr' + +instance Outputable Instr where + ppr instr = Outputable.docToSDoc $ pprInstr instr -- | Pretty print a register. @@ -101,12 +203,13 @@ pprSize :: Size -> Doc pprSize x = ptext (case x of - II8 -> sLit "ub" - II16 -> sLit "uh" - II32 -> sLit "" - II64 -> sLit "d" - FF32 -> sLit "" - FF64 -> sLit "d") + II8 -> sLit "ub" + II16 -> sLit "uh" + II32 -> sLit "" + II64 -> sLit "d" + FF32 -> sLit "" + FF64 -> sLit "d" + _ -> panic "SPARC.Ppr.pprSize: no match") -- | Pretty print a size for an instruction suffix. @@ -120,7 +223,8 @@ pprStSize x II32 -> sLit "" II64 -> sLit "x" FF32 -> sLit "" - FF64 -> sLit "d") + FF64 -> sLit "d" + _ -> panic "SPARC.Ppr.pprSize: no match") -- | Pretty print a condition code. @@ -258,6 +362,7 @@ pprInstr (NEWBLOCK _) pprInstr (LDATA _ _) = panic "PprMach.pprInstr: LDATA" +{- pprInstr (SPILL reg slot) = hcat [ ptext (sLit "\tSPILL"), @@ -273,7 +378,7 @@ pprInstr (RELOAD slot reg) ptext (sLit "SLOT") <> parens (int slot), comma, pprReg reg] - +-} -- a clumsy hack for now, to handle possible double alignment problems -- even clumsier, to allow for RegReg regs that show when doing indexed diff --git a/compiler/nativeGen/SPARC/RegInfo.hs b/compiler/nativeGen/SPARC/RegInfo.hs index 8f8a977ac7..025e302556 100644 --- a/compiler/nativeGen/SPARC/RegInfo.hs +++ b/compiler/nativeGen/SPARC/RegInfo.hs @@ -8,241 +8,115 @@ ----------------------------------------------------------------------------- module SPARC.RegInfo ( - -- machine specific - RegUsage(..), - noUsage, - regUsage, - patchRegs, - jumpDests, - isJumpish, - patchJump, - isRegRegMove, + mkVReg, + + riZero, + fpRelEA, + moveSp, + fPair, + + shortcutStatic, + regDotColor, JumpDest(..), canShortcut, - shortcutJump, - - mkSpillInstr, - mkLoadInstr, - mkRegRegMoveInstr, - mkBranchInstr, - - spillSlotSize, - maxSpillSlots, - spillSlotToOffset + shortcutJump, ) where -#include "nativeGen/NCG.h" -#include "HsVersions.h" - import SPARC.Instr import SPARC.Regs -import RegsBase +import RegClass +import Reg +import Size +import Constants (wORD_SIZE) +import Cmm +import CLabel import BlockId import Outputable -import Constants ( rESERVED_C_STACK_BYTES ) -import FastBool - - --- | Represents what regs are read and written to in an instruction. --- -data RegUsage - = RU [Reg] -- regs read from - [Reg] -- regs written to - - --- | No regs read or written to. -noUsage :: RegUsage -noUsage = RU [] [] - - --- | regUsage returns the sets of src and destination registers used --- by a particular instruction. Machine registers that are --- pre-allocated to stgRegs are filtered out, because they are --- uninteresting from a register allocation standpoint. (We wouldn't --- want them to end up on the free list!) As far as we are concerned, --- the fixed registers simply don't exist (for allocation purposes, --- anyway). - --- regUsage doesn't need to do any trickery for jumps and such. Just --- state precisely the regs read and written by that insn. The --- consequences of control flow transfers, as far as register --- allocation goes, are taken care of by the register allocator. --- -regUsage :: Instr -> RegUsage -regUsage instr - = case instr of - SPILL reg _ -> usage ([reg], []) - RELOAD _ reg -> usage ([], [reg]) - - LD _ addr reg -> usage (regAddr addr, [reg]) - ST _ reg addr -> usage (reg : regAddr addr, []) - ADD _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SUB _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - UMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - UDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - RDY rd -> usage ([], [rd]) - WRY r1 r2 -> usage ([r1, r2], []) - AND _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - ANDN _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - OR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - ORN _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - XOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - XNOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SLL r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SRL r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SRA r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SETHI _ reg -> usage ([], [reg]) - FABS _ r1 r2 -> usage ([r1], [r2]) - FADD _ r1 r2 r3 -> usage ([r1, r2], [r3]) - FCMP _ _ r1 r2 -> usage ([r1, r2], []) - FDIV _ r1 r2 r3 -> usage ([r1, r2], [r3]) - FMOV _ r1 r2 -> usage ([r1], [r2]) - FMUL _ r1 r2 r3 -> usage ([r1, r2], [r3]) - FNEG _ r1 r2 -> usage ([r1], [r2]) - FSQRT _ r1 r2 -> usage ([r1], [r2]) - FSUB _ r1 r2 r3 -> usage ([r1, r2], [r3]) - FxTOy _ _ r1 r2 -> usage ([r1], [r2]) - - JMP addr -> usage (regAddr addr, []) - JMP_TBL addr _ -> usage (regAddr addr, []) - - CALL (Left _ ) _ True -> noUsage - CALL (Left _ ) n False -> usage (argRegs n, callClobberedRegs) - CALL (Right reg) _ True -> usage ([reg], []) - CALL (Right reg) n False -> usage (reg : (argRegs n), callClobberedRegs) - _ -> noUsage - - where - usage (src, dst) - = RU (filter interesting src) (filter interesting dst) - - regAddr (AddrRegReg r1 r2) = [r1, r2] - regAddr (AddrRegImm r1 _) = [r1] +import Unique - regRI (RIReg r) = [r] - regRI _ = [] +-- | Make a virtual reg with this size. +mkVReg :: Unique -> Size -> Reg +mkVReg u size + | not (isFloatSize size) + = VirtualRegI u --- | Interesting regs are virtuals, or ones that are allocatable --- by the register allocator. -interesting :: Reg -> Bool -interesting reg - = case reg of - VirtualRegI _ -> True - VirtualRegHi _ -> True - VirtualRegF _ -> True - VirtualRegD _ -> True - RealReg i -> isFastTrue (freeReg i) - - - --- | Apply a given mapping to tall the register references in this instruction. - -patchRegs :: Instr -> (Reg -> Reg) -> Instr -patchRegs instr env = case instr of - SPILL reg slot -> SPILL (env reg) slot - RELOAD slot reg -> RELOAD slot (env reg) - - LD sz addr reg -> LD sz (fixAddr addr) (env reg) - ST sz reg addr -> ST sz (env reg) (fixAddr addr) - - ADD x cc r1 ar r2 -> ADD x cc (env r1) (fixRI ar) (env r2) - SUB x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2) - UMUL cc r1 ar r2 -> UMUL cc (env r1) (fixRI ar) (env r2) - SMUL cc r1 ar r2 -> SMUL cc (env r1) (fixRI ar) (env r2) - UDIV cc r1 ar r2 -> UDIV cc (env r1) (fixRI ar) (env r2) - SDIV cc r1 ar r2 -> SDIV cc (env r1) (fixRI ar) (env r2) - RDY rd -> RDY (env rd) - WRY r1 r2 -> WRY (env r1) (env r2) - AND b r1 ar r2 -> AND b (env r1) (fixRI ar) (env r2) - ANDN b r1 ar r2 -> ANDN b (env r1) (fixRI ar) (env r2) - OR b r1 ar r2 -> OR b (env r1) (fixRI ar) (env r2) - ORN b r1 ar r2 -> ORN b (env r1) (fixRI ar) (env r2) - XOR b r1 ar r2 -> XOR b (env r1) (fixRI ar) (env r2) - XNOR b r1 ar r2 -> XNOR b (env r1) (fixRI ar) (env r2) - SLL r1 ar r2 -> SLL (env r1) (fixRI ar) (env r2) - SRL r1 ar r2 -> SRL (env r1) (fixRI ar) (env r2) - SRA r1 ar r2 -> SRA (env r1) (fixRI ar) (env r2) - - SETHI imm reg -> SETHI imm (env reg) - - FABS s r1 r2 -> FABS s (env r1) (env r2) - FADD s r1 r2 r3 -> FADD s (env r1) (env r2) (env r3) - FCMP e s r1 r2 -> FCMP e s (env r1) (env r2) - FDIV s r1 r2 r3 -> FDIV s (env r1) (env r2) (env r3) - FMOV s r1 r2 -> FMOV s (env r1) (env r2) - FMUL s r1 r2 r3 -> FMUL s (env r1) (env r2) (env r3) - FNEG s r1 r2 -> FNEG s (env r1) (env r2) - FSQRT s r1 r2 -> FSQRT s (env r1) (env r2) - FSUB s r1 r2 r3 -> FSUB s (env r1) (env r2) (env r3) - FxTOy s1 s2 r1 r2 -> FxTOy s1 s2 (env r1) (env r2) - - JMP addr -> JMP (fixAddr addr) - JMP_TBL addr ids -> JMP_TBL (fixAddr addr) ids - - CALL (Left i) n t -> CALL (Left i) n t - CALL (Right r) n t -> CALL (Right (env r)) n t - _ -> instr - - where - fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) - fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i - - fixRI (RIReg r) = RIReg (env r) - fixRI other = other - - --- ----------------------------------------------------------------------------- --- Determine the possible destinations from the current instruction. - --- (we always assume that the next instruction is also a valid destination; --- if this isn't the case then the jump should be at the end of the basic --- block). - -jumpDests :: Instr -> [BlockId] -> [BlockId] -jumpDests insn acc - = case insn of - BI _ _ id -> id : acc - BF _ _ id -> id : acc - JMP_TBL _ ids -> ids ++ acc - _other -> acc + | otherwise + = case size of + FF32 -> VirtualRegF u + FF64 -> VirtualRegD u + _ -> panic "mkVReg" --- | Check whether a particular instruction is a jump, branch or call instruction (jumpish) --- We can't just use jumpDests above because the jump might take its arg, --- so the instr won't contain a blockid. +-- | Check if a RI represents a zero value. +-- - a literal zero +-- - register %g0, which is always zero. -- -isJumpish :: Instr -> Bool -isJumpish instr - = case instr of - BI{} -> True - BF{} -> True - JMP{} -> True - JMP_TBL{} -> True - CALL{} -> True - _ -> False - - --- | Change the destination of this jump instruction --- Used in joinToTargets in the linear allocator, when emitting fixup code --- for join points. -patchJump :: Instr -> BlockId -> BlockId -> Instr -patchJump insn old new - = case insn of - BI cc annul id - | id == old -> BI cc annul new - - BF cc annul id - | id == old -> BF cc annul new - - _other -> insn - +riZero :: RI -> Bool +riZero (RIImm (ImmInt 0)) = True +riZero (RIImm (ImmInteger 0)) = True +riZero (RIReg (RealReg 0)) = True +riZero _ = False + + +-- | Calculate the effective address which would be used by the +-- corresponding fpRel sequence. fpRel is in MachRegs.lhs, +-- alas -- can't have fpRelEA here because of module dependencies. +fpRelEA :: Int -> Reg -> Instr +fpRelEA n dst + = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst + + +-- | Code to shift the stack pointer by n words. +moveSp :: Int -> Instr +moveSp n + = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp + + +-- | Produce the second-half-of-a-double register given the first half. +fPair :: Reg -> Maybe Reg +fPair (RealReg n) + | n >= 32 && n `mod` 2 == 0 = Just (RealReg (n+1)) + +fPair (VirtualRegD u) + = Just (VirtualRegHi u) + +fPair _ + = trace ("MachInstrs.fPair: can't get high half of supposed double reg ") + Nothing + +-- Here because it knows about JumpDest +shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic +shortcutStatic fn (CmmStaticLit (CmmLabel lab)) + | Just uq <- maybeAsmTemp lab + = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq))) +shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off)) + | Just uq <- maybeAsmTemp lbl1 + = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off) + -- slightly dodgy, we're ignoring the second label, but this + -- works with the way we use CmmLabelDiffOff for jump tables now. +shortcutStatic _ other_static + = other_static + +shortBlockId :: (BlockId -> Maybe JumpDest) -> BlockId -> CLabel +shortBlockId fn blockid@(BlockId uq) = + case fn blockid of + Nothing -> mkAsmTempLabel uq + Just (DestBlockId blockid') -> shortBlockId fn blockid' + Just (DestImm (ImmCLbl lbl)) -> lbl + _other -> panic "shortBlockId" + + +regDotColor :: Reg -> SDoc +regDotColor reg + = case regClass reg of + RcInteger -> text "blue" + RcFloat -> text "red" + RcDouble -> text "green" @@ -253,108 +127,3 @@ canShortcut _ = Nothing shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr shortcutJump _ other = other - - - --- | Make a spill instruction. --- On SPARC we spill below frame pointer leaving 2 words/spill -mkSpillInstr - :: Reg -- ^ register to spill - -> Int -- ^ current stack delta - -> Int -- ^ spill slot to use - -> Instr - -mkSpillInstr reg _ slot - = let off = spillSlotToOffset slot - off_w = 1 + (off `div` 4) - sz = case regClass reg of - RcInteger -> II32 - RcFloat -> FF32 - RcDouble -> FF64 - - in ST sz reg (fpRel (negate off_w)) - - --- | Make a spill reload instruction. -mkLoadInstr - :: Reg -- ^ register to load - -> Int -- ^ current stack delta - -> Int -- ^ spill slot to use - -> Instr - -mkLoadInstr reg _ slot - = let off = spillSlotToOffset slot - off_w = 1 + (off `div` 4) - sz = case regClass reg of - RcInteger -> II32 - RcFloat -> FF32 - RcDouble -> FF64 - - in LD sz (fpRel (- off_w)) reg - - --- | Make a reg-reg move instruction. --- On SPARC v8 there are no instructions to move directly between --- floating point and integer regs. If we need to do that then we --- have to go via memory. --- -mkRegRegMoveInstr - :: Reg - -> Reg - -> Instr - -mkRegRegMoveInstr src dst - = case regClass src of - RcInteger -> ADD False False src (RIReg g0) dst - RcDouble -> FMOV FF64 src dst - RcFloat -> FMOV FF32 src dst - - --- | Check whether an instruction represents a reg-reg move. --- The register allocator attempts to eliminate reg->reg moves whenever it can, --- by assigning the src and dest temporaries to the same real register. --- -isRegRegMove :: Instr -> Maybe (Reg,Reg) -isRegRegMove instr - = case instr of - ADD False False src (RIReg src2) dst - | g0 == src2 -> Just (src, dst) - - FMOV FF64 src dst -> Just (src, dst) - FMOV FF32 src dst -> Just (src, dst) - _ -> Nothing - - --- | Make an unconditional branch instruction. -mkBranchInstr - :: BlockId - -> [Instr] - -mkBranchInstr id - = [BI ALWAYS False id - , NOP] -- fill the branch delay slot. - - --- | TODO: Why do we need 8 bytes per slot?? -BL 2009/02 -spillSlotSize :: Int -spillSlotSize = 8 - - --- | The maximum number of spill slots available on the C stack. --- If we use up all of the slots, then we're screwed. -maxSpillSlots :: Int -maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1 - - --- | Convert a spill slot number to a *byte* offset, with no sign. --- -spillSlotToOffset :: Int -> Int -spillSlotToOffset slot - | slot >= 0 && slot < maxSpillSlots - = 64 + spillSlotSize * slot - - | otherwise - = pprPanic "spillSlotToOffset:" - ( text "invalid spill location: " <> int slot - $$ text "maxSpillSlots: " <> int maxSpillSlots) - diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs index 987fc2da14..1fb6a01b87 100644 --- a/compiler/nativeGen/SPARC/Regs.hs +++ b/compiler/nativeGen/SPARC/Regs.hs @@ -5,17 +5,6 @@ -- ----------------------------------------------------------------------------- module SPARC.Regs ( - - -- sizes - Size(..), - intSize, - floatSize, - isFloatSize, - wordSize, - cmmTypeSize, - sizeToWidth, - mkVReg, - -- immediate values Imm(..), strImmLit, @@ -39,113 +28,33 @@ module SPARC.Regs ( fits13Bits, largeOffsetError, gReg, iReg, lReg, oReg, fReg, - fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f26, f27, + fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f22, f26, f27, nCG_FirstFloatReg, - -- horror show + -- allocatable freeReg, - globalRegMaybe + allocatableRegs, + globalRegMaybe, + + get_GlobalReg_reg_or_addr ) where -#include "nativeGen/NCG.h" -#include "HsVersions.h" -#include "../includes/MachRegs.h" -import RegsBase +import Reg +import RegClass +import CgUtils ( get_GlobalReg_addr ) import BlockId import Cmm import CLabel ( CLabel ) import Pretty -import Outputable ( Outputable(..), pprPanic, panic ) +import Outputable ( panic ) import qualified Outputable -import Unique import Constants import FastBool --- sizes ----------------------------------------------------------------------- - --- | A 'Size' also includes format information, such as whether --- the word is signed or unsigned. --- -data Size - = II8 -- byte (signed) - | II16 -- halfword (signed, 2 bytes) - | II32 -- word (4 bytes) - | II64 -- word (8 bytes) - | FF32 -- IEEE single-precision floating pt - | FF64 -- IEEE single-precision floating pt - deriving Eq - - --- | Get the integer size of this width. -intSize :: Width -> Size -intSize width - = case width of - W8 -> II8 - W16 -> II16 - W32 -> II32 - W64 -> II64 - other -> pprPanic "SPARC.Regs.intSize" (ppr other) - - --- | Get the float size of this width. -floatSize :: Width -> Size -floatSize width - = case width of - W32 -> FF32 - W64 -> FF64 - other -> pprPanic "SPARC.Regs.intSize" (ppr other) - - --- | Check if a size represents a floating point value. -isFloatSize :: Size -> Bool -isFloatSize size - = case size of - FF32 -> True - FF64 -> True - _ -> False - - --- | Size of a machine word. --- This is big enough to hold a pointer. -wordSize :: Size -wordSize = intSize wordWidth - - --- | Convert a Cmm type to a Size. -cmmTypeSize :: CmmType -> Size -cmmTypeSize ty - | isFloatType ty = floatSize (typeWidth ty) - | otherwise = intSize (typeWidth ty) - - --- | Get the Width of a Size. -sizeToWidth :: Size -> Width -sizeToWidth size - = case size of - II8 -> W8 - II16 -> W16 - II32 -> W32 - II64 -> W64 - FF32 -> W32 - FF64 -> W64 - - --- | Make a virtual reg with this size. -mkVReg :: Unique -> Size -> Reg -mkVReg u size - | not (isFloatSize size) - = VirtualRegI u - - | otherwise - = case size of - FF32 -> VirtualRegF u - FF64 -> VirtualRegD u - _ -> panic "mkVReg" - -- immediates ------------------------------------------------------------------ @@ -390,48 +299,13 @@ o1 = RealReg (oReg 1) f0 = RealReg (fReg 0) +-- | We use he first few float regs as double precision. +-- This is the RegNo of the first float regs we use as single precision. +-- nCG_FirstFloatReg :: RegNo -nCG_FirstFloatReg = unRealReg NCG_FirstFloatReg -#else -nCG_FirstFloatReg :: RegNo -nCG_FirstFloatReg = unRealReg f22 -#endif - - --- horror show ----------------------------------------------------------------- -#if sparc_TARGET_ARCH -#define g0 0 -#define g1 1 -#define g2 2 -#define g3 3 -#define g4 4 -#define g5 5 -#define g6 6 -#define g7 7 -#define o0 8 -#define o1 9 -#define o2 10 -#define o3 11 -#define o4 12 -#define o5 13 -#define o6 14 -#define o7 15 -#define l0 16 -#define l1 17 -#define l2 18 -#define l3 19 -#define l4 20 -#define l5 21 -#define l6 22 -#define l7 23 -#define i0 24 -#define i1 25 -#define i2 26 -#define i3 27 -#define i4 28 -#define i5 29 -#define i6 30 -#define i7 31 +nCG_FirstFloatReg = 54 + + -- | Check whether a machine register is free for allocation. -- This needs to match the info in includes/MachRegs.h otherwise modules @@ -445,7 +319,11 @@ freeReg regno -- %g1(r1) - %g4(r4) are allocable ----------------- -freeReg :: RegNo -> FastBool + -- %g5(r5) - %g7(r7) + -- are reserved for the OS + 5 -> fastBool False + 6 -> fastBool False + 7 -> fastBool False -- %o0(r8) - %o5(r13) are allocable ---------------- @@ -507,7 +385,15 @@ freeReg :: RegNo -> FastBool -- regs not matched above are allocable. _ -> fastBool True - + + +-- allocatableRegs is allMachRegNos with the fixed-use regs removed. +-- i.e., these are the regs for which we are prepared to allow the +-- register allocator to attempt to map VRegs to. +allocatableRegs :: [RegNo] +allocatableRegs + = let isFree i = isFastTrue (freeReg i) + in filter isFree allMachRegNos -- | Returns Just the real register that a global register is stored in. @@ -539,15 +425,20 @@ globalRegMaybe gg Hp -> Just (RealReg 27) -- %i3 HpLim -> Just (RealReg 28) -- %i4 -globalRegMaybe :: GlobalReg -> Maybe Reg - - - -#else - -freeReg _ = 0# -globalRegMaybe = panic "SPARC.Regs.globalRegMaybe: not defined" + BaseReg -> Just (RealReg 25) -- %i1 + + _ -> Nothing -#endif +-- We map STG registers onto appropriate CmmExprs. Either they map +-- to real machine registers or stored as offsets from BaseReg. Given +-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real +-- register it is in, on this platform, or a CmmExpr denoting the +-- address in the register table holding it. +-- (See also get_GlobalReg_addr in CgUtils.) +get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr +get_GlobalReg_reg_or_addr mid + = case globalRegMaybe mid of + Just rr -> Left rr + Nothing -> Right (get_GlobalReg_addr mid) |