diff options
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r-- | compiler/nativeGen/Reg.hs | 28 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 31 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 20 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 9 |
4 files changed, 71 insertions, 17 deletions
diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs index 7f69ea01a4..e0c0176a75 100644 --- a/compiler/nativeGen/Reg.hs +++ b/compiler/nativeGen/Reg.hs @@ -52,11 +52,11 @@ type RegNo -- Virtual regs can be of either class, so that info is attached. -- data VirtualReg - = VirtualRegI {-# UNPACK #-} !Unique - | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register - | VirtualRegF {-# UNPACK #-} !Unique - | VirtualRegD {-# UNPACK #-} !Unique - + = VirtualRegI {-# UNPACK #-} !Unique + | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register + | VirtualRegF {-# UNPACK #-} !Unique + | VirtualRegD {-# UNPACK #-} !Unique + | VirtualRegSSE {-# UNPACK #-} !Unique deriving (Eq, Show) -- This is laborious, but necessary. We can't derive Ord because @@ -69,14 +69,15 @@ instance Ord VirtualReg where compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b - + compare (VirtualRegSSE a) (VirtualRegSSE b) = nonDetCmpUnique a b compare VirtualRegI{} _ = LT compare _ VirtualRegI{} = GT compare VirtualRegHi{} _ = LT compare _ VirtualRegHi{} = GT compare VirtualRegF{} _ = LT compare _ VirtualRegF{} = GT - + compare VirtualRegD{} _ = LT + compare _ VirtualRegD{} = GT instance Uniquable VirtualReg where @@ -86,18 +87,16 @@ instance Uniquable VirtualReg where VirtualRegHi u -> u VirtualRegF u -> u VirtualRegD u -> u + VirtualRegSSE u -> u instance Outputable VirtualReg where ppr reg = case reg of VirtualRegI u -> text "%vI_" <> pprUniqueAlways u VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u - -- this code is kinda wrong on x86 - -- because float and double occupy the same register set - -- namely SSE2 register xmm0 .. xmm15 - VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u - VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u - + VirtualRegF u -> text "%vF_" <> pprUniqueAlways u + VirtualRegD u -> text "%vD_" <> pprUniqueAlways u + VirtualRegSSE u -> text "%vSSE_" <> pprUniqueAlways u renameVirtualReg :: Unique -> VirtualReg -> VirtualReg @@ -107,6 +106,7 @@ renameVirtualReg u r VirtualRegHi _ -> VirtualRegHi u VirtualRegF _ -> VirtualRegF u VirtualRegD _ -> VirtualRegD u + VirtualRegSSE _ -> VirtualRegSSE u classOfVirtualReg :: VirtualReg -> RegClass @@ -116,7 +116,7 @@ classOfVirtualReg vr VirtualRegHi{} -> RcInteger VirtualRegF{} -> RcFloat VirtualRegD{} -> RcDouble - + VirtualRegSSE{} -> RcDoubleSSE -- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 73cfb28d46..fe6d4d0499 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -284,11 +284,13 @@ data ChildCode64 data Register = Fixed Format Reg InstrBlock | Any Format (Reg -> InstrBlock) + | AnyV (Reg -> InstrBlock) swizzleRegisterRep :: Register -> Format -> Register swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code swizzleRegisterRep (Any _ codefn) format = Any format codefn +swizzleRegisterRep r@(AnyV{}) _ = r -- | Grab the Reg for a CmmReg @@ -368,6 +370,9 @@ getSomeReg expr = do Any rep code -> do tmp <- getNewRegNat rep return (tmp, code tmp) + AnyV code -> do + tmp <- getVectorReg + return (tmp, code tmp) Fixed _ reg code -> return (reg, code) @@ -484,6 +489,16 @@ iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do r_dst_lo ) +iselExpr64 (CmmMachOp (MO_V_Add len width) [expr1, expr2]) = do + r_dst <- getVectorReg + ChildCode64 code1 r1 <- iselExpr64 expr1 + ChildCode64 code2 r2 <- iselExpr64 expr2 + let fmt = VecFormat len width FmtInt + return $ ChildCode64 (code1 `appOL` code2 `appOL` + toOL [ V_MOV fmt (OpReg r_dst) (OpReg r1) + , V_ADD (VecFormat len width FmtInt) (OpReg r2) (OpReg r_dst) + ]) r_dst + iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do fn <- getAnyReg expr r_dst_lo <- getNewRegNat II32 @@ -501,6 +516,8 @@ iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do iselExpr64 expr = pprPanic "iselExpr64(i386)" (ppr expr) +getVectorReg :: NatM Reg +getVectorReg = RegVirtual . VirtualRegSSE <$> getUniqueM -------------------------------------------------------------------------------- getRegister :: CmmExpr -> NatM Register @@ -822,7 +839,7 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps MO_V_Insert {} -> needLlvm MO_V_Extract {} -> needLlvm - MO_V_Add {} -> needLlvm + MO_V_Add {} -> triv_op W512 V_ADD MO_V_Sub {} -> needLlvm MO_V_Mul {} -> needLlvm MO_VS_Quot {} -> needLlvm @@ -985,6 +1002,12 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps getRegister' _ _ (CmmLoad mem pk) + | isVecType pk + = do + code <- vecLoadCode (V_MOV format) mem + return (AnyV code) + +getRegister' _ _ (CmmLoad mem pk) | isFloatType pk = do Amode addr mem_code <- getAmode mem @@ -1096,6 +1119,9 @@ getByteReg expr = do -- ToDo: could optimise slightly by checking for -- byte-addressable real registers, but that will -- happen very rarely if at all. + AnyV {} -> do + tmp <- getVectorReg + return (tmp, code tmp) else getSomeReg expr -- all regs are byte-addressable on x86_64 -- Another variant: this time we want the result in a register that cannot @@ -1116,6 +1142,9 @@ getNonClobberedReg expr = do return (tmp, code `snocOL` reg2reg rep reg tmp) | otherwise -> return (reg, code) + AnyV code -> do + tmp <- getVectorReg + return (tmp, code tmp) reg2reg :: Format -> Reg -> Reg -> Instr reg2reg format src dst = MOV format (OpReg src) (OpReg dst) diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 6e5d656beb..e59594365b 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -8,8 +8,9 @@ -- ----------------------------------------------------------------------------- -module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest(..), - getJumpDestBlockId, canShortcut, shortcutStatics, +module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), + VecFormat(..), ScalarFormat(..), + JumpDest(..), getJumpDestBlockId, canShortcut, shortcutStatics, shortcutJump, allocMoreStack, maxSpillSlots, archWordFormat ) where @@ -71,6 +72,19 @@ instance Instruction Instr where mkStackAllocInstr = x86_mkStackAllocInstr mkStackDeallocInstr = x86_mkStackDeallocInstr +data VecFormat = VecFormat { vecLength :: !Length + , vecWidth :: !Width + , vecScalar :: !ScalarFormat + } + +instance Outputable VecFormat where + ppr (VecFormat l w f) = ppr (l,w,f) + +data ScalarFormat = FmtFloat | FmtInt + +instance Outputable ScalarFormat where + ppr FmtFloat = text "float" + ppr FmtInt = text "int" -- ----------------------------------------------------------------------------- -- Intel x86 instructions @@ -193,6 +207,7 @@ data Instr -- Moves. | MOV Format Operand Operand + | V_MOV VecFormat Operand Operand | CMOV Cond Format Operand Reg | MOVZxL Format Operand Operand -- format is the size of operand 1 | MOVSxL Format Operand Operand -- format is the size of operand 1 @@ -205,6 +220,7 @@ data Instr -- Int Arithmetic. | ADD Format Operand Operand + | V_ADD VecFormat Operand Operand | ADC Format Operand Operand | SUB Format Operand Operand | SBB Format Operand Operand diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 095d9eba7c..7a799cfabd 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -833,6 +833,15 @@ pprInstr (XADD format src dst) = pprFormatOpOp (sLit "xadd") format src dst pprInstr (CMPXCHG format src dst) = pprFormatOpOp (sLit "cmpxchg") format src dst +-- SIMD +pprInstr (V_ADD format src dst) + | VecFormat _ W32 FmtFloat <- format = pprVecFormatOpOp (sLit "addps") format src dst + | VecFormat _ W64 FmtFloat <- format = pprVecFormatOpOp (sLit "addpd") format src dst + | VecFormat _ W64 FmtInt <- format = pprVecFormatOpOp (sLit "addpd") format src dst +pprInstr (V_MOV format src dst) + | VecFormat _ W32 FmtFloat <- format = pprVecFormatOpOp (sLit "movaps") format src dst + | VecFormat _ W64 FmtFloat <- format = pprVecFormatOpOp (sLit "movapd") format src dst + | VecFormat _ _ FmtInt <- format = pprVecFormatOpOp (sLit "movdqa") format src dst -------------------------- |