diff options
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r-- | compiler/nativeGen/Format.hs | 55 | ||||
-rw-r--r-- | compiler/nativeGen/NCGMonad.hs | 1 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 6 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Ppr.hs | 12 | ||||
-rw-r--r-- | compiler/nativeGen/Reg.hs | 14 | ||||
-rw-r--r-- | compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs | 1 | ||||
-rw-r--r-- | compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs | 5 | ||||
-rw-r--r-- | compiler/nativeGen/RegAlloc/Linear/Main.hs | 6 | ||||
-rw-r--r-- | compiler/nativeGen/RegClass.hs | 19 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/Ppr.hs | 14 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/Regs.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 526 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 81 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 150 | ||||
-rw-r--r-- | compiler/nativeGen/X86/RegInfo.hs | 3 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Regs.hs | 1 |
16 files changed, 799 insertions, 97 deletions
diff --git a/compiler/nativeGen/Format.hs b/compiler/nativeGen/Format.hs index 31472893e7..a0e4e99f80 100644 --- a/compiler/nativeGen/Format.hs +++ b/compiler/nativeGen/Format.hs @@ -10,9 +10,11 @@ -- module Format ( Format(..), + ScalarFormat(..), intFormat, floatFormat, isFloatFormat, + isVecFormat, cmmTypeFormat, formatToWidth, formatInBytes @@ -25,6 +27,29 @@ import GhcPrelude import Cmm import Outputable + +-- Note [GHC's data format representations] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- GHC has severals types that represent various aspects of data format. +-- These include: +-- +-- * 'CmmType.CmmType': The data classification used throughout the C-- +-- pipeline. This is a pair of a CmmCat and a Width. +-- +-- * 'CmmType.CmmCat': What the bits in a C-- value mean (e.g. a pointer, integer, or floating-point value) +-- +-- * 'CmmType.Width': The width of a C-- value. +-- +-- * 'CmmType.Length': The width (measured in number of scalars) of a vector value. +-- +-- * 'Format.Format': The data format representation used by much of the backend. +-- +-- * 'Format.ScalarFormat': The format of a 'Format.VecFormat'\'s scalar. +-- +-- * 'RegClass.RegClass': Whether a register is an integer, float-point, or vector register +-- + -- It looks very like the old MachRep, but it's now of purely local -- significance, here in the native code generator. You can change it -- without global consequences. @@ -47,8 +72,16 @@ data Format | II64 | FF32 | FF64 + | VecFormat !Length !ScalarFormat !Width deriving (Show, Eq) +data ScalarFormat = FmtInt8 + | FmtInt16 + | FmtInt32 + | FmtInt64 + | FmtFloat + | FmtDouble + deriving (Show, Eq) -- | Get the integer format of this width. intFormat :: Width -> Format @@ -81,13 +114,33 @@ isFloatFormat format FF64 -> True _ -> False +-- | Check if a format represents a vector +isVecFormat :: Format -> Bool +isVecFormat (VecFormat {}) = True +isVecFormat _ = False -- | Convert a Cmm type to a Format. cmmTypeFormat :: CmmType -> Format cmmTypeFormat ty | isFloatType ty = floatFormat (typeWidth ty) + | isVecType ty = vecFormat ty | otherwise = intFormat (typeWidth ty) +vecFormat :: CmmType -> Format +vecFormat ty = + let l = vecLength ty + elemTy = vecElemType ty + in if isFloatType elemTy + then case typeWidth elemTy of + W32 -> VecFormat l FmtFloat W32 + W64 -> VecFormat l FmtDouble W64 + _ -> pprPanic "Incorrect vector element width" (ppr elemTy) + else case typeWidth elemTy of + W8 -> VecFormat l FmtInt8 W8 + W16 -> VecFormat l FmtInt16 W16 + W32 -> VecFormat l FmtInt32 W32 + W64 -> VecFormat l FmtInt64 W64 + _ -> pprPanic "Incorrect vector element width" (ppr elemTy) -- | Get the Width of a Format. formatToWidth :: Format -> Width @@ -99,7 +152,7 @@ formatToWidth format II64 -> W64 FF32 -> W32 FF64 -> W64 - + VecFormat l _ w -> widthFromBytes (l*widthInBytes w) formatInBytes :: Format -> Int formatInBytes = widthInBytes . formatToWidth diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs index 3680c1c7b0..67730aa59b 100644 --- a/compiler/nativeGen/NCGMonad.hs +++ b/compiler/nativeGen/NCGMonad.hs @@ -250,7 +250,6 @@ getNewRegNat rep dflags <- getDynFlags return (RegVirtual $ targetMkVirtualReg (targetPlatform dflags) u rep) - getNewRegPairNat :: Format -> NatM (Reg,Reg) getNewRegPairNat rep = do u <- getUniqueNat diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index a49526c93a..7e5df6a76c 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -1909,6 +1909,8 @@ genCCall' dflags gcp target dest_regs args FF32 -> (1, 1, 4, fprs) FF64 -> (2, 1, 8, fprs) II64 -> panic "genCCall' passArguments II64" + VecFormat {} + -> panic "genCCall' passArguments vector format" GCP32ELF -> case cmmTypeFormat rep of @@ -1919,6 +1921,8 @@ genCCall' dflags gcp target dest_regs args FF32 -> (0, 1, 4, fprs) FF64 -> (0, 1, 8, fprs) II64 -> panic "genCCall' passArguments II64" + VecFormat {} + -> panic "genCCall' passArguments vector format" GCP64ELF _ -> case cmmTypeFormat rep of II8 -> (1, 0, 8, gprs) @@ -1930,6 +1934,8 @@ genCCall' dflags gcp target dest_regs args -- the FPRs. FF32 -> (1, 1, 8, fprs) FF64 -> (1, 1, 8, fprs) + VecFormat {} + -> panic "genCCall' passArguments vector format" moveResult reduceToFF32 = case dest_regs of diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs index 4254f23122..b7316e6bc6 100644 --- a/compiler/nativeGen/PPC/Ppr.hs +++ b/compiler/nativeGen/PPC/Ppr.hs @@ -29,7 +29,7 @@ import BlockId import CLabel import PprCmmExpr () -import Unique ( pprUniqueAlways, getUnique ) +import Unique ( getUnique ) import GHC.Platform import FastString import Outputable @@ -168,10 +168,7 @@ pprReg r = case r of RegReal (RealRegSingle i) -> ppr_reg_no i RegReal (RealRegPair{}) -> panic "PPC.pprReg: no reg pairs on this arch" - RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u - RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u - RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u - RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u + RegVirtual v -> ppr v where ppr_reg_no :: Int -> SDoc @@ -190,7 +187,8 @@ pprFormat x II32 -> sLit "w" II64 -> sLit "d" FF32 -> sLit "fs" - FF64 -> sLit "fd") + FF64 -> sLit "fd" + VecFormat _ _ _ -> panic "PPC.Ppr.pprFormat: VecFormat") pprCond :: Cond -> SDoc @@ -375,6 +373,7 @@ pprInstr (LD fmt reg addr) = hcat [ II64 -> sLit "d" FF32 -> sLit "fs" FF64 -> sLit "fd" + VecFormat _ _ _ -> panic "PPC.Ppr.pprInstr: VecFormat" ), case addr of AddrRegImm _ _ -> empty AddrRegReg _ _ -> char 'x', @@ -414,6 +413,7 @@ pprInstr (LA fmt reg addr) = hcat [ II64 -> sLit "d" FF32 -> sLit "fs" FF64 -> sLit "fd" + VecFormat _ _ _ -> panic "PPC.Ppr.pprInstr: VecFormat" ), case addr of AddrRegImm _ _ -> empty AddrRegReg _ _ -> char 'x', diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs index 7f69ea01a4..dff2f07bf4 100644 --- a/compiler/nativeGen/Reg.hs +++ b/compiler/nativeGen/Reg.hs @@ -56,6 +56,7 @@ data VirtualReg | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register | VirtualRegF {-# UNPACK #-} !Unique | VirtualRegD {-# UNPACK #-} !Unique + | VirtualRegVec {-# UNPACK #-} !Unique deriving (Eq, Show) @@ -69,6 +70,7 @@ instance Ord VirtualReg where compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b + compare (VirtualRegVec a) (VirtualRegVec b) = nonDetCmpUnique a b compare VirtualRegI{} _ = LT compare _ VirtualRegI{} = GT @@ -76,7 +78,8 @@ instance Ord VirtualReg where compare _ VirtualRegHi{} = GT compare VirtualRegF{} _ = LT compare _ VirtualRegF{} = GT - + compare VirtualRegVec{} _ = LT + compare _ VirtualRegVec{} = GT instance Uniquable VirtualReg where @@ -86,6 +89,7 @@ instance Uniquable VirtualReg where VirtualRegHi u -> u VirtualRegF u -> u VirtualRegD u -> u + VirtualRegVec u -> u instance Outputable VirtualReg where ppr reg @@ -95,8 +99,9 @@ instance Outputable VirtualReg where -- this code is kinda wrong on x86 -- because float and double occupy the same register set -- namely SSE2 register xmm0 .. xmm15 - VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u - VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u + VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u + VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u + VirtualRegVec u -> text "%vVec_" <> pprUniqueAlways u @@ -107,6 +112,7 @@ renameVirtualReg u r VirtualRegHi _ -> VirtualRegHi u VirtualRegF _ -> VirtualRegF u VirtualRegD _ -> VirtualRegD u + VirtualRegVec _ -> VirtualRegVec u classOfVirtualReg :: VirtualReg -> RegClass @@ -116,6 +122,8 @@ classOfVirtualReg vr VirtualRegHi{} -> RcInteger VirtualRegF{} -> RcFloat VirtualRegD{} -> RcDouble + -- Below is an awful, largely x86-specific hack + VirtualRegVec{} -> RcDouble diff --git a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs index 5d4fd418c3..23d7c6b421 100644 --- a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs +++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs @@ -195,7 +195,6 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclu - -- Specification Code ---------------------------------------------------------- -- -- The trivColorable function for each particular architecture should diff --git a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs index 5a4f1c65a8..d452edfdc6 100644 --- a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs +++ b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs @@ -2,6 +2,7 @@ module RegAlloc.Linear.FreeRegs ( FR(..), + allFreeRegs, maxSpillSlots ) @@ -69,6 +70,10 @@ instance FR SPARC.FreeRegs where frInitFreeRegs = SPARC.initFreeRegs frReleaseReg = SPARC.releaseReg +-- | For debugging output. +allFreeRegs :: FR freeRegs => Platform -> freeRegs -> [RealReg] +allFreeRegs plat fr = foldMap (\rcls -> frGetFreeRegs plat rcls fr) allRegClasses + maxSpillSlots :: DynFlags -> Int maxSpillSlots dflags = case platformArch (targetPlatform dflags) of diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs index cdaf738d68..b29712e0e0 100644 --- a/compiler/nativeGen/RegAlloc/Linear/Main.hs +++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs @@ -884,8 +884,10 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc $ vcat [ text "allocating vreg: " <> text (show r) , text "assignment: " <> ppr assig - , text "freeRegs: " <> text (show freeRegs) - , text "initFreeRegs: " <> text (show (frInitFreeRegs platform `asTypeOf` freeRegs)) ] + , text "freeRegs: " <> text (showRegs freeRegs) + , text "initFreeRegs: " <> text (showRegs (frInitFreeRegs platform `asTypeOf` freeRegs)) + ] + where showRegs = show . map (\reg -> (reg, targetClassOfRealReg platform reg)) . allFreeRegs platform result diff --git a/compiler/nativeGen/RegClass.hs b/compiler/nativeGen/RegClass.hs index fbbb786817..d73a3409ac 100644 --- a/compiler/nativeGen/RegClass.hs +++ b/compiler/nativeGen/RegClass.hs @@ -1,15 +1,14 @@ -- | An architecture independent description of a register's class. module RegClass - ( RegClass (..) ) - -where + ( RegClass(..) + , allRegClasses + ) where import GhcPrelude import Outputable import Unique - -- | The class of a register. -- Used in the register allocator. -- We treat all registers in a class as being interchangable. @@ -18,7 +17,11 @@ data RegClass = RcInteger | RcFloat | RcDouble - deriving Eq + deriving (Eq, Show) + +allRegClasses :: [RegClass] +allRegClasses = + [ RcInteger, RcFloat, RcDouble ] instance Uniquable RegClass where @@ -27,6 +30,6 @@ instance Uniquable RegClass where getUnique RcDouble = mkRegClassUnique 2 instance Outputable RegClass where - ppr RcInteger = Outputable.text "I" - ppr RcFloat = Outputable.text "F" - ppr RcDouble = Outputable.text "D" + ppr RcInteger = Outputable.text "I" + ppr RcFloat = Outputable.text "F" + ppr RcDouble = Outputable.text "D" diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs index fc67f77541..aa355f97cb 100644 --- a/compiler/nativeGen/SPARC/Ppr.hs +++ b/compiler/nativeGen/SPARC/Ppr.hs @@ -45,7 +45,6 @@ import CLabel import Hoopl.Label import Hoopl.Collections -import Unique ( pprUniqueAlways ) import Outputable import GHC.Platform import FastString @@ -148,12 +147,7 @@ pprReg :: Reg -> SDoc pprReg reg = case reg of RegVirtual vr - -> case vr of - VirtualRegI u -> text "%vI_" <> pprUniqueAlways u - VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u - VirtualRegF u -> text "%vF_" <> pprUniqueAlways u - VirtualRegD u -> text "%vD_" <> pprUniqueAlways u - + -> ppr vr RegReal rr -> case rr of @@ -221,7 +215,8 @@ pprFormat x II32 -> sLit "" II64 -> sLit "d" FF32 -> sLit "" - FF64 -> sLit "d") + FF64 -> sLit "d" + VecFormat _ _ _ -> panic "SPARC.Ppr.pprFormat: VecFormat") -- | Pretty print a format for an instruction suffix. @@ -235,7 +230,8 @@ pprStFormat x II32 -> sLit "" II64 -> sLit "x" FF32 -> sLit "" - FF64 -> sLit "d") + FF64 -> sLit "d" + VecFormat _ _ _ -> panic "SPARC.Ppr.pprFormat: VecFormat") diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs index 0d7edc346a..e46dbd0d38 100644 --- a/compiler/nativeGen/SPARC/Regs.hs +++ b/compiler/nativeGen/SPARC/Regs.hs @@ -104,7 +104,6 @@ virtualRegSqueeze cls vr VirtualRegD{} -> 1 _other -> 0 - {-# INLINE realRegSqueeze #-} realRegSqueeze :: RegClass -> RealReg -> Int @@ -134,7 +133,6 @@ realRegSqueeze cls rr RealRegPair{} -> 1 - -- | All the allocatable registers in the machine, -- including register pairs. allRealRegs :: [RealReg] diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 13662f6807..ed3684e074 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -111,12 +111,25 @@ sse2Enabled = do ArchX86 -> return True _ -> panic "trying to generate x86/x86_64 on the wrong platform" +sse4_1Enabled :: NatM Bool +sse4_1Enabled = do + dflags <- getDynFlags + return (isSse4_1Enabled dflags) sse4_2Enabled :: NatM Bool sse4_2Enabled = do dflags <- getDynFlags return (isSse4_2Enabled dflags) +sseEnabled :: NatM Bool +sseEnabled = do + dflags <- getDynFlags + return (isSseEnabled dflags) + +avxEnabled :: NatM Bool +avxEnabled = do + dflags <- getDynFlags + return (isAvxEnabled dflags) cmmTopCodeGen :: RawCmmDecl @@ -215,6 +228,7 @@ stmtToInstrs bid stmt = do CmmAssign reg src | isFloatType ty -> assignReg_FltCode format reg src | is32Bit && isWord64 ty -> assignReg_I64Code reg src + | isVecType ty -> assignReg_VecCode format reg src | otherwise -> assignReg_IntCode format reg src where ty = cmmRegType dflags reg format = cmmTypeFormat ty @@ -222,6 +236,7 @@ stmtToInstrs bid stmt = do CmmStore addr src | isFloatType ty -> assignMem_FltCode format addr src | is32Bit && isWord64 ty -> assignMem_I64Code addr src + | isVecType ty -> assignMem_VecCode format addr src | otherwise -> assignMem_IntCode format addr src where ty = cmmExprType dflags src format = cmmTypeFormat ty @@ -308,6 +323,15 @@ getRegisterReg platform (CmmGlobal mid) -- platform. Hence ... +getVecRegisterReg :: Platform -> Bool -> Format -> CmmReg -> Reg +getVecRegisterReg _ use_avx format (CmmLocal (LocalReg u pk)) + | isVecType pk && use_avx = RegVirtual (mkVirtualReg u format) + | otherwise = pprPanic + (unlines ["avx flag is not enabled" , + "or this is not a vector register"]) + (ppr pk) +getVecRegisterReg platform _use_avx _format c = getRegisterReg platform c + -- | Memory addressing modes passed up the tree. data Amode = Amode AddrMode InstrBlock @@ -503,6 +527,13 @@ iselExpr64 expr -------------------------------------------------------------------------------- + +-- This is a helper data type which helps reduce the code duplication for +-- the code generation of arithmetic operations. This is not specifically +-- targetted for any particular type like Int8, Int32 etc +data VectorArithInstns = VA_Add | VA_Sub | VA_Mul | VA_Div + + getRegister :: CmmExpr -> NatM Register getRegister e = do dflags <- getDynFlags is32Bit <- is32BitPlatform @@ -520,16 +551,24 @@ getRegister' dflags is32Bit (CmmReg reg) do reg' <- getPicBaseNat (archWordFormat is32Bit) return (Fixed (archWordFormat is32Bit) reg' nilOL) _ -> - do - let - fmt = cmmTypeFormat (cmmRegType dflags reg) - format = fmt - -- - let platform = targetPlatform dflags - return (Fixed format - (getRegisterReg platform reg) - nilOL) - + do use_sse2 <- sse2Enabled + use_avx <- avxEnabled + let cmmregtype = cmmRegType dflags reg + if isVecType cmmregtype + then return (vectorRegister cmmregtype use_avx use_sse2) + else return (standardRegister cmmregtype) + where + vectorRegister :: CmmType -> Bool -> Bool -> Register + vectorRegister reg_ty use_avx use_sse2 + | use_avx || use_sse2 = + let vecfmt = cmmTypeFormat reg_ty + platform = targetPlatform dflags + in (Fixed vecfmt (getVecRegisterReg platform True vecfmt reg) nilOL) + | otherwise = panic "Please enable the -mavx or -msse2 flag" + + standardRegister crt = + let platform = targetPlatform dflags + in (Fixed (cmmTypeFormat crt) (getRegisterReg platform reg) nilOL) getRegister' dflags is32Bit (CmmRegOff r n) = getRegister' dflags is32Bit $ mangleIndexTree dflags r n @@ -631,7 +670,69 @@ getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), return $ Any II64 (\dst -> unitOL $ LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) +getRegister' _ _ (CmmMachOp mop [x, y, z]) = do -- ternary MachOps + sse4_1 <- sse4_1Enabled + sse2 <- sse2Enabled + sse <- sseEnabled + case mop of + MO_VF_Insert l W32 | sse4_1 && sse -> vector_float_pack l W32 x y z + | otherwise + -> sorry "Please enable the -msse4 and -msse flag" + MO_VF_Insert l W64 | sse2 && sse -> vector_float_pack l W64 x y z + | otherwise + -> sorry "Please enable the -msse2 and -msse flag" + _other -> incorrectOperands + where + vector_float_pack :: Length + -> Width + -> CmmExpr + -> CmmExpr + -> CmmExpr + -> NatM Register + vector_float_pack len W32 expr1 expr2 (CmmLit offset) + = do + fn <- getAnyReg expr1 + (r, exp) <- getSomeReg expr2 + let f = VecFormat len FmtFloat W32 + imm = litToImm offset + code dst = exp `appOL` + (fn dst) `snocOL` + (INSERTPS f (OpImm imm) (OpReg r) dst) + in return $ Any f code + vector_float_pack len W64 expr1 expr2 (CmmLit offset) + = do + Amode addr addr_code <- getAmode expr2 + (r, exp) <- getSomeReg expr1 + + -- fn <- getAnyReg expr1 + -- (r, exp) <- getSomeReg expr2 + let f = VecFormat len FmtDouble W64 + code dst + = case offset of + CmmInt 0 _ -> exp `appOL` addr_code `snocOL` + (MOVL f (OpAddr addr) (OpReg r)) `snocOL` + (MOVU f (OpReg r) (OpReg dst)) + CmmInt 16 _ -> exp `appOL` addr_code `snocOL` + (MOVH f (OpAddr addr) (OpReg r)) `snocOL` + (MOVU f (OpReg r) (OpReg dst)) + _ -> panic "Error in offset while packing" + -- code dst + -- = case offset of + -- CmmInt 0 _ -> exp `appOL` + -- (fn dst) `snocOL` + -- (MOVL f (OpReg r) (OpReg dst)) + -- CmmInt 16 _ -> exp `appOL` + -- (fn dst) `snocOL` + -- (MOVH f (OpReg r) (OpReg dst)) + -- _ -> panic "Error in offset while packing" + in return $ Any f code + vector_float_pack _ _ _ c _ + = pprPanic "Pack not supported for : " (ppr c) + getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps + sse2 <- sse2Enabled + sse <- sseEnabled + avx <- avxEnabled case mop of MO_F_Neg w -> sse2NegCode w x @@ -708,23 +809,28 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps MO_FS_Conv from to -> coerceFP2Int from to x MO_SF_Conv from to -> coerceInt2FP from to x - MO_V_Insert {} -> needLlvm - MO_V_Extract {} -> needLlvm - MO_V_Add {} -> needLlvm - MO_V_Sub {} -> needLlvm - MO_V_Mul {} -> needLlvm - MO_VS_Quot {} -> needLlvm - MO_VS_Rem {} -> needLlvm - MO_VS_Neg {} -> needLlvm - MO_VU_Quot {} -> needLlvm - MO_VU_Rem {} -> needLlvm - MO_VF_Insert {} -> needLlvm - MO_VF_Extract {} -> needLlvm - MO_VF_Add {} -> needLlvm - MO_VF_Sub {} -> needLlvm - MO_VF_Mul {} -> needLlvm - MO_VF_Quot {} -> needLlvm - MO_VF_Neg {} -> needLlvm + MO_V_Insert {} -> needLlvm + MO_V_Extract {} -> needLlvm + MO_V_Add {} -> needLlvm + MO_V_Sub {} -> needLlvm + MO_V_Mul {} -> needLlvm + MO_VS_Quot {} -> needLlvm + MO_VS_Rem {} -> needLlvm + MO_VS_Neg {} -> needLlvm + MO_VU_Quot {} -> needLlvm + MO_VU_Rem {} -> needLlvm + MO_VF_Broadcast {} -> incorrectOperands + MO_VF_Insert {} -> incorrectOperands + MO_VF_Extract {} -> incorrectOperands + MO_VF_Add {} -> incorrectOperands + MO_VF_Sub {} -> incorrectOperands + MO_VF_Mul {} -> incorrectOperands + MO_VF_Quot {} -> incorrectOperands + + MO_VF_Neg l w | avx -> vector_float_negate_avx l w x + | sse && sse2 -> vector_float_negate_sse l w x + | otherwise + -> sorry "Please enable the -mavx or -msse, -msse2 flag" _other -> pprPanic "getRegister" (pprMachOp mop) where @@ -762,8 +868,45 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps = do e_code <- getRegister' dflags is32Bit expr return (swizzleRegisterRep e_code new_format) + vector_float_negate_avx :: Length -> Width -> CmmExpr -> NatM Register + vector_float_negate_avx l w expr = do + tmp <- getNewRegNat (VecFormat l FmtFloat w) + (reg, exp) <- getSomeReg expr + Amode addr addr_code <- memConstant (mkAlignment $ widthInBytes W32) (CmmFloat 0.0 W32) + let format = case w of + W32 -> VecFormat l FmtFloat w + W64 -> VecFormat l FmtDouble w + _ -> pprPanic "Cannot negate vector of width" (ppr w) + code dst = case w of + W32 -> exp `appOL` addr_code `snocOL` + (VBROADCAST format addr tmp) `snocOL` + (VSUB format (OpReg reg) tmp dst) + W64 -> exp `appOL` addr_code `snocOL` + (MOVL format (OpAddr addr) (OpReg tmp)) `snocOL` + (MOVH format (OpAddr addr) (OpReg tmp)) `snocOL` + (VSUB format (OpReg reg) tmp dst) + _ -> pprPanic "Cannot negate vector of width" (ppr w) + return (Any format code) + + vector_float_negate_sse :: Length -> Width -> CmmExpr -> NatM Register + vector_float_negate_sse l w expr = do + tmp <- getNewRegNat (VecFormat l FmtFloat w) + (reg, exp) <- getSomeReg expr + let format = case w of + W32 -> VecFormat l FmtFloat w + W64 -> VecFormat l FmtDouble w + _ -> pprPanic "Cannot negate vector of width" (ppr w) + code dst = exp `snocOL` + (XOR format (OpReg tmp) (OpReg tmp)) `snocOL` + (MOVU format (OpReg tmp) (OpReg dst)) `snocOL` + (SUB format (OpReg reg) (OpReg dst)) + return (Any format code) getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps + sse4_1 <- sse4_1Enabled + sse2 <- sse2Enabled + sse <- sseEnabled + avx <- avxEnabled case mop of MO_F_Eq _ -> condFltReg is32Bit EQQ x y MO_F_Ne _ -> condFltReg is32Bit NE x y @@ -828,13 +971,49 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps MO_VS_Quot {} -> needLlvm MO_VS_Rem {} -> needLlvm MO_VS_Neg {} -> needLlvm - MO_VF_Insert {} -> needLlvm - MO_VF_Extract {} -> needLlvm - MO_VF_Add {} -> needLlvm - MO_VF_Sub {} -> needLlvm - MO_VF_Mul {} -> needLlvm - MO_VF_Quot {} -> needLlvm - MO_VF_Neg {} -> needLlvm + + MO_VF_Broadcast l W32 | avx -> vector_float_broadcast_avx l W32 x y + | sse4_1 -> vector_float_broadcast_sse l W32 x y + | otherwise + -> sorry "Please enable the -mavx or -msse4 flag" + + MO_VF_Broadcast l W64 | sse2 -> vector_float_broadcast_avx l W64 x y + | otherwise -> sorry "Please enable the -msse2 flag" + + MO_VF_Extract l W32 | avx -> vector_float_unpack l W32 x y + | sse -> vector_float_unpack_sse l W32 x y + | otherwise + -> sorry "Please enable the -mavx or -msse flag" + + MO_VF_Extract l W64 | sse2 -> vector_float_unpack l W64 x y + | otherwise -> sorry "Please enable the -msse2 flag" + + MO_VF_Add l w | avx -> vector_float_op_avx VA_Add l w x y + | sse && w == W32 -> vector_float_op_sse VA_Add l w x y + | sse2 && w == W64 -> vector_float_op_sse VA_Add l w x y + | otherwise + -> sorry "Please enable the -mavx or -msse flag" + + MO_VF_Sub l w | avx -> vector_float_op_avx VA_Sub l w x y + | sse && w == W32 -> vector_float_op_sse VA_Sub l w x y + | sse2 && w == W64 -> vector_float_op_sse VA_Sub l w x y + | otherwise + -> sorry "Please enable the -mavx or -msse flag" + + MO_VF_Mul l w | avx -> vector_float_op_avx VA_Mul l w x y + | sse && w == W32 -> vector_float_op_sse VA_Mul l w x y + | sse2 && w == W64 -> vector_float_op_sse VA_Mul l w x y + | otherwise + -> sorry "Please enable the -mavx or -msse flag" + + MO_VF_Quot l w | avx -> vector_float_op_avx VA_Div l w x y + | sse && w == W32 -> vector_float_op_sse VA_Div l w x y + | sse2 && w == W64 -> vector_float_op_sse VA_Div l w x y + | otherwise + -> sorry "Please enable the -mavx or -msse flag" + + MO_VF_Insert {} -> incorrectOperands + MO_VF_Neg {} -> incorrectOperands _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop) where @@ -930,7 +1109,171 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps -- TODO: There are other interesting patterns we want to replace -- with a LEA, e.g. `(x + offset) + (y << shift)`. + ----------------------- + -- Vector operations--- + vector_float_op_avx :: VectorArithInstns + -> Length + -> Width + -> CmmExpr + -> CmmExpr + -> NatM Register + vector_float_op_avx op l w expr1 expr2 = do + (reg1, exp1) <- getSomeReg expr1 + (reg2, exp2) <- getSomeReg expr2 + let format = case w of + W32 -> VecFormat l FmtFloat W32 + W64 -> VecFormat l FmtDouble W64 + _ -> pprPanic "Operation not supported for width " (ppr w) + code dst = case op of + VA_Add -> arithInstr VADD + VA_Sub -> arithInstr VSUB + VA_Mul -> arithInstr VMUL + VA_Div -> arithInstr VDIV + where + -- opcode src2 src1 dst <==> dst = src1 `opcode` src2 + arithInstr instr = exp1 `appOL` exp2 `snocOL` + (instr format (OpReg reg2) reg1 dst) + return (Any format code) + + vector_float_op_sse :: VectorArithInstns + -> Length + -> Width + -> CmmExpr + -> CmmExpr + -> NatM Register + vector_float_op_sse op l w expr1 expr2 = do + (reg1, exp1) <- getSomeReg expr1 + (reg2, exp2) <- getSomeReg expr2 + let format = case w of + W32 -> VecFormat l FmtFloat W32 + W64 -> VecFormat l FmtDouble W64 + _ -> pprPanic "Operation not supported for width " (ppr w) + code dst = case op of + VA_Add -> arithInstr ADD + VA_Sub -> arithInstr SUB + VA_Mul -> arithInstr MUL + VA_Div -> arithInstr FDIV + where + -- opcode src2 src1 <==> src1 = src1 `opcode` src2 + arithInstr instr + = exp1 `appOL` exp2 `snocOL` + (MOVU format (OpReg reg1) (OpReg dst)) `snocOL` + (instr format (OpReg reg2) (OpReg dst)) + return (Any format code) -------------------- + vector_float_unpack :: Length + -> Width + -> CmmExpr + -> CmmExpr + -> NatM Register + vector_float_unpack l W32 expr (CmmLit lit) + = do + (r, exp) <- getSomeReg expr + let format = VecFormat l FmtFloat W32 + imm = litToImm lit + code dst + = case lit of + CmmInt 0 _ -> exp `snocOL` (VMOVU format (OpReg r) (OpReg dst)) + CmmInt _ _ -> exp `snocOL` (VPSHUFD format (OpImm imm) (OpReg r) dst) + _ -> panic "Error in offset while unpacking" + return (Any format code) + vector_float_unpack l W64 expr (CmmLit lit) + = do + dflags <- getDynFlags + (r, exp) <- getSomeReg expr + let format = VecFormat l FmtDouble W64 + addr = spRel dflags 0 + code dst + = case lit of + CmmInt 0 _ -> exp `snocOL` + (MOVL format (OpReg r) (OpAddr addr)) `snocOL` + (MOV FF64 (OpAddr addr) (OpReg dst)) + CmmInt 1 _ -> exp `snocOL` + (MOVH format (OpReg r) (OpAddr addr)) `snocOL` + (MOV FF64 (OpAddr addr) (OpReg dst)) + _ -> panic "Error in offset while unpacking" + return (Any format code) + vector_float_unpack _ w c e + = pprPanic "Unpack not supported for : " (ppr c $$ ppr e $$ ppr w) + ----------------------- + + vector_float_unpack_sse :: Length + -> Width + -> CmmExpr + -> CmmExpr + -> NatM Register + vector_float_unpack_sse l W32 expr (CmmLit lit) + = do + (r,exp) <- getSomeReg expr + let format = VecFormat l FmtFloat W32 + imm = litToImm lit + code dst + = case lit of + CmmInt 0 _ -> exp `snocOL` (MOVU format (OpReg r) (OpReg dst)) + CmmInt _ _ -> exp `snocOL` (PSHUFD format (OpImm imm) (OpReg r) dst) + _ -> panic "Error in offset while unpacking" + return (Any format code) + vector_float_unpack_sse _ w c e + = pprPanic "Unpack not supported for : " (ppr c $$ ppr e $$ ppr w) + ----------------------- + vector_float_broadcast_avx :: Length + -> Width + -> CmmExpr + -> CmmExpr + -> NatM Register + vector_float_broadcast_avx len W32 expr1 expr2 + = do + dflags <- getDynFlags + fn <- getAnyReg expr1 + (r', exp) <- getSomeReg expr2 + let f = VecFormat len FmtFloat W32 + addr = spRel dflags 0 + in return $ Any f (\r -> exp `appOL` + (fn r) `snocOL` + (MOVU f (OpReg r') (OpAddr addr)) `snocOL` + (VBROADCAST f addr r)) + vector_float_broadcast_avx len W64 expr1 expr2 + = do + dflags <- getDynFlags + fn <- getAnyReg expr1 + (r', exp) <- getSomeReg expr2 + let f = VecFormat len FmtDouble W64 + addr = spRel dflags 0 + in return $ Any f (\r -> exp `appOL` + (fn r) `snocOL` + (MOVU f (OpReg r') (OpAddr addr)) `snocOL` + (MOVL f (OpAddr addr) (OpReg r)) `snocOL` + (MOVH f (OpAddr addr) (OpReg r))) + vector_float_broadcast_avx _ _ c _ + = pprPanic "Broadcast not supported for : " (ppr c) + ----------------------- + vector_float_broadcast_sse :: Length + -> Width + -> CmmExpr + -> CmmExpr + -> NatM Register + vector_float_broadcast_sse len W32 expr1 expr2 + = do + dflags <- getDynFlags + fn <- getAnyReg expr1 -- destination + (r, exp) <- getSomeReg expr2 -- source + let f = VecFormat len FmtFloat W32 + addr = spRel dflags 0 + code dst = exp `appOL` + (fn dst) `snocOL` + (MOVU f (OpReg r) (OpAddr addr)) `snocOL` + (insertps 0) `snocOL` + (insertps 16) `snocOL` + (insertps 32) `snocOL` + (insertps 48) + where + insertps off = + INSERTPS f (OpImm $ litToImm $ CmmInt off W32) (OpAddr addr) dst + + in return $ Any f code + vector_float_broadcast_sse _ _ c _ + = pprPanic "Broadcast not supported for : " (ppr c) + ----------------------- sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register sub_code rep x (CmmLit (CmmInt y _)) | is32BitInteger (-y) = add_int rep x (-y) @@ -983,6 +1326,21 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps return (Fixed format result code) +getRegister' _ _ (CmmLoad mem pk) + | isVecType pk = do + use_avx <- avxEnabled + use_sse <- sseEnabled + Amode addr mem_code <- getAmode mem + let format = cmmTypeFormat pk + code dst + | use_avx = mem_code `snocOL` + VMOVU format (OpAddr addr) (OpReg dst) + | use_sse = mem_code `snocOL` + MOVU format (OpAddr addr) (OpReg dst) + | otherwise = pprPanic (unlines ["avx or sse flag not enabled", + "for loading to "]) + (ppr pk) + return (Any format code) getRegister' _ _ (CmmLoad mem pk) | isFloatType pk @@ -1049,10 +1407,24 @@ getRegister' dflags is32Bit (CmmLit lit) -- small memory model (see gcc docs, -mcmodel=small). getRegister' dflags _ (CmmLit lit) - = do let format = cmmTypeFormat (cmmLitType dflags lit) - imm = litToImm lit - code dst = unitOL (MOV format (OpImm imm) (OpReg dst)) - return (Any format code) + | isVecType cmmtype = vectorRegister cmmtype + | otherwise = standardRegister cmmtype + where + cmmtype = cmmLitType dflags lit + vectorRegister ctype + = do + --NOTE: + -- This operation is only used to zero a register. For loading a + -- vector literal there are pack and broadcast operations + let format = cmmTypeFormat ctype + code dst = unitOL (XOR format (OpReg dst) (OpReg dst)) + return (Any format code) + standardRegister ctype + = do + let format = cmmTypeFormat ctype + imm = litToImm lit + code dst = unitOL (MOV format (OpImm imm) (OpReg dst)) + return (Any format code) getRegister' _ _ other | isVecExpr other = needLlvm @@ -1118,8 +1490,14 @@ getNonClobberedReg expr = do return (reg, code) reg2reg :: Format -> Reg -> Reg -> Instr -reg2reg format src dst = MOV format (OpReg src) (OpReg dst) - +reg2reg format@(VecFormat _ FmtFloat W32) src dst + = VMOVU format (OpReg src) (OpReg dst) +reg2reg format@(VecFormat _ FmtDouble W64) src dst + = VMOVU format (OpReg src) (OpReg dst) +reg2reg (VecFormat _ _ _) _ _ + = panic "MOV operation not implemented for vectors" +reg2reg format src dst + = MOV format (OpReg src) (OpReg dst) -------------------------------------------------------------------------------- getAmode :: CmmExpr -> NatM Amode @@ -1181,6 +1559,9 @@ getAmode' _ (CmmMachOp (MO_Add _) getAmode' _ (CmmMachOp (MO_Add _) [x,y]) = x86_complex_amode x y 0 0 +getAmode' _ (CmmLit lit@(CmmFloat _ w)) + = memConstant (mkAlignment $ widthInBytes w) lit + getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit = return (Amode (ImmAddr (litToImm lit) 0) nilOL) @@ -1561,7 +1942,8 @@ assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock - +assignMem_VecCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_VecCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock -- integer assignment to memory -- specific case of adding/subtracting an integer to a particular address. @@ -1638,6 +2020,29 @@ assignReg_FltCode _ reg src = do let platform = targetPlatform dflags return (src_code (getRegisterReg platform reg)) +assignMem_VecCode pk addr src = do + (src_reg, src_code) <- getNonClobberedReg src + Amode addr addr_code <- getAmode addr + use_avx <- avxEnabled + use_sse <- sseEnabled + let + code | use_avx = src_code `appOL` + addr_code `snocOL` + (VMOVU pk (OpReg src_reg) (OpAddr addr)) + | use_sse = src_code `appOL` + addr_code `snocOL` + (MOVU pk (OpReg src_reg) (OpAddr addr)) + | otherwise = sorry "Please enable the -mavx or -msse flag" + return code + +assignReg_VecCode format reg src = do + use_avx <- avxEnabled + use_sse <- sseEnabled + src_code <- getAnyReg src + dflags <- getDynFlags + let platform = targetPlatform dflags + flag = use_avx || use_sse + return (src_code (getVecRegisterReg platform flag format reg)) genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock @@ -3362,6 +3767,7 @@ sse2NegCode w x = do x@II16 -> wrongFmt x x@II32 -> wrongFmt x x@II64 -> wrongFmt x + x@VecFormat {} -> wrongFmt x where wrongFmt x = panic $ "sse2NegCode: " ++ show x @@ -3376,29 +3782,33 @@ sse2NegCode w x = do return (Any fmt code) isVecExpr :: CmmExpr -> Bool -isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True -isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True -isVecExpr (CmmMachOp (MO_V_Add {}) _) = True -isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True -isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True -isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True -isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True -isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True -isVecExpr (CmmMachOp _ [e]) = isVecExpr e -isVecExpr _ = False +isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True +isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True +isVecExpr (CmmMachOp (MO_V_Add {}) _) = True +isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True +isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True +isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True +isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True +isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Broadcast {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True +isVecExpr (CmmMachOp _ [e]) = isVecExpr e +isVecExpr _ = False needLlvm :: NatM a needLlvm = sorry $ unlines ["The native code generator does not support vector" ,"instructions. Please use -fllvm."] +incorrectOperands :: NatM a +incorrectOperands = sorry "Incorrect number of operands" + -- | This works on the invariant that all jumps in the given blocks are required. -- Starting from there we try to make a few more jumps redundant by reordering -- them. diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 6e5d656beb..47b62e62e7 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -328,6 +328,36 @@ data Instr | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit | MFENCE + -- Vector Instructions -- + -- NOTE: Instructions follow the AT&T syntax + -- Constructors and deconstructors + | VBROADCAST Format AddrMode Reg + | VEXTRACT Format Operand Reg Operand + | INSERTPS Format Operand Operand Reg + + -- move operations + | VMOVU Format Operand Operand + | MOVU Format Operand Operand + | MOVL Format Operand Operand + | MOVH Format Operand Operand + + -- logic operations + | VPXOR Format Reg Reg Reg + + -- Arithmetic + | VADD Format Operand Reg Reg + | VSUB Format Operand Reg Reg + | VMUL Format Operand Reg Reg + | VDIV Format Operand Reg Reg + + -- Shuffle + | VPSHUFD Format Operand Operand Reg + | PSHUFD Format Operand Operand Reg + + -- Shift + | PSLLDQ Format Operand Reg + | PSRLDQ Format Operand Reg + data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2 @@ -430,6 +460,31 @@ x86_regUsageOfInstr platform instr CMPXCHG _ src dst -> usageRMM src dst (OpReg eax) MFENCE -> noUsage + -- vector instructions + VBROADCAST _ src dst -> mkRU (use_EA src []) [dst] + VEXTRACT _ off src dst -> mkRU ((use_R off []) ++ [src]) (use_R dst []) + INSERTPS _ off src dst + -> mkRU ((use_R off []) ++ (use_R src []) ++ [dst]) [dst] + + VMOVU _ src dst -> mkRU (use_R src []) (use_R dst []) + MOVU _ src dst -> mkRU (use_R src []) (use_R dst []) + MOVL _ src dst -> mkRU (use_R src []) (use_R dst []) + MOVH _ src dst -> mkRU (use_R src []) (use_R dst []) + VPXOR _ s1 s2 dst -> mkRU [s1,s2] [dst] + + VADD _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst] + VSUB _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst] + VMUL _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst] + VDIV _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst] + + VPSHUFD _ off src dst + -> mkRU (concatMap (\op -> use_R op []) [off, src]) [dst] + PSHUFD _ off src dst + -> mkRU (concatMap (\op -> use_R op []) [off, src]) [dst] + + PSLLDQ _ off dst -> mkRU (use_R off []) [dst] + PSRLDQ _ off dst -> mkRU (use_R off []) [dst] + _other -> panic "regUsage: unrecognised instr" where -- # Definitions @@ -588,6 +643,32 @@ x86_patchRegsOfInstr instr env CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst MFENCE -> instr + -- vector instructions + VBROADCAST fmt src dst -> VBROADCAST fmt (lookupAddr src) (env dst) + VEXTRACT fmt off src dst + -> VEXTRACT fmt (patchOp off) (env src) (patchOp dst) + INSERTPS fmt off src dst + -> INSERTPS fmt (patchOp off) (patchOp src) (env dst) + + VMOVU fmt src dst -> VMOVU fmt (patchOp src) (patchOp dst) + MOVU fmt src dst -> MOVU fmt (patchOp src) (patchOp dst) + MOVL fmt src dst -> MOVL fmt (patchOp src) (patchOp dst) + MOVH fmt src dst -> MOVH fmt (patchOp src) (patchOp dst) + VPXOR fmt s1 s2 dst -> VPXOR fmt (env s1) (env s2) (env dst) + + VADD fmt s1 s2 dst -> VADD fmt (patchOp s1) (env s2) (env dst) + VSUB fmt s1 s2 dst -> VSUB fmt (patchOp s1) (env s2) (env dst) + VMUL fmt s1 s2 dst -> VMUL fmt (patchOp s1) (env s2) (env dst) + VDIV fmt s1 s2 dst -> VDIV fmt (patchOp s1) (env s2) (env dst) + + VPSHUFD fmt off src dst + -> VPSHUFD fmt (patchOp off) (patchOp src) (env dst) + PSHUFD fmt off src dst + -> PSHUFD fmt (patchOp off) (patchOp src) (env dst) + PSLLDQ fmt off dst + -> PSLLDQ fmt (patchOp off) (env dst) + PSRLDQ fmt off dst + -> PSRLDQ fmt (patchOp off) (env dst) _other -> panic "patchRegs: unrecognised instr" where diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 095d9eba7c..a3f27ba471 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -41,7 +41,6 @@ import DynFlags import Cmm hiding (topInfoTable) import BlockId import CLabel -import Unique ( pprUniqueAlways ) import GHC.Platform import FastString import Outputable @@ -280,10 +279,7 @@ pprReg f r if target32Bit platform then ppr32_reg_no f i else ppr64_reg_no f i RegReal (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch" - RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u - RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u - RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u - RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u + RegVirtual v -> ppr v where ppr32_reg_no :: Format -> Int -> SDoc @@ -395,6 +391,11 @@ pprFormat x II64 -> sLit "q" FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2) FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2) + + VecFormat _ FmtFloat W32 -> sLit "ps" + VecFormat _ FmtDouble W64 -> sLit "pd" + -- TODO: Add Ints and remove panic + VecFormat {} -> panic "Incorrect width" ) pprFormat_x87 :: Format -> SDoc @@ -783,6 +784,41 @@ pprInstr (IDIV fmt op) = pprFormatOp (sLit "idiv") fmt op pprInstr (DIV fmt op) = pprFormatOp (sLit "div") fmt op pprInstr (IMUL2 fmt op) = pprFormatOp (sLit "imul") fmt op +-- Vector Instructions + +pprInstr (VADD format s1 s2 dst) + = pprFormatOpRegReg (sLit "vadd") format s1 s2 dst +pprInstr (VSUB format s1 s2 dst) + = pprFormatOpRegReg (sLit "vsub") format s1 s2 dst +pprInstr (VMUL format s1 s2 dst) + = pprFormatOpRegReg (sLit "vmul") format s1 s2 dst +pprInstr (VDIV format s1 s2 dst) + = pprFormatOpRegReg (sLit "vdiv") format s1 s2 dst +pprInstr (VBROADCAST format from to) + = pprBroadcast (sLit "vbroadcast") format from to +pprInstr (VMOVU format from to) + = pprFormatOpOp (sLit "vmovu") format from to +pprInstr (MOVU format from to) + = pprFormatOpOp (sLit "movu") format from to +pprInstr (MOVL format from to) + = pprFormatOpOp (sLit "movl") format from to +pprInstr (MOVH format from to) + = pprFormatOpOp (sLit "movh") format from to +pprInstr (VPXOR format s1 s2 dst) + = pprXor (sLit "vpxor") format s1 s2 dst +pprInstr (VEXTRACT format offset from to) + = pprFormatOpRegOp (sLit "vextract") format offset from to +pprInstr (INSERTPS format offset addr dst) + = pprInsert (sLit "insertps") format offset addr dst +pprInstr (VPSHUFD format offset src dst) + = pprShuf (sLit "vpshufd") format offset src dst +pprInstr (PSHUFD format offset src dst) + = pprShuf (sLit "pshufd") format offset src dst +pprInstr (PSLLDQ format offset dst) + = pprShiftLeft (sLit "pslldq") format offset dst +pprInstr (PSRLDQ format offset dst) + = pprShiftRight (sLit "psrldq") format offset dst + -- x86_64 only pprInstr (MUL format op1 op2) = pprFormatOpOp (sLit "mul") format op1 op2 pprInstr (MUL2 format op) = pprFormatOp (sLit "mul") format op @@ -875,6 +911,23 @@ pprMnemonic :: PtrString -> Format -> SDoc pprMnemonic name format = char '\t' <> ptext name <> pprFormat format <> space +pprGenMnemonic :: PtrString -> Format -> SDoc +pprGenMnemonic name _ = + char '\t' <> ptext name <> ptext (sLit "") <> space + +pprBroadcastMnemonic :: PtrString -> Format -> SDoc +pprBroadcastMnemonic name format = + char '\t' <> ptext name <> pprBroadcastFormat format <> space + +pprBroadcastFormat :: Format -> SDoc +pprBroadcastFormat x + = ptext (case x of + VecFormat _ FmtFloat W32 -> sLit "ss" + VecFormat _ FmtDouble W64 -> sLit "sd" + -- TODO: Add Ints and remove panic + VecFormat {} -> panic "Incorrect width" + _ -> panic "Scalar Format invading vector operation" + ) pprFormatImmOp :: PtrString -> Format -> Imm -> Operand -> SDoc pprFormatImmOp name format imm op1 @@ -921,7 +974,16 @@ pprOpOp name format op1 op2 pprOperand format op2 ] - +pprFormatOpRegOp :: PtrString -> Format -> Operand -> Reg -> Operand -> SDoc +pprFormatOpRegOp name format off reg1 op2 + = hcat [ + pprMnemonic name format, + pprOperand format off, + comma, + pprReg format reg1, + comma, + pprOperand format op2 + ] pprRegReg :: PtrString -> Reg -> Reg -> SDoc pprRegReg name reg1 reg2 @@ -944,6 +1006,17 @@ pprFormatOpReg name format op1 reg2 pprReg (archWordFormat (target32Bit platform)) reg2 ] +pprFormatOpRegReg :: PtrString -> Format -> Operand -> Reg -> Reg -> SDoc +pprFormatOpRegReg name format op1 reg2 reg3 + = hcat [ + pprMnemonic name format, + pprOperand format op1, + comma, + pprReg format reg2, + comma, + pprReg format reg3 + ] + pprCondOpReg :: PtrString -> Format -> Cond -> Operand -> Reg -> SDoc pprCondOpReg name format cond op1 reg2 = hcat [ @@ -1008,3 +1081,68 @@ pprFormatOpOpCoerce name format1 format2 op1 op2 pprCondInstr :: PtrString -> Cond -> SDoc -> SDoc pprCondInstr name cond arg = hcat [ char '\t', ptext name, pprCond cond, space, arg] + + +-- Custom pretty printers +-- These instructions currently don't follow a uniform suffix pattern +-- in their names, so we have custom pretty printers for them. + +pprBroadcast :: PtrString -> Format -> AddrMode -> Reg -> SDoc +pprBroadcast name format op dst + = hcat [ + pprBroadcastMnemonic name format, + pprAddr op, + comma, + pprReg format dst + ] + +pprXor :: PtrString -> Format -> Reg -> Reg -> Reg -> SDoc +pprXor name format reg1 reg2 reg3 + = hcat [ + pprGenMnemonic name format, + pprReg format reg1, + comma, + pprReg format reg2, + comma, + pprReg format reg3 + ] + +pprInsert :: PtrString -> Format -> Operand -> Operand -> Reg -> SDoc +pprInsert name format off src dst + = hcat [ + pprGenMnemonic name format, + pprOperand format off, + comma, + pprOperand format src, + comma, + pprReg format dst + ] + +pprShuf :: PtrString -> Format -> Operand -> Operand -> Reg -> SDoc +pprShuf name format op1 op2 reg3 + = hcat [ + pprGenMnemonic name format, + pprOperand format op1, + comma, + pprOperand format op2, + comma, + pprReg format reg3 + ] + +pprShiftLeft :: PtrString -> Format -> Operand -> Reg -> SDoc +pprShiftLeft name format off reg + = hcat [ + pprGenMnemonic name format, + pprOperand format off, + comma, + pprReg format reg + ] + +pprShiftRight :: PtrString -> Format -> Operand -> Reg -> SDoc +pprShiftRight name format off reg + = hcat [ + pprGenMnemonic name format, + pprOperand format off, + comma, + pprReg format reg + ] diff --git a/compiler/nativeGen/X86/RegInfo.hs b/compiler/nativeGen/X86/RegInfo.hs index 19056be4fa..a7784bacad 100644 --- a/compiler/nativeGen/X86/RegInfo.hs +++ b/compiler/nativeGen/X86/RegInfo.hs @@ -22,6 +22,8 @@ import UniqFM import X86.Regs +--TODO: +-- Add VirtualRegAVX and inspect VecFormat and allocate mkVirtualReg :: Unique -> Format -> VirtualReg mkVirtualReg u format = case format of @@ -31,6 +33,7 @@ mkVirtualReg u format -- For now we map both to being allocated as "Double" Registers -- on X86/X86_64 FF64 -> VirtualRegD u + VecFormat {} -> VirtualRegVec u _other -> VirtualRegI u regDotColor :: Platform -> RealReg -> SDoc diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs index 2d9fd88c8e..f0e4c7d5f6 100644 --- a/compiler/nativeGen/X86/Regs.hs +++ b/compiler/nativeGen/X86/Regs.hs @@ -84,6 +84,7 @@ virtualRegSqueeze cls vr -> case vr of VirtualRegD{} -> 1 VirtualRegF{} -> 0 + VirtualRegVec{} -> 1 _other -> 0 |