diff options
author | Ben Gamari <ben@smart-cactus.org> | 2019-07-09 14:49:32 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2019-07-13 11:36:29 -0400 |
commit | 311ec702a78c9e2ba35e66c77e53748e8e751f8d (patch) | |
tree | 792eb726fbc1c8bdf63e1e43975869c18f85b58f /compiler | |
parent | de3935a6ccc26ec063e13d2739dd098c7616fde2 (diff) | |
download | haskell-wip/back-out-simd.tar.gz |
Revert "Add support for SIMD operations in the NCG"wip/back-out-simd
Unfortunately this will require more work; register allocation is
quite broken.
This reverts commit acd795583625401c5554f8e04ec7efca18814011.
Diffstat (limited to 'compiler')
29 files changed, 209 insertions, 1017 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index 6df910edfa..4e6a9d293a 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -64,20 +64,13 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) assign_regs assts (r:rs) regs | isVecType ty = vec | isFloatType ty = float | otherwise = int - where vec = case regs of - (vs, fs, ds, ls, s:ss) - | passVectorInReg w dflags - -> let elt_ty = vecElemType ty - reg_ty = if isFloatType elt_ty - then Float else Integer - reg_class = case w of - W128 -> XmmReg - W256 -> YmmReg - W512 -> ZmmReg - _ -> panic "CmmCallConv.assignArgumentsPos: Invalid vector width" - in k (RegisterParam - (reg_class s (vecLength ty) (typeWidth elt_ty) reg_ty), - (vs, fs, ds, ls, ss)) + where vec = case (w, regs) of + (W128, (vs, fs, ds, ls, s:ss)) + | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss)) + (W256, (vs, fs, ds, ls, s:ss)) + | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss)) + (W512, (vs, fs, ds, ls, s:ss)) + | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss)) _ -> (assts, (r:rs)) float = case (w, regs) of (W32, (vs, fs, ds, ls, s:ss)) @@ -96,7 +89,6 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) (_, (vs, fs, ds, l:ls, ss)) | widthInBits w > widthInBits (wordWidth dflags) -> k (RegisterParam l, (vs, fs, ds, ls, ss)) _ -> (assts, (r:rs)) - k (asst, regs') = assign_regs ((r, asst) : assts) rs regs' ty = arg_ty r w = typeWidth ty @@ -210,13 +202,11 @@ nodeOnly = ([VanillaReg 1], [], [], [], []) -- only use this functionality in hand-written C-- code in the RTS. realArgRegsCover :: DynFlags -> [GlobalReg] realArgRegsCover dflags - | passFloatArgsInXmm dflags - = map ($VGcPtr) (realVanillaRegs dflags) ++ - realLongRegs dflags ++ - map (\x -> XmmReg x 2 W64 Integer) (realXmmRegNos dflags) - | otherwise - = map ($VGcPtr) (realVanillaRegs dflags) ++ - realFloatRegs dflags ++ - realDoubleRegs dflags ++ - realLongRegs dflags ++ - map (\x -> XmmReg x 2 W64 Integer) (realXmmRegNos dflags) + | passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++ + realLongRegs dflags ++ + map XmmReg (realXmmRegNos dflags) + | otherwise = map ($VGcPtr) (realVanillaRegs dflags) ++ + realFloatRegs dflags ++ + realDoubleRegs dflags ++ + realLongRegs dflags ++ + map XmmReg (realXmmRegNos dflags) diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 79eaf8f89c..901df5d908 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -14,7 +14,6 @@ module CmmExpr , currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg , node, baseReg , VGcPtr(..) - , GlobalVecRegTy(..) , DefinerOfRegs, UserOfRegs , foldRegsDefd, foldRegsUsed @@ -42,7 +41,6 @@ import Outputable (panic) import Unique import Data.Set (Set) -import Data.Monoid ((<>)) import qualified Data.Set as Set import BasicTypes (Alignment, mkAlignment, alignmentOf) @@ -394,7 +392,6 @@ data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show ) ----------------------------------------------------------------------------- {- Note [Overlapping global registers] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The backend might not faithfully implement the abstraction of the STG machine with independent registers for different values of type @@ -416,26 +413,6 @@ on a particular platform. The instance Eq GlobalReg is syntactic equality of STG registers and does not take overlap into account. However it is still used in UserOfRegs/DefinerOfRegs and there are likely still bugs there, beware! - - -Note [SIMD registers] -~~~~~~~~~~~~~~~~~~~~~ - -GHC's treatment of SIMD registers is heavily modelled after the x86_64 -architecture. Namely we have 128- (XMM), 256- (YMM), and 512-bit (ZMM) -registers. Furthermore, we treat each possible format in these registers as a -distinct register which overlaps with the others. For instance, we XMM1 as a -2xI64 register is distinct from but overlaps with (in the sense defined in Note -[Overlapping global registers]) its use as a 4xI32 register. - -This model makes it easier to fit SIMD registers into the NCG, which generally -expects that each global register has a single, known CmmType. - -In the future we could consider further refactoring this to eliminate the -XMM, YMM, and ZMM register names (which are quite x86-specific) and instead just -having a set of NxM-bit vector registers (e.g. Vec2x64A, Vec2x64B, ..., -Vec4x32A, ..., Vec4x64A). - -} data GlobalReg @@ -455,15 +432,12 @@ data GlobalReg | XmmReg -- 128-bit SIMD vector register {-# UNPACK #-} !Int -- its number - !Length !Width !GlobalVecRegTy | YmmReg -- 256-bit SIMD vector register {-# UNPACK #-} !Int -- its number - !Length !Width !GlobalVecRegTy | ZmmReg -- 512-bit SIMD vector register {-# UNPACK #-} !Int -- its number - !Length !Width !GlobalVecRegTy -- STG registers | Sp -- Stack ptr; points to last occupied stack location. @@ -504,17 +478,17 @@ data GlobalReg deriving( Show ) -data GlobalVecRegTy = Integer | Float - deriving (Show, Eq, Ord) - instance Eq GlobalReg where VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes FloatReg i == FloatReg j = i==j DoubleReg i == DoubleReg j = i==j LongReg i == LongReg j = i==j - XmmReg i l w grt == XmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt' - YmmReg i l w grt == YmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt' - ZmmReg i l w grt == ZmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt' + -- NOTE: XMM, YMM, ZMM registers actually are the same registers + -- at least with respect to store at YMM i and then read from XMM i + -- and similarly for ZMM etc. + XmmReg i == XmmReg j = i==j + YmmReg i == YmmReg j = i==j + ZmmReg i == ZmmReg j = i==j Sp == Sp = True SpLim == SpLim = True Hp == Hp = True @@ -538,21 +512,9 @@ instance Ord GlobalReg where compare (FloatReg i) (FloatReg j) = compare i j compare (DoubleReg i) (DoubleReg j) = compare i j compare (LongReg i) (LongReg j) = compare i j - compare (XmmReg i l w grt) - (XmmReg j l' w' grt') = compare i j - <> compare l l' - <> compare w w' - <> compare grt grt' - compare (YmmReg i l w grt) - (YmmReg j l' w' grt') = compare i j - <> compare l l' - <> compare w w' - <> compare grt grt' - compare (ZmmReg i l w grt) - (ZmmReg j l' w' grt') = compare i j - <> compare l l' - <> compare w w' - <> compare grt grt' + compare (XmmReg i) (XmmReg j) = compare i j + compare (YmmReg i) (YmmReg j) = compare i j + compare (ZmmReg i) (ZmmReg j) = compare i j compare Sp Sp = EQ compare SpLim SpLim = EQ compare Hp Hp = EQ @@ -576,12 +538,12 @@ instance Ord GlobalReg where compare _ (DoubleReg _) = GT compare (LongReg _) _ = LT compare _ (LongReg _) = GT - compare (XmmReg _ _ _ _) _ = LT - compare _ (XmmReg _ _ _ _) = GT - compare (YmmReg _ _ _ _) _ = LT - compare _ (YmmReg _ _ _ _) = GT - compare (ZmmReg _ _ _ _) _ = LT - compare _ (ZmmReg _ _ _ _) = GT + compare (XmmReg _) _ = LT + compare _ (XmmReg _) = GT + compare (YmmReg _) _ = LT + compare _ (YmmReg _) = GT + compare (ZmmReg _) _ = LT + compare _ (ZmmReg _) = GT compare Sp _ = LT compare _ Sp = GT compare SpLim _ = LT @@ -634,15 +596,12 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags globalRegType _ (FloatReg _) = cmmFloat W32 globalRegType _ (DoubleReg _) = cmmFloat W64 globalRegType _ (LongReg _) = cmmBits W64 -globalRegType _ (XmmReg _ l w ty) = case ty of - Integer -> cmmVec l (cmmBits w) - Float -> cmmVec l (cmmFloat w) -globalRegType _ (YmmReg _ l w ty) = case ty of - Integer -> cmmVec l (cmmBits w) - Float -> cmmVec l (cmmFloat w) -globalRegType _ (ZmmReg _ l w ty) = case ty of - Integer -> cmmVec l (cmmBits w) - Float -> cmmVec l (cmmFloat w) +-- TODO: improve the internal model of SIMD/vectorized registers +-- the right design SHOULd improve handling of float and double code too. +-- see remarks in "NOTE [SIMD Design for the future]"" in StgCmmPrim +globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32) +globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32) +globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32) globalRegType dflags Hp = gcWord dflags -- The initialiser for all diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 53dcd70b7b..d5c3f84443 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -148,13 +148,9 @@ lintCmmMiddle node = case node of dflags <- getDynFlags erep <- lintCmmExpr expr let reg_ty = cmmRegType dflags reg - case isVecCatType reg_ty of - True -> if ((typeWidth reg_ty) == (typeWidth erep)) - then return () - else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty - _ -> if (erep `cmmEqType_ignoring_ptrhood` reg_ty) - then return () - else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty + if (erep `cmmEqType_ignoring_ptrhood` reg_ty) + then return () + else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty CmmStore l r -> do _ <- lintCmmExpr l diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index 38d9edb480..9740d21bef 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -136,9 +136,8 @@ data MachOp | MO_VU_Rem Length Width -- Floting point vector element insertion and extraction operations - | MO_VF_Broadcast Length Width -- Broadcast a scalar into a vector - | MO_VF_Insert Length Width -- Insert scalar into vector - | MO_VF_Extract Length Width -- Extract scalar from vector + | MO_VF_Insert Length Width -- Insert scalar into vector + | MO_VF_Extract Length Width -- Extract scalar from vector -- Floating point vector operations | MO_VF_Add Length Width @@ -431,7 +430,6 @@ machOpResultType dflags mop tys = MO_VU_Quot l w -> cmmVec l (cmmBits w) MO_VU_Rem l w -> cmmVec l (cmmBits w) - MO_VF_Broadcast l w -> cmmVec l (cmmFloat w) MO_VF_Insert l w -> cmmVec l (cmmFloat w) MO_VF_Extract _ w -> cmmFloat w @@ -524,21 +522,16 @@ machOpArgReps dflags op = MO_VU_Quot _ r -> [r,r] MO_VU_Rem _ r -> [r,r] - -- offset is always W32 as mentioned in StgCmmPrim.hs - MO_VF_Broadcast l r -> [vecwidth l r, r] - MO_VF_Insert l r -> [vecwidth l r, r, W32] - MO_VF_Extract l r -> [vecwidth l r, W32] + MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags] + MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags] - -- NOTE: The below is owing to the fact that floats use the SSE registers - MO_VF_Add l w -> [vecwidth l w, vecwidth l w] - MO_VF_Sub l w -> [vecwidth l w, vecwidth l w] - MO_VF_Mul l w -> [vecwidth l w, vecwidth l w] - MO_VF_Quot l w -> [vecwidth l w, vecwidth l w] - MO_VF_Neg l w -> [vecwidth l w] + MO_VF_Add _ r -> [r,r] + MO_VF_Sub _ r -> [r,r] + MO_VF_Mul _ r -> [r,r] + MO_VF_Quot _ r -> [r,r] + MO_VF_Neg _ r -> [r] MO_AlignmentCheck _ r -> [r] - where - vecwidth l w = widthFromBytes (l*widthInBytes w) ----------------------------------------------------------------------------- -- CallishMachOp diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs index 17b588720f..43d23c7ee7 100644 --- a/compiler/cmm/CmmType.hs +++ b/compiler/cmm/CmmType.hs @@ -6,7 +6,6 @@ module CmmType , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood , isFloatType, isGcPtrType, isBitsType , isWord32, isWord64, isFloat64, isFloat32 - , isVecCatType , Width(..) , widthInBits, widthInBytes, widthInLog, widthFromBytes @@ -134,7 +133,7 @@ cInt :: DynFlags -> CmmType cInt dflags = cmmBits (cIntWidth dflags) ------------ Predicates ---------------- -isFloatType, isGcPtrType, isBitsType, isVecCatType :: CmmType -> Bool +isFloatType, isGcPtrType, isBitsType :: CmmType -> Bool isFloatType (CmmType FloatCat _) = True isFloatType _other = False @@ -144,9 +143,6 @@ isGcPtrType _other = False isBitsType (CmmType BitsCat _) = True isBitsType _ = False -isVecCatType (CmmType (VecCat _ _) _) = True -isVecCatType _other = False - isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise) -- isFloat32 and 64 are obvious diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index a60a26229b..7227edd57e 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -713,10 +713,6 @@ pprMachOp_for_C mop = case mop of (panic $ "PprC.pprMachOp_for_C: MO_VU_Rem" ++ " should have been handled earlier!") - MO_VF_Broadcast {} -> pprTrace "offending mop:" - (text "MO_VF_Broadcast") - (panic $ "PprC.pprMachOp_for_C: MO_VF_Broadcast" - ++ " should have been handled earlier!") MO_VF_Insert {} -> pprTrace "offending mop:" (text "MO_VF_Insert") (panic $ "PprC.pprMachOp_for_C: MO_VF_Insert" diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index 2080c1f5d8..7bf73f1ca6 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -261,9 +261,9 @@ pprGlobalReg gr FloatReg n -> char 'F' <> int n DoubleReg n -> char 'D' <> int n LongReg n -> char 'L' <> int n - XmmReg n _ _ _ -> text "XMM" <> int n - YmmReg n _ _ _ -> text "YMM" <> int n - ZmmReg n _ _ _ -> text "ZMM" <> int n + XmmReg n -> text "XMM" <> int n + YmmReg n -> text "YMM" <> int n + ZmmReg n -> text "ZMM" <> int n Sp -> text "Sp" SpLim -> text "SpLim" Hp -> text "Hp" diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index 2cbcfc66a9..0ff9bd8b56 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -57,27 +57,27 @@ baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")") -baseRegOffset dflags (XmmReg 1 _ _ _) = oFFSET_StgRegTable_rXMM1 dflags -baseRegOffset dflags (XmmReg 2 _ _ _) = oFFSET_StgRegTable_rXMM2 dflags -baseRegOffset dflags (XmmReg 3 _ _ _) = oFFSET_StgRegTable_rXMM3 dflags -baseRegOffset dflags (XmmReg 4 _ _ _) = oFFSET_StgRegTable_rXMM4 dflags -baseRegOffset dflags (XmmReg 5 _ _ _) = oFFSET_StgRegTable_rXMM5 dflags -baseRegOffset dflags (XmmReg 6 _ _ _) = oFFSET_StgRegTable_rXMM6 dflags -baseRegOffset _ (XmmReg n _ _ _) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")") -baseRegOffset dflags (YmmReg 1 _ _ _) = oFFSET_StgRegTable_rYMM1 dflags -baseRegOffset dflags (YmmReg 2 _ _ _) = oFFSET_StgRegTable_rYMM2 dflags -baseRegOffset dflags (YmmReg 3 _ _ _) = oFFSET_StgRegTable_rYMM3 dflags -baseRegOffset dflags (YmmReg 4 _ _ _) = oFFSET_StgRegTable_rYMM4 dflags -baseRegOffset dflags (YmmReg 5 _ _ _) = oFFSET_StgRegTable_rYMM5 dflags -baseRegOffset dflags (YmmReg 6 _ _ _) = oFFSET_StgRegTable_rYMM6 dflags -baseRegOffset _ (YmmReg n _ _ _) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")") -baseRegOffset dflags (ZmmReg 1 _ _ _) = oFFSET_StgRegTable_rZMM1 dflags -baseRegOffset dflags (ZmmReg 2 _ _ _) = oFFSET_StgRegTable_rZMM2 dflags -baseRegOffset dflags (ZmmReg 3 _ _ _) = oFFSET_StgRegTable_rZMM3 dflags -baseRegOffset dflags (ZmmReg 4 _ _ _) = oFFSET_StgRegTable_rZMM4 dflags -baseRegOffset dflags (ZmmReg 5 _ _ _) = oFFSET_StgRegTable_rZMM5 dflags -baseRegOffset dflags (ZmmReg 6 _ _ _) = oFFSET_StgRegTable_rZMM6 dflags -baseRegOffset _ (ZmmReg n _ _ _) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")") +baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags +baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags +baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags +baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags +baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags +baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags +baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")") +baseRegOffset dflags (YmmReg 1) = oFFSET_StgRegTable_rYMM1 dflags +baseRegOffset dflags (YmmReg 2) = oFFSET_StgRegTable_rYMM2 dflags +baseRegOffset dflags (YmmReg 3) = oFFSET_StgRegTable_rYMM3 dflags +baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags +baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags +baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags +baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")") +baseRegOffset dflags (ZmmReg 1) = oFFSET_StgRegTable_rZMM1 dflags +baseRegOffset dflags (ZmmReg 2) = oFFSET_StgRegTable_rZMM2 dflags +baseRegOffset dflags (ZmmReg 3) = oFFSET_StgRegTable_rZMM3 dflags +baseRegOffset dflags (ZmmReg 4) = oFFSET_StgRegTable_rZMM4 dflags +baseRegOffset dflags (ZmmReg 5) = oFFSET_StgRegTable_rZMM5 dflags +baseRegOffset dflags (ZmmReg 6) = oFFSET_StgRegTable_rZMM6 dflags +baseRegOffset _ (ZmmReg n) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")") baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 9a6cf6c2e5..c3f9d5a279 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -669,7 +669,7 @@ emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res] -- SIMD primops emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do checkVecCompatibility dflags vcat n w - doVecBroadcastOp (vecElemInjectCast dflags vcat w) ty zeros e res + doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res where zeros :: CmmExpr zeros = CmmLit $ CmmVec (replicate n zero) @@ -1765,8 +1765,9 @@ vecElemProjectCast _ _ _ = Nothing checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode () checkVecCompatibility dflags vcat l w = do - when (hscTarget dflags /= HscLlvm && hscTarget dflags /= HscAsm) $ do - sorry "SIMD vector instructions not supported for the C backend or GHCi" + when (hscTarget dflags /= HscLlvm) $ do + sorry $ unlines ["SIMD vector instructions require the LLVM back-end." + ,"Please use -fllvm."] check vecWidth vcat l w where check :: Width -> PrimOpVecCat -> Length -> Width -> FCode () @@ -1791,38 +1792,6 @@ checkVecCompatibility dflags vcat l w = do ------------------------------------------------------------------------------ -- Helpers for translating vector packing and unpacking. -doVecBroadcastOp :: Maybe MachOp -- Cast from element to vector component - -> CmmType -- Type of vector - -> CmmExpr -- Initial vector - -> CmmExpr -- Elements - -> CmmFormal -- Destination for result - -> FCode () -doVecBroadcastOp maybe_pre_write_cast ty z es res = do - dst <- newTemp ty - emitAssign (CmmLocal dst) z - vecBroadcast dst es 0 - where - vecBroadcast :: CmmFormal -> CmmExpr -> Int -> FCode () - vecBroadcast src e _ = do - dst <- newTemp ty - if isFloatType (vecElemType ty) - then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Broadcast len wid) - [CmmReg (CmmLocal src), cast e]) - --TODO : Add the MachOp MO_V_Broadcast - else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid) - [CmmReg (CmmLocal src), cast e]) - emitAssign (CmmLocal res) (CmmReg (CmmLocal dst)) - - cast :: CmmExpr -> CmmExpr - cast val = case maybe_pre_write_cast of - Nothing -> val - Just cast -> CmmMachOp cast [val] - - len :: Length - len = vecLength ty - - wid :: Width - wid = typeWidth (vecElemType ty) doVecPackOp :: Maybe MachOp -- Cast from element to vector component -> CmmType -- Type of vector @@ -1840,16 +1809,16 @@ doVecPackOp maybe_pre_write_cast ty z es res = do emitAssign (CmmLocal res) (CmmReg (CmmLocal src)) vecPack src (e : es) i = do - dst <- newTemp ty - if isFloatType (vecElemType ty) - then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid) - [CmmReg (CmmLocal src), cast e, iLit]) - else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid) - [CmmReg (CmmLocal src), cast e, iLit]) - vecPack dst es (i + 1) + dst <- newTemp ty + if isFloatType (vecElemType ty) + then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid) + [CmmReg (CmmLocal src), cast e, iLit]) + else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid) + [CmmReg (CmmLocal src), cast e, iLit]) + vecPack dst es (i + 1) where -- vector indices are always 32-bits - iLit = CmmLit (CmmInt ((toInteger i) * 16) W32) + iLit = CmmLit (CmmInt (toInteger i) W32) cast :: CmmExpr -> CmmExpr cast val = case maybe_pre_write_cast of diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs index a5a5683a3e..81f3b9f84c 100644 --- a/compiler/llvmGen/LlvmCodeGen/Base.hs +++ b/compiler/llvmGen/LlvmCodeGen/Base.hs @@ -152,12 +152,12 @@ llvmFunArgs dflags live = where platform = targetPlatform dflags isLive r = not (isSSE r) || r `elem` alwaysLive || r `elem` live isPassed r = not (isSSE r) || isLive r - isSSE (FloatReg _) = True - isSSE (DoubleReg _) = True - isSSE (XmmReg _ _ _ _ ) = True - isSSE (YmmReg _ _ _ _ ) = True - isSSE (ZmmReg _ _ _ _ ) = True - isSSE _ = False + isSSE (FloatReg _) = True + isSSE (DoubleReg _) = True + isSSE (XmmReg _) = True + isSSE (YmmReg _) = True + isSSE (ZmmReg _) = True + isSSE _ = False -- | Llvm standard fun attributes llvmStdFunAttrs :: [LlvmFuncAttr] diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 8fea6e0b17..86a59381b2 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -1287,7 +1287,6 @@ genMachOp _ op [x] = case op of MO_VU_Quot _ _ -> panicOp MO_VU_Rem _ _ -> panicOp - MO_VF_Broadcast _ _ -> panicOp MO_VF_Insert _ _ -> panicOp MO_VF_Extract _ _ -> panicOp @@ -1484,7 +1483,6 @@ genMachOp_slow opt op [x, y] = case op of MO_VS_Neg {} -> panicOp - MO_VF_Broadcast {} -> panicOp MO_VF_Insert {} -> panicOp MO_VF_Extract {} -> panicOp @@ -1846,9 +1844,9 @@ funEpilogue live = do let liveRegs = alwaysLive ++ live isSSE (FloatReg _) = True isSSE (DoubleReg _) = True - isSSE (XmmReg _ _ _ _) = True - isSSE (YmmReg _ _ _ _) = True - isSSE (ZmmReg _ _ _ _) = True + isSSE (XmmReg _) = True + isSSE (YmmReg _) = True + isSSE (ZmmReg _) = True isSSE _ = False -- Set to value or "undef" depending on whether the register is diff --git a/compiler/llvmGen/LlvmCodeGen/Regs.hs b/compiler/llvmGen/LlvmCodeGen/Regs.hs index 6d188d908f..8cdf3c6869 100644 --- a/compiler/llvmGen/LlvmCodeGen/Regs.hs +++ b/compiler/llvmGen/LlvmCodeGen/Regs.hs @@ -60,24 +60,24 @@ lmGlobalReg dflags suf reg DoubleReg 4 -> doubleGlobal $ "D4" ++ suf DoubleReg 5 -> doubleGlobal $ "D5" ++ suf DoubleReg 6 -> doubleGlobal $ "D6" ++ suf - XmmReg 1 _ _ _ -> xmmGlobal $ "XMM1" ++ suf - XmmReg 2 _ _ _ -> xmmGlobal $ "XMM2" ++ suf - XmmReg 3 _ _ _ -> xmmGlobal $ "XMM3" ++ suf - XmmReg 4 _ _ _ -> xmmGlobal $ "XMM4" ++ suf - XmmReg 5 _ _ _ -> xmmGlobal $ "XMM5" ++ suf - XmmReg 6 _ _ _ -> xmmGlobal $ "XMM6" ++ suf - YmmReg 1 _ _ _ -> ymmGlobal $ "YMM1" ++ suf - YmmReg 2 _ _ _ -> ymmGlobal $ "YMM2" ++ suf - YmmReg 3 _ _ _ -> ymmGlobal $ "YMM3" ++ suf - YmmReg 4 _ _ _ -> ymmGlobal $ "YMM4" ++ suf - YmmReg 5 _ _ _ -> ymmGlobal $ "YMM5" ++ suf - YmmReg 6 _ _ _ -> ymmGlobal $ "YMM6" ++ suf - ZmmReg 1 _ _ _ -> zmmGlobal $ "ZMM1" ++ suf - ZmmReg 2 _ _ _ -> zmmGlobal $ "ZMM2" ++ suf - ZmmReg 3 _ _ _ -> zmmGlobal $ "ZMM3" ++ suf - ZmmReg 4 _ _ _ -> zmmGlobal $ "ZMM4" ++ suf - ZmmReg 5 _ _ _ -> zmmGlobal $ "ZMM5" ++ suf - ZmmReg 6 _ _ _ -> zmmGlobal $ "ZMM6" ++ suf + XmmReg 1 -> xmmGlobal $ "XMM1" ++ suf + XmmReg 2 -> xmmGlobal $ "XMM2" ++ suf + XmmReg 3 -> xmmGlobal $ "XMM3" ++ suf + XmmReg 4 -> xmmGlobal $ "XMM4" ++ suf + XmmReg 5 -> xmmGlobal $ "XMM5" ++ suf + XmmReg 6 -> xmmGlobal $ "XMM6" ++ suf + YmmReg 1 -> ymmGlobal $ "YMM1" ++ suf + YmmReg 2 -> ymmGlobal $ "YMM2" ++ suf + YmmReg 3 -> ymmGlobal $ "YMM3" ++ suf + YmmReg 4 -> ymmGlobal $ "YMM4" ++ suf + YmmReg 5 -> ymmGlobal $ "YMM5" ++ suf + YmmReg 6 -> ymmGlobal $ "YMM6" ++ suf + ZmmReg 1 -> zmmGlobal $ "ZMM1" ++ suf + ZmmReg 2 -> zmmGlobal $ "ZMM2" ++ suf + ZmmReg 3 -> zmmGlobal $ "ZMM3" ++ suf + ZmmReg 4 -> zmmGlobal $ "ZMM4" ++ suf + ZmmReg 5 -> zmmGlobal $ "ZMM5" ++ suf + ZmmReg 6 -> zmmGlobal $ "ZMM6" ++ suf MachSp -> wordGlobal $ "MachSp" ++ suf _other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg) ++ ") not supported!" diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 7009771aa4..052e49a965 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -219,7 +219,6 @@ module DynFlags ( -- * SSE and AVX isSseEnabled, isSse2Enabled, - isSse4_1Enabled, isSse4_2Enabled, isBmiEnabled, isBmi2Enabled, @@ -5903,8 +5902,6 @@ isSse2Enabled dflags = case platformArch (targetPlatform dflags) of ArchX86 -> True _ -> False -isSse4_1Enabled :: DynFlags -> Bool -isSse4_1Enabled dflags = sseVersion dflags >= Just SSE4 isSse4_2Enabled :: DynFlags -> Bool isSse4_2Enabled dflags = sseVersion dflags >= Just SSE42 diff --git a/compiler/nativeGen/Format.hs b/compiler/nativeGen/Format.hs index a0e4e99f80..31472893e7 100644 --- a/compiler/nativeGen/Format.hs +++ b/compiler/nativeGen/Format.hs @@ -10,11 +10,9 @@ -- module Format ( Format(..), - ScalarFormat(..), intFormat, floatFormat, isFloatFormat, - isVecFormat, cmmTypeFormat, formatToWidth, formatInBytes @@ -27,29 +25,6 @@ import GhcPrelude import Cmm import Outputable - --- Note [GHC's data format representations] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- --- GHC has severals types that represent various aspects of data format. --- These include: --- --- * 'CmmType.CmmType': The data classification used throughout the C-- --- pipeline. This is a pair of a CmmCat and a Width. --- --- * 'CmmType.CmmCat': What the bits in a C-- value mean (e.g. a pointer, integer, or floating-point value) --- --- * 'CmmType.Width': The width of a C-- value. --- --- * 'CmmType.Length': The width (measured in number of scalars) of a vector value. --- --- * 'Format.Format': The data format representation used by much of the backend. --- --- * 'Format.ScalarFormat': The format of a 'Format.VecFormat'\'s scalar. --- --- * 'RegClass.RegClass': Whether a register is an integer, float-point, or vector register --- - -- It looks very like the old MachRep, but it's now of purely local -- significance, here in the native code generator. You can change it -- without global consequences. @@ -72,16 +47,8 @@ data Format | II64 | FF32 | FF64 - | VecFormat !Length !ScalarFormat !Width deriving (Show, Eq) -data ScalarFormat = FmtInt8 - | FmtInt16 - | FmtInt32 - | FmtInt64 - | FmtFloat - | FmtDouble - deriving (Show, Eq) -- | Get the integer format of this width. intFormat :: Width -> Format @@ -114,33 +81,13 @@ isFloatFormat format FF64 -> True _ -> False --- | Check if a format represents a vector -isVecFormat :: Format -> Bool -isVecFormat (VecFormat {}) = True -isVecFormat _ = False -- | Convert a Cmm type to a Format. cmmTypeFormat :: CmmType -> Format cmmTypeFormat ty | isFloatType ty = floatFormat (typeWidth ty) - | isVecType ty = vecFormat ty | otherwise = intFormat (typeWidth ty) -vecFormat :: CmmType -> Format -vecFormat ty = - let l = vecLength ty - elemTy = vecElemType ty - in if isFloatType elemTy - then case typeWidth elemTy of - W32 -> VecFormat l FmtFloat W32 - W64 -> VecFormat l FmtDouble W64 - _ -> pprPanic "Incorrect vector element width" (ppr elemTy) - else case typeWidth elemTy of - W8 -> VecFormat l FmtInt8 W8 - W16 -> VecFormat l FmtInt16 W16 - W32 -> VecFormat l FmtInt32 W32 - W64 -> VecFormat l FmtInt64 W64 - _ -> pprPanic "Incorrect vector element width" (ppr elemTy) -- | Get the Width of a Format. formatToWidth :: Format -> Width @@ -152,7 +99,7 @@ formatToWidth format II64 -> W64 FF32 -> W32 FF64 -> W64 - VecFormat l _ w -> widthFromBytes (l*widthInBytes w) + formatInBytes :: Format -> Int formatInBytes = widthInBytes . formatToWidth diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs index 67730aa59b..3680c1c7b0 100644 --- a/compiler/nativeGen/NCGMonad.hs +++ b/compiler/nativeGen/NCGMonad.hs @@ -250,6 +250,7 @@ getNewRegNat rep dflags <- getDynFlags return (RegVirtual $ targetMkVirtualReg (targetPlatform dflags) u rep) + getNewRegPairNat :: Format -> NatM (Reg,Reg) getNewRegPairNat rep = do u <- getUniqueNat diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 7e5df6a76c..a49526c93a 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -1909,8 +1909,6 @@ genCCall' dflags gcp target dest_regs args FF32 -> (1, 1, 4, fprs) FF64 -> (2, 1, 8, fprs) II64 -> panic "genCCall' passArguments II64" - VecFormat {} - -> panic "genCCall' passArguments vector format" GCP32ELF -> case cmmTypeFormat rep of @@ -1921,8 +1919,6 @@ genCCall' dflags gcp target dest_regs args FF32 -> (0, 1, 4, fprs) FF64 -> (0, 1, 8, fprs) II64 -> panic "genCCall' passArguments II64" - VecFormat {} - -> panic "genCCall' passArguments vector format" GCP64ELF _ -> case cmmTypeFormat rep of II8 -> (1, 0, 8, gprs) @@ -1934,8 +1930,6 @@ genCCall' dflags gcp target dest_regs args -- the FPRs. FF32 -> (1, 1, 8, fprs) FF64 -> (1, 1, 8, fprs) - VecFormat {} - -> panic "genCCall' passArguments vector format" moveResult reduceToFF32 = case dest_regs of diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs index b7316e6bc6..4254f23122 100644 --- a/compiler/nativeGen/PPC/Ppr.hs +++ b/compiler/nativeGen/PPC/Ppr.hs @@ -29,7 +29,7 @@ import BlockId import CLabel import PprCmmExpr () -import Unique ( getUnique ) +import Unique ( pprUniqueAlways, getUnique ) import GHC.Platform import FastString import Outputable @@ -168,7 +168,10 @@ pprReg r = case r of RegReal (RealRegSingle i) -> ppr_reg_no i RegReal (RealRegPair{}) -> panic "PPC.pprReg: no reg pairs on this arch" - RegVirtual v -> ppr v + RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u + RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u + RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u + RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u where ppr_reg_no :: Int -> SDoc @@ -187,8 +190,7 @@ pprFormat x II32 -> sLit "w" II64 -> sLit "d" FF32 -> sLit "fs" - FF64 -> sLit "fd" - VecFormat _ _ _ -> panic "PPC.Ppr.pprFormat: VecFormat") + FF64 -> sLit "fd") pprCond :: Cond -> SDoc @@ -373,7 +375,6 @@ pprInstr (LD fmt reg addr) = hcat [ II64 -> sLit "d" FF32 -> sLit "fs" FF64 -> sLit "fd" - VecFormat _ _ _ -> panic "PPC.Ppr.pprInstr: VecFormat" ), case addr of AddrRegImm _ _ -> empty AddrRegReg _ _ -> char 'x', @@ -413,7 +414,6 @@ pprInstr (LA fmt reg addr) = hcat [ II64 -> sLit "d" FF32 -> sLit "fs" FF64 -> sLit "fd" - VecFormat _ _ _ -> panic "PPC.Ppr.pprInstr: VecFormat" ), case addr of AddrRegImm _ _ -> empty AddrRegReg _ _ -> char 'x', diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs index dff2f07bf4..7f69ea01a4 100644 --- a/compiler/nativeGen/Reg.hs +++ b/compiler/nativeGen/Reg.hs @@ -56,7 +56,6 @@ data VirtualReg | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register | VirtualRegF {-# UNPACK #-} !Unique | VirtualRegD {-# UNPACK #-} !Unique - | VirtualRegVec {-# UNPACK #-} !Unique deriving (Eq, Show) @@ -70,7 +69,6 @@ instance Ord VirtualReg where compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b - compare (VirtualRegVec a) (VirtualRegVec b) = nonDetCmpUnique a b compare VirtualRegI{} _ = LT compare _ VirtualRegI{} = GT @@ -78,8 +76,7 @@ instance Ord VirtualReg where compare _ VirtualRegHi{} = GT compare VirtualRegF{} _ = LT compare _ VirtualRegF{} = GT - compare VirtualRegVec{} _ = LT - compare _ VirtualRegVec{} = GT + instance Uniquable VirtualReg where @@ -89,7 +86,6 @@ instance Uniquable VirtualReg where VirtualRegHi u -> u VirtualRegF u -> u VirtualRegD u -> u - VirtualRegVec u -> u instance Outputable VirtualReg where ppr reg @@ -99,9 +95,8 @@ instance Outputable VirtualReg where -- this code is kinda wrong on x86 -- because float and double occupy the same register set -- namely SSE2 register xmm0 .. xmm15 - VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u - VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u - VirtualRegVec u -> text "%vVec_" <> pprUniqueAlways u + VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u + VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u @@ -112,7 +107,6 @@ renameVirtualReg u r VirtualRegHi _ -> VirtualRegHi u VirtualRegF _ -> VirtualRegF u VirtualRegD _ -> VirtualRegD u - VirtualRegVec _ -> VirtualRegVec u classOfVirtualReg :: VirtualReg -> RegClass @@ -122,8 +116,6 @@ classOfVirtualReg vr VirtualRegHi{} -> RcInteger VirtualRegF{} -> RcFloat VirtualRegD{} -> RcDouble - -- Below is an awful, largely x86-specific hack - VirtualRegVec{} -> RcDouble diff --git a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs index 23d7c6b421..5d4fd418c3 100644 --- a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs +++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs @@ -195,6 +195,7 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclu + -- Specification Code ---------------------------------------------------------- -- -- The trivColorable function for each particular architecture should diff --git a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs index d452edfdc6..5a4f1c65a8 100644 --- a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs +++ b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs @@ -2,7 +2,6 @@ module RegAlloc.Linear.FreeRegs ( FR(..), - allFreeRegs, maxSpillSlots ) @@ -70,10 +69,6 @@ instance FR SPARC.FreeRegs where frInitFreeRegs = SPARC.initFreeRegs frReleaseReg = SPARC.releaseReg --- | For debugging output. -allFreeRegs :: FR freeRegs => Platform -> freeRegs -> [RealReg] -allFreeRegs plat fr = foldMap (\rcls -> frGetFreeRegs plat rcls fr) allRegClasses - maxSpillSlots :: DynFlags -> Int maxSpillSlots dflags = case platformArch (targetPlatform dflags) of diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs index b29712e0e0..cdaf738d68 100644 --- a/compiler/nativeGen/RegAlloc/Linear/Main.hs +++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs @@ -884,10 +884,8 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc $ vcat [ text "allocating vreg: " <> text (show r) , text "assignment: " <> ppr assig - , text "freeRegs: " <> text (showRegs freeRegs) - , text "initFreeRegs: " <> text (showRegs (frInitFreeRegs platform `asTypeOf` freeRegs)) - ] - where showRegs = show . map (\reg -> (reg, targetClassOfRealReg platform reg)) . allFreeRegs platform + , text "freeRegs: " <> text (show freeRegs) + , text "initFreeRegs: " <> text (show (frInitFreeRegs platform `asTypeOf` freeRegs)) ] result diff --git a/compiler/nativeGen/RegClass.hs b/compiler/nativeGen/RegClass.hs index d73a3409ac..fbbb786817 100644 --- a/compiler/nativeGen/RegClass.hs +++ b/compiler/nativeGen/RegClass.hs @@ -1,14 +1,15 @@ -- | An architecture independent description of a register's class. module RegClass - ( RegClass(..) - , allRegClasses - ) where + ( RegClass (..) ) + +where import GhcPrelude import Outputable import Unique + -- | The class of a register. -- Used in the register allocator. -- We treat all registers in a class as being interchangable. @@ -17,11 +18,7 @@ data RegClass = RcInteger | RcFloat | RcDouble - deriving (Eq, Show) - -allRegClasses :: [RegClass] -allRegClasses = - [ RcInteger, RcFloat, RcDouble ] + deriving Eq instance Uniquable RegClass where @@ -30,6 +27,6 @@ instance Uniquable RegClass where getUnique RcDouble = mkRegClassUnique 2 instance Outputable RegClass where - ppr RcInteger = Outputable.text "I" - ppr RcFloat = Outputable.text "F" - ppr RcDouble = Outputable.text "D" + ppr RcInteger = Outputable.text "I" + ppr RcFloat = Outputable.text "F" + ppr RcDouble = Outputable.text "D" diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs index aa355f97cb..fc67f77541 100644 --- a/compiler/nativeGen/SPARC/Ppr.hs +++ b/compiler/nativeGen/SPARC/Ppr.hs @@ -45,6 +45,7 @@ import CLabel import Hoopl.Label import Hoopl.Collections +import Unique ( pprUniqueAlways ) import Outputable import GHC.Platform import FastString @@ -147,7 +148,12 @@ pprReg :: Reg -> SDoc pprReg reg = case reg of RegVirtual vr - -> ppr vr + -> case vr of + VirtualRegI u -> text "%vI_" <> pprUniqueAlways u + VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u + VirtualRegF u -> text "%vF_" <> pprUniqueAlways u + VirtualRegD u -> text "%vD_" <> pprUniqueAlways u + RegReal rr -> case rr of @@ -215,8 +221,7 @@ pprFormat x II32 -> sLit "" II64 -> sLit "d" FF32 -> sLit "" - FF64 -> sLit "d" - VecFormat _ _ _ -> panic "SPARC.Ppr.pprFormat: VecFormat") + FF64 -> sLit "d") -- | Pretty print a format for an instruction suffix. @@ -230,8 +235,7 @@ pprStFormat x II32 -> sLit "" II64 -> sLit "x" FF32 -> sLit "" - FF64 -> sLit "d" - VecFormat _ _ _ -> panic "SPARC.Ppr.pprFormat: VecFormat") + FF64 -> sLit "d") diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs index e46dbd0d38..0d7edc346a 100644 --- a/compiler/nativeGen/SPARC/Regs.hs +++ b/compiler/nativeGen/SPARC/Regs.hs @@ -104,6 +104,7 @@ virtualRegSqueeze cls vr VirtualRegD{} -> 1 _other -> 0 + {-# INLINE realRegSqueeze #-} realRegSqueeze :: RegClass -> RealReg -> Int @@ -133,6 +134,7 @@ realRegSqueeze cls rr RealRegPair{} -> 1 + -- | All the allocatable registers in the machine, -- including register pairs. allRealRegs :: [RealReg] diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index ed3684e074..13662f6807 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -111,25 +111,12 @@ sse2Enabled = do ArchX86 -> return True _ -> panic "trying to generate x86/x86_64 on the wrong platform" -sse4_1Enabled :: NatM Bool -sse4_1Enabled = do - dflags <- getDynFlags - return (isSse4_1Enabled dflags) sse4_2Enabled :: NatM Bool sse4_2Enabled = do dflags <- getDynFlags return (isSse4_2Enabled dflags) -sseEnabled :: NatM Bool -sseEnabled = do - dflags <- getDynFlags - return (isSseEnabled dflags) - -avxEnabled :: NatM Bool -avxEnabled = do - dflags <- getDynFlags - return (isAvxEnabled dflags) cmmTopCodeGen :: RawCmmDecl @@ -228,7 +215,6 @@ stmtToInstrs bid stmt = do CmmAssign reg src | isFloatType ty -> assignReg_FltCode format reg src | is32Bit && isWord64 ty -> assignReg_I64Code reg src - | isVecType ty -> assignReg_VecCode format reg src | otherwise -> assignReg_IntCode format reg src where ty = cmmRegType dflags reg format = cmmTypeFormat ty @@ -236,7 +222,6 @@ stmtToInstrs bid stmt = do CmmStore addr src | isFloatType ty -> assignMem_FltCode format addr src | is32Bit && isWord64 ty -> assignMem_I64Code addr src - | isVecType ty -> assignMem_VecCode format addr src | otherwise -> assignMem_IntCode format addr src where ty = cmmExprType dflags src format = cmmTypeFormat ty @@ -323,15 +308,6 @@ getRegisterReg platform (CmmGlobal mid) -- platform. Hence ... -getVecRegisterReg :: Platform -> Bool -> Format -> CmmReg -> Reg -getVecRegisterReg _ use_avx format (CmmLocal (LocalReg u pk)) - | isVecType pk && use_avx = RegVirtual (mkVirtualReg u format) - | otherwise = pprPanic - (unlines ["avx flag is not enabled" , - "or this is not a vector register"]) - (ppr pk) -getVecRegisterReg platform _use_avx _format c = getRegisterReg platform c - -- | Memory addressing modes passed up the tree. data Amode = Amode AddrMode InstrBlock @@ -527,13 +503,6 @@ iselExpr64 expr -------------------------------------------------------------------------------- - --- This is a helper data type which helps reduce the code duplication for --- the code generation of arithmetic operations. This is not specifically --- targetted for any particular type like Int8, Int32 etc -data VectorArithInstns = VA_Add | VA_Sub | VA_Mul | VA_Div - - getRegister :: CmmExpr -> NatM Register getRegister e = do dflags <- getDynFlags is32Bit <- is32BitPlatform @@ -551,24 +520,16 @@ getRegister' dflags is32Bit (CmmReg reg) do reg' <- getPicBaseNat (archWordFormat is32Bit) return (Fixed (archWordFormat is32Bit) reg' nilOL) _ -> - do use_sse2 <- sse2Enabled - use_avx <- avxEnabled - let cmmregtype = cmmRegType dflags reg - if isVecType cmmregtype - then return (vectorRegister cmmregtype use_avx use_sse2) - else return (standardRegister cmmregtype) - where - vectorRegister :: CmmType -> Bool -> Bool -> Register - vectorRegister reg_ty use_avx use_sse2 - | use_avx || use_sse2 = - let vecfmt = cmmTypeFormat reg_ty - platform = targetPlatform dflags - in (Fixed vecfmt (getVecRegisterReg platform True vecfmt reg) nilOL) - | otherwise = panic "Please enable the -mavx or -msse2 flag" - - standardRegister crt = - let platform = targetPlatform dflags - in (Fixed (cmmTypeFormat crt) (getRegisterReg platform reg) nilOL) + do + let + fmt = cmmTypeFormat (cmmRegType dflags reg) + format = fmt + -- + let platform = targetPlatform dflags + return (Fixed format + (getRegisterReg platform reg) + nilOL) + getRegister' dflags is32Bit (CmmRegOff r n) = getRegister' dflags is32Bit $ mangleIndexTree dflags r n @@ -670,69 +631,7 @@ getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), return $ Any II64 (\dst -> unitOL $ LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) -getRegister' _ _ (CmmMachOp mop [x, y, z]) = do -- ternary MachOps - sse4_1 <- sse4_1Enabled - sse2 <- sse2Enabled - sse <- sseEnabled - case mop of - MO_VF_Insert l W32 | sse4_1 && sse -> vector_float_pack l W32 x y z - | otherwise - -> sorry "Please enable the -msse4 and -msse flag" - MO_VF_Insert l W64 | sse2 && sse -> vector_float_pack l W64 x y z - | otherwise - -> sorry "Please enable the -msse2 and -msse flag" - _other -> incorrectOperands - where - vector_float_pack :: Length - -> Width - -> CmmExpr - -> CmmExpr - -> CmmExpr - -> NatM Register - vector_float_pack len W32 expr1 expr2 (CmmLit offset) - = do - fn <- getAnyReg expr1 - (r, exp) <- getSomeReg expr2 - let f = VecFormat len FmtFloat W32 - imm = litToImm offset - code dst = exp `appOL` - (fn dst) `snocOL` - (INSERTPS f (OpImm imm) (OpReg r) dst) - in return $ Any f code - vector_float_pack len W64 expr1 expr2 (CmmLit offset) - = do - Amode addr addr_code <- getAmode expr2 - (r, exp) <- getSomeReg expr1 - - -- fn <- getAnyReg expr1 - -- (r, exp) <- getSomeReg expr2 - let f = VecFormat len FmtDouble W64 - code dst - = case offset of - CmmInt 0 _ -> exp `appOL` addr_code `snocOL` - (MOVL f (OpAddr addr) (OpReg r)) `snocOL` - (MOVU f (OpReg r) (OpReg dst)) - CmmInt 16 _ -> exp `appOL` addr_code `snocOL` - (MOVH f (OpAddr addr) (OpReg r)) `snocOL` - (MOVU f (OpReg r) (OpReg dst)) - _ -> panic "Error in offset while packing" - -- code dst - -- = case offset of - -- CmmInt 0 _ -> exp `appOL` - -- (fn dst) `snocOL` - -- (MOVL f (OpReg r) (OpReg dst)) - -- CmmInt 16 _ -> exp `appOL` - -- (fn dst) `snocOL` - -- (MOVH f (OpReg r) (OpReg dst)) - -- _ -> panic "Error in offset while packing" - in return $ Any f code - vector_float_pack _ _ _ c _ - = pprPanic "Pack not supported for : " (ppr c) - getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps - sse2 <- sse2Enabled - sse <- sseEnabled - avx <- avxEnabled case mop of MO_F_Neg w -> sse2NegCode w x @@ -809,28 +708,23 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps MO_FS_Conv from to -> coerceFP2Int from to x MO_SF_Conv from to -> coerceInt2FP from to x - MO_V_Insert {} -> needLlvm - MO_V_Extract {} -> needLlvm - MO_V_Add {} -> needLlvm - MO_V_Sub {} -> needLlvm - MO_V_Mul {} -> needLlvm - MO_VS_Quot {} -> needLlvm - MO_VS_Rem {} -> needLlvm - MO_VS_Neg {} -> needLlvm - MO_VU_Quot {} -> needLlvm - MO_VU_Rem {} -> needLlvm - MO_VF_Broadcast {} -> incorrectOperands - MO_VF_Insert {} -> incorrectOperands - MO_VF_Extract {} -> incorrectOperands - MO_VF_Add {} -> incorrectOperands - MO_VF_Sub {} -> incorrectOperands - MO_VF_Mul {} -> incorrectOperands - MO_VF_Quot {} -> incorrectOperands - - MO_VF_Neg l w | avx -> vector_float_negate_avx l w x - | sse && sse2 -> vector_float_negate_sse l w x - | otherwise - -> sorry "Please enable the -mavx or -msse, -msse2 flag" + MO_V_Insert {} -> needLlvm + MO_V_Extract {} -> needLlvm + MO_V_Add {} -> needLlvm + MO_V_Sub {} -> needLlvm + MO_V_Mul {} -> needLlvm + MO_VS_Quot {} -> needLlvm + MO_VS_Rem {} -> needLlvm + MO_VS_Neg {} -> needLlvm + MO_VU_Quot {} -> needLlvm + MO_VU_Rem {} -> needLlvm + MO_VF_Insert {} -> needLlvm + MO_VF_Extract {} -> needLlvm + MO_VF_Add {} -> needLlvm + MO_VF_Sub {} -> needLlvm + MO_VF_Mul {} -> needLlvm + MO_VF_Quot {} -> needLlvm + MO_VF_Neg {} -> needLlvm _other -> pprPanic "getRegister" (pprMachOp mop) where @@ -868,45 +762,8 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps = do e_code <- getRegister' dflags is32Bit expr return (swizzleRegisterRep e_code new_format) - vector_float_negate_avx :: Length -> Width -> CmmExpr -> NatM Register - vector_float_negate_avx l w expr = do - tmp <- getNewRegNat (VecFormat l FmtFloat w) - (reg, exp) <- getSomeReg expr - Amode addr addr_code <- memConstant (mkAlignment $ widthInBytes W32) (CmmFloat 0.0 W32) - let format = case w of - W32 -> VecFormat l FmtFloat w - W64 -> VecFormat l FmtDouble w - _ -> pprPanic "Cannot negate vector of width" (ppr w) - code dst = case w of - W32 -> exp `appOL` addr_code `snocOL` - (VBROADCAST format addr tmp) `snocOL` - (VSUB format (OpReg reg) tmp dst) - W64 -> exp `appOL` addr_code `snocOL` - (MOVL format (OpAddr addr) (OpReg tmp)) `snocOL` - (MOVH format (OpAddr addr) (OpReg tmp)) `snocOL` - (VSUB format (OpReg reg) tmp dst) - _ -> pprPanic "Cannot negate vector of width" (ppr w) - return (Any format code) - - vector_float_negate_sse :: Length -> Width -> CmmExpr -> NatM Register - vector_float_negate_sse l w expr = do - tmp <- getNewRegNat (VecFormat l FmtFloat w) - (reg, exp) <- getSomeReg expr - let format = case w of - W32 -> VecFormat l FmtFloat w - W64 -> VecFormat l FmtDouble w - _ -> pprPanic "Cannot negate vector of width" (ppr w) - code dst = exp `snocOL` - (XOR format (OpReg tmp) (OpReg tmp)) `snocOL` - (MOVU format (OpReg tmp) (OpReg dst)) `snocOL` - (SUB format (OpReg reg) (OpReg dst)) - return (Any format code) getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps - sse4_1 <- sse4_1Enabled - sse2 <- sse2Enabled - sse <- sseEnabled - avx <- avxEnabled case mop of MO_F_Eq _ -> condFltReg is32Bit EQQ x y MO_F_Ne _ -> condFltReg is32Bit NE x y @@ -971,49 +828,13 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps MO_VS_Quot {} -> needLlvm MO_VS_Rem {} -> needLlvm MO_VS_Neg {} -> needLlvm - - MO_VF_Broadcast l W32 | avx -> vector_float_broadcast_avx l W32 x y - | sse4_1 -> vector_float_broadcast_sse l W32 x y - | otherwise - -> sorry "Please enable the -mavx or -msse4 flag" - - MO_VF_Broadcast l W64 | sse2 -> vector_float_broadcast_avx l W64 x y - | otherwise -> sorry "Please enable the -msse2 flag" - - MO_VF_Extract l W32 | avx -> vector_float_unpack l W32 x y - | sse -> vector_float_unpack_sse l W32 x y - | otherwise - -> sorry "Please enable the -mavx or -msse flag" - - MO_VF_Extract l W64 | sse2 -> vector_float_unpack l W64 x y - | otherwise -> sorry "Please enable the -msse2 flag" - - MO_VF_Add l w | avx -> vector_float_op_avx VA_Add l w x y - | sse && w == W32 -> vector_float_op_sse VA_Add l w x y - | sse2 && w == W64 -> vector_float_op_sse VA_Add l w x y - | otherwise - -> sorry "Please enable the -mavx or -msse flag" - - MO_VF_Sub l w | avx -> vector_float_op_avx VA_Sub l w x y - | sse && w == W32 -> vector_float_op_sse VA_Sub l w x y - | sse2 && w == W64 -> vector_float_op_sse VA_Sub l w x y - | otherwise - -> sorry "Please enable the -mavx or -msse flag" - - MO_VF_Mul l w | avx -> vector_float_op_avx VA_Mul l w x y - | sse && w == W32 -> vector_float_op_sse VA_Mul l w x y - | sse2 && w == W64 -> vector_float_op_sse VA_Mul l w x y - | otherwise - -> sorry "Please enable the -mavx or -msse flag" - - MO_VF_Quot l w | avx -> vector_float_op_avx VA_Div l w x y - | sse && w == W32 -> vector_float_op_sse VA_Div l w x y - | sse2 && w == W64 -> vector_float_op_sse VA_Div l w x y - | otherwise - -> sorry "Please enable the -mavx or -msse flag" - - MO_VF_Insert {} -> incorrectOperands - MO_VF_Neg {} -> incorrectOperands + MO_VF_Insert {} -> needLlvm + MO_VF_Extract {} -> needLlvm + MO_VF_Add {} -> needLlvm + MO_VF_Sub {} -> needLlvm + MO_VF_Mul {} -> needLlvm + MO_VF_Quot {} -> needLlvm + MO_VF_Neg {} -> needLlvm _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop) where @@ -1109,171 +930,7 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps -- TODO: There are other interesting patterns we want to replace -- with a LEA, e.g. `(x + offset) + (y << shift)`. - ----------------------- - -- Vector operations--- - vector_float_op_avx :: VectorArithInstns - -> Length - -> Width - -> CmmExpr - -> CmmExpr - -> NatM Register - vector_float_op_avx op l w expr1 expr2 = do - (reg1, exp1) <- getSomeReg expr1 - (reg2, exp2) <- getSomeReg expr2 - let format = case w of - W32 -> VecFormat l FmtFloat W32 - W64 -> VecFormat l FmtDouble W64 - _ -> pprPanic "Operation not supported for width " (ppr w) - code dst = case op of - VA_Add -> arithInstr VADD - VA_Sub -> arithInstr VSUB - VA_Mul -> arithInstr VMUL - VA_Div -> arithInstr VDIV - where - -- opcode src2 src1 dst <==> dst = src1 `opcode` src2 - arithInstr instr = exp1 `appOL` exp2 `snocOL` - (instr format (OpReg reg2) reg1 dst) - return (Any format code) - - vector_float_op_sse :: VectorArithInstns - -> Length - -> Width - -> CmmExpr - -> CmmExpr - -> NatM Register - vector_float_op_sse op l w expr1 expr2 = do - (reg1, exp1) <- getSomeReg expr1 - (reg2, exp2) <- getSomeReg expr2 - let format = case w of - W32 -> VecFormat l FmtFloat W32 - W64 -> VecFormat l FmtDouble W64 - _ -> pprPanic "Operation not supported for width " (ppr w) - code dst = case op of - VA_Add -> arithInstr ADD - VA_Sub -> arithInstr SUB - VA_Mul -> arithInstr MUL - VA_Div -> arithInstr FDIV - where - -- opcode src2 src1 <==> src1 = src1 `opcode` src2 - arithInstr instr - = exp1 `appOL` exp2 `snocOL` - (MOVU format (OpReg reg1) (OpReg dst)) `snocOL` - (instr format (OpReg reg2) (OpReg dst)) - return (Any format code) -------------------- - vector_float_unpack :: Length - -> Width - -> CmmExpr - -> CmmExpr - -> NatM Register - vector_float_unpack l W32 expr (CmmLit lit) - = do - (r, exp) <- getSomeReg expr - let format = VecFormat l FmtFloat W32 - imm = litToImm lit - code dst - = case lit of - CmmInt 0 _ -> exp `snocOL` (VMOVU format (OpReg r) (OpReg dst)) - CmmInt _ _ -> exp `snocOL` (VPSHUFD format (OpImm imm) (OpReg r) dst) - _ -> panic "Error in offset while unpacking" - return (Any format code) - vector_float_unpack l W64 expr (CmmLit lit) - = do - dflags <- getDynFlags - (r, exp) <- getSomeReg expr - let format = VecFormat l FmtDouble W64 - addr = spRel dflags 0 - code dst - = case lit of - CmmInt 0 _ -> exp `snocOL` - (MOVL format (OpReg r) (OpAddr addr)) `snocOL` - (MOV FF64 (OpAddr addr) (OpReg dst)) - CmmInt 1 _ -> exp `snocOL` - (MOVH format (OpReg r) (OpAddr addr)) `snocOL` - (MOV FF64 (OpAddr addr) (OpReg dst)) - _ -> panic "Error in offset while unpacking" - return (Any format code) - vector_float_unpack _ w c e - = pprPanic "Unpack not supported for : " (ppr c $$ ppr e $$ ppr w) - ----------------------- - - vector_float_unpack_sse :: Length - -> Width - -> CmmExpr - -> CmmExpr - -> NatM Register - vector_float_unpack_sse l W32 expr (CmmLit lit) - = do - (r,exp) <- getSomeReg expr - let format = VecFormat l FmtFloat W32 - imm = litToImm lit - code dst - = case lit of - CmmInt 0 _ -> exp `snocOL` (MOVU format (OpReg r) (OpReg dst)) - CmmInt _ _ -> exp `snocOL` (PSHUFD format (OpImm imm) (OpReg r) dst) - _ -> panic "Error in offset while unpacking" - return (Any format code) - vector_float_unpack_sse _ w c e - = pprPanic "Unpack not supported for : " (ppr c $$ ppr e $$ ppr w) - ----------------------- - vector_float_broadcast_avx :: Length - -> Width - -> CmmExpr - -> CmmExpr - -> NatM Register - vector_float_broadcast_avx len W32 expr1 expr2 - = do - dflags <- getDynFlags - fn <- getAnyReg expr1 - (r', exp) <- getSomeReg expr2 - let f = VecFormat len FmtFloat W32 - addr = spRel dflags 0 - in return $ Any f (\r -> exp `appOL` - (fn r) `snocOL` - (MOVU f (OpReg r') (OpAddr addr)) `snocOL` - (VBROADCAST f addr r)) - vector_float_broadcast_avx len W64 expr1 expr2 - = do - dflags <- getDynFlags - fn <- getAnyReg expr1 - (r', exp) <- getSomeReg expr2 - let f = VecFormat len FmtDouble W64 - addr = spRel dflags 0 - in return $ Any f (\r -> exp `appOL` - (fn r) `snocOL` - (MOVU f (OpReg r') (OpAddr addr)) `snocOL` - (MOVL f (OpAddr addr) (OpReg r)) `snocOL` - (MOVH f (OpAddr addr) (OpReg r))) - vector_float_broadcast_avx _ _ c _ - = pprPanic "Broadcast not supported for : " (ppr c) - ----------------------- - vector_float_broadcast_sse :: Length - -> Width - -> CmmExpr - -> CmmExpr - -> NatM Register - vector_float_broadcast_sse len W32 expr1 expr2 - = do - dflags <- getDynFlags - fn <- getAnyReg expr1 -- destination - (r, exp) <- getSomeReg expr2 -- source - let f = VecFormat len FmtFloat W32 - addr = spRel dflags 0 - code dst = exp `appOL` - (fn dst) `snocOL` - (MOVU f (OpReg r) (OpAddr addr)) `snocOL` - (insertps 0) `snocOL` - (insertps 16) `snocOL` - (insertps 32) `snocOL` - (insertps 48) - where - insertps off = - INSERTPS f (OpImm $ litToImm $ CmmInt off W32) (OpAddr addr) dst - - in return $ Any f code - vector_float_broadcast_sse _ _ c _ - = pprPanic "Broadcast not supported for : " (ppr c) - ----------------------- sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register sub_code rep x (CmmLit (CmmInt y _)) | is32BitInteger (-y) = add_int rep x (-y) @@ -1326,21 +983,6 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps return (Fixed format result code) -getRegister' _ _ (CmmLoad mem pk) - | isVecType pk = do - use_avx <- avxEnabled - use_sse <- sseEnabled - Amode addr mem_code <- getAmode mem - let format = cmmTypeFormat pk - code dst - | use_avx = mem_code `snocOL` - VMOVU format (OpAddr addr) (OpReg dst) - | use_sse = mem_code `snocOL` - MOVU format (OpAddr addr) (OpReg dst) - | otherwise = pprPanic (unlines ["avx or sse flag not enabled", - "for loading to "]) - (ppr pk) - return (Any format code) getRegister' _ _ (CmmLoad mem pk) | isFloatType pk @@ -1407,24 +1049,10 @@ getRegister' dflags is32Bit (CmmLit lit) -- small memory model (see gcc docs, -mcmodel=small). getRegister' dflags _ (CmmLit lit) - | isVecType cmmtype = vectorRegister cmmtype - | otherwise = standardRegister cmmtype - where - cmmtype = cmmLitType dflags lit - vectorRegister ctype - = do - --NOTE: - -- This operation is only used to zero a register. For loading a - -- vector literal there are pack and broadcast operations - let format = cmmTypeFormat ctype - code dst = unitOL (XOR format (OpReg dst) (OpReg dst)) - return (Any format code) - standardRegister ctype - = do - let format = cmmTypeFormat ctype - imm = litToImm lit - code dst = unitOL (MOV format (OpImm imm) (OpReg dst)) - return (Any format code) + = do let format = cmmTypeFormat (cmmLitType dflags lit) + imm = litToImm lit + code dst = unitOL (MOV format (OpImm imm) (OpReg dst)) + return (Any format code) getRegister' _ _ other | isVecExpr other = needLlvm @@ -1490,14 +1118,8 @@ getNonClobberedReg expr = do return (reg, code) reg2reg :: Format -> Reg -> Reg -> Instr -reg2reg format@(VecFormat _ FmtFloat W32) src dst - = VMOVU format (OpReg src) (OpReg dst) -reg2reg format@(VecFormat _ FmtDouble W64) src dst - = VMOVU format (OpReg src) (OpReg dst) -reg2reg (VecFormat _ _ _) _ _ - = panic "MOV operation not implemented for vectors" -reg2reg format src dst - = MOV format (OpReg src) (OpReg dst) +reg2reg format src dst = MOV format (OpReg src) (OpReg dst) + -------------------------------------------------------------------------------- getAmode :: CmmExpr -> NatM Amode @@ -1559,9 +1181,6 @@ getAmode' _ (CmmMachOp (MO_Add _) getAmode' _ (CmmMachOp (MO_Add _) [x,y]) = x86_complex_amode x y 0 0 -getAmode' _ (CmmLit lit@(CmmFloat _ w)) - = memConstant (mkAlignment $ widthInBytes w) lit - getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit = return (Amode (ImmAddr (litToImm lit) 0) nilOL) @@ -1942,8 +1561,7 @@ assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock -assignMem_VecCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_VecCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock + -- integer assignment to memory -- specific case of adding/subtracting an integer to a particular address. @@ -2020,29 +1638,6 @@ assignReg_FltCode _ reg src = do let platform = targetPlatform dflags return (src_code (getRegisterReg platform reg)) -assignMem_VecCode pk addr src = do - (src_reg, src_code) <- getNonClobberedReg src - Amode addr addr_code <- getAmode addr - use_avx <- avxEnabled - use_sse <- sseEnabled - let - code | use_avx = src_code `appOL` - addr_code `snocOL` - (VMOVU pk (OpReg src_reg) (OpAddr addr)) - | use_sse = src_code `appOL` - addr_code `snocOL` - (MOVU pk (OpReg src_reg) (OpAddr addr)) - | otherwise = sorry "Please enable the -mavx or -msse flag" - return code - -assignReg_VecCode format reg src = do - use_avx <- avxEnabled - use_sse <- sseEnabled - src_code <- getAnyReg src - dflags <- getDynFlags - let platform = targetPlatform dflags - flag = use_avx || use_sse - return (src_code (getVecRegisterReg platform flag format reg)) genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock @@ -3767,7 +3362,6 @@ sse2NegCode w x = do x@II16 -> wrongFmt x x@II32 -> wrongFmt x x@II64 -> wrongFmt x - x@VecFormat {} -> wrongFmt x where wrongFmt x = panic $ "sse2NegCode: " ++ show x @@ -3782,33 +3376,29 @@ sse2NegCode w x = do return (Any fmt code) isVecExpr :: CmmExpr -> Bool -isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True -isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True -isVecExpr (CmmMachOp (MO_V_Add {}) _) = True -isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True -isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True -isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True -isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True -isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Broadcast {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True -isVecExpr (CmmMachOp _ [e]) = isVecExpr e -isVecExpr _ = False +isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True +isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True +isVecExpr (CmmMachOp (MO_V_Add {}) _) = True +isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True +isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True +isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True +isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True +isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True +isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True +isVecExpr (CmmMachOp _ [e]) = isVecExpr e +isVecExpr _ = False needLlvm :: NatM a needLlvm = sorry $ unlines ["The native code generator does not support vector" ,"instructions. Please use -fllvm."] -incorrectOperands :: NatM a -incorrectOperands = sorry "Incorrect number of operands" - -- | This works on the invariant that all jumps in the given blocks are required. -- Starting from there we try to make a few more jumps redundant by reordering -- them. diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 47b62e62e7..6e5d656beb 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -328,36 +328,6 @@ data Instr | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit | MFENCE - -- Vector Instructions -- - -- NOTE: Instructions follow the AT&T syntax - -- Constructors and deconstructors - | VBROADCAST Format AddrMode Reg - | VEXTRACT Format Operand Reg Operand - | INSERTPS Format Operand Operand Reg - - -- move operations - | VMOVU Format Operand Operand - | MOVU Format Operand Operand - | MOVL Format Operand Operand - | MOVH Format Operand Operand - - -- logic operations - | VPXOR Format Reg Reg Reg - - -- Arithmetic - | VADD Format Operand Reg Reg - | VSUB Format Operand Reg Reg - | VMUL Format Operand Reg Reg - | VDIV Format Operand Reg Reg - - -- Shuffle - | VPSHUFD Format Operand Operand Reg - | PSHUFD Format Operand Operand Reg - - -- Shift - | PSLLDQ Format Operand Reg - | PSRLDQ Format Operand Reg - data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2 @@ -460,31 +430,6 @@ x86_regUsageOfInstr platform instr CMPXCHG _ src dst -> usageRMM src dst (OpReg eax) MFENCE -> noUsage - -- vector instructions - VBROADCAST _ src dst -> mkRU (use_EA src []) [dst] - VEXTRACT _ off src dst -> mkRU ((use_R off []) ++ [src]) (use_R dst []) - INSERTPS _ off src dst - -> mkRU ((use_R off []) ++ (use_R src []) ++ [dst]) [dst] - - VMOVU _ src dst -> mkRU (use_R src []) (use_R dst []) - MOVU _ src dst -> mkRU (use_R src []) (use_R dst []) - MOVL _ src dst -> mkRU (use_R src []) (use_R dst []) - MOVH _ src dst -> mkRU (use_R src []) (use_R dst []) - VPXOR _ s1 s2 dst -> mkRU [s1,s2] [dst] - - VADD _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst] - VSUB _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst] - VMUL _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst] - VDIV _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst] - - VPSHUFD _ off src dst - -> mkRU (concatMap (\op -> use_R op []) [off, src]) [dst] - PSHUFD _ off src dst - -> mkRU (concatMap (\op -> use_R op []) [off, src]) [dst] - - PSLLDQ _ off dst -> mkRU (use_R off []) [dst] - PSRLDQ _ off dst -> mkRU (use_R off []) [dst] - _other -> panic "regUsage: unrecognised instr" where -- # Definitions @@ -643,32 +588,6 @@ x86_patchRegsOfInstr instr env CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst MFENCE -> instr - -- vector instructions - VBROADCAST fmt src dst -> VBROADCAST fmt (lookupAddr src) (env dst) - VEXTRACT fmt off src dst - -> VEXTRACT fmt (patchOp off) (env src) (patchOp dst) - INSERTPS fmt off src dst - -> INSERTPS fmt (patchOp off) (patchOp src) (env dst) - - VMOVU fmt src dst -> VMOVU fmt (patchOp src) (patchOp dst) - MOVU fmt src dst -> MOVU fmt (patchOp src) (patchOp dst) - MOVL fmt src dst -> MOVL fmt (patchOp src) (patchOp dst) - MOVH fmt src dst -> MOVH fmt (patchOp src) (patchOp dst) - VPXOR fmt s1 s2 dst -> VPXOR fmt (env s1) (env s2) (env dst) - - VADD fmt s1 s2 dst -> VADD fmt (patchOp s1) (env s2) (env dst) - VSUB fmt s1 s2 dst -> VSUB fmt (patchOp s1) (env s2) (env dst) - VMUL fmt s1 s2 dst -> VMUL fmt (patchOp s1) (env s2) (env dst) - VDIV fmt s1 s2 dst -> VDIV fmt (patchOp s1) (env s2) (env dst) - - VPSHUFD fmt off src dst - -> VPSHUFD fmt (patchOp off) (patchOp src) (env dst) - PSHUFD fmt off src dst - -> PSHUFD fmt (patchOp off) (patchOp src) (env dst) - PSLLDQ fmt off dst - -> PSLLDQ fmt (patchOp off) (env dst) - PSRLDQ fmt off dst - -> PSRLDQ fmt (patchOp off) (env dst) _other -> panic "patchRegs: unrecognised instr" where diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index a3f27ba471..095d9eba7c 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -41,6 +41,7 @@ import DynFlags import Cmm hiding (topInfoTable) import BlockId import CLabel +import Unique ( pprUniqueAlways ) import GHC.Platform import FastString import Outputable @@ -279,7 +280,10 @@ pprReg f r if target32Bit platform then ppr32_reg_no f i else ppr64_reg_no f i RegReal (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch" - RegVirtual v -> ppr v + RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u + RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u + RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u + RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u where ppr32_reg_no :: Format -> Int -> SDoc @@ -391,11 +395,6 @@ pprFormat x II64 -> sLit "q" FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2) FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2) - - VecFormat _ FmtFloat W32 -> sLit "ps" - VecFormat _ FmtDouble W64 -> sLit "pd" - -- TODO: Add Ints and remove panic - VecFormat {} -> panic "Incorrect width" ) pprFormat_x87 :: Format -> SDoc @@ -784,41 +783,6 @@ pprInstr (IDIV fmt op) = pprFormatOp (sLit "idiv") fmt op pprInstr (DIV fmt op) = pprFormatOp (sLit "div") fmt op pprInstr (IMUL2 fmt op) = pprFormatOp (sLit "imul") fmt op --- Vector Instructions - -pprInstr (VADD format s1 s2 dst) - = pprFormatOpRegReg (sLit "vadd") format s1 s2 dst -pprInstr (VSUB format s1 s2 dst) - = pprFormatOpRegReg (sLit "vsub") format s1 s2 dst -pprInstr (VMUL format s1 s2 dst) - = pprFormatOpRegReg (sLit "vmul") format s1 s2 dst -pprInstr (VDIV format s1 s2 dst) - = pprFormatOpRegReg (sLit "vdiv") format s1 s2 dst -pprInstr (VBROADCAST format from to) - = pprBroadcast (sLit "vbroadcast") format from to -pprInstr (VMOVU format from to) - = pprFormatOpOp (sLit "vmovu") format from to -pprInstr (MOVU format from to) - = pprFormatOpOp (sLit "movu") format from to -pprInstr (MOVL format from to) - = pprFormatOpOp (sLit "movl") format from to -pprInstr (MOVH format from to) - = pprFormatOpOp (sLit "movh") format from to -pprInstr (VPXOR format s1 s2 dst) - = pprXor (sLit "vpxor") format s1 s2 dst -pprInstr (VEXTRACT format offset from to) - = pprFormatOpRegOp (sLit "vextract") format offset from to -pprInstr (INSERTPS format offset addr dst) - = pprInsert (sLit "insertps") format offset addr dst -pprInstr (VPSHUFD format offset src dst) - = pprShuf (sLit "vpshufd") format offset src dst -pprInstr (PSHUFD format offset src dst) - = pprShuf (sLit "pshufd") format offset src dst -pprInstr (PSLLDQ format offset dst) - = pprShiftLeft (sLit "pslldq") format offset dst -pprInstr (PSRLDQ format offset dst) - = pprShiftRight (sLit "psrldq") format offset dst - -- x86_64 only pprInstr (MUL format op1 op2) = pprFormatOpOp (sLit "mul") format op1 op2 pprInstr (MUL2 format op) = pprFormatOp (sLit "mul") format op @@ -911,23 +875,6 @@ pprMnemonic :: PtrString -> Format -> SDoc pprMnemonic name format = char '\t' <> ptext name <> pprFormat format <> space -pprGenMnemonic :: PtrString -> Format -> SDoc -pprGenMnemonic name _ = - char '\t' <> ptext name <> ptext (sLit "") <> space - -pprBroadcastMnemonic :: PtrString -> Format -> SDoc -pprBroadcastMnemonic name format = - char '\t' <> ptext name <> pprBroadcastFormat format <> space - -pprBroadcastFormat :: Format -> SDoc -pprBroadcastFormat x - = ptext (case x of - VecFormat _ FmtFloat W32 -> sLit "ss" - VecFormat _ FmtDouble W64 -> sLit "sd" - -- TODO: Add Ints and remove panic - VecFormat {} -> panic "Incorrect width" - _ -> panic "Scalar Format invading vector operation" - ) pprFormatImmOp :: PtrString -> Format -> Imm -> Operand -> SDoc pprFormatImmOp name format imm op1 @@ -974,16 +921,7 @@ pprOpOp name format op1 op2 pprOperand format op2 ] -pprFormatOpRegOp :: PtrString -> Format -> Operand -> Reg -> Operand -> SDoc -pprFormatOpRegOp name format off reg1 op2 - = hcat [ - pprMnemonic name format, - pprOperand format off, - comma, - pprReg format reg1, - comma, - pprOperand format op2 - ] + pprRegReg :: PtrString -> Reg -> Reg -> SDoc pprRegReg name reg1 reg2 @@ -1006,17 +944,6 @@ pprFormatOpReg name format op1 reg2 pprReg (archWordFormat (target32Bit platform)) reg2 ] -pprFormatOpRegReg :: PtrString -> Format -> Operand -> Reg -> Reg -> SDoc -pprFormatOpRegReg name format op1 reg2 reg3 - = hcat [ - pprMnemonic name format, - pprOperand format op1, - comma, - pprReg format reg2, - comma, - pprReg format reg3 - ] - pprCondOpReg :: PtrString -> Format -> Cond -> Operand -> Reg -> SDoc pprCondOpReg name format cond op1 reg2 = hcat [ @@ -1081,68 +1008,3 @@ pprFormatOpOpCoerce name format1 format2 op1 op2 pprCondInstr :: PtrString -> Cond -> SDoc -> SDoc pprCondInstr name cond arg = hcat [ char '\t', ptext name, pprCond cond, space, arg] - - --- Custom pretty printers --- These instructions currently don't follow a uniform suffix pattern --- in their names, so we have custom pretty printers for them. - -pprBroadcast :: PtrString -> Format -> AddrMode -> Reg -> SDoc -pprBroadcast name format op dst - = hcat [ - pprBroadcastMnemonic name format, - pprAddr op, - comma, - pprReg format dst - ] - -pprXor :: PtrString -> Format -> Reg -> Reg -> Reg -> SDoc -pprXor name format reg1 reg2 reg3 - = hcat [ - pprGenMnemonic name format, - pprReg format reg1, - comma, - pprReg format reg2, - comma, - pprReg format reg3 - ] - -pprInsert :: PtrString -> Format -> Operand -> Operand -> Reg -> SDoc -pprInsert name format off src dst - = hcat [ - pprGenMnemonic name format, - pprOperand format off, - comma, - pprOperand format src, - comma, - pprReg format dst - ] - -pprShuf :: PtrString -> Format -> Operand -> Operand -> Reg -> SDoc -pprShuf name format op1 op2 reg3 - = hcat [ - pprGenMnemonic name format, - pprOperand format op1, - comma, - pprOperand format op2, - comma, - pprReg format reg3 - ] - -pprShiftLeft :: PtrString -> Format -> Operand -> Reg -> SDoc -pprShiftLeft name format off reg - = hcat [ - pprGenMnemonic name format, - pprOperand format off, - comma, - pprReg format reg - ] - -pprShiftRight :: PtrString -> Format -> Operand -> Reg -> SDoc -pprShiftRight name format off reg - = hcat [ - pprGenMnemonic name format, - pprOperand format off, - comma, - pprReg format reg - ] diff --git a/compiler/nativeGen/X86/RegInfo.hs b/compiler/nativeGen/X86/RegInfo.hs index a7784bacad..19056be4fa 100644 --- a/compiler/nativeGen/X86/RegInfo.hs +++ b/compiler/nativeGen/X86/RegInfo.hs @@ -22,8 +22,6 @@ import UniqFM import X86.Regs ---TODO: --- Add VirtualRegAVX and inspect VecFormat and allocate mkVirtualReg :: Unique -> Format -> VirtualReg mkVirtualReg u format = case format of @@ -33,7 +31,6 @@ mkVirtualReg u format -- For now we map both to being allocated as "Double" Registers -- on X86/X86_64 FF64 -> VirtualRegD u - VecFormat {} -> VirtualRegVec u _other -> VirtualRegI u regDotColor :: Platform -> RealReg -> SDoc diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs index f0e4c7d5f6..2d9fd88c8e 100644 --- a/compiler/nativeGen/X86/Regs.hs +++ b/compiler/nativeGen/X86/Regs.hs @@ -84,7 +84,6 @@ virtualRegSqueeze cls vr -> case vr of VirtualRegD{} -> 1 VirtualRegF{} -> 0 - VirtualRegVec{} -> 1 _other -> 0 |