diff options
author | Geoffrey Mainland <gmainlan@microsoft.com> | 2012-10-31 15:42:01 +0000 |
---|---|---|
committer | Geoffrey Mainland <gmainlan@microsoft.com> | 2013-02-01 22:00:24 +0000 |
commit | 33bfc6a700eaab9bc06974d6f71a80e61d9177c9 (patch) | |
tree | 842bf82b7dbdafe04820349e5e991800a3cf0646 /compiler/cmm | |
parent | 1811440833da92eefd7b7255915855fddc64994c (diff) | |
download | haskell-33bfc6a700eaab9bc06974d6f71a80e61d9177c9.tar.gz |
Add support for passing SSE vectors in registers.
This patch adds support for 6 XMM registers on x86-64 which overlap with the F
and D registers and may hold 128-bit wide SIMD vectors. Because there is not a
good way to attach type information to STG registers, we aggressively bitcast in
the LLVM back-end.
Diffstat (limited to 'compiler/cmm')
-rw-r--r-- | compiler/cmm/CmmCallConv.hs | 4 | ||||
-rw-r--r-- | compiler/cmm/CmmExpr.hs | 10 | ||||
-rw-r--r-- | compiler/cmm/CmmMachOp.hs | 46 | ||||
-rw-r--r-- | compiler/cmm/PprC.hs | 9 | ||||
-rw-r--r-- | compiler/cmm/PprCmmExpr.hs | 1 |
5 files changed, 51 insertions, 19 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index dd4d6a6c1a..913f15d436 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -70,7 +70,9 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) assign_regs assts (r:rs) regs | isVecType ty = vec | isFloatType ty = float | otherwise = int - where vec = (assts, (r:rs)) + where vec = case (w, regs) of + (W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss)) + _ -> (assts, (r:rs)) float = case (w, regs) of (W32, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss)) (W32, (vs, f:fs, ds, ls, ss)) diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index dce962443b..1df8e848b8 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -336,6 +336,9 @@ data GlobalReg | LongReg -- long int registers (64-bit, really) {-# UNPACK #-} !Int -- its number + | XmmReg -- 128-bit SIMD vector register + {-# UNPACK #-} !Int -- its number + -- STG registers | Sp -- Stack ptr; points to last occupied stack location. | SpLim -- Stack limit @@ -371,6 +374,7 @@ instance Eq GlobalReg where FloatReg i == FloatReg j = i==j DoubleReg i == DoubleReg j = i==j LongReg i == LongReg j = i==j + XmmReg i == XmmReg j = i==j Sp == Sp = True SpLim == SpLim = True Hp == Hp = True @@ -392,6 +396,7 @@ instance Ord GlobalReg where compare (FloatReg i) (FloatReg j) = compare i j compare (DoubleReg i) (DoubleReg j) = compare i j compare (LongReg i) (LongReg j) = compare i j + compare (XmmReg i) (XmmReg j) = compare i j compare Sp Sp = EQ compare SpLim SpLim = EQ compare Hp Hp = EQ @@ -413,6 +418,8 @@ instance Ord GlobalReg where compare _ (DoubleReg _) = GT compare (LongReg _) _ = LT compare _ (LongReg _) = GT + compare (XmmReg _) _ = LT + compare _ (XmmReg _) = GT compare Sp _ = LT compare _ Sp = GT compare SpLim _ = LT @@ -455,6 +462,8 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags globalRegType _ (FloatReg _) = cmmFloat W32 globalRegType _ (DoubleReg _) = cmmFloat W64 globalRegType _ (LongReg _) = cmmBits W64 +globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32) + globalRegType dflags Hp = gcWord dflags -- The initialiser for all -- dynamically allocated closures @@ -465,4 +474,5 @@ isArgReg (VanillaReg {}) = True isArgReg (FloatReg {}) = True isArgReg (DoubleReg {}) = True isArgReg (LongReg {}) = True +isArgReg (XmmReg {}) = True isArgReg _ = False diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index 4e38cd42b9..0f18029d20 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -118,6 +118,10 @@ data MachOp | MO_VS_Rem Length Width | MO_VS_Neg Length Width + -- Floting point vector element insertion and extraction operations + | MO_VF_Insert Length Width -- Insert scalar into vector + | MO_VF_Extract Length Width -- Extract scalar from vector + -- Floating point vector operations | MO_VF_Add Length Width | MO_VF_Sub Length Width @@ -360,22 +364,25 @@ machOpResultType dflags mop tys = MO_SF_Conv _ to -> cmmFloat to MO_FF_Conv _ to -> cmmFloat to - MO_V_Insert {} -> ty1 - MO_V_Extract {} -> vecElemType ty1 - - MO_V_Add {} -> ty1 - MO_V_Sub {} -> ty1 - MO_V_Mul {} -> ty1 - - MO_VS_Quot {} -> ty1 - MO_VS_Rem {} -> ty1 - MO_VS_Neg {} -> ty1 - - MO_VF_Add {} -> ty1 - MO_VF_Sub {} -> ty1 - MO_VF_Mul {} -> ty1 - MO_VF_Quot {} -> ty1 - MO_VF_Neg {} -> ty1 + MO_V_Insert l w -> cmmVec l (cmmBits w) + MO_V_Extract _ w -> cmmBits w + + MO_V_Add l w -> cmmVec l (cmmBits w) + MO_V_Sub l w -> cmmVec l (cmmBits w) + MO_V_Mul l w -> cmmVec l (cmmBits w) + + MO_VS_Quot l w -> cmmVec l (cmmBits w) + MO_VS_Rem l w -> cmmVec l (cmmBits w) + MO_VS_Neg l w -> cmmVec l (cmmBits w) + + MO_VF_Insert l w -> cmmVec l (cmmFloat w) + MO_VF_Extract _ w -> cmmFloat w + + MO_VF_Add l w -> cmmVec l (cmmFloat w) + MO_VF_Sub l w -> cmmVec l (cmmFloat w) + MO_VF_Mul l w -> cmmVec l (cmmFloat w) + MO_VF_Quot l w -> cmmVec l (cmmFloat w) + MO_VF_Neg l w -> cmmVec l (cmmFloat w) where (ty1:_) = tys @@ -443,8 +450,8 @@ machOpArgReps dflags op = MO_FS_Conv from _ -> [from] MO_FF_Conv from _ -> [from] - MO_V_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags] - MO_V_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags] + MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth dflags] + MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth dflags] MO_V_Add _ r -> [r,r] MO_V_Sub _ r -> [r,r] @@ -454,6 +461,9 @@ machOpArgReps dflags op = MO_VS_Rem _ r -> [r,r] MO_VS_Neg _ r -> [r] + MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags] + MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags] + MO_VF_Add _ r -> [r,r] MO_VF_Sub _ r -> [r,r] MO_VF_Mul _ r -> [r,r] diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 8712d5fb5c..cda68ef39e 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -661,6 +661,15 @@ pprMachOp_for_C mop = case mop of (panic $ "PprC.pprMachOp_for_C: MO_VS_Neg" ++ " should have been handled earlier!") + MO_VF_Insert {} -> pprTrace "offending mop:" + (ptext $ sLit "MO_VF_Insert") + (panic $ "PprC.pprMachOp_for_C: MO_VF_Insert" + ++ " should have been handled earlier!") + MO_VF_Extract {} -> pprTrace "offending mop:" + (ptext $ sLit "MO_VF_Extract") + (panic $ "PprC.pprMachOp_for_C: MO_VF_Extract" + ++ " should have been handled earlier!") + MO_VF_Add {} -> pprTrace "offending mop:" (ptext $ sLit "MO_VF_Add") (panic $ "PprC.pprMachOp_for_C: MO_VF_Add" diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index 3c9fa063ff..d1128b07d3 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -255,6 +255,7 @@ pprGlobalReg gr FloatReg n -> char 'F' <> int n DoubleReg n -> char 'D' <> int n LongReg n -> char 'L' <> int n + XmmReg n -> ptext (sLit "XMM") <> int n Sp -> ptext (sLit "Sp") SpLim -> ptext (sLit "SpLim") Hp -> ptext (sLit "Hp") |