summaryrefslogtreecommitdiff
path: root/compiler/cmm
diff options
context:
space:
mode:
authorGeoffrey Mainland <gmainlan@microsoft.com>2012-10-31 15:42:01 +0000
committerGeoffrey Mainland <gmainlan@microsoft.com>2013-02-01 22:00:24 +0000
commit33bfc6a700eaab9bc06974d6f71a80e61d9177c9 (patch)
tree842bf82b7dbdafe04820349e5e991800a3cf0646 /compiler/cmm
parent1811440833da92eefd7b7255915855fddc64994c (diff)
downloadhaskell-33bfc6a700eaab9bc06974d6f71a80e61d9177c9.tar.gz
Add support for passing SSE vectors in registers.
This patch adds support for 6 XMM registers on x86-64 which overlap with the F and D registers and may hold 128-bit wide SIMD vectors. Because there is not a good way to attach type information to STG registers, we aggressively bitcast in the LLVM back-end.
Diffstat (limited to 'compiler/cmm')
-rw-r--r--compiler/cmm/CmmCallConv.hs4
-rw-r--r--compiler/cmm/CmmExpr.hs10
-rw-r--r--compiler/cmm/CmmMachOp.hs46
-rw-r--r--compiler/cmm/PprC.hs9
-rw-r--r--compiler/cmm/PprCmmExpr.hs1
5 files changed, 51 insertions, 19 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index dd4d6a6c1a..913f15d436 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -70,7 +70,9 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
assign_regs assts (r:rs) regs | isVecType ty = vec
| isFloatType ty = float
| otherwise = int
- where vec = (assts, (r:rs))
+ where vec = case (w, regs) of
+ (W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+ _ -> (assts, (r:rs))
float = case (w, regs) of
(W32, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
(W32, (vs, f:fs, ds, ls, ss))
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index dce962443b..1df8e848b8 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -336,6 +336,9 @@ data GlobalReg
| LongReg -- long int registers (64-bit, really)
{-# UNPACK #-} !Int -- its number
+ | XmmReg -- 128-bit SIMD vector register
+ {-# UNPACK #-} !Int -- its number
+
-- STG registers
| Sp -- Stack ptr; points to last occupied stack location.
| SpLim -- Stack limit
@@ -371,6 +374,7 @@ instance Eq GlobalReg where
FloatReg i == FloatReg j = i==j
DoubleReg i == DoubleReg j = i==j
LongReg i == LongReg j = i==j
+ XmmReg i == XmmReg j = i==j
Sp == Sp = True
SpLim == SpLim = True
Hp == Hp = True
@@ -392,6 +396,7 @@ instance Ord GlobalReg where
compare (FloatReg i) (FloatReg j) = compare i j
compare (DoubleReg i) (DoubleReg j) = compare i j
compare (LongReg i) (LongReg j) = compare i j
+ compare (XmmReg i) (XmmReg j) = compare i j
compare Sp Sp = EQ
compare SpLim SpLim = EQ
compare Hp Hp = EQ
@@ -413,6 +418,8 @@ instance Ord GlobalReg where
compare _ (DoubleReg _) = GT
compare (LongReg _) _ = LT
compare _ (LongReg _) = GT
+ compare (XmmReg _) _ = LT
+ compare _ (XmmReg _) = GT
compare Sp _ = LT
compare _ Sp = GT
compare SpLim _ = LT
@@ -455,6 +462,8 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
globalRegType _ (FloatReg _) = cmmFloat W32
globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64
+globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
+
globalRegType dflags Hp = gcWord dflags
-- The initialiser for all
-- dynamically allocated closures
@@ -465,4 +474,5 @@ isArgReg (VanillaReg {}) = True
isArgReg (FloatReg {}) = True
isArgReg (DoubleReg {}) = True
isArgReg (LongReg {}) = True
+isArgReg (XmmReg {}) = True
isArgReg _ = False
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index 4e38cd42b9..0f18029d20 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -118,6 +118,10 @@ data MachOp
| MO_VS_Rem Length Width
| MO_VS_Neg Length Width
+ -- Floting point vector element insertion and extraction operations
+ | MO_VF_Insert Length Width -- Insert scalar into vector
+ | MO_VF_Extract Length Width -- Extract scalar from vector
+
-- Floating point vector operations
| MO_VF_Add Length Width
| MO_VF_Sub Length Width
@@ -360,22 +364,25 @@ machOpResultType dflags mop tys =
MO_SF_Conv _ to -> cmmFloat to
MO_FF_Conv _ to -> cmmFloat to
- MO_V_Insert {} -> ty1
- MO_V_Extract {} -> vecElemType ty1
-
- MO_V_Add {} -> ty1
- MO_V_Sub {} -> ty1
- MO_V_Mul {} -> ty1
-
- MO_VS_Quot {} -> ty1
- MO_VS_Rem {} -> ty1
- MO_VS_Neg {} -> ty1
-
- MO_VF_Add {} -> ty1
- MO_VF_Sub {} -> ty1
- MO_VF_Mul {} -> ty1
- MO_VF_Quot {} -> ty1
- MO_VF_Neg {} -> ty1
+ MO_V_Insert l w -> cmmVec l (cmmBits w)
+ MO_V_Extract _ w -> cmmBits w
+
+ MO_V_Add l w -> cmmVec l (cmmBits w)
+ MO_V_Sub l w -> cmmVec l (cmmBits w)
+ MO_V_Mul l w -> cmmVec l (cmmBits w)
+
+ MO_VS_Quot l w -> cmmVec l (cmmBits w)
+ MO_VS_Rem l w -> cmmVec l (cmmBits w)
+ MO_VS_Neg l w -> cmmVec l (cmmBits w)
+
+ MO_VF_Insert l w -> cmmVec l (cmmFloat w)
+ MO_VF_Extract _ w -> cmmFloat w
+
+ MO_VF_Add l w -> cmmVec l (cmmFloat w)
+ MO_VF_Sub l w -> cmmVec l (cmmFloat w)
+ MO_VF_Mul l w -> cmmVec l (cmmFloat w)
+ MO_VF_Quot l w -> cmmVec l (cmmFloat w)
+ MO_VF_Neg l w -> cmmVec l (cmmFloat w)
where
(ty1:_) = tys
@@ -443,8 +450,8 @@ machOpArgReps dflags op =
MO_FS_Conv from _ -> [from]
MO_FF_Conv from _ -> [from]
- MO_V_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
- MO_V_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
+ MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth dflags]
+ MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth dflags]
MO_V_Add _ r -> [r,r]
MO_V_Sub _ r -> [r,r]
@@ -454,6 +461,9 @@ machOpArgReps dflags op =
MO_VS_Rem _ r -> [r,r]
MO_VS_Neg _ r -> [r]
+ MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
+ MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
+
MO_VF_Add _ r -> [r,r]
MO_VF_Sub _ r -> [r,r]
MO_VF_Mul _ r -> [r,r]
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 8712d5fb5c..cda68ef39e 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -661,6 +661,15 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_VS_Neg"
++ " should have been handled earlier!")
+ MO_VF_Insert {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_VF_Insert")
+ (panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
+ ++ " should have been handled earlier!")
+ MO_VF_Extract {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_VF_Extract")
+ (panic $ "PprC.pprMachOp_for_C: MO_VF_Extract"
+ ++ " should have been handled earlier!")
+
MO_VF_Add {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Add")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Add"
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index 3c9fa063ff..d1128b07d3 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -255,6 +255,7 @@ pprGlobalReg gr
FloatReg n -> char 'F' <> int n
DoubleReg n -> char 'D' <> int n
LongReg n -> char 'L' <> int n
+ XmmReg n -> ptext (sLit "XMM") <> int n
Sp -> ptext (sLit "Sp")
SpLim -> ptext (sLit "SpLim")
Hp -> ptext (sLit "Hp")