summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeoffrey Mainland <gmainlan@microsoft.com>2013-02-14 23:28:39 +0000
committerGeoffrey Mainland <gmainlan@microsoft.com>2013-07-08 10:28:37 +0100
commit8fb5fd9429a5c62d83402cb54b1de1ae908f9b54 (patch)
treed7fd489e8a48b348a6ae1c874c4a9858f29a8f05
parenta7a9b85ca19f7f5ac555e855a03cc60154ff185e (diff)
downloadhaskell-simd.tar.gz
Pass 256-bit-wide vectors in registers.simd
-rw-r--r--compiler/cmm/CmmCallConv.hs1
-rw-r--r--compiler/cmm/CmmExpr.hs9
-rw-r--r--compiler/cmm/PprCmmExpr.hs1
-rw-r--r--compiler/codeGen/CgUtils.hs7
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Base.hs1
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs1
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Regs.hs7
-rw-r--r--includes/CodeGen.Platform.hs41
-rw-r--r--includes/stg/MachRegs.h16
-rw-r--r--includes/stg/Regs.h42
-rw-r--r--includes/stg/Types.h2
-rw-r--r--utils/deriveConstants/DeriveConstants.hs6
12 files changed, 134 insertions, 0 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index 1546dd4a60..1e56319084 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -66,6 +66,7 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
| otherwise = int
where vec = case (w, regs) of
(W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+ (W256, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
float = case (w, regs) of
(W32, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 1df8e848b8..320c09f801 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -339,6 +339,9 @@ data GlobalReg
| XmmReg -- 128-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
+ | YmmReg -- 256-bit SIMD vector register
+ {-# UNPACK #-} !Int -- its number
+
-- STG registers
| Sp -- Stack ptr; points to last occupied stack location.
| SpLim -- Stack limit
@@ -375,6 +378,7 @@ instance Eq GlobalReg where
DoubleReg i == DoubleReg j = i==j
LongReg i == LongReg j = i==j
XmmReg i == XmmReg j = i==j
+ YmmReg i == YmmReg j = i==j
Sp == Sp = True
SpLim == SpLim = True
Hp == Hp = True
@@ -397,6 +401,7 @@ instance Ord GlobalReg where
compare (DoubleReg i) (DoubleReg j) = compare i j
compare (LongReg i) (LongReg j) = compare i j
compare (XmmReg i) (XmmReg j) = compare i j
+ compare (YmmReg i) (YmmReg j) = compare i j
compare Sp Sp = EQ
compare SpLim SpLim = EQ
compare Hp Hp = EQ
@@ -420,6 +425,8 @@ instance Ord GlobalReg where
compare _ (LongReg _) = GT
compare (XmmReg _) _ = LT
compare _ (XmmReg _) = GT
+ compare (YmmReg _) _ = LT
+ compare _ (YmmReg _) = GT
compare Sp _ = LT
compare _ Sp = GT
compare SpLim _ = LT
@@ -463,6 +470,7 @@ globalRegType _ (FloatReg _) = cmmFloat W32
globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64
globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
+globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32)
globalRegType dflags Hp = gcWord dflags
-- The initialiser for all
@@ -475,4 +483,5 @@ isArgReg (FloatReg {}) = True
isArgReg (DoubleReg {}) = True
isArgReg (LongReg {}) = True
isArgReg (XmmReg {}) = True
+isArgReg (YmmReg {}) = True
isArgReg _ = False
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index d1128b07d3..c96dee2800 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -256,6 +256,7 @@ pprGlobalReg gr
DoubleReg n -> char 'D' <> int n
LongReg n -> char 'L' <> int n
XmmReg n -> ptext (sLit "XMM") <> int n
+ YmmReg n -> ptext (sLit "YMM") <> int n
Sp -> ptext (sLit "Sp")
SpLim -> ptext (sLit "SpLim")
Hp -> ptext (sLit "Hp")
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index c06dd60cb1..2cf4cb3483 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -56,6 +56,13 @@ baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags
baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags
baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags
baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
+baseRegOffset dflags (YmmReg 1) = oFFSET_StgRegTable_rYMM1 dflags
+baseRegOffset dflags (YmmReg 2) = oFFSET_StgRegTable_rYMM2 dflags
+baseRegOffset dflags (YmmReg 3) = oFFSET_StgRegTable_rYMM3 dflags
+baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags
+baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags
+baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags
+baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs
index ef0ab3b331..6db6912acc 100644
--- a/compiler/llvmGen/LlvmCodeGen/Base.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Base.hs
@@ -152,6 +152,7 @@ llvmFunArgs dflags live =
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
+ isSSE (YmmReg _) = True
isSSE _ = False
-- | Llvm standard fun attributes
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index d223a5c9cd..7caf55c211 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -1517,6 +1517,7 @@ funEpilogue live = do
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
+ isSSE (YmmReg _) = True
isSSE _ = False
-- Set to value or "undef" depending on whether the register is
diff --git a/compiler/llvmGen/LlvmCodeGen/Regs.hs b/compiler/llvmGen/LlvmCodeGen/Regs.hs
index 1b87929499..16d1237c23 100644
--- a/compiler/llvmGen/LlvmCodeGen/Regs.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Regs.hs
@@ -62,6 +62,12 @@ lmGlobalReg dflags suf reg
XmmReg 4 -> xmmGlobal $ "XMM4" ++ suf
XmmReg 5 -> xmmGlobal $ "XMM5" ++ suf
XmmReg 6 -> xmmGlobal $ "XMM6" ++ suf
+ YmmReg 1 -> ymmGlobal $ "YMM1" ++ suf
+ YmmReg 2 -> ymmGlobal $ "YMM2" ++ suf
+ YmmReg 3 -> ymmGlobal $ "YMM3" ++ suf
+ YmmReg 4 -> ymmGlobal $ "YMM4" ++ suf
+ YmmReg 5 -> ymmGlobal $ "YMM5" ++ suf
+ YmmReg 6 -> ymmGlobal $ "YMM6" ++ suf
_other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg)
++ ") not supported!"
-- LongReg, HpLim, CCSS, CurrentTSO, CurrentNusery, HpAlloc
@@ -72,6 +78,7 @@ lmGlobalReg dflags suf reg
floatGlobal name = LMNLocalVar (fsLit name) LMFloat
doubleGlobal name = LMNLocalVar (fsLit name) LMDouble
xmmGlobal name = LMNLocalVar (fsLit name) (LMVector 4 (LMInt 32))
+ ymmGlobal name = LMNLocalVar (fsLit name) (LMVector 8 (LMInt 32))
-- | A list of STG Registers that should always be considered alive
alwaysLive :: [GlobalReg]
diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs
index beff19601d..9be7c054ab 100644
--- a/includes/CodeGen.Platform.hs
+++ b/includes/CodeGen.Platform.hs
@@ -65,6 +65,23 @@ import Reg
# define xmm14 38
# define xmm15 39
+# define ymm0 40
+# define ymm1 41
+# define ymm2 42
+# define ymm3 43
+# define ymm4 44
+# define ymm5 45
+# define ymm6 46
+# define ymm7 47
+# define ymm8 48
+# define ymm9 49
+# define ymm10 50
+# define ymm11 51
+# define ymm12 52
+# define ymm13 53
+# define ymm14 54
+# define ymm15 55
+
#elif MACHREGS_powerpc
# define r0 0
@@ -391,6 +408,9 @@ activeStgRegs = [
#ifdef REG_XMM1
,XmmReg 1
#endif
+#ifdef REG_YMM1
+ ,YmmReg 1
+#endif
#ifdef REG_F2
,FloatReg 2
#endif
@@ -400,6 +420,9 @@ activeStgRegs = [
#ifdef REG_XMM2
,XmmReg 2
#endif
+#ifdef REG_YMM2
+ ,YmmReg 2
+#endif
#ifdef REG_F3
,FloatReg 3
#endif
@@ -409,6 +432,9 @@ activeStgRegs = [
#ifdef REG_XMM3
,XmmReg 3
#endif
+#ifdef REG_YMM3
+ ,YmmReg 3
+#endif
#ifdef REG_F4
,FloatReg 4
#endif
@@ -418,6 +444,9 @@ activeStgRegs = [
#ifdef REG_XMM4
,XmmReg 4
#endif
+#ifdef REG_YMM4
+ ,YmmReg 4
+#endif
#ifdef REG_F5
,FloatReg 5
#endif
@@ -427,6 +456,9 @@ activeStgRegs = [
#ifdef REG_XMM5
,XmmReg 5
#endif
+#ifdef REG_YMM5
+ ,YmmReg 5
+#endif
#ifdef REG_F6
,FloatReg 6
#endif
@@ -436,6 +468,9 @@ activeStgRegs = [
#ifdef REG_XMM6
,XmmReg 6
#endif
+#ifdef REG_YMM6
+ ,YmmReg 6
+#endif
#else /* MAX_REAL_SSE_REG == 0 */
#ifdef REG_F1
,FloatReg 1
@@ -594,6 +629,12 @@ globalRegMaybe (XmmReg 3) = Just (RealRegSingle REG_XMM3)
globalRegMaybe (XmmReg 4) = Just (RealRegSingle REG_XMM4)
globalRegMaybe (XmmReg 5) = Just (RealRegSingle REG_XMM5)
globalRegMaybe (XmmReg 6) = Just (RealRegSingle REG_XMM6)
+globalRegMaybe (YmmReg 1) = Just (RealRegSingle REG_YMM1)
+globalRegMaybe (YmmReg 2) = Just (RealRegSingle REG_YMM2)
+globalRegMaybe (YmmReg 3) = Just (RealRegSingle REG_YMM3)
+globalRegMaybe (YmmReg 4) = Just (RealRegSingle REG_YMM4)
+globalRegMaybe (YmmReg 5) = Just (RealRegSingle REG_YMM5)
+globalRegMaybe (YmmReg 6) = Just (RealRegSingle REG_YMM6)
# endif
# ifdef REG_Sp
globalRegMaybe Sp = Just (RealRegSingle REG_Sp)
diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h
index 76bdb1fc21..2cce08949d 100644
--- a/includes/stg/MachRegs.h
+++ b/includes/stg/MachRegs.h
@@ -174,6 +174,13 @@
#define REG_XMM5 xmm5
#define REG_XMM6 xmm6
+#define REG_YMM1 ymm1
+#define REG_YMM2 ymm2
+#define REG_YMM3 ymm3
+#define REG_YMM4 ymm4
+#define REG_YMM5 ymm5
+#define REG_YMM6 ymm6
+
#if !defined(mingw32_HOST_OS)
#define CALLER_SAVES_R3
#define CALLER_SAVES_R4
@@ -208,6 +215,15 @@
#define CALLER_SAVES_XMM6
#endif
+#define CALLER_SAVES_YMM1
+#define CALLER_SAVES_YMM2
+#define CALLER_SAVES_YMM3
+#define CALLER_SAVES_YMM4
+#define CALLER_SAVES_YMM5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_YMM6
+#endif
+
#define MAX_REAL_VANILLA_REG 6
#define MAX_REAL_FLOAT_REG 6
#define MAX_REAL_DOUBLE_REG 6
diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h
index 10ae2851ac..4b0416358e 100644
--- a/includes/stg/Regs.h
+++ b/includes/stg/Regs.h
@@ -87,6 +87,12 @@ typedef struct {
StgWord128 rXMM4;
StgWord128 rXMM5;
StgWord128 rXMM6;
+ StgWord128 rYMM1;
+ StgWord128 rYMM2;
+ StgWord128 rYMM3;
+ StgWord128 rYMM4;
+ StgWord128 rYMM5;
+ StgWord128 rYMM6;
StgWord64 rL1;
StgPtr rSp;
StgPtr rSpLim;
@@ -312,6 +318,42 @@ GLOBAL_REG_DECL(StgWord128,XMM6,REG_XMM6)
#define XMM6 (BaseReg->rXMM6)
#endif
+#if defined(REG_YMM1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM1,REG_YMM1)
+#else
+#define YMM1 (BaseReg->rYMM1)
+#endif
+
+#if defined(REG_YMM2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM2,REG_YMM2)
+#else
+#define YMM2 (BaseReg->rYMM2)
+#endif
+
+#if defined(REG_YMM3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM3,REG_YMM3)
+#else
+#define YMM3 (BaseReg->rYMM3)
+#endif
+
+#if defined(REG_YMM4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM4,REG_YMM4)
+#else
+#define YMM4 (BaseReg->rYMM4)
+#endif
+
+#if defined(REG_YMM5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM5,REG_YMM5)
+#else
+#define YMM5 (BaseReg->rYMM5)
+#endif
+
+#if defined(REG_YMM6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM6,REG_YMM6)
+#else
+#define YMM6 (BaseReg->rYMM6)
+#endif
+
#if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord64,L1,REG_L1)
#else
diff --git a/includes/stg/Types.h b/includes/stg/Types.h
index ccc06a175b..4e574bb5c6 100644
--- a/includes/stg/Types.h
+++ b/includes/stg/Types.h
@@ -85,6 +85,8 @@ typedef unsigned long long int StgWord64;
typedef struct { StgWord64 h; StgWord64 l; } StgWord128;
+typedef struct { StgWord128 h; StgWord128 l; } StgWord256;
+
/*
* Define the standard word size we'll use on this machine: make it
* big enough to hold a pointer.
diff --git a/utils/deriveConstants/DeriveConstants.hs b/utils/deriveConstants/DeriveConstants.hs
index 3173c27cec..9fc29d6750 100644
--- a/utils/deriveConstants/DeriveConstants.hs
+++ b/utils/deriveConstants/DeriveConstants.hs
@@ -313,6 +313,12 @@ wanteds = concat
,fieldOffset Both "StgRegTable" "rXMM4"
,fieldOffset Both "StgRegTable" "rXMM5"
,fieldOffset Both "StgRegTable" "rXMM6"
+ ,fieldOffset Both "StgRegTable" "rYMM1"
+ ,fieldOffset Both "StgRegTable" "rYMM2"
+ ,fieldOffset Both "StgRegTable" "rYMM3"
+ ,fieldOffset Both "StgRegTable" "rYMM4"
+ ,fieldOffset Both "StgRegTable" "rYMM5"
+ ,fieldOffset Both "StgRegTable" "rYMM6"
,fieldOffset Both "StgRegTable" "rL1"
,fieldOffset Both "StgRegTable" "rSp"
,fieldOffset Both "StgRegTable" "rSpLim"