diff options
author | Geoffrey Mainland <gmainlan@microsoft.com> | 2013-02-14 23:28:39 +0000 |
---|---|---|
committer | Geoffrey Mainland <gmainlan@microsoft.com> | 2013-07-08 10:28:37 +0100 |
commit | 8fb5fd9429a5c62d83402cb54b1de1ae908f9b54 (patch) | |
tree | d7fd489e8a48b348a6ae1c874c4a9858f29a8f05 | |
parent | a7a9b85ca19f7f5ac555e855a03cc60154ff185e (diff) | |
download | haskell-8fb5fd9429a5c62d83402cb54b1de1ae908f9b54.tar.gz |
Pass 256-bit-wide vectors in registers.simd
-rw-r--r-- | compiler/cmm/CmmCallConv.hs | 1 | ||||
-rw-r--r-- | compiler/cmm/CmmExpr.hs | 9 | ||||
-rw-r--r-- | compiler/cmm/PprCmmExpr.hs | 1 | ||||
-rw-r--r-- | compiler/codeGen/CgUtils.hs | 7 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/Base.hs | 1 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/Regs.hs | 7 | ||||
-rw-r--r-- | includes/CodeGen.Platform.hs | 41 | ||||
-rw-r--r-- | includes/stg/MachRegs.h | 16 | ||||
-rw-r--r-- | includes/stg/Regs.h | 42 | ||||
-rw-r--r-- | includes/stg/Types.h | 2 | ||||
-rw-r--r-- | utils/deriveConstants/DeriveConstants.hs | 6 |
12 files changed, 134 insertions, 0 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index 1546dd4a60..1e56319084 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -66,6 +66,7 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) | otherwise = int where vec = case (w, regs) of (W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss)) + (W256, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss)) _ -> (assts, (r:rs)) float = case (w, regs) of (W32, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss)) diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 1df8e848b8..320c09f801 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -339,6 +339,9 @@ data GlobalReg | XmmReg -- 128-bit SIMD vector register {-# UNPACK #-} !Int -- its number + | YmmReg -- 256-bit SIMD vector register + {-# UNPACK #-} !Int -- its number + -- STG registers | Sp -- Stack ptr; points to last occupied stack location. | SpLim -- Stack limit @@ -375,6 +378,7 @@ instance Eq GlobalReg where DoubleReg i == DoubleReg j = i==j LongReg i == LongReg j = i==j XmmReg i == XmmReg j = i==j + YmmReg i == YmmReg j = i==j Sp == Sp = True SpLim == SpLim = True Hp == Hp = True @@ -397,6 +401,7 @@ instance Ord GlobalReg where compare (DoubleReg i) (DoubleReg j) = compare i j compare (LongReg i) (LongReg j) = compare i j compare (XmmReg i) (XmmReg j) = compare i j + compare (YmmReg i) (YmmReg j) = compare i j compare Sp Sp = EQ compare SpLim SpLim = EQ compare Hp Hp = EQ @@ -420,6 +425,8 @@ instance Ord GlobalReg where compare _ (LongReg _) = GT compare (XmmReg _) _ = LT compare _ (XmmReg _) = GT + compare (YmmReg _) _ = LT + compare _ (YmmReg _) = GT compare Sp _ = LT compare _ Sp = GT compare SpLim _ = LT @@ -463,6 +470,7 @@ globalRegType _ (FloatReg _) = cmmFloat W32 globalRegType _ (DoubleReg _) = cmmFloat W64 globalRegType _ (LongReg _) = cmmBits W64 globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32) +globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32) globalRegType dflags Hp = gcWord dflags -- The initialiser for all @@ -475,4 +483,5 @@ isArgReg (FloatReg {}) = True isArgReg (DoubleReg {}) = True isArgReg (LongReg {}) = True isArgReg (XmmReg {}) = True +isArgReg (YmmReg {}) = True isArgReg _ = False diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index d1128b07d3..c96dee2800 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -256,6 +256,7 @@ pprGlobalReg gr DoubleReg n -> char 'D' <> int n LongReg n -> char 'L' <> int n XmmReg n -> ptext (sLit "XMM") <> int n + YmmReg n -> ptext (sLit "YMM") <> int n Sp -> ptext (sLit "Sp") SpLim -> ptext (sLit "SpLim") Hp -> ptext (sLit "Hp") diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index c06dd60cb1..2cf4cb3483 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -56,6 +56,13 @@ baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")") +baseRegOffset dflags (YmmReg 1) = oFFSET_StgRegTable_rYMM1 dflags +baseRegOffset dflags (YmmReg 2) = oFFSET_StgRegTable_rYMM2 dflags +baseRegOffset dflags (YmmReg 3) = oFFSET_StgRegTable_rYMM3 dflags +baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags +baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags +baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags +baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")") baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs index ef0ab3b331..6db6912acc 100644 --- a/compiler/llvmGen/LlvmCodeGen/Base.hs +++ b/compiler/llvmGen/LlvmCodeGen/Base.hs @@ -152,6 +152,7 @@ llvmFunArgs dflags live = isSSE (FloatReg _) = True isSSE (DoubleReg _) = True isSSE (XmmReg _) = True + isSSE (YmmReg _) = True isSSE _ = False -- | Llvm standard fun attributes diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index d223a5c9cd..7caf55c211 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -1517,6 +1517,7 @@ funEpilogue live = do isSSE (FloatReg _) = True isSSE (DoubleReg _) = True isSSE (XmmReg _) = True + isSSE (YmmReg _) = True isSSE _ = False -- Set to value or "undef" depending on whether the register is diff --git a/compiler/llvmGen/LlvmCodeGen/Regs.hs b/compiler/llvmGen/LlvmCodeGen/Regs.hs index 1b87929499..16d1237c23 100644 --- a/compiler/llvmGen/LlvmCodeGen/Regs.hs +++ b/compiler/llvmGen/LlvmCodeGen/Regs.hs @@ -62,6 +62,12 @@ lmGlobalReg dflags suf reg XmmReg 4 -> xmmGlobal $ "XMM4" ++ suf XmmReg 5 -> xmmGlobal $ "XMM5" ++ suf XmmReg 6 -> xmmGlobal $ "XMM6" ++ suf + YmmReg 1 -> ymmGlobal $ "YMM1" ++ suf + YmmReg 2 -> ymmGlobal $ "YMM2" ++ suf + YmmReg 3 -> ymmGlobal $ "YMM3" ++ suf + YmmReg 4 -> ymmGlobal $ "YMM4" ++ suf + YmmReg 5 -> ymmGlobal $ "YMM5" ++ suf + YmmReg 6 -> ymmGlobal $ "YMM6" ++ suf _other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg) ++ ") not supported!" -- LongReg, HpLim, CCSS, CurrentTSO, CurrentNusery, HpAlloc @@ -72,6 +78,7 @@ lmGlobalReg dflags suf reg floatGlobal name = LMNLocalVar (fsLit name) LMFloat doubleGlobal name = LMNLocalVar (fsLit name) LMDouble xmmGlobal name = LMNLocalVar (fsLit name) (LMVector 4 (LMInt 32)) + ymmGlobal name = LMNLocalVar (fsLit name) (LMVector 8 (LMInt 32)) -- | A list of STG Registers that should always be considered alive alwaysLive :: [GlobalReg] diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs index beff19601d..9be7c054ab 100644 --- a/includes/CodeGen.Platform.hs +++ b/includes/CodeGen.Platform.hs @@ -65,6 +65,23 @@ import Reg # define xmm14 38 # define xmm15 39 +# define ymm0 40 +# define ymm1 41 +# define ymm2 42 +# define ymm3 43 +# define ymm4 44 +# define ymm5 45 +# define ymm6 46 +# define ymm7 47 +# define ymm8 48 +# define ymm9 49 +# define ymm10 50 +# define ymm11 51 +# define ymm12 52 +# define ymm13 53 +# define ymm14 54 +# define ymm15 55 + #elif MACHREGS_powerpc # define r0 0 @@ -391,6 +408,9 @@ activeStgRegs = [ #ifdef REG_XMM1 ,XmmReg 1 #endif +#ifdef REG_YMM1 + ,YmmReg 1 +#endif #ifdef REG_F2 ,FloatReg 2 #endif @@ -400,6 +420,9 @@ activeStgRegs = [ #ifdef REG_XMM2 ,XmmReg 2 #endif +#ifdef REG_YMM2 + ,YmmReg 2 +#endif #ifdef REG_F3 ,FloatReg 3 #endif @@ -409,6 +432,9 @@ activeStgRegs = [ #ifdef REG_XMM3 ,XmmReg 3 #endif +#ifdef REG_YMM3 + ,YmmReg 3 +#endif #ifdef REG_F4 ,FloatReg 4 #endif @@ -418,6 +444,9 @@ activeStgRegs = [ #ifdef REG_XMM4 ,XmmReg 4 #endif +#ifdef REG_YMM4 + ,YmmReg 4 +#endif #ifdef REG_F5 ,FloatReg 5 #endif @@ -427,6 +456,9 @@ activeStgRegs = [ #ifdef REG_XMM5 ,XmmReg 5 #endif +#ifdef REG_YMM5 + ,YmmReg 5 +#endif #ifdef REG_F6 ,FloatReg 6 #endif @@ -436,6 +468,9 @@ activeStgRegs = [ #ifdef REG_XMM6 ,XmmReg 6 #endif +#ifdef REG_YMM6 + ,YmmReg 6 +#endif #else /* MAX_REAL_SSE_REG == 0 */ #ifdef REG_F1 ,FloatReg 1 @@ -594,6 +629,12 @@ globalRegMaybe (XmmReg 3) = Just (RealRegSingle REG_XMM3) globalRegMaybe (XmmReg 4) = Just (RealRegSingle REG_XMM4) globalRegMaybe (XmmReg 5) = Just (RealRegSingle REG_XMM5) globalRegMaybe (XmmReg 6) = Just (RealRegSingle REG_XMM6) +globalRegMaybe (YmmReg 1) = Just (RealRegSingle REG_YMM1) +globalRegMaybe (YmmReg 2) = Just (RealRegSingle REG_YMM2) +globalRegMaybe (YmmReg 3) = Just (RealRegSingle REG_YMM3) +globalRegMaybe (YmmReg 4) = Just (RealRegSingle REG_YMM4) +globalRegMaybe (YmmReg 5) = Just (RealRegSingle REG_YMM5) +globalRegMaybe (YmmReg 6) = Just (RealRegSingle REG_YMM6) # endif # ifdef REG_Sp globalRegMaybe Sp = Just (RealRegSingle REG_Sp) diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h index 76bdb1fc21..2cce08949d 100644 --- a/includes/stg/MachRegs.h +++ b/includes/stg/MachRegs.h @@ -174,6 +174,13 @@ #define REG_XMM5 xmm5 #define REG_XMM6 xmm6 +#define REG_YMM1 ymm1 +#define REG_YMM2 ymm2 +#define REG_YMM3 ymm3 +#define REG_YMM4 ymm4 +#define REG_YMM5 ymm5 +#define REG_YMM6 ymm6 + #if !defined(mingw32_HOST_OS) #define CALLER_SAVES_R3 #define CALLER_SAVES_R4 @@ -208,6 +215,15 @@ #define CALLER_SAVES_XMM6 #endif +#define CALLER_SAVES_YMM1 +#define CALLER_SAVES_YMM2 +#define CALLER_SAVES_YMM3 +#define CALLER_SAVES_YMM4 +#define CALLER_SAVES_YMM5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_YMM6 +#endif + #define MAX_REAL_VANILLA_REG 6 #define MAX_REAL_FLOAT_REG 6 #define MAX_REAL_DOUBLE_REG 6 diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h index 10ae2851ac..4b0416358e 100644 --- a/includes/stg/Regs.h +++ b/includes/stg/Regs.h @@ -87,6 +87,12 @@ typedef struct { StgWord128 rXMM4; StgWord128 rXMM5; StgWord128 rXMM6; + StgWord128 rYMM1; + StgWord128 rYMM2; + StgWord128 rYMM3; + StgWord128 rYMM4; + StgWord128 rYMM5; + StgWord128 rYMM6; StgWord64 rL1; StgPtr rSp; StgPtr rSpLim; @@ -312,6 +318,42 @@ GLOBAL_REG_DECL(StgWord128,XMM6,REG_XMM6) #define XMM6 (BaseReg->rXMM6) #endif +#if defined(REG_YMM1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM1,REG_YMM1) +#else +#define YMM1 (BaseReg->rYMM1) +#endif + +#if defined(REG_YMM2) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM2,REG_YMM2) +#else +#define YMM2 (BaseReg->rYMM2) +#endif + +#if defined(REG_YMM3) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM3,REG_YMM3) +#else +#define YMM3 (BaseReg->rYMM3) +#endif + +#if defined(REG_YMM4) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM4,REG_YMM4) +#else +#define YMM4 (BaseReg->rYMM4) +#endif + +#if defined(REG_YMM5) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM5,REG_YMM5) +#else +#define YMM5 (BaseReg->rYMM5) +#endif + +#if defined(REG_YMM6) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM6,REG_YMM6) +#else +#define YMM6 (BaseReg->rYMM6) +#endif + #if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS) GLOBAL_REG_DECL(StgWord64,L1,REG_L1) #else diff --git a/includes/stg/Types.h b/includes/stg/Types.h index ccc06a175b..4e574bb5c6 100644 --- a/includes/stg/Types.h +++ b/includes/stg/Types.h @@ -85,6 +85,8 @@ typedef unsigned long long int StgWord64; typedef struct { StgWord64 h; StgWord64 l; } StgWord128; +typedef struct { StgWord128 h; StgWord128 l; } StgWord256; + /* * Define the standard word size we'll use on this machine: make it * big enough to hold a pointer. diff --git a/utils/deriveConstants/DeriveConstants.hs b/utils/deriveConstants/DeriveConstants.hs index 3173c27cec..9fc29d6750 100644 --- a/utils/deriveConstants/DeriveConstants.hs +++ b/utils/deriveConstants/DeriveConstants.hs @@ -313,6 +313,12 @@ wanteds = concat ,fieldOffset Both "StgRegTable" "rXMM4" ,fieldOffset Both "StgRegTable" "rXMM5" ,fieldOffset Both "StgRegTable" "rXMM6" + ,fieldOffset Both "StgRegTable" "rYMM1" + ,fieldOffset Both "StgRegTable" "rYMM2" + ,fieldOffset Both "StgRegTable" "rYMM3" + ,fieldOffset Both "StgRegTable" "rYMM4" + ,fieldOffset Both "StgRegTable" "rYMM5" + ,fieldOffset Both "StgRegTable" "rYMM6" ,fieldOffset Both "StgRegTable" "rL1" ,fieldOffset Both "StgRegTable" "rSp" ,fieldOffset Both "StgRegTable" "rSpLim" |