diff options
author | Geoffrey Mainland <gmainlan@microsoft.com> | 2013-02-14 23:28:39 +0000 |
---|---|---|
committer | Geoffrey Mainland <gmainlan@microsoft.com> | 2013-09-22 22:33:59 -0400 |
commit | 0b561f17f158ebd7bba841a3cf6ef602b55e7c61 (patch) | |
tree | 1e283bf675a53598fb1a169d89bd63c21333c854 | |
parent | f8c51678b0e60aad8ee1e4c57af746fa792a3546 (diff) | |
download | haskell-0b561f17f158ebd7bba841a3cf6ef602b55e7c61.tar.gz |
Pass 256-bit-wide vectors in registers.
-rw-r--r-- | compiler/cmm/CmmCallConv.hs | 1 | ||||
-rw-r--r-- | compiler/cmm/CmmExpr.hs | 9 | ||||
-rw-r--r-- | compiler/cmm/PprCmmExpr.hs | 1 | ||||
-rw-r--r-- | compiler/codeGen/CgUtils.hs | 7 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/Base.hs | 1 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/Regs.hs | 7 | ||||
-rw-r--r-- | includes/CodeGen.Platform.hs | 55 | ||||
-rw-r--r-- | includes/stg/MachRegs.h | 23 | ||||
-rw-r--r-- | includes/stg/Regs.h | 42 | ||||
-rw-r--r-- | includes/stg/Types.h | 2 | ||||
-rw-r--r-- | utils/deriveConstants/DeriveConstants.hs | 6 |
12 files changed, 155 insertions, 0 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index de10d56490..de86c4ed33 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -67,6 +67,7 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) | otherwise = int where vec = case (w, regs) of (W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss)) + (W256, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss)) _ -> (assts, (r:rs)) float = case (w, regs) of (W32, (vs, fs, ds, ls, s:ss)) diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index d3624dac6b..02126905c8 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -343,6 +343,9 @@ data GlobalReg | XmmReg -- 128-bit SIMD vector register {-# UNPACK #-} !Int -- its number + | YmmReg -- 256-bit SIMD vector register + {-# UNPACK #-} !Int -- its number + -- STG registers | Sp -- Stack ptr; points to last occupied stack location. | SpLim -- Stack limit @@ -379,6 +382,7 @@ instance Eq GlobalReg where DoubleReg i == DoubleReg j = i==j LongReg i == LongReg j = i==j XmmReg i == XmmReg j = i==j + YmmReg i == YmmReg j = i==j Sp == Sp = True SpLim == SpLim = True Hp == Hp = True @@ -401,6 +405,7 @@ instance Ord GlobalReg where compare (DoubleReg i) (DoubleReg j) = compare i j compare (LongReg i) (LongReg j) = compare i j compare (XmmReg i) (XmmReg j) = compare i j + compare (YmmReg i) (YmmReg j) = compare i j compare Sp Sp = EQ compare SpLim SpLim = EQ compare Hp Hp = EQ @@ -424,6 +429,8 @@ instance Ord GlobalReg where compare _ (LongReg _) = GT compare (XmmReg _) _ = LT compare _ (XmmReg _) = GT + compare (YmmReg _) _ = LT + compare _ (YmmReg _) = GT compare Sp _ = LT compare _ Sp = GT compare SpLim _ = LT @@ -467,6 +474,7 @@ globalRegType _ (FloatReg _) = cmmFloat W32 globalRegType _ (DoubleReg _) = cmmFloat W64 globalRegType _ (LongReg _) = cmmBits W64 globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32) +globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32) globalRegType dflags Hp = gcWord dflags -- The initialiser for all @@ -479,4 +487,5 @@ isArgReg (FloatReg {}) = True isArgReg (DoubleReg {}) = True isArgReg (LongReg {}) = True isArgReg (XmmReg {}) = True +isArgReg (YmmReg {}) = True isArgReg _ = False diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index d1128b07d3..c96dee2800 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -256,6 +256,7 @@ pprGlobalReg gr DoubleReg n -> char 'D' <> int n LongReg n -> char 'L' <> int n XmmReg n -> ptext (sLit "XMM") <> int n + YmmReg n -> ptext (sLit "YMM") <> int n Sp -> ptext (sLit "Sp") SpLim -> ptext (sLit "SpLim") Hp -> ptext (sLit "Hp") diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index c06dd60cb1..2cf4cb3483 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -56,6 +56,13 @@ baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")") +baseRegOffset dflags (YmmReg 1) = oFFSET_StgRegTable_rYMM1 dflags +baseRegOffset dflags (YmmReg 2) = oFFSET_StgRegTable_rYMM2 dflags +baseRegOffset dflags (YmmReg 3) = oFFSET_StgRegTable_rYMM3 dflags +baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags +baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags +baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags +baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")") baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs index 6ae3c4252d..9142c35064 100644 --- a/compiler/llvmGen/LlvmCodeGen/Base.hs +++ b/compiler/llvmGen/LlvmCodeGen/Base.hs @@ -155,6 +155,7 @@ llvmFunArgs dflags live = isSSE (FloatReg _) = True isSSE (DoubleReg _) = True isSSE (XmmReg _) = True + isSSE (YmmReg _) = True isSSE _ = False -- | Llvm standard fun attributes diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index c52640b17f..08aa45fa71 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -1533,6 +1533,7 @@ funEpilogue live = do isSSE (FloatReg _) = True isSSE (DoubleReg _) = True isSSE (XmmReg _) = True + isSSE (YmmReg _) = True isSSE _ = False -- Set to value or "undef" depending on whether the register is diff --git a/compiler/llvmGen/LlvmCodeGen/Regs.hs b/compiler/llvmGen/LlvmCodeGen/Regs.hs index 1b87929499..16d1237c23 100644 --- a/compiler/llvmGen/LlvmCodeGen/Regs.hs +++ b/compiler/llvmGen/LlvmCodeGen/Regs.hs @@ -62,6 +62,12 @@ lmGlobalReg dflags suf reg XmmReg 4 -> xmmGlobal $ "XMM4" ++ suf XmmReg 5 -> xmmGlobal $ "XMM5" ++ suf XmmReg 6 -> xmmGlobal $ "XMM6" ++ suf + YmmReg 1 -> ymmGlobal $ "YMM1" ++ suf + YmmReg 2 -> ymmGlobal $ "YMM2" ++ suf + YmmReg 3 -> ymmGlobal $ "YMM3" ++ suf + YmmReg 4 -> ymmGlobal $ "YMM4" ++ suf + YmmReg 5 -> ymmGlobal $ "YMM5" ++ suf + YmmReg 6 -> ymmGlobal $ "YMM6" ++ suf _other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg) ++ ") not supported!" -- LongReg, HpLim, CCSS, CurrentTSO, CurrentNusery, HpAlloc @@ -72,6 +78,7 @@ lmGlobalReg dflags suf reg floatGlobal name = LMNLocalVar (fsLit name) LMFloat doubleGlobal name = LMNLocalVar (fsLit name) LMDouble xmmGlobal name = LMNLocalVar (fsLit name) (LMVector 4 (LMInt 32)) + ymmGlobal name = LMNLocalVar (fsLit name) (LMVector 8 (LMInt 32)) -- | A list of STG Registers that should always be considered alive alwaysLive :: [GlobalReg] diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs index ca0a905d40..cff60b3e8c 100644 --- a/includes/CodeGen.Platform.hs +++ b/includes/CodeGen.Platform.hs @@ -65,6 +65,23 @@ import Reg # define xmm14 38 # define xmm15 39 +# define ymm0 40 +# define ymm1 41 +# define ymm2 42 +# define ymm3 43 +# define ymm4 44 +# define ymm5 45 +# define ymm6 46 +# define ymm7 47 +# define ymm8 48 +# define ymm9 49 +# define ymm10 50 +# define ymm11 51 +# define ymm12 52 +# define ymm13 53 +# define ymm14 54 +# define ymm15 55 + #elif MACHREGS_powerpc # define r0 0 @@ -391,6 +408,9 @@ activeStgRegs = [ #ifdef REG_XMM1 ,XmmReg 1 #endif +#ifdef REG_YMM1 + ,YmmReg 1 +#endif #ifdef REG_F2 ,FloatReg 2 #endif @@ -400,6 +420,9 @@ activeStgRegs = [ #ifdef REG_XMM2 ,XmmReg 2 #endif +#ifdef REG_YMM2 + ,YmmReg 2 +#endif #ifdef REG_F3 ,FloatReg 3 #endif @@ -409,6 +432,9 @@ activeStgRegs = [ #ifdef REG_XMM3 ,XmmReg 3 #endif +#ifdef REG_YMM3 + ,YmmReg 3 +#endif #ifdef REG_F4 ,FloatReg 4 #endif @@ -418,6 +444,9 @@ activeStgRegs = [ #ifdef REG_XMM4 ,XmmReg 4 #endif +#ifdef REG_YMM4 + ,YmmReg 4 +#endif #ifdef REG_F5 ,FloatReg 5 #endif @@ -427,6 +456,9 @@ activeStgRegs = [ #ifdef REG_XMM5 ,XmmReg 5 #endif +#ifdef REG_YMM5 + ,YmmReg 5 +#endif #ifdef REG_F6 ,FloatReg 6 #endif @@ -436,6 +468,9 @@ activeStgRegs = [ #ifdef REG_XMM6 ,XmmReg 6 #endif +#ifdef REG_YMM6 + ,YmmReg 6 +#endif #else /* MAX_REAL_XMM_REG == 0 */ #ifdef REG_F1 ,FloatReg 1 @@ -607,6 +642,26 @@ globalRegMaybe (XmmReg 5) = Just (RealRegSingle REG_XMM5) globalRegMaybe (XmmReg 6) = Just (RealRegSingle REG_XMM6) # endif # endif +# if MAX_REAL_YMM_REG != 0 +# ifdef REG_YMM1 +globalRegMaybe (YmmReg 1) = Just (RealRegSingle REG_YMM1) +# endif +# ifdef REG_YMM2 +globalRegMaybe (YmmReg 2) = Just (RealRegSingle REG_YMM2) +# endif +# ifdef REG_YMM3 +globalRegMaybe (YmmReg 3) = Just (RealRegSingle REG_YMM3) +# endif +# ifdef REG_YMM4 +globalRegMaybe (YmmReg 4) = Just (RealRegSingle REG_YMM4) +# endif +# ifdef REG_YMM5 +globalRegMaybe (YmmReg 5) = Just (RealRegSingle REG_YMM5) +# endif +# ifdef REG_YMM6 +globalRegMaybe (YmmReg 6) = Just (RealRegSingle REG_YMM6) +# endif +# endif # ifdef REG_Sp globalRegMaybe Sp = Just (RealRegSingle REG_Sp) # endif diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h index fa39a5a4fe..a73705110d 100644 --- a/includes/stg/MachRegs.h +++ b/includes/stg/MachRegs.h @@ -108,11 +108,17 @@ #define REG_XMM3 xmm2 #define REG_XMM4 xmm3 +#define REG_YMM1 ymm0 +#define REG_YMM2 ymm1 +#define REG_YMM3 ymm2 +#define REG_YMM4 ymm3 + #define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */ #define MAX_REAL_FLOAT_REG 0 #define MAX_REAL_DOUBLE_REG 0 #define MAX_REAL_LONG_REG 0 #define MAX_REAL_XMM_REG 4 +#define MAX_REAL_YMM_REG 4 /* ----------------------------------------------------------------------------- The x86-64 register mapping @@ -179,6 +185,13 @@ #define REG_XMM5 xmm5 #define REG_XMM6 xmm6 +#define REG_YMM1 ymm1 +#define REG_YMM2 ymm2 +#define REG_YMM3 ymm3 +#define REG_YMM4 ymm4 +#define REG_YMM5 ymm5 +#define REG_YMM6 ymm6 + #if !defined(mingw32_HOST_OS) #define CALLER_SAVES_R3 #define CALLER_SAVES_R4 @@ -213,11 +226,21 @@ #define CALLER_SAVES_XMM6 #endif +#define CALLER_SAVES_YMM1 +#define CALLER_SAVES_YMM2 +#define CALLER_SAVES_YMM3 +#define CALLER_SAVES_YMM4 +#define CALLER_SAVES_YMM5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_YMM6 +#endif + #define MAX_REAL_VANILLA_REG 6 #define MAX_REAL_FLOAT_REG 6 #define MAX_REAL_DOUBLE_REG 6 #define MAX_REAL_LONG_REG 0 #define MAX_REAL_XMM_REG 6 +#define MAX_REAL_YMM_REG 6 /* ----------------------------------------------------------------------------- The PowerPC register mapping diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h index 10ae2851ac..2f27c639c8 100644 --- a/includes/stg/Regs.h +++ b/includes/stg/Regs.h @@ -87,6 +87,12 @@ typedef struct { StgWord128 rXMM4; StgWord128 rXMM5; StgWord128 rXMM6; + StgWord256 rYMM1; + StgWord256 rYMM2; + StgWord256 rYMM3; + StgWord256 rYMM4; + StgWord256 rYMM5; + StgWord256 rYMM6; StgWord64 rL1; StgPtr rSp; StgPtr rSpLim; @@ -312,6 +318,42 @@ GLOBAL_REG_DECL(StgWord128,XMM6,REG_XMM6) #define XMM6 (BaseReg->rXMM6) #endif +#if defined(REG_YMM1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM1,REG_YMM1) +#else +#define YMM1 (BaseReg->rYMM1) +#endif + +#if defined(REG_YMM2) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM2,REG_YMM2) +#else +#define YMM2 (BaseReg->rYMM2) +#endif + +#if defined(REG_YMM3) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM3,REG_YMM3) +#else +#define YMM3 (BaseReg->rYMM3) +#endif + +#if defined(REG_YMM4) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM4,REG_YMM4) +#else +#define YMM4 (BaseReg->rYMM4) +#endif + +#if defined(REG_YMM5) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM5,REG_YMM5) +#else +#define YMM5 (BaseReg->rYMM5) +#endif + +#if defined(REG_YMM6) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM6,REG_YMM6) +#else +#define YMM6 (BaseReg->rYMM6) +#endif + #if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS) GLOBAL_REG_DECL(StgWord64,L1,REG_L1) #else diff --git a/includes/stg/Types.h b/includes/stg/Types.h index ccc06a175b..4e574bb5c6 100644 --- a/includes/stg/Types.h +++ b/includes/stg/Types.h @@ -85,6 +85,8 @@ typedef unsigned long long int StgWord64; typedef struct { StgWord64 h; StgWord64 l; } StgWord128; +typedef struct { StgWord128 h; StgWord128 l; } StgWord256; + /* * Define the standard word size we'll use on this machine: make it * big enough to hold a pointer. diff --git a/utils/deriveConstants/DeriveConstants.hs b/utils/deriveConstants/DeriveConstants.hs index 48990061cc..29b059c873 100644 --- a/utils/deriveConstants/DeriveConstants.hs +++ b/utils/deriveConstants/DeriveConstants.hs @@ -313,6 +313,12 @@ wanteds = concat ,fieldOffset Both "StgRegTable" "rXMM4" ,fieldOffset Both "StgRegTable" "rXMM5" ,fieldOffset Both "StgRegTable" "rXMM6" + ,fieldOffset Both "StgRegTable" "rYMM1" + ,fieldOffset Both "StgRegTable" "rYMM2" + ,fieldOffset Both "StgRegTable" "rYMM3" + ,fieldOffset Both "StgRegTable" "rYMM4" + ,fieldOffset Both "StgRegTable" "rYMM5" + ,fieldOffset Both "StgRegTable" "rYMM6" ,fieldOffset Both "StgRegTable" "rL1" ,fieldOffset Both "StgRegTable" "rSp" ,fieldOffset Both "StgRegTable" "rSpLim" |