diff options
-rw-r--r-- | compiler/cmm/CmmCallConv.hs | 1 | ||||
-rw-r--r-- | compiler/cmm/CmmExpr.hs | 9 | ||||
-rw-r--r-- | compiler/cmm/PprCmmExpr.hs | 1 | ||||
-rw-r--r-- | compiler/codeGen/CgUtils.hs | 7 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/Base.hs | 1 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/Regs.hs | 7 | ||||
-rw-r--r-- | includes/CodeGen.Platform.hs | 55 | ||||
-rw-r--r-- | includes/stg/MachRegs.h | 23 | ||||
-rw-r--r-- | includes/stg/Regs.h | 42 | ||||
-rw-r--r-- | includes/stg/Types.h | 2 | ||||
-rw-r--r-- | utils/deriveConstants/DeriveConstants.hs | 6 |
12 files changed, 155 insertions, 0 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index de86c4ed33..6a931668bb 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -68,6 +68,7 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) where vec = case (w, regs) of (W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss)) (W256, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss)) + (W512, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss)) _ -> (assts, (r:rs)) float = case (w, regs) of (W32, (vs, fs, ds, ls, s:ss)) diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 02126905c8..0f5abda74b 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -346,6 +346,9 @@ data GlobalReg | YmmReg -- 256-bit SIMD vector register {-# UNPACK #-} !Int -- its number + | ZmmReg -- 512-bit SIMD vector register + {-# UNPACK #-} !Int -- its number + -- STG registers | Sp -- Stack ptr; points to last occupied stack location. | SpLim -- Stack limit @@ -383,6 +386,7 @@ instance Eq GlobalReg where LongReg i == LongReg j = i==j XmmReg i == XmmReg j = i==j YmmReg i == YmmReg j = i==j + ZmmReg i == ZmmReg j = i==j Sp == Sp = True SpLim == SpLim = True Hp == Hp = True @@ -406,6 +410,7 @@ instance Ord GlobalReg where compare (LongReg i) (LongReg j) = compare i j compare (XmmReg i) (XmmReg j) = compare i j compare (YmmReg i) (YmmReg j) = compare i j + compare (ZmmReg i) (ZmmReg j) = compare i j compare Sp Sp = EQ compare SpLim SpLim = EQ compare Hp Hp = EQ @@ -431,6 +436,8 @@ instance Ord GlobalReg where compare _ (XmmReg _) = GT compare (YmmReg _) _ = LT compare _ (YmmReg _) = GT + compare (ZmmReg _) _ = LT + compare _ (ZmmReg _) = GT compare Sp _ = LT compare _ Sp = GT compare SpLim _ = LT @@ -475,6 +482,7 @@ globalRegType _ (DoubleReg _) = cmmFloat W64 globalRegType _ (LongReg _) = cmmBits W64 globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32) globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32) +globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32) globalRegType dflags Hp = gcWord dflags -- The initialiser for all @@ -488,4 +496,5 @@ isArgReg (DoubleReg {}) = True isArgReg (LongReg {}) = True isArgReg (XmmReg {}) = True isArgReg (YmmReg {}) = True +isArgReg (ZmmReg {}) = True isArgReg _ = False diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index c96dee2800..0bb79ac147 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -257,6 +257,7 @@ pprGlobalReg gr LongReg n -> char 'L' <> int n XmmReg n -> ptext (sLit "XMM") <> int n YmmReg n -> ptext (sLit "YMM") <> int n + ZmmReg n -> ptext (sLit "ZMM") <> int n Sp -> ptext (sLit "Sp") SpLim -> ptext (sLit "SpLim") Hp -> ptext (sLit "Hp") diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index 2cf4cb3483..6b36ab09cd 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -63,6 +63,13 @@ baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")") +baseRegOffset dflags (ZmmReg 1) = oFFSET_StgRegTable_rZMM1 dflags +baseRegOffset dflags (ZmmReg 2) = oFFSET_StgRegTable_rZMM2 dflags +baseRegOffset dflags (ZmmReg 3) = oFFSET_StgRegTable_rZMM3 dflags +baseRegOffset dflags (ZmmReg 4) = oFFSET_StgRegTable_rZMM4 dflags +baseRegOffset dflags (ZmmReg 5) = oFFSET_StgRegTable_rZMM5 dflags +baseRegOffset dflags (ZmmReg 6) = oFFSET_StgRegTable_rZMM6 dflags +baseRegOffset _ (ZmmReg n) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")") baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs index 9142c35064..fb9668b5ee 100644 --- a/compiler/llvmGen/LlvmCodeGen/Base.hs +++ b/compiler/llvmGen/LlvmCodeGen/Base.hs @@ -156,6 +156,7 @@ llvmFunArgs dflags live = isSSE (DoubleReg _) = True isSSE (XmmReg _) = True isSSE (YmmReg _) = True + isSSE (ZmmReg _) = True isSSE _ = False -- | Llvm standard fun attributes diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 08aa45fa71..5002b89b72 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -1534,6 +1534,7 @@ funEpilogue live = do isSSE (DoubleReg _) = True isSSE (XmmReg _) = True isSSE (YmmReg _) = True + isSSE (ZmmReg _) = True isSSE _ = False -- Set to value or "undef" depending on whether the register is diff --git a/compiler/llvmGen/LlvmCodeGen/Regs.hs b/compiler/llvmGen/LlvmCodeGen/Regs.hs index 16d1237c23..9f20aa5de5 100644 --- a/compiler/llvmGen/LlvmCodeGen/Regs.hs +++ b/compiler/llvmGen/LlvmCodeGen/Regs.hs @@ -68,6 +68,12 @@ lmGlobalReg dflags suf reg YmmReg 4 -> ymmGlobal $ "YMM4" ++ suf YmmReg 5 -> ymmGlobal $ "YMM5" ++ suf YmmReg 6 -> ymmGlobal $ "YMM6" ++ suf + ZmmReg 1 -> zmmGlobal $ "ZMM1" ++ suf + ZmmReg 2 -> zmmGlobal $ "ZMM2" ++ suf + ZmmReg 3 -> zmmGlobal $ "ZMM3" ++ suf + ZmmReg 4 -> zmmGlobal $ "ZMM4" ++ suf + ZmmReg 5 -> zmmGlobal $ "ZMM5" ++ suf + ZmmReg 6 -> zmmGlobal $ "ZMM6" ++ suf _other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg) ++ ") not supported!" -- LongReg, HpLim, CCSS, CurrentTSO, CurrentNusery, HpAlloc @@ -79,6 +85,7 @@ lmGlobalReg dflags suf reg doubleGlobal name = LMNLocalVar (fsLit name) LMDouble xmmGlobal name = LMNLocalVar (fsLit name) (LMVector 4 (LMInt 32)) ymmGlobal name = LMNLocalVar (fsLit name) (LMVector 8 (LMInt 32)) + zmmGlobal name = LMNLocalVar (fsLit name) (LMVector 16 (LMInt 32)) -- | A list of STG Registers that should always be considered alive alwaysLive :: [GlobalReg] diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs index cff60b3e8c..3d6dd41ae4 100644 --- a/includes/CodeGen.Platform.hs +++ b/includes/CodeGen.Platform.hs @@ -82,6 +82,23 @@ import Reg # define ymm14 54 # define ymm15 55 +# define zmm0 56 +# define zmm1 57 +# define zmm2 58 +# define zmm3 59 +# define zmm4 60 +# define zmm5 61 +# define zmm6 62 +# define zmm7 63 +# define zmm8 64 +# define zmm9 65 +# define zmm10 66 +# define zmm11 67 +# define zmm12 68 +# define zmm13 69 +# define zmm14 70 +# define zmm15 71 + #elif MACHREGS_powerpc # define r0 0 @@ -411,6 +428,9 @@ activeStgRegs = [ #ifdef REG_YMM1 ,YmmReg 1 #endif +#ifdef REG_ZMM1 + ,ZmmReg 1 +#endif #ifdef REG_F2 ,FloatReg 2 #endif @@ -423,6 +443,9 @@ activeStgRegs = [ #ifdef REG_YMM2 ,YmmReg 2 #endif +#ifdef REG_ZMM2 + ,ZmmReg 2 +#endif #ifdef REG_F3 ,FloatReg 3 #endif @@ -435,6 +458,9 @@ activeStgRegs = [ #ifdef REG_YMM3 ,YmmReg 3 #endif +#ifdef REG_ZMM3 + ,ZmmReg 3 +#endif #ifdef REG_F4 ,FloatReg 4 #endif @@ -447,6 +473,9 @@ activeStgRegs = [ #ifdef REG_YMM4 ,YmmReg 4 #endif +#ifdef REG_ZMM4 + ,ZmmReg 4 +#endif #ifdef REG_F5 ,FloatReg 5 #endif @@ -459,6 +488,9 @@ activeStgRegs = [ #ifdef REG_YMM5 ,YmmReg 5 #endif +#ifdef REG_ZMM5 + ,ZmmReg 5 +#endif #ifdef REG_F6 ,FloatReg 6 #endif @@ -471,6 +503,9 @@ activeStgRegs = [ #ifdef REG_YMM6 ,YmmReg 6 #endif +#ifdef REG_ZMM6 + ,ZmmReg 6 +#endif #else /* MAX_REAL_XMM_REG == 0 */ #ifdef REG_F1 ,FloatReg 1 @@ -662,6 +697,26 @@ globalRegMaybe (YmmReg 5) = Just (RealRegSingle REG_YMM5) globalRegMaybe (YmmReg 6) = Just (RealRegSingle REG_YMM6) # endif # endif +# if MAX_REAL_ZMM_REG != 0 +# ifdef REG_ZMM1 +globalRegMaybe (ZmmReg 1) = Just (RealRegSingle REG_ZMM1) +# endif +# ifdef REG_ZMM2 +globalRegMaybe (ZmmReg 2) = Just (RealRegSingle REG_ZMM2) +# endif +# ifdef REG_ZMM3 +globalRegMaybe (ZmmReg 3) = Just (RealRegSingle REG_ZMM3) +# endif +# ifdef REG_ZMM4 +globalRegMaybe (ZmmReg 4) = Just (RealRegSingle REG_ZMM4) +# endif +# ifdef REG_ZMM5 +globalRegMaybe (ZmmReg 5) = Just (RealRegSingle REG_ZMM5) +# endif +# ifdef REG_ZMM6 +globalRegMaybe (ZmmReg 6) = Just (RealRegSingle REG_ZMM6) +# endif +# endif # ifdef REG_Sp globalRegMaybe Sp = Just (RealRegSingle REG_Sp) # endif diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h index a73705110d..81e48cc9b4 100644 --- a/includes/stg/MachRegs.h +++ b/includes/stg/MachRegs.h @@ -113,12 +113,18 @@ #define REG_YMM3 ymm2 #define REG_YMM4 ymm3 +#define REG_ZMM1 zmm0 +#define REG_ZMM2 zmm1 +#define REG_ZMM3 zmm2 +#define REG_ZMM4 zmm3 + #define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */ #define MAX_REAL_FLOAT_REG 0 #define MAX_REAL_DOUBLE_REG 0 #define MAX_REAL_LONG_REG 0 #define MAX_REAL_XMM_REG 4 #define MAX_REAL_YMM_REG 4 +#define MAX_REAL_ZMM_REG 4 /* ----------------------------------------------------------------------------- The x86-64 register mapping @@ -192,6 +198,13 @@ #define REG_YMM5 ymm5 #define REG_YMM6 ymm6 +#define REG_ZMM1 zmm1 +#define REG_ZMM2 zmm2 +#define REG_ZMM3 zmm3 +#define REG_ZMM4 zmm4 +#define REG_ZMM5 zmm5 +#define REG_ZMM6 zmm6 + #if !defined(mingw32_HOST_OS) #define CALLER_SAVES_R3 #define CALLER_SAVES_R4 @@ -235,12 +248,22 @@ #define CALLER_SAVES_YMM6 #endif +#define CALLER_SAVES_ZMM1 +#define CALLER_SAVES_ZMM2 +#define CALLER_SAVES_ZMM3 +#define CALLER_SAVES_ZMM4 +#define CALLER_SAVES_ZMM5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_ZMM6 +#endif + #define MAX_REAL_VANILLA_REG 6 #define MAX_REAL_FLOAT_REG 6 #define MAX_REAL_DOUBLE_REG 6 #define MAX_REAL_LONG_REG 0 #define MAX_REAL_XMM_REG 6 #define MAX_REAL_YMM_REG 6 +#define MAX_REAL_ZMM_REG 6 /* ----------------------------------------------------------------------------- The PowerPC register mapping diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h index 2f27c639c8..1abf9da439 100644 --- a/includes/stg/Regs.h +++ b/includes/stg/Regs.h @@ -93,6 +93,12 @@ typedef struct { StgWord256 rYMM4; StgWord256 rYMM5; StgWord256 rYMM6; + StgWord512 rZMM1; + StgWord512 rZMM2; + StgWord512 rZMM3; + StgWord512 rZMM4; + StgWord512 rZMM5; + StgWord512 rZMM6; StgWord64 rL1; StgPtr rSp; StgPtr rSpLim; @@ -354,6 +360,42 @@ GLOBAL_REG_DECL(StgWord256,YMM6,REG_YMM6) #define YMM6 (BaseReg->rYMM6) #endif +#if defined(REG_ZMM1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord512,ZMM1,REG_ZMM1) +#else +#define ZMM1 (BaseReg->rZMM1) +#endif + +#if defined(REG_ZMM2) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord512,ZMM2,REG_ZMM2) +#else +#define ZMM2 (BaseReg->rZMM2) +#endif + +#if defined(REG_ZMM3) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord512,ZMM3,REG_ZMM3) +#else +#define ZMM3 (BaseReg->rZMM3) +#endif + +#if defined(REG_ZMM4) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord512,ZMM4,REG_ZMM4) +#else +#define ZMM4 (BaseReg->rZMM4) +#endif + +#if defined(REG_ZMM5) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord512,ZMM5,REG_ZMM5) +#else +#define ZMM5 (BaseReg->rZMM5) +#endif + +#if defined(REG_ZMM6) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord512,ZMM6,REG_ZMM6) +#else +#define ZMM6 (BaseReg->rZMM6) +#endif + #if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS) GLOBAL_REG_DECL(StgWord64,L1,REG_L1) #else diff --git a/includes/stg/Types.h b/includes/stg/Types.h index 4e574bb5c6..6138a8f8a2 100644 --- a/includes/stg/Types.h +++ b/includes/stg/Types.h @@ -87,6 +87,8 @@ typedef struct { StgWord64 h; StgWord64 l; } StgWord128; typedef struct { StgWord128 h; StgWord128 l; } StgWord256; +typedef struct { StgWord256 h; StgWord256 l; } StgWord512; + /* * Define the standard word size we'll use on this machine: make it * big enough to hold a pointer. diff --git a/utils/deriveConstants/DeriveConstants.hs b/utils/deriveConstants/DeriveConstants.hs index 29b059c873..5b9b7c0bd9 100644 --- a/utils/deriveConstants/DeriveConstants.hs +++ b/utils/deriveConstants/DeriveConstants.hs @@ -319,6 +319,12 @@ wanteds = concat ,fieldOffset Both "StgRegTable" "rYMM4" ,fieldOffset Both "StgRegTable" "rYMM5" ,fieldOffset Both "StgRegTable" "rYMM6" + ,fieldOffset Both "StgRegTable" "rZMM1" + ,fieldOffset Both "StgRegTable" "rZMM2" + ,fieldOffset Both "StgRegTable" "rZMM3" + ,fieldOffset Both "StgRegTable" "rZMM4" + ,fieldOffset Both "StgRegTable" "rZMM5" + ,fieldOffset Both "StgRegTable" "rZMM6" ,fieldOffset Both "StgRegTable" "rL1" ,fieldOffset Both "StgRegTable" "rSp" ,fieldOffset Both "StgRegTable" "rSpLim" |