summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/cmm/CmmCallConv.hs1
-rw-r--r--compiler/cmm/CmmExpr.hs9
-rw-r--r--compiler/cmm/PprCmmExpr.hs1
-rw-r--r--compiler/codeGen/CgUtils.hs7
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Base.hs1
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs1
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Regs.hs7
-rw-r--r--includes/CodeGen.Platform.hs55
-rw-r--r--includes/stg/MachRegs.h23
-rw-r--r--includes/stg/Regs.h42
-rw-r--r--includes/stg/Types.h2
-rw-r--r--utils/deriveConstants/DeriveConstants.hs6
12 files changed, 155 insertions, 0 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index de86c4ed33..6a931668bb 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -68,6 +68,7 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
where vec = case (w, regs) of
(W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
(W256, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
+ (W512, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
float = case (w, regs) of
(W32, (vs, fs, ds, ls, s:ss))
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 02126905c8..0f5abda74b 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -346,6 +346,9 @@ data GlobalReg
| YmmReg -- 256-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
+ | ZmmReg -- 512-bit SIMD vector register
+ {-# UNPACK #-} !Int -- its number
+
-- STG registers
| Sp -- Stack ptr; points to last occupied stack location.
| SpLim -- Stack limit
@@ -383,6 +386,7 @@ instance Eq GlobalReg where
LongReg i == LongReg j = i==j
XmmReg i == XmmReg j = i==j
YmmReg i == YmmReg j = i==j
+ ZmmReg i == ZmmReg j = i==j
Sp == Sp = True
SpLim == SpLim = True
Hp == Hp = True
@@ -406,6 +410,7 @@ instance Ord GlobalReg where
compare (LongReg i) (LongReg j) = compare i j
compare (XmmReg i) (XmmReg j) = compare i j
compare (YmmReg i) (YmmReg j) = compare i j
+ compare (ZmmReg i) (ZmmReg j) = compare i j
compare Sp Sp = EQ
compare SpLim SpLim = EQ
compare Hp Hp = EQ
@@ -431,6 +436,8 @@ instance Ord GlobalReg where
compare _ (XmmReg _) = GT
compare (YmmReg _) _ = LT
compare _ (YmmReg _) = GT
+ compare (ZmmReg _) _ = LT
+ compare _ (ZmmReg _) = GT
compare Sp _ = LT
compare _ Sp = GT
compare SpLim _ = LT
@@ -475,6 +482,7 @@ globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64
globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32)
+globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32)
globalRegType dflags Hp = gcWord dflags
-- The initialiser for all
@@ -488,4 +496,5 @@ isArgReg (DoubleReg {}) = True
isArgReg (LongReg {}) = True
isArgReg (XmmReg {}) = True
isArgReg (YmmReg {}) = True
+isArgReg (ZmmReg {}) = True
isArgReg _ = False
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index c96dee2800..0bb79ac147 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -257,6 +257,7 @@ pprGlobalReg gr
LongReg n -> char 'L' <> int n
XmmReg n -> ptext (sLit "XMM") <> int n
YmmReg n -> ptext (sLit "YMM") <> int n
+ ZmmReg n -> ptext (sLit "ZMM") <> int n
Sp -> ptext (sLit "Sp")
SpLim -> ptext (sLit "SpLim")
Hp -> ptext (sLit "Hp")
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index 2cf4cb3483..6b36ab09cd 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -63,6 +63,13 @@ baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags
baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags
baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags
baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
+baseRegOffset dflags (ZmmReg 1) = oFFSET_StgRegTable_rZMM1 dflags
+baseRegOffset dflags (ZmmReg 2) = oFFSET_StgRegTable_rZMM2 dflags
+baseRegOffset dflags (ZmmReg 3) = oFFSET_StgRegTable_rZMM3 dflags
+baseRegOffset dflags (ZmmReg 4) = oFFSET_StgRegTable_rZMM4 dflags
+baseRegOffset dflags (ZmmReg 5) = oFFSET_StgRegTable_rZMM5 dflags
+baseRegOffset dflags (ZmmReg 6) = oFFSET_StgRegTable_rZMM6 dflags
+baseRegOffset _ (ZmmReg n) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")")
baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs
index 9142c35064..fb9668b5ee 100644
--- a/compiler/llvmGen/LlvmCodeGen/Base.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Base.hs
@@ -156,6 +156,7 @@ llvmFunArgs dflags live =
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE (YmmReg _) = True
+ isSSE (ZmmReg _) = True
isSSE _ = False
-- | Llvm standard fun attributes
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 08aa45fa71..5002b89b72 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -1534,6 +1534,7 @@ funEpilogue live = do
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE (YmmReg _) = True
+ isSSE (ZmmReg _) = True
isSSE _ = False
-- Set to value or "undef" depending on whether the register is
diff --git a/compiler/llvmGen/LlvmCodeGen/Regs.hs b/compiler/llvmGen/LlvmCodeGen/Regs.hs
index 16d1237c23..9f20aa5de5 100644
--- a/compiler/llvmGen/LlvmCodeGen/Regs.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Regs.hs
@@ -68,6 +68,12 @@ lmGlobalReg dflags suf reg
YmmReg 4 -> ymmGlobal $ "YMM4" ++ suf
YmmReg 5 -> ymmGlobal $ "YMM5" ++ suf
YmmReg 6 -> ymmGlobal $ "YMM6" ++ suf
+ ZmmReg 1 -> zmmGlobal $ "ZMM1" ++ suf
+ ZmmReg 2 -> zmmGlobal $ "ZMM2" ++ suf
+ ZmmReg 3 -> zmmGlobal $ "ZMM3" ++ suf
+ ZmmReg 4 -> zmmGlobal $ "ZMM4" ++ suf
+ ZmmReg 5 -> zmmGlobal $ "ZMM5" ++ suf
+ ZmmReg 6 -> zmmGlobal $ "ZMM6" ++ suf
_other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg)
++ ") not supported!"
-- LongReg, HpLim, CCSS, CurrentTSO, CurrentNusery, HpAlloc
@@ -79,6 +85,7 @@ lmGlobalReg dflags suf reg
doubleGlobal name = LMNLocalVar (fsLit name) LMDouble
xmmGlobal name = LMNLocalVar (fsLit name) (LMVector 4 (LMInt 32))
ymmGlobal name = LMNLocalVar (fsLit name) (LMVector 8 (LMInt 32))
+ zmmGlobal name = LMNLocalVar (fsLit name) (LMVector 16 (LMInt 32))
-- | A list of STG Registers that should always be considered alive
alwaysLive :: [GlobalReg]
diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs
index cff60b3e8c..3d6dd41ae4 100644
--- a/includes/CodeGen.Platform.hs
+++ b/includes/CodeGen.Platform.hs
@@ -82,6 +82,23 @@ import Reg
# define ymm14 54
# define ymm15 55
+# define zmm0 56
+# define zmm1 57
+# define zmm2 58
+# define zmm3 59
+# define zmm4 60
+# define zmm5 61
+# define zmm6 62
+# define zmm7 63
+# define zmm8 64
+# define zmm9 65
+# define zmm10 66
+# define zmm11 67
+# define zmm12 68
+# define zmm13 69
+# define zmm14 70
+# define zmm15 71
+
#elif MACHREGS_powerpc
# define r0 0
@@ -411,6 +428,9 @@ activeStgRegs = [
#ifdef REG_YMM1
,YmmReg 1
#endif
+#ifdef REG_ZMM1
+ ,ZmmReg 1
+#endif
#ifdef REG_F2
,FloatReg 2
#endif
@@ -423,6 +443,9 @@ activeStgRegs = [
#ifdef REG_YMM2
,YmmReg 2
#endif
+#ifdef REG_ZMM2
+ ,ZmmReg 2
+#endif
#ifdef REG_F3
,FloatReg 3
#endif
@@ -435,6 +458,9 @@ activeStgRegs = [
#ifdef REG_YMM3
,YmmReg 3
#endif
+#ifdef REG_ZMM3
+ ,ZmmReg 3
+#endif
#ifdef REG_F4
,FloatReg 4
#endif
@@ -447,6 +473,9 @@ activeStgRegs = [
#ifdef REG_YMM4
,YmmReg 4
#endif
+#ifdef REG_ZMM4
+ ,ZmmReg 4
+#endif
#ifdef REG_F5
,FloatReg 5
#endif
@@ -459,6 +488,9 @@ activeStgRegs = [
#ifdef REG_YMM5
,YmmReg 5
#endif
+#ifdef REG_ZMM5
+ ,ZmmReg 5
+#endif
#ifdef REG_F6
,FloatReg 6
#endif
@@ -471,6 +503,9 @@ activeStgRegs = [
#ifdef REG_YMM6
,YmmReg 6
#endif
+#ifdef REG_ZMM6
+ ,ZmmReg 6
+#endif
#else /* MAX_REAL_XMM_REG == 0 */
#ifdef REG_F1
,FloatReg 1
@@ -662,6 +697,26 @@ globalRegMaybe (YmmReg 5) = Just (RealRegSingle REG_YMM5)
globalRegMaybe (YmmReg 6) = Just (RealRegSingle REG_YMM6)
# endif
# endif
+# if MAX_REAL_ZMM_REG != 0
+# ifdef REG_ZMM1
+globalRegMaybe (ZmmReg 1) = Just (RealRegSingle REG_ZMM1)
+# endif
+# ifdef REG_ZMM2
+globalRegMaybe (ZmmReg 2) = Just (RealRegSingle REG_ZMM2)
+# endif
+# ifdef REG_ZMM3
+globalRegMaybe (ZmmReg 3) = Just (RealRegSingle REG_ZMM3)
+# endif
+# ifdef REG_ZMM4
+globalRegMaybe (ZmmReg 4) = Just (RealRegSingle REG_ZMM4)
+# endif
+# ifdef REG_ZMM5
+globalRegMaybe (ZmmReg 5) = Just (RealRegSingle REG_ZMM5)
+# endif
+# ifdef REG_ZMM6
+globalRegMaybe (ZmmReg 6) = Just (RealRegSingle REG_ZMM6)
+# endif
+# endif
# ifdef REG_Sp
globalRegMaybe Sp = Just (RealRegSingle REG_Sp)
# endif
diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h
index a73705110d..81e48cc9b4 100644
--- a/includes/stg/MachRegs.h
+++ b/includes/stg/MachRegs.h
@@ -113,12 +113,18 @@
#define REG_YMM3 ymm2
#define REG_YMM4 ymm3
+#define REG_ZMM1 zmm0
+#define REG_ZMM2 zmm1
+#define REG_ZMM3 zmm2
+#define REG_ZMM4 zmm3
+
#define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */
#define MAX_REAL_FLOAT_REG 0
#define MAX_REAL_DOUBLE_REG 0
#define MAX_REAL_LONG_REG 0
#define MAX_REAL_XMM_REG 4
#define MAX_REAL_YMM_REG 4
+#define MAX_REAL_ZMM_REG 4
/* -----------------------------------------------------------------------------
The x86-64 register mapping
@@ -192,6 +198,13 @@
#define REG_YMM5 ymm5
#define REG_YMM6 ymm6
+#define REG_ZMM1 zmm1
+#define REG_ZMM2 zmm2
+#define REG_ZMM3 zmm3
+#define REG_ZMM4 zmm4
+#define REG_ZMM5 zmm5
+#define REG_ZMM6 zmm6
+
#if !defined(mingw32_HOST_OS)
#define CALLER_SAVES_R3
#define CALLER_SAVES_R4
@@ -235,12 +248,22 @@
#define CALLER_SAVES_YMM6
#endif
+#define CALLER_SAVES_ZMM1
+#define CALLER_SAVES_ZMM2
+#define CALLER_SAVES_ZMM3
+#define CALLER_SAVES_ZMM4
+#define CALLER_SAVES_ZMM5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_ZMM6
+#endif
+
#define MAX_REAL_VANILLA_REG 6
#define MAX_REAL_FLOAT_REG 6
#define MAX_REAL_DOUBLE_REG 6
#define MAX_REAL_LONG_REG 0
#define MAX_REAL_XMM_REG 6
#define MAX_REAL_YMM_REG 6
+#define MAX_REAL_ZMM_REG 6
/* -----------------------------------------------------------------------------
The PowerPC register mapping
diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h
index 2f27c639c8..1abf9da439 100644
--- a/includes/stg/Regs.h
+++ b/includes/stg/Regs.h
@@ -93,6 +93,12 @@ typedef struct {
StgWord256 rYMM4;
StgWord256 rYMM5;
StgWord256 rYMM6;
+ StgWord512 rZMM1;
+ StgWord512 rZMM2;
+ StgWord512 rZMM3;
+ StgWord512 rZMM4;
+ StgWord512 rZMM5;
+ StgWord512 rZMM6;
StgWord64 rL1;
StgPtr rSp;
StgPtr rSpLim;
@@ -354,6 +360,42 @@ GLOBAL_REG_DECL(StgWord256,YMM6,REG_YMM6)
#define YMM6 (BaseReg->rYMM6)
#endif
+#if defined(REG_ZMM1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM1,REG_ZMM1)
+#else
+#define ZMM1 (BaseReg->rZMM1)
+#endif
+
+#if defined(REG_ZMM2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM2,REG_ZMM2)
+#else
+#define ZMM2 (BaseReg->rZMM2)
+#endif
+
+#if defined(REG_ZMM3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM3,REG_ZMM3)
+#else
+#define ZMM3 (BaseReg->rZMM3)
+#endif
+
+#if defined(REG_ZMM4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM4,REG_ZMM4)
+#else
+#define ZMM4 (BaseReg->rZMM4)
+#endif
+
+#if defined(REG_ZMM5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM5,REG_ZMM5)
+#else
+#define ZMM5 (BaseReg->rZMM5)
+#endif
+
+#if defined(REG_ZMM6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM6,REG_ZMM6)
+#else
+#define ZMM6 (BaseReg->rZMM6)
+#endif
+
#if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord64,L1,REG_L1)
#else
diff --git a/includes/stg/Types.h b/includes/stg/Types.h
index 4e574bb5c6..6138a8f8a2 100644
--- a/includes/stg/Types.h
+++ b/includes/stg/Types.h
@@ -87,6 +87,8 @@ typedef struct { StgWord64 h; StgWord64 l; } StgWord128;
typedef struct { StgWord128 h; StgWord128 l; } StgWord256;
+typedef struct { StgWord256 h; StgWord256 l; } StgWord512;
+
/*
* Define the standard word size we'll use on this machine: make it
* big enough to hold a pointer.
diff --git a/utils/deriveConstants/DeriveConstants.hs b/utils/deriveConstants/DeriveConstants.hs
index 29b059c873..5b9b7c0bd9 100644
--- a/utils/deriveConstants/DeriveConstants.hs
+++ b/utils/deriveConstants/DeriveConstants.hs
@@ -319,6 +319,12 @@ wanteds = concat
,fieldOffset Both "StgRegTable" "rYMM4"
,fieldOffset Both "StgRegTable" "rYMM5"
,fieldOffset Both "StgRegTable" "rYMM6"
+ ,fieldOffset Both "StgRegTable" "rZMM1"
+ ,fieldOffset Both "StgRegTable" "rZMM2"
+ ,fieldOffset Both "StgRegTable" "rZMM3"
+ ,fieldOffset Both "StgRegTable" "rZMM4"
+ ,fieldOffset Both "StgRegTable" "rZMM5"
+ ,fieldOffset Both "StgRegTable" "rZMM6"
,fieldOffset Both "StgRegTable" "rL1"
,fieldOffset Both "StgRegTable" "rSp"
,fieldOffset Both "StgRegTable" "rSpLim"