summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeoffrey Mainland <gmainlan@microsoft.com>2013-09-23 00:22:19 -0400
committerGeoffrey Mainland <gmainlan@microsoft.com>2013-09-23 00:22:19 -0400
commit680441de191145dd874bd453e09e4ee906d87bbb (patch)
tree0246607039802b6270563cfb3c3751cb554cdc74
parent6e6e6f5bfdfc3996603064523af6e2be2c5131fa (diff)
parent25eeb6782a8f8cfdd3d8e9515863007c609eafc7 (diff)
downloadhaskell-680441de191145dd874bd453e09e4ee906d87bbb.tar.gz
Merge branch 'wip/simd'
This merge revises and extends the current SIMD support in GHC. Notable features: * Support for AVX, AVX2, and AVX-512. Support for AVX-512 is untested. * SIMD primops are currently LLVM-only and documented in compiler/prelude/primops.txt.pp. * By default only 128-bit wide SIMD vectors are passed in registers, and then only on the X86_64 architecture. There is a "hidden" flag, -fllvm-pass-vectors-in-regs, that causes GHC to generate LLVM code that assumes all vectors are passed in registers by LLVM. This can be used with a suitably patched version of LLVM, and if we get LLVM 3.4 patched, we can consider turning it on by default for LLVM 3.4+. This would mean that we couldn't mix LLVM <3.4-compiled object files with LLVM >=3.4-compiled object files, but I don't see that as much of a problem. * utils/genprimcode has been hacked up to allow us to write vector operations once and have them instantiated at multiple vector types. I'm not thrilled with this solution, but after discussing with Simon PJ, what I've implemented seems to be the minimal reasonable solution to the problem of exploding primop boilerplate. The changes are documented in compiler/prelude/primops.txt.pp. * Error handling is sub-optimal. My patch checks to make sure that vector primops can be compiled efficiently based on the current set of dynamic flags. For example, if -mavx is not specified and the user tries to use a primop that adds together two 256-bit wide vectors of double-precision elements, the user will see an error message like: ghc-stage2: sorry! (unimplemented feature or known bug) (GHC version 7.7.20130916 for x86_64-unknown-linux): 256-bit wide floating point SIMD vector instructions require at least -mavx.
-rw-r--r--compiler/cmm/CmmCallConv.hs62
-rw-r--r--compiler/cmm/CmmExpr.hs18
-rw-r--r--compiler/cmm/CmmLex.x6
-rw-r--r--compiler/cmm/CmmMachOp.hs10
-rw-r--r--compiler/cmm/CmmParse.y4
-rw-r--r--compiler/cmm/CmmType.hs18
-rw-r--r--compiler/cmm/PprC.hs9
-rw-r--r--compiler/cmm/PprCmmExpr.hs2
-rw-r--r--compiler/codeGen/CgUtils.hs14
-rw-r--r--compiler/codeGen/StgCmmArgRep.hs16
-rw-r--r--compiler/codeGen/StgCmmLayout.hs2
-rw-r--r--compiler/codeGen/StgCmmPrim.hs333
-rw-r--r--compiler/ghc.mk16
-rw-r--r--compiler/ghci/ByteCodeAsm.lhs4
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Base.hs2
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs8
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Regs.hs14
-rw-r--r--compiler/llvmGen/LlvmMangler.hs39
-rw-r--r--compiler/main/CmdLineParser.hs2
-rw-r--r--compiler/main/DriverPipeline.hs40
-rw-r--r--compiler/main/DynFlags.hs55
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs2
-rw-r--r--compiler/prelude/PrelNames.lhs15
-rw-r--r--compiler/prelude/PrimOp.lhs11
-rw-r--r--compiler/prelude/TysPrim.lhs40
-rw-r--r--compiler/prelude/primops.txt.pp546
-rw-r--r--includes/Cmm.h2
-rw-r--r--includes/CodeGen.Platform.hs124
-rw-r--r--includes/rts/storage/FunTypes.h36
-rw-r--r--includes/stg/MachRegs.h53
-rw-r--r--includes/stg/MiscClosures.h4
-rw-r--r--includes/stg/Regs.h84
-rw-r--r--includes/stg/Types.h4
-rw-r--r--rts/Linker.c6
-rw-r--r--utils/deriveConstants/DeriveConstants.hs12
-rw-r--r--utils/genapply/GenApply.hs12
-rw-r--r--utils/genprimopcode/Lexer.x8
-rw-r--r--utils/genprimopcode/Main.hs396
-rw-r--r--utils/genprimopcode/Parser.y29
-rw-r--r--utils/genprimopcode/ParserM.hs8
-rw-r--r--utils/genprimopcode/Syntax.hs42
41 files changed, 1380 insertions, 728 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index 35499333d0..60e2c8c8f7 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -14,6 +14,7 @@ import Cmm (Convention(..))
import PprCmm ()
import DynFlags
+import Platform
import Outputable
-- Calculate the 'GlobalReg' or stack locations for function call
@@ -65,15 +66,22 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
| isFloatType ty = float
| otherwise = int
where vec = case (w, regs) of
- (W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+ (W128, (vs, fs, ds, ls, s:ss))
+ | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+ (W256, (vs, fs, ds, ls, s:ss))
+ | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
+ (W512, (vs, fs, ds, ls, s:ss))
+ | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
float = case (w, regs) of
- (W32, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
+ (W32, (vs, fs, ds, ls, s:ss))
+ | passFloatInXmm -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
(W32, (vs, f:fs, ds, ls, ss))
- | not hasXmmRegs -> k (RegisterParam f, (vs, fs, ds, ls, ss))
- (W64, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss))
+ | not passFloatInXmm -> k (RegisterParam f, (vs, fs, ds, ls, ss))
+ (W64, (vs, fs, ds, ls, s:ss))
+ | passFloatInXmm -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss))
(W64, (vs, fs, d:ds, ls, ss))
- | not hasXmmRegs -> k (RegisterParam d, (vs, fs, ds, ls, ss))
+ | not passFloatInXmm -> k (RegisterParam d, (vs, fs, ds, ls, ss))
(W80, _) -> panic "F80 unsupported register type"
_ -> (assts, (r:rs))
int = case (w, regs) of
@@ -88,8 +96,26 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
w = typeWidth ty
gcp | isGcPtrType ty = VGcPtr
| otherwise = VNonGcPtr
- hasXmmRegs = mAX_Real_XMM_REG dflags /= 0
-
+ passFloatInXmm = passFloatArgsInXmm dflags
+
+passFloatArgsInXmm :: DynFlags -> Bool
+passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
+ ArchX86_64 -> True
+ _ -> False
+
+-- On X86_64, we always pass 128-bit-wide vectors in registers. On 32-bit X86
+-- and for all larger vector sizes on X86_64, LLVM's GHC calling convention
+-- doesn't currently passing vectors in registers. The patch to update the GHC
+-- calling convention to support passing SIMD vectors in registers is small and
+-- well-contained, so it may make it into LLVM 3.4. The hidden
+-- -fllvm-pass-vectors-in-regs flag will generate LLVM code that attempts to
+-- pass vectors in registers, but it must only be used with a version of LLVM
+-- that has an updated GHC calling convention.
+passVectorInReg :: Width -> DynFlags -> Bool
+passVectorInReg W128 dflags = case platformArch (targetPlatform dflags) of
+ ArchX86_64 -> True
+ _ -> gopt Opt_LlvmPassVectorsInRegisters dflags
+passVectorInReg _ dflags = gopt Opt_LlvmPassVectorsInRegisters dflags
assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a]
-> (
@@ -158,7 +184,10 @@ realVanillaRegs dflags = map VanillaReg $ regList (mAX_Real_Vanilla_REG dflags)
realFloatRegs dflags = map FloatReg $ regList (mAX_Real_Float_REG dflags)
realDoubleRegs dflags = map DoubleReg $ regList (mAX_Real_Double_REG dflags)
realLongRegs dflags = map LongReg $ regList (mAX_Real_Long_REG dflags)
-realXmmRegNos dflags = regList (mAX_Real_XMM_REG dflags)
+
+realXmmRegNos dflags
+ | isSse2Enabled dflags = regList (mAX_Real_XMM_REG dflags)
+ | otherwise = []
regList :: Int -> [Int]
regList n = [1 .. n]
@@ -180,12 +209,11 @@ nodeOnly = ([VanillaReg 1], [], [], [], [])
-- only use this functionality in hand-written C-- code in the RTS.
realArgRegsCover :: DynFlags -> [GlobalReg]
realArgRegsCover dflags
- | hasXmmRegs = map ($VGcPtr) (realVanillaRegs dflags) ++
- realDoubleRegs dflags ++
- realLongRegs dflags
- | otherwise = map ($VGcPtr) (realVanillaRegs dflags) ++
- realFloatRegs dflags ++
- realDoubleRegs dflags ++
- realLongRegs dflags
- where
- hasXmmRegs = mAX_Real_XMM_REG dflags /= 0
+ | passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++
+ realLongRegs dflags ++
+ map XmmReg (realXmmRegNos dflags)
+ | otherwise = map ($VGcPtr) (realVanillaRegs dflags) ++
+ realFloatRegs dflags ++
+ realDoubleRegs dflags ++
+ realLongRegs dflags ++
+ map XmmReg (realXmmRegNos dflags)
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index d3624dac6b..0f5abda74b 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -343,6 +343,12 @@ data GlobalReg
| XmmReg -- 128-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
+ | YmmReg -- 256-bit SIMD vector register
+ {-# UNPACK #-} !Int -- its number
+
+ | ZmmReg -- 512-bit SIMD vector register
+ {-# UNPACK #-} !Int -- its number
+
-- STG registers
| Sp -- Stack ptr; points to last occupied stack location.
| SpLim -- Stack limit
@@ -379,6 +385,8 @@ instance Eq GlobalReg where
DoubleReg i == DoubleReg j = i==j
LongReg i == LongReg j = i==j
XmmReg i == XmmReg j = i==j
+ YmmReg i == YmmReg j = i==j
+ ZmmReg i == ZmmReg j = i==j
Sp == Sp = True
SpLim == SpLim = True
Hp == Hp = True
@@ -401,6 +409,8 @@ instance Ord GlobalReg where
compare (DoubleReg i) (DoubleReg j) = compare i j
compare (LongReg i) (LongReg j) = compare i j
compare (XmmReg i) (XmmReg j) = compare i j
+ compare (YmmReg i) (YmmReg j) = compare i j
+ compare (ZmmReg i) (ZmmReg j) = compare i j
compare Sp Sp = EQ
compare SpLim SpLim = EQ
compare Hp Hp = EQ
@@ -424,6 +434,10 @@ instance Ord GlobalReg where
compare _ (LongReg _) = GT
compare (XmmReg _) _ = LT
compare _ (XmmReg _) = GT
+ compare (YmmReg _) _ = LT
+ compare _ (YmmReg _) = GT
+ compare (ZmmReg _) _ = LT
+ compare _ (ZmmReg _) = GT
compare Sp _ = LT
compare _ Sp = GT
compare SpLim _ = LT
@@ -467,6 +481,8 @@ globalRegType _ (FloatReg _) = cmmFloat W32
globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64
globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
+globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32)
+globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32)
globalRegType dflags Hp = gcWord dflags
-- The initialiser for all
@@ -479,4 +495,6 @@ isArgReg (FloatReg {}) = True
isArgReg (DoubleReg {}) = True
isArgReg (LongReg {}) = True
isArgReg (XmmReg {}) = True
+isArgReg (YmmReg {}) = True
+isArgReg (ZmmReg {}) = True
isArgReg _ = False
diff --git a/compiler/cmm/CmmLex.x b/compiler/cmm/CmmLex.x
index 81227eb05f..b9da1e0fa6 100644
--- a/compiler/cmm/CmmLex.x
+++ b/compiler/cmm/CmmLex.x
@@ -164,6 +164,8 @@ data CmmToken
| CmmT_bits32
| CmmT_bits64
| CmmT_bits128
+ | CmmT_bits256
+ | CmmT_bits512
| CmmT_float32
| CmmT_float64
| CmmT_gcptr
@@ -244,6 +246,8 @@ reservedWordsFM = listToUFM $
( "bits32", CmmT_bits32 ),
( "bits64", CmmT_bits64 ),
( "bits128", CmmT_bits128 ),
+ ( "bits256", CmmT_bits256 ),
+ ( "bits512", CmmT_bits512 ),
( "float32", CmmT_float32 ),
( "float64", CmmT_float64 ),
-- New forms
@@ -252,6 +256,8 @@ reservedWordsFM = listToUFM $
( "b32", CmmT_bits32 ),
( "b64", CmmT_bits64 ),
( "b128", CmmT_bits128 ),
+ ( "b256", CmmT_bits256 ),
+ ( "b512", CmmT_bits512 ),
( "f32", CmmT_float32 ),
( "f64", CmmT_float64 ),
( "gcptr", CmmT_gcptr )
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index 8d42bbd2cb..c009d15e25 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -118,6 +118,10 @@ data MachOp
| MO_VS_Rem Length Width
| MO_VS_Neg Length Width
+ -- Unsigned vector multiply/divide
+ | MO_VU_Quot Length Width
+ | MO_VU_Rem Length Width
+
-- Floting point vector element insertion and extraction operations
| MO_VF_Insert Length Width -- Insert scalar into vector
| MO_VF_Extract Length Width -- Extract scalar from vector
@@ -375,6 +379,9 @@ machOpResultType dflags mop tys =
MO_VS_Rem l w -> cmmVec l (cmmBits w)
MO_VS_Neg l w -> cmmVec l (cmmBits w)
+ MO_VU_Quot l w -> cmmVec l (cmmBits w)
+ MO_VU_Rem l w -> cmmVec l (cmmBits w)
+
MO_VF_Insert l w -> cmmVec l (cmmFloat w)
MO_VF_Extract _ w -> cmmFloat w
@@ -461,6 +468,9 @@ machOpArgReps dflags op =
MO_VS_Rem _ r -> [r,r]
MO_VS_Neg _ r -> [r]
+ MO_VU_Quot _ r -> [r,r]
+ MO_VU_Rem _ r -> [r,r]
+
MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index 8c36deafbb..8367f7abd4 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -289,6 +289,8 @@ import Data.Maybe
'bits32' { L _ (CmmT_bits32) }
'bits64' { L _ (CmmT_bits64) }
'bits128' { L _ (CmmT_bits128) }
+ 'bits256' { L _ (CmmT_bits256) }
+ 'bits512' { L _ (CmmT_bits512) }
'float32' { L _ (CmmT_float32) }
'float64' { L _ (CmmT_float64) }
'gcptr' { L _ (CmmT_gcptr) }
@@ -777,6 +779,8 @@ typenot8 :: { CmmType }
| 'bits32' { b32 }
| 'bits64' { b64 }
| 'bits128' { b128 }
+ | 'bits256' { b256 }
+ | 'bits512' { b512 }
| 'float32' { f32 }
| 'float64' { f64 }
| 'gcptr' {% do dflags <- getDynFlags; return $ gcWord dflags }
diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs
index 8eac8c9e85..d03c2dc0b9 100644
--- a/compiler/cmm/CmmType.hs
+++ b/compiler/cmm/CmmType.hs
@@ -1,7 +1,7 @@
module CmmType
( CmmType -- Abstract
- , b8, b16, b32, b64, b128, f32, f64, bWord, bHalfWord, gcWord
+ , b8, b16, b32, b64, b128, b256, b512, f32, f64, bWord, bHalfWord, gcWord
, cInt, cLong
, cmmBits, cmmFloat
, typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
@@ -107,12 +107,14 @@ cmmFloat = CmmType FloatCat
-------- Common CmmTypes ------------
-- Floats and words of specific widths
-b8, b16, b32, b64, b128, f32, f64 :: CmmType
+b8, b16, b32, b64, b128, b256, b512, f32, f64 :: CmmType
b8 = cmmBits W8
b16 = cmmBits W16
b32 = cmmBits W32
b64 = cmmBits W64
b128 = cmmBits W128
+b256 = cmmBits W256
+b512 = cmmBits W512
f32 = cmmFloat W32
f64 = cmmFloat W64
@@ -166,6 +168,8 @@ data Width = W8 | W16 | W32 | W64
-- used in x86 native codegen only.
-- (we use Ord, so it'd better be in this order)
| W128
+ | W256
+ | W512
deriving (Eq, Ord, Show)
instance Outputable Width where
@@ -177,6 +181,8 @@ mrStr W16 = sLit("W16")
mrStr W32 = sLit("W32")
mrStr W64 = sLit("W64")
mrStr W128 = sLit("W128")
+mrStr W256 = sLit("W256")
+mrStr W512 = sLit("W512")
mrStr W80 = sLit("W80")
@@ -216,6 +222,8 @@ widthInBits W16 = 16
widthInBits W32 = 32
widthInBits W64 = 64
widthInBits W128 = 128
+widthInBits W256 = 256
+widthInBits W512 = 512
widthInBits W80 = 80
widthInBytes :: Width -> Int
@@ -224,6 +232,8 @@ widthInBytes W16 = 2
widthInBytes W32 = 4
widthInBytes W64 = 8
widthInBytes W128 = 16
+widthInBytes W256 = 32
+widthInBytes W512 = 64
widthInBytes W80 = 10
widthFromBytes :: Int -> Width
@@ -232,6 +242,8 @@ widthFromBytes 2 = W16
widthFromBytes 4 = W32
widthFromBytes 8 = W64
widthFromBytes 16 = W128
+widthFromBytes 32 = W256
+widthFromBytes 64 = W512
widthFromBytes 10 = W80
widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
@@ -242,6 +254,8 @@ widthInLog W16 = 1
widthInLog W32 = 2
widthInLog W64 = 3
widthInLog W128 = 4
+widthInLog W256 = 5
+widthInLog W512 = 6
widthInLog W80 = panic "widthInLog: F80"
-- widening / narrowing
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index d45b103954..c468161c73 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -651,6 +651,15 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_VS_Neg"
++ " should have been handled earlier!")
+ MO_VU_Quot {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_VU_Quot")
+ (panic $ "PprC.pprMachOp_for_C: MO_VU_Quot"
+ ++ " should have been handled earlier!")
+ MO_VU_Rem {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_VU_Rem")
+ (panic $ "PprC.pprMachOp_for_C: MO_VU_Rem"
+ ++ " should have been handled earlier!")
+
MO_VF_Insert {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Insert")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index d1128b07d3..0bb79ac147 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -256,6 +256,8 @@ pprGlobalReg gr
DoubleReg n -> char 'D' <> int n
LongReg n -> char 'L' <> int n
XmmReg n -> ptext (sLit "XMM") <> int n
+ YmmReg n -> ptext (sLit "YMM") <> int n
+ ZmmReg n -> ptext (sLit "ZMM") <> int n
Sp -> ptext (sLit "Sp")
SpLim -> ptext (sLit "SpLim")
Hp -> ptext (sLit "Hp")
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index c06dd60cb1..6b36ab09cd 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -56,6 +56,20 @@ baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags
baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags
baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags
baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
+baseRegOffset dflags (YmmReg 1) = oFFSET_StgRegTable_rYMM1 dflags
+baseRegOffset dflags (YmmReg 2) = oFFSET_StgRegTable_rYMM2 dflags
+baseRegOffset dflags (YmmReg 3) = oFFSET_StgRegTable_rYMM3 dflags
+baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags
+baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags
+baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags
+baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
+baseRegOffset dflags (ZmmReg 1) = oFFSET_StgRegTable_rZMM1 dflags
+baseRegOffset dflags (ZmmReg 2) = oFFSET_StgRegTable_rZMM2 dflags
+baseRegOffset dflags (ZmmReg 3) = oFFSET_StgRegTable_rZMM3 dflags
+baseRegOffset dflags (ZmmReg 4) = oFFSET_StgRegTable_rZMM4 dflags
+baseRegOffset dflags (ZmmReg 5) = oFFSET_StgRegTable_rZMM5 dflags
+baseRegOffset dflags (ZmmReg 6) = oFFSET_StgRegTable_rZMM6 dflags
+baseRegOffset _ (ZmmReg n) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")")
baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
diff --git a/compiler/codeGen/StgCmmArgRep.hs b/compiler/codeGen/StgCmmArgRep.hs
index bd228d4617..1e68105aac 100644
--- a/compiler/codeGen/StgCmmArgRep.hs
+++ b/compiler/codeGen/StgCmmArgRep.hs
@@ -47,6 +47,8 @@ data ArgRep = P -- GC Ptr
| F -- Float
| D -- Double
| V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc.
+ | V32 -- 32-byte (256-bit) vectors of Float/Double/Int8/Word32/etc.
+ | V64 -- 64-byte (512-bit) vectors of Float/Double/Int8/Word32/etc.
instance Outputable ArgRep where ppr = text . argRepString
argRepString :: ArgRep -> String
@@ -57,6 +59,8 @@ argRepString V = "V"
argRepString F = "F"
argRepString D = "D"
argRepString V16 = "V16"
+argRepString V32 = "V32"
+argRepString V64 = "V64"
toArgRep :: PrimRep -> ArgRep
toArgRep VoidRep = V
@@ -68,9 +72,11 @@ toArgRep Int64Rep = L
toArgRep Word64Rep = L
toArgRep FloatRep = F
toArgRep DoubleRep = D
-toArgRep (VecRep len elem)
- | len*primElemRepSizeB elem == 16 = V16
- | otherwise = error "toArgRep: bad vector primrep"
+toArgRep (VecRep len elem) = case len*primElemRepSizeB elem of
+ 16 -> V16
+ 32 -> V32
+ 64 -> V64
+ _ -> error "toArgRep: bad vector primrep"
isNonV :: ArgRep -> Bool
isNonV V = False
@@ -84,6 +90,8 @@ argRepSizeW dflags L = wORD64_SIZE `quot` wORD_SIZE dflags
argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags
argRepSizeW _ V = 0
argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags
+argRepSizeW dflags V32 = 32 `quot` wORD_SIZE dflags
+argRepSizeW dflags V64 = 64 `quot` wORD_SIZE dflags
idArgRep :: Id -> ArgRep
idArgRep = toArgRep . idPrimRep
@@ -132,4 +140,6 @@ slowCallPattern (F: _) = (fsLit "stg_ap_f", 1)
slowCallPattern (D: _) = (fsLit "stg_ap_d", 1)
slowCallPattern (L: _) = (fsLit "stg_ap_l", 1)
slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1)
+slowCallPattern (V32: _) = (fsLit "stg_ap_v32", 1)
+slowCallPattern (V64: _) = (fsLit "stg_ap_v64", 1)
slowCallPattern [] = (fsLit "stg_ap_0", 0)
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index b52d4e57df..84ff21b3d0 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -385,6 +385,8 @@ stdPattern reps
[D] -> Just ARG_D
[L] -> Just ARG_L
[V16] -> Just ARG_V16
+ [V32] -> Just ARG_V32
+ [V64] -> Just ARG_V64
[N,N] -> Just ARG_NN
[N,P] -> Just ARG_NP
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 8560f7cf1c..523fcb21f9 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -40,7 +40,7 @@ import FastString
import Outputable
import Util
-import Control.Monad (liftM)
+import Control.Monad (liftM, when)
import Data.Bits
------------------------------------------------------------------------
@@ -380,14 +380,6 @@ emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp
emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
-emitPrimOp _ res IndexOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
-emitPrimOp _ res IndexOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 f32 res args
-emitPrimOp _ res IndexOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
-emitPrimOp _ res IndexOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 f64 res args
-emitPrimOp _ res IndexOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
-emitPrimOp _ res IndexOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res IndexOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
-emitPrimOp _ res IndexOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
@@ -407,14 +399,6 @@ emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (
emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
-emitPrimOp _ res ReadOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
-emitPrimOp _ res ReadOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 b32 res args
-emitPrimOp _ res ReadOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
-emitPrimOp _ res ReadOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 b64 res args
-emitPrimOp _ res ReadOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
-emitPrimOp _ res ReadOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res ReadOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
-emitPrimOp _ res ReadOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
-- IndexXXXArray
@@ -434,14 +418,6 @@ emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayO
emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
-emitPrimOp _ res IndexByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
-emitPrimOp _ res IndexByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
-emitPrimOp _ res IndexByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
-emitPrimOp _ res IndexByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
-emitPrimOp _ res IndexByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
-emitPrimOp _ res IndexByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res IndexByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
-emitPrimOp _ res IndexByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
-- ReadXXXArray, identical to IndexXXXArray.
@@ -461,14 +437,6 @@ emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp
emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
-emitPrimOp _ res ReadByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
-emitPrimOp _ res ReadByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
-emitPrimOp _ res ReadByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
-emitPrimOp _ res ReadByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
-emitPrimOp _ res ReadByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
-emitPrimOp _ res ReadByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res ReadByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
-emitPrimOp _ res ReadByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
-- WriteXXXoffAddr
@@ -488,14 +456,6 @@ emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (J
emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
-emitPrimOp _ res WriteOffAddrOp_FloatX4 args = doWriteOffAddrOp Nothing vec4f32 res args
-emitPrimOp _ res WriteOffAddrOp_FloatAsFloatX4 args = doWriteOffAddrOp Nothing f32 res args
-emitPrimOp _ res WriteOffAddrOp_DoubleX2 args = doWriteOffAddrOp Nothing vec2f64 res args
-emitPrimOp _ res WriteOffAddrOp_DoubleAsDoubleX2 args = doWriteOffAddrOp Nothing f64 res args
-emitPrimOp _ res WriteOffAddrOp_Int32X4 args = doWriteOffAddrOp Nothing vec4b32 res args
-emitPrimOp _ res WriteOffAddrOp_Int32AsInt32X4 args = doWriteOffAddrOp Nothing b32 res args
-emitPrimOp _ res WriteOffAddrOp_Int64X2 args = doWriteOffAddrOp Nothing vec2b64 res args
-emitPrimOp _ res WriteOffAddrOp_Int64AsInt64X2 args = doWriteOffAddrOp Nothing b64 res args
-- WriteXXXArray
@@ -515,14 +475,6 @@ emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayO
emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
-emitPrimOp _ res WriteByteArrayOp_FloatX4 args = doWriteByteArrayOp Nothing vec4f32 res args
-emitPrimOp _ res WriteByteArrayOp_FloatAsFloatX4 args = doWriteByteArrayOp Nothing f32 res args
-emitPrimOp _ res WriteByteArrayOp_DoubleX2 args = doWriteByteArrayOp Nothing vec2f64 res args
-emitPrimOp _ res WriteByteArrayOp_DoubleAsDoubleX2 args = doWriteByteArrayOp Nothing f64 res args
-emitPrimOp _ res WriteByteArrayOp_Int32X4 args = doWriteByteArrayOp Nothing vec4b32 res args
-emitPrimOp _ res WriteByteArrayOp_Int32AsInt32X4 args = doWriteByteArrayOp Nothing b32 res args
-emitPrimOp _ res WriteByteArrayOp_Int64X2 args = doWriteByteArrayOp Nothing vec2b64 res args
-emitPrimOp _ res WriteByteArrayOp_Int64AsInt64X2 args = doWriteByteArrayOp Nothing b64 res args
-- Copying and setting byte arrays
emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
@@ -556,78 +508,152 @@ emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
(MO_UF_Conv W64) [w]
--- SIMD vector packing and unpacking
-emitPrimOp _ [res] FloatToFloatX4Op [e] =
- doVecPackOp Nothing vec4f32 zero [e,e,e,e] res
+-- SIMD primops
+emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
+ checkVecCompatibility dflags vcat n w
+ doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
+ zeros :: CmmExpr
+ zeros = CmmLit $ CmmVec (replicate n zero)
+
+ zero :: CmmLit
+ zero = case vcat of
+ IntVec -> CmmInt 0 w
+ WordVec -> CmmInt 0 w
+ FloatVec -> CmmFloat 0 w
+
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
+ checkVecCompatibility dflags vcat n w
+ when (length es /= n) $
+ panic "emitPrimOp: VecPackOp has wrong number of arguments"
+ doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
+ where
+ zeros :: CmmExpr
+ zeros = CmmLit $ CmmVec (replicate n zero)
+
+ zero :: CmmLit
+ zero = case vcat of
+ IntVec -> CmmInt 0 w
+ WordVec -> CmmInt 0 w
+ FloatVec -> CmmFloat 0 w
+
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
+ checkVecCompatibility dflags vcat n w
+ when (length res /= n) $
+ panic "emitPrimOp: VecUnpackOp has wrong number of results"
+ doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] FloatX4PackOp es@[_,_,_,_] =
- doVecPackOp Nothing vec4f32 zero es res
+emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
+ checkVecCompatibility dflags vcat n w
+ doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ res@[_,_,_,_] FloatX4UnpackOp [arg] =
- doVecUnpackOp Nothing vec4f32 arg res
+emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] FloatX4InsertOp [v,e,i] =
- doVecInsertOp Nothing vec4f32 v e i res
+emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] DoubleToDoubleX2Op [e] =
- doVecPackOp Nothing vec2f64 zero [e,e] res
+emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doWriteByteArrayOp Nothing ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] DoubleX2PackOp es@[_,_] =
- doVecPackOp Nothing vec2f64 zero es res
+emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexOffAddrOp Nothing ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ res@[_,_] DoubleX2UnpackOp [arg] =
- doVecUnpackOp Nothing vec2f64 arg res
+emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] DoubleX2InsertOp [v,e,i] =
- doVecInsertOp Nothing vec2f64 v e i res
+emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doWriteOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp dflags [res] Int32ToInt32X4Op [e] =
- doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero [e,e,e,e] res
+emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexByteArrayOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp dflags [res] Int32X4PackOp es@[_,_,_,_] =
- doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero es res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexByteArrayOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp dflags res@[_,_,_,_] Int32X4UnpackOp [arg] =
- doVecUnpackOp (Just (mo_s_32ToWord dflags)) vec4b32 arg res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-emitPrimOp dflags [res] Int32X4InsertOp [v,e,i] =
- doVecInsertOp (Just (mo_WordTo32 dflags)) vec4b32 v e i res
+emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doWriteByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-emitPrimOp _ [res] Int64ToInt64X2Op [e] =
- doVecPackOp Nothing vec2b64 zero [e,e] res
+emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexOffAddrOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp _ [res] Int64X2PackOp es@[_,_] =
- doVecPackOp Nothing vec2b64 zero es res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexOffAddrOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp _ res@[_,_] Int64X2UnpackOp [arg] =
- doVecUnpackOp Nothing vec2b64 arg res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-emitPrimOp _ [res] Int64X2InsertOp [v,e,i] =
- doVecInsertOp Nothing vec2b64 v e i res
+emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doWriteOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-- Prefetch
emitPrimOp _ res PrefetchByteArrayOp args = doPrefetchByteArrayOp res args
@@ -944,33 +970,26 @@ translateOp _ FloatMulOp = Just (MO_F_Mul W32)
translateOp _ FloatDivOp = Just (MO_F_Quot W32)
translateOp _ FloatNegOp = Just (MO_F_Neg W32)
--- Floating point vector ops
-
-translateOp _ FloatX4AddOp = Just (MO_VF_Add 4 W32)
-translateOp _ FloatX4SubOp = Just (MO_VF_Sub 4 W32)
-translateOp _ FloatX4MulOp = Just (MO_VF_Mul 4 W32)
-translateOp _ FloatX4DivOp = Just (MO_VF_Quot 4 W32)
-translateOp _ FloatX4NegOp = Just (MO_VF_Neg 4 W32)
-
-translateOp _ DoubleX2AddOp = Just (MO_VF_Add 2 W64)
-translateOp _ DoubleX2SubOp = Just (MO_VF_Sub 2 W64)
-translateOp _ DoubleX2MulOp = Just (MO_VF_Mul 2 W64)
-translateOp _ DoubleX2DivOp = Just (MO_VF_Quot 2 W64)
-translateOp _ DoubleX2NegOp = Just (MO_VF_Neg 2 W64)
-
-translateOp _ Int32X4AddOp = Just (MO_V_Add 4 W32)
-translateOp _ Int32X4SubOp = Just (MO_V_Sub 4 W32)
-translateOp _ Int32X4MulOp = Just (MO_V_Mul 4 W32)
-translateOp _ Int32X4QuotOp = Just (MO_VS_Quot 4 W32)
-translateOp _ Int32X4RemOp = Just (MO_VS_Rem 4 W32)
-translateOp _ Int32X4NegOp = Just (MO_VS_Neg 4 W32)
-
-translateOp _ Int64X2AddOp = Just (MO_V_Add 2 W64)
-translateOp _ Int64X2SubOp = Just (MO_V_Sub 2 W64)
-translateOp _ Int64X2MulOp = Just (MO_V_Mul 2 W64)
-translateOp _ Int64X2QuotOp = Just (MO_VS_Quot 2 W64)
-translateOp _ Int64X2RemOp = Just (MO_VS_Rem 2 W64)
-translateOp _ Int64X2NegOp = Just (MO_VS_Neg 2 W64)
+-- Vector ops
+
+translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
+translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
+translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
+translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
+translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
+
+translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
+translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
+translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
+translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
+translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
+translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
+
+translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
+translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
+translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
+translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
+translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
-- Conversions
@@ -1183,6 +1202,70 @@ setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
------------------------------------------------------------------------------
+-- Helpers for translating vector primops.
+
+vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
+vecVmmType pocat n w = vec n (vecCmmCat pocat w)
+
+vecCmmCat :: PrimOpVecCat -> Width -> CmmType
+vecCmmCat IntVec = cmmBits
+vecCmmCat WordVec = cmmBits
+vecCmmCat FloatVec = cmmFloat
+
+vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
+vecElemInjectCast _ FloatVec _ = Nothing
+vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
+vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
+vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
+vecElemInjectCast _ IntVec W64 = Nothing
+vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
+vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
+vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
+vecElemInjectCast _ WordVec W64 = Nothing
+vecElemInjectCast _ _ _ = Nothing
+
+vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
+vecElemProjectCast _ FloatVec _ = Nothing
+vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
+vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
+vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
+vecElemProjectCast _ IntVec W64 = Nothing
+vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
+vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
+vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
+vecElemProjectCast _ WordVec W64 = Nothing
+vecElemProjectCast _ _ _ = Nothing
+
+-- Check to make sure that we can generate code for the specified vector type
+-- given the current set of dynamic flags.
+checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
+checkVecCompatibility dflags vcat l w = do
+ when (hscTarget dflags /= HscLlvm) $ do
+ sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
+ ,"Please use -fllvm."]
+ check vecWidth vcat l w
+ where
+ check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
+ check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
+ sorry $ "128-bit wide single-precision floating point " ++
+ "SIMD vector instructions require at least -msse."
+ check W128 _ _ _ | not (isSse2Enabled dflags) =
+ sorry $ "128-bit wide integer and double precision " ++
+ "SIMD vector instructions require at least -msse2."
+ check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
+ sorry $ "256-bit wide floating point " ++
+ "SIMD vector instructions require at least -mavx."
+ check W256 _ _ _ | not (isAvx2Enabled dflags) =
+ sorry $ "256-bit wide integer " ++
+ "SIMD vector instructions require at least -mavx2."
+ check W512 _ _ _ | not (isAvx512fEnabled dflags) =
+ sorry $ "512-bit wide " ++
+ "SIMD vector instructions require -mavx512f."
+ check _ _ _ _ = return ()
+
+ vecWidth = typeWidth (vecVmmType vcat l w)
+
+------------------------------------------------------------------------------
-- Helpers for translating vector packing and unpacking.
doVecPackOp :: Maybe MachOp -- Cast from element to vector component
diff --git a/compiler/ghc.mk b/compiler/ghc.mk
index 4fdadd7c30..5b9610103b 100644
--- a/compiler/ghc.mk
+++ b/compiler/ghc.mk
@@ -250,8 +250,12 @@ PRIMOP_BITS_NAMES = primop-data-decl.hs-incl \
primop-code-size.hs-incl \
primop-can-fail.hs-incl \
primop-strictness.hs-incl \
- primop-fixity.hs-incl \
- primop-primop-info.hs-incl
+ primop-fixity.hs-incl \
+ primop-primop-info.hs-incl \
+ primop-vector-uniques.hs-incl \
+ primop-vector-tys.hs-incl \
+ primop-vector-tys-exports.hs-incl \
+ primop-vector-tycons.hs-incl
PRIMOP_BITS_STAGE1 = $(addprefix compiler/stage1/build/,$(PRIMOP_BITS_NAMES))
PRIMOP_BITS_STAGE2 = $(addprefix compiler/stage2/build/,$(PRIMOP_BITS_NAMES))
@@ -290,6 +294,14 @@ compiler/stage$1/build/primop-fixity.hs-incl: compiler/stage$1/build/primops.txt
"$$(genprimopcode_INPLACE)" --fixity < $$< > $$@
compiler/stage$1/build/primop-primop-info.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
"$$(genprimopcode_INPLACE)" --primop-primop-info < $$< > $$@
+compiler/stage$1/build/primop-vector-uniques.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
+ "$$(genprimopcode_INPLACE)" --primop-vector-uniques < $$< > $$@
+compiler/stage$1/build/primop-vector-tys.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
+ "$$(genprimopcode_INPLACE)" --primop-vector-tys < $$< > $$@
+compiler/stage$1/build/primop-vector-tys-exports.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
+ "$$(genprimopcode_INPLACE)" --primop-vector-tys-exports < $$< > $$@
+compiler/stage$1/build/primop-vector-tycons.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
+ "$$(genprimopcode_INPLACE)" --primop-vector-tycons < $$< > $$@
# Usages aren't used any more; but the generator
# can still generate them if we want them back
diff --git a/compiler/ghci/ByteCodeAsm.lhs b/compiler/ghci/ByteCodeAsm.lhs
index dd8bbe4c83..7e5ef354a9 100644
--- a/compiler/ghci/ByteCodeAsm.lhs
+++ b/compiler/ghci/ByteCodeAsm.lhs
@@ -462,6 +462,8 @@ push_alts L = bci_PUSH_ALTS_L
push_alts F = bci_PUSH_ALTS_F
push_alts D = bci_PUSH_ALTS_D
push_alts V16 = error "push_alts: vector"
+push_alts V32 = error "push_alts: vector"
+push_alts V64 = error "push_alts: vector"
return_ubx :: ArgRep -> Word16
return_ubx V = bci_RETURN_V
@@ -471,6 +473,8 @@ return_ubx L = bci_RETURN_L
return_ubx F = bci_RETURN_F
return_ubx D = bci_RETURN_D
return_ubx V16 = error "return_ubx: vector"
+return_ubx V32 = error "return_ubx: vector"
+return_ubx V64 = error "return_ubx: vector"
-- Make lists of host-sized words for literals, so that when the
-- words are placed in memory at increasing addresses, the
diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs
index 6ae3c4252d..fb9668b5ee 100644
--- a/compiler/llvmGen/LlvmCodeGen/Base.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Base.hs
@@ -155,6 +155,8 @@ llvmFunArgs dflags live =
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
+ isSSE (YmmReg _) = True
+ isSSE (ZmmReg _) = True
isSSE _ = False
-- | Llvm standard fun attributes
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 33107c0b68..5002b89b72 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -967,6 +967,9 @@ genMachOp _ op [x] = case op of
MO_VS_Quot _ _ -> panicOp
MO_VS_Rem _ _ -> panicOp
+
+ MO_VU_Quot _ _ -> panicOp
+ MO_VU_Rem _ _ -> panicOp
MO_VF_Insert _ _ -> panicOp
MO_VF_Extract _ _ -> panicOp
@@ -1140,6 +1143,9 @@ genMachOp_slow opt op [x, y] = case op of
MO_VS_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SDiv
MO_VS_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SRem
+
+ MO_VU_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_UDiv
+ MO_VU_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_URem
MO_VF_Add l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd
MO_VF_Sub l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub
@@ -1527,6 +1533,8 @@ funEpilogue live = do
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
+ isSSE (YmmReg _) = True
+ isSSE (ZmmReg _) = True
isSSE _ = False
-- Set to value or "undef" depending on whether the register is
diff --git a/compiler/llvmGen/LlvmCodeGen/Regs.hs b/compiler/llvmGen/LlvmCodeGen/Regs.hs
index 1b87929499..9f20aa5de5 100644
--- a/compiler/llvmGen/LlvmCodeGen/Regs.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Regs.hs
@@ -62,6 +62,18 @@ lmGlobalReg dflags suf reg
XmmReg 4 -> xmmGlobal $ "XMM4" ++ suf
XmmReg 5 -> xmmGlobal $ "XMM5" ++ suf
XmmReg 6 -> xmmGlobal $ "XMM6" ++ suf
+ YmmReg 1 -> ymmGlobal $ "YMM1" ++ suf
+ YmmReg 2 -> ymmGlobal $ "YMM2" ++ suf
+ YmmReg 3 -> ymmGlobal $ "YMM3" ++ suf
+ YmmReg 4 -> ymmGlobal $ "YMM4" ++ suf
+ YmmReg 5 -> ymmGlobal $ "YMM5" ++ suf
+ YmmReg 6 -> ymmGlobal $ "YMM6" ++ suf
+ ZmmReg 1 -> zmmGlobal $ "ZMM1" ++ suf
+ ZmmReg 2 -> zmmGlobal $ "ZMM2" ++ suf
+ ZmmReg 3 -> zmmGlobal $ "ZMM3" ++ suf
+ ZmmReg 4 -> zmmGlobal $ "ZMM4" ++ suf
+ ZmmReg 5 -> zmmGlobal $ "ZMM5" ++ suf
+ ZmmReg 6 -> zmmGlobal $ "ZMM6" ++ suf
_other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg)
++ ") not supported!"
-- LongReg, HpLim, CCSS, CurrentTSO, CurrentNusery, HpAlloc
@@ -72,6 +84,8 @@ lmGlobalReg dflags suf reg
floatGlobal name = LMNLocalVar (fsLit name) LMFloat
doubleGlobal name = LMNLocalVar (fsLit name) LMDouble
xmmGlobal name = LMNLocalVar (fsLit name) (LMVector 4 (LMInt 32))
+ ymmGlobal name = LMNLocalVar (fsLit name) (LMVector 8 (LMInt 32))
+ zmmGlobal name = LMNLocalVar (fsLit name) (LMVector 16 (LMInt 32))
-- | A list of STG Registers that should always be considered alive
alwaysLive :: [GlobalReg]
diff --git a/compiler/llvmGen/LlvmMangler.hs b/compiler/llvmGen/LlvmMangler.hs
index 83a2be7f8d..5f74dc4564 100644
--- a/compiler/llvmGen/LlvmMangler.hs
+++ b/compiler/llvmGen/LlvmMangler.hs
@@ -20,6 +20,10 @@ import System.IO
import Data.List ( sortBy )
import Data.Function ( on )
+#if x86_64_TARGET_ARCH
+#define REWRITE_AVX
+#endif
+
-- Magic Strings
secStmt, infoSec, newLine, textStmt, dataStmt, syntaxUnified :: B.ByteString
secStmt = B.pack "\t.section\t"
@@ -47,7 +51,7 @@ llvmFixupAsm dflags f1 f2 = {-# SCC "llvm_mangler" #-} do
w <- openBinaryFile f2 WriteMode
ss <- readSections r w
hClose r
- let fixed = fixTables ss
+ let fixed = (map rewriteAVX . fixTables) ss
mapM_ (writeSection w) fixed
hClose w
return ()
@@ -90,6 +94,39 @@ writeSection w (hdr, cts) = do
B.hPutStrLn w hdr
B.hPutStrLn w cts
+#if REWRITE_AVX
+rewriteAVX :: Section -> Section
+rewriteAVX = rewriteVmovaps . rewriteVmovdqa
+
+rewriteVmovdqa :: Section -> Section
+rewriteVmovdqa = rewriteInstructions vmovdqa vmovdqu
+ where
+ vmovdqa, vmovdqu :: B.ByteString
+ vmovdqa = B.pack "vmovdqa"
+ vmovdqu = B.pack "vmovdqu"
+
+rewriteVmovap :: Section -> Section
+rewriteVmovap = rewriteInstructions vmovap vmovup
+ where
+ vmovap, vmovup :: B.ByteString
+ vmovap = B.pack "vmovap"
+ vmovup = B.pack "vmovup"
+
+rewriteInstructions :: B.ByteString -> B.ByteString -> Section -> Section
+rewriteInstructions matchBS replaceBS (hdr, cts) =
+ (hdr, loop cts)
+ where
+ loop :: B.ByteString -> B.ByteString
+ loop cts =
+ case B.breakSubstring cts matchBS of
+ (hd,tl) | B.null tl -> hd
+ | otherwise -> hd `B.append` replaceBS `B.append`
+ loop (B.drop (B.length matchBS) tl)
+#else /* !REWRITE_AVX */
+rewriteAVX :: Section -> Section
+rewriteAVX = id
+#endif /* !REWRITE_SSE */
+
-- | Reorder and convert sections so info tables end up next to the
-- code. Also does stack fixups.
fixTables :: [Section] -> [Section]
diff --git a/compiler/main/CmdLineParser.hs b/compiler/main/CmdLineParser.hs
index 6681186246..fef2701bcd 100644
--- a/compiler/main/CmdLineParser.hs
+++ b/compiler/main/CmdLineParser.hs
@@ -220,7 +220,7 @@ processOneArg opt_kind rest arg args
Just min <- parseInt min_s -> Right (f maj min, args)
| [maj_s] <- split '.' rest_no_eq,
Just maj <- parseInt maj_s -> Right (f maj 0, args)
- | otherwise -> Left ("malformed version argument in " ++ dash_arg)
+ | otherwise -> Right (f 1 0, args)
findArg :: [Flag m] -> String -> Maybe (String, OptKind m)
diff --git a/compiler/main/DriverPipeline.hs b/compiler/main/DriverPipeline.hs
index 035d5778d6..44a6fa57a1 100644
--- a/compiler/main/DriverPipeline.hs
+++ b/compiler/main/DriverPipeline.hs
@@ -1380,7 +1380,10 @@ runPhase (RealPhase LlvmLlc) input_fn dflags
++ [SysTools.Option tbaa]
++ map SysTools.Option fpOpts
++ map SysTools.Option abiOpts
- ++ map SysTools.Option sseOpts)
+ ++ map SysTools.Option sseOpts
+ ++ map SysTools.Option avxOpts
+ ++ map SysTools.Option avx512Opts
+ ++ map SysTools.Option stackAlignOpts)
return (RealPhase next_phase, output_fn)
where
@@ -1411,8 +1414,24 @@ runPhase (RealPhase LlvmLlc) input_fn dflags
sseOpts | isSse4_2Enabled dflags = ["-mattr=+sse42"]
| isSse2Enabled dflags = ["-mattr=+sse2"]
+ | isSseEnabled dflags = ["-mattr=+sse"]
| otherwise = []
+ avxOpts | isAvx512fEnabled dflags = ["-mattr=+avx512f"]
+ | isAvx2Enabled dflags = ["-mattr=+avx2"]
+ | isAvxEnabled dflags = ["-mattr=+avx"]
+ | otherwise = []
+
+ avx512Opts =
+ [ "-mattr=+avx512cd" | isAvx512cdEnabled dflags ] ++
+ [ "-mattr=+avx512er" | isAvx512erEnabled dflags ] ++
+ [ "-mattr=+avx512pf" | isAvx512pfEnabled dflags ]
+
+ stackAlignOpts =
+ case platformArch (targetPlatform dflags) of
+ ArchX86_64 | isAvxEnabled dflags -> ["-stack-alignment=32"]
+ _ -> []
+
-----------------------------------------------------------------------------
-- LlvmMangle phase
@@ -2015,12 +2034,18 @@ doCpp dflags raw input_fn output_fn = do
-- remember, in code we *compile*, the HOST is the same our TARGET,
-- and BUILD is the same as our HOST.
- let sse2 = isSse2Enabled dflags
- sse4_2 = isSse4_2Enabled dflags
- sse_defs =
- [ "-D__SSE__=1" | sse2 || sse4_2 ] ++
- [ "-D__SSE2__=1" | sse2 || sse4_2 ] ++
- [ "-D__SSE4_2__=1" | sse4_2 ]
+ let sse_defs =
+ [ "-D__SSE__=1" | isSseEnabled dflags ] ++
+ [ "-D__SSE2__=1" | isSse2Enabled dflags ] ++
+ [ "-D__SSE4_2__=1" | isSse4_2Enabled dflags ]
+
+ let avx_defs =
+ [ "-D__AVX__=1" | isAvxEnabled dflags ] ++
+ [ "-D__AVX2__=1" | isAvx2Enabled dflags ] ++
+ [ "-D__AVX512CD__=1" | isAvx512cdEnabled dflags ] ++
+ [ "-D__AVX512ER__=1" | isAvx512erEnabled dflags ] ++
+ [ "-D__AVX512F__=1" | isAvx512fEnabled dflags ] ++
+ [ "-D__AVX512PF__=1" | isAvx512pfEnabled dflags ]
backend_defs <- getBackendDefs dflags
@@ -2031,6 +2056,7 @@ doCpp dflags raw input_fn output_fn = do
++ map SysTools.Option backend_defs
++ map SysTools.Option hscpp_opts
++ map SysTools.Option sse_defs
+ ++ map SysTools.Option avx_defs
-- Set the language mode to assembler-with-cpp when preprocessing. This
-- alleviates some of the C99 macro rules relating to whitespace and the hash
-- operator, which we tend to abuse. Clang in particular is not very happy
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 63da5d6100..d6b386a475 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -128,9 +128,16 @@ module DynFlags (
unsafeGlobalDynFlags, setUnsafeGlobalDynFlags,
- -- * SSE
+ -- * SSE and AVX
+ isSseEnabled,
isSse2Enabled,
isSse4_2Enabled,
+ isAvxEnabled,
+ isAvx2Enabled,
+ isAvx512cdEnabled,
+ isAvx512erEnabled,
+ isAvx512fEnabled,
+ isAvx512pfEnabled,
-- * Linker information
LinkerInfo(..),
@@ -303,6 +310,7 @@ data GeneralFlag
| Opt_RegsIterative -- do iterative coalescing graph coloring register allocation
| Opt_PedanticBottoms -- Be picky about how we treat bottom
| Opt_LlvmTBAA -- Use LLVM TBAA infastructure for improving AA (hidden flag)
+ | Opt_LlvmPassVectorsInRegisters -- Pass SIMD vectors in registers (requires a patched LLVM) (hidden flag)
| Opt_IrrefutableTuples
| Opt_CmmSink
| Opt_CmmElimCommonBlocks
@@ -770,6 +778,12 @@ data DynFlags = DynFlags {
-- | Machine dependant flags (-m<blah> stuff)
sseVersion :: Maybe (Int, Int), -- (major, minor)
+ avx :: Bool,
+ avx2 :: Bool,
+ avx512cd :: Bool, -- Enable AVX-512 Conflict Detection Instructions.
+ avx512er :: Bool, -- Enable AVX-512 Exponential and Reciprocal Instructions.
+ avx512f :: Bool, -- Enable AVX-512 instructions.
+ avx512pf :: Bool, -- Enable AVX-512 PreFetch Instructions.
-- | Run-time linker information (what options we need, etc.)
rtldFlags :: IORef (Maybe LinkerInfo)
@@ -1401,6 +1415,12 @@ defaultDynFlags mySettings =
interactivePrint = Nothing,
nextWrapperNum = panic "defaultDynFlags: No nextWrapperNum",
sseVersion = Nothing,
+ avx = False,
+ avx2 = False,
+ avx512cd = False,
+ avx512er = False,
+ avx512f = False,
+ avx512pf = False,
rtldFlags = panic "defaultDynFlags: no rtldFlags"
}
@@ -2305,6 +2325,12 @@ dynamic_flags = [
, Flag "monly-3-regs" (NoArg (addWarn "The -monly-3-regs flag does nothing; it will be removed in a future GHC release"))
, Flag "monly-4-regs" (NoArg (addWarn "The -monly-4-regs flag does nothing; it will be removed in a future GHC release"))
, Flag "msse" (versionSuffix (\maj min d -> d{ sseVersion = Just (maj, min) }))
+ , Flag "mavx" (noArg (\d -> d{ avx = True }))
+ , Flag "mavx2" (noArg (\d -> d{ avx2 = True }))
+ , Flag "mavx512cd" (noArg (\d -> d{ avx512cd = True }))
+ , Flag "mavx512er" (noArg (\d -> d{ avx512er = True }))
+ , Flag "mavx512f" (noArg (\d -> d{ avx512f = True }))
+ , Flag "mavx512pf" (noArg (\d -> d{ avx512pf = True }))
------ Warning opts -------------------------------------------------
, Flag "W" (NoArg (mapM_ setWarningFlag minusWOpts))
@@ -2587,6 +2613,7 @@ fFlags = [
( "regs-graph", Opt_RegsGraph, nop ),
( "regs-iterative", Opt_RegsIterative, nop ),
( "llvm-tbaa", Opt_LlvmTBAA, nop), -- hidden flag
+ ( "llvm-pass-vectors-in-regs", Opt_LlvmPassVectorsInRegisters, nop), -- hidden flag
( "irrefutable-tuples", Opt_IrrefutableTuples, nop ),
( "cmm-sink", Opt_CmmSink, nop ),
( "cmm-elim-common-blocks", Opt_CmmElimCommonBlocks, nop ),
@@ -3585,12 +3612,18 @@ setUnsafeGlobalDynFlags :: DynFlags -> IO ()
setUnsafeGlobalDynFlags = writeIORef v_unsafeGlobalDynFlags
-- -----------------------------------------------------------------------------
--- SSE
+-- SSE and AVX
-- TODO: Instead of using a separate predicate (i.e. isSse2Enabled) to
-- check if SSE is enabled, we might have x86-64 imply the -msse2
-- flag.
+isSseEnabled :: DynFlags -> Bool
+isSseEnabled dflags = case platformArch (targetPlatform dflags) of
+ ArchX86_64 -> True
+ ArchX86 -> sseVersion dflags >= Just (1,0)
+ _ -> False
+
isSse2Enabled :: DynFlags -> Bool
isSse2Enabled dflags = case platformArch (targetPlatform dflags) of
ArchX86_64 -> -- SSE2 is fixed on for x86_64. It would be
@@ -3605,6 +3638,24 @@ isSse2Enabled dflags = case platformArch (targetPlatform dflags) of
isSse4_2Enabled :: DynFlags -> Bool
isSse4_2Enabled dflags = sseVersion dflags >= Just (4,2)
+isAvxEnabled :: DynFlags -> Bool
+isAvxEnabled dflags = avx dflags || avx2 dflags || avx512f dflags
+
+isAvx2Enabled :: DynFlags -> Bool
+isAvx2Enabled dflags = avx2 dflags || avx512f dflags
+
+isAvx512cdEnabled :: DynFlags -> Bool
+isAvx512cdEnabled dflags = avx512cd dflags
+
+isAvx512erEnabled :: DynFlags -> Bool
+isAvx512erEnabled dflags = avx512er dflags
+
+isAvx512fEnabled :: DynFlags -> Bool
+isAvx512fEnabled dflags = avx512f dflags
+
+isAvx512pfEnabled :: DynFlags -> Bool
+isAvx512pfEnabled dflags = avx512pf dflags
+
-- -----------------------------------------------------------------------------
-- Linker information
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index f6143d3fb9..e18da25347 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -610,6 +610,8 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm
+ MO_VU_Quot {} -> needLlvm
+ MO_VU_Rem {} -> needLlvm
MO_VF_Insert {} -> needLlvm
MO_VF_Extract {} -> needLlvm
MO_VF_Add {} -> needLlvm
diff --git a/compiler/prelude/PrelNames.lhs b/compiler/prelude/PrelNames.lhs
index 81fb9be52a..07730e653d 100644
--- a/compiler/prelude/PrelNames.lhs
+++ b/compiler/prelude/PrelNames.lhs
@@ -1474,15 +1474,6 @@ typeNatMulTyFamNameKey = mkPreludeTyConUnique 163
typeNatExpTyFamNameKey = mkPreludeTyConUnique 164
typeNatLeqTyFamNameKey = mkPreludeTyConUnique 165
--- SIMD vector types (Unique keys)
-floatX4PrimTyConKey, doubleX2PrimTyConKey, int32X4PrimTyConKey,
- int64X2PrimTyConKey :: Unique
-
-floatX4PrimTyConKey = mkPreludeTyConUnique 170
-doubleX2PrimTyConKey = mkPreludeTyConUnique 171
-int32X4PrimTyConKey = mkPreludeTyConUnique 172
-int64X2PrimTyConKey = mkPreludeTyConUnique 173
-
ntTyConKey:: Unique
ntTyConKey = mkPreludeTyConUnique 174
coercibleTyConKey :: Unique
@@ -1492,6 +1483,12 @@ coercibleTyConKey = mkPreludeTyConUnique 175
-- USES TyConUniques 200-299
-----------------------------------------------------
+----------------------- SIMD ------------------------
+-- USES TyConUniques 300-399
+-----------------------------------------------------
+
+#include "primop-vector-uniques.hs-incl"
+
unitTyConKey :: Unique
unitTyConKey = mkTupleTyConUnique BoxedTuple 0
\end{code}
diff --git a/compiler/prelude/PrimOp.lhs b/compiler/prelude/PrimOp.lhs
index 8b1970c37f..22753ee3ea 100644
--- a/compiler/prelude/PrimOp.lhs
+++ b/compiler/prelude/PrimOp.lhs
@@ -5,7 +5,7 @@
\begin{code}
module PrimOp (
- PrimOp(..), allThePrimOps,
+ PrimOp(..), PrimOpVecCat(..), allThePrimOps,
primOpType, primOpSig,
primOpTag, maxPrimOpTag, primOpOcc,
@@ -25,6 +25,7 @@ module PrimOp (
import TysPrim
import TysWiredIn
+import CmmType
import Demand
import Var ( TyVar )
import OccName ( OccName, pprOccName, mkVarOccFS )
@@ -64,6 +65,7 @@ primOpTag op = iBox (tagOf_PrimOp op)
-- supplies
-- tagOf_PrimOp :: PrimOp -> FastInt
#include "primop-tag.hs-incl"
+tagOf_PrimOp _ = error "tagOf_PrimOp: unknown primop"
instance Eq PrimOp where
@@ -82,6 +84,12 @@ instance Outputable PrimOp where
ppr op = pprPrimOp op
\end{code}
+\begin{code}
+data PrimOpVecCat = IntVec
+ | WordVec
+ | FloatVec
+\end{code}
+
An @Enum@-derived list would be better; meanwhile... (ToDo)
\begin{code}
@@ -173,6 +181,7 @@ else, notably a type, can be constructed) for each @PrimOp@.
\begin{code}
primOpInfo :: PrimOp -> PrimOpInfo
#include "primop-primop-info.hs-incl"
+primOpInfo _ = error "primOpInfo: unknown primop"
\end{code}
Here are a load of comments from the old primOp info:
diff --git a/compiler/prelude/TysPrim.lhs b/compiler/prelude/TysPrim.lhs
index f166065b22..b17f1a6f9a 100644
--- a/compiler/prelude/TysPrim.lhs
+++ b/compiler/prelude/TysPrim.lhs
@@ -76,11 +76,8 @@ module TysPrim(
-- * Any
anyTy, anyTyCon, anyTypeOfKind,
- -- * SIMD
- floatX4PrimTyCon, floatX4PrimTy,
- doubleX2PrimTyCon, doubleX2PrimTy,
- int32X4PrimTyCon, int32X4PrimTy,
- int64X2PrimTyCon, int64X2PrimTy
+ -- * SIMD
+#include "primop-vector-tys-exports.hs-incl"
) where
#include "HsVersions.h"
@@ -144,10 +141,7 @@ primTyCons
, superKindTyCon
, anyKindTyCon
- , floatX4PrimTyCon
- , doubleX2PrimTyCon
- , int32X4PrimTyCon
- , int64X2PrimTyCon
+#include "primop-vector-tycons.hs-incl"
]
mkPrimTc :: FastString -> Unique -> TyCon -> Name
@@ -157,7 +151,7 @@ mkPrimTc fs unique tycon
(ATyCon tycon) -- Relevant TyCon
UserSyntax -- None are built-in syntax
-charPrimTyConName, intPrimTyConName, int32PrimTyConName, int64PrimTyConName, wordPrimTyConName, word32PrimTyConName, word64PrimTyConName, addrPrimTyConName, floatPrimTyConName, doublePrimTyConName, statePrimTyConName, realWorldTyConName, arrayPrimTyConName, arrayArrayPrimTyConName, byteArrayPrimTyConName, mutableArrayPrimTyConName, mutableByteArrayPrimTyConName, mutableArrayArrayPrimTyConName, mutVarPrimTyConName, mVarPrimTyConName, tVarPrimTyConName, stablePtrPrimTyConName, stableNamePrimTyConName, bcoPrimTyConName, weakPrimTyConName, threadIdPrimTyConName, eqPrimTyConName, eqReprPrimTyConName, floatX4PrimTyConName, doubleX2PrimTyConName, int32X4PrimTyConName, int64X2PrimTyConName :: Name
+charPrimTyConName, intPrimTyConName, int32PrimTyConName, int64PrimTyConName, wordPrimTyConName, word32PrimTyConName, word64PrimTyConName, addrPrimTyConName, floatPrimTyConName, doublePrimTyConName, statePrimTyConName, realWorldTyConName, arrayPrimTyConName, arrayArrayPrimTyConName, byteArrayPrimTyConName, mutableArrayPrimTyConName, mutableByteArrayPrimTyConName, mutableArrayArrayPrimTyConName, mutVarPrimTyConName, mVarPrimTyConName, tVarPrimTyConName, stablePtrPrimTyConName, stableNamePrimTyConName, bcoPrimTyConName, weakPrimTyConName, threadIdPrimTyConName, eqPrimTyConName, eqReprPrimTyConName :: Name
charPrimTyConName = mkPrimTc (fsLit "Char#") charPrimTyConKey charPrimTyCon
intPrimTyConName = mkPrimTc (fsLit "Int#") intPrimTyConKey intPrimTyCon
int32PrimTyConName = mkPrimTc (fsLit "Int32#") int32PrimTyConKey int32PrimTyCon
@@ -186,10 +180,6 @@ stableNamePrimTyConName = mkPrimTc (fsLit "StableName#") stableNamePrimTyC
bcoPrimTyConName = mkPrimTc (fsLit "BCO#") bcoPrimTyConKey bcoPrimTyCon
weakPrimTyConName = mkPrimTc (fsLit "Weak#") weakPrimTyConKey weakPrimTyCon
threadIdPrimTyConName = mkPrimTc (fsLit "ThreadId#") threadIdPrimTyConKey threadIdPrimTyCon
-floatX4PrimTyConName = mkPrimTc (fsLit "FloatX4#") floatX4PrimTyConKey floatX4PrimTyCon
-doubleX2PrimTyConName = mkPrimTc (fsLit "DoubleX2#") doubleX2PrimTyConKey doubleX2PrimTyCon
-int32X4PrimTyConName = mkPrimTc (fsLit "Int32X4#") int32X4PrimTyConKey int32X4PrimTyCon
-int64X2PrimTyConName = mkPrimTc (fsLit "Int64X2#") int64X2PrimTyConKey int64X2PrimTyCon
\end{code}
%************************************************************************
@@ -766,28 +756,10 @@ anyTypeOfKind kind = TyConApp anyTyCon [kind]
%************************************************************************
%* *
-\subsection{SIMD vector type}
+\subsection{SIMD vector types}
%* *
%************************************************************************
\begin{code}
-floatX4PrimTy :: Type
-floatX4PrimTy = mkTyConTy floatX4PrimTyCon
-floatX4PrimTyCon :: TyCon
-floatX4PrimTyCon = pcPrimTyCon0 floatX4PrimTyConName (VecRep 4 FloatElemRep)
-
-doubleX2PrimTy :: Type
-doubleX2PrimTy = mkTyConTy doubleX2PrimTyCon
-doubleX2PrimTyCon :: TyCon
-doubleX2PrimTyCon = pcPrimTyCon0 doubleX2PrimTyConName (VecRep 2 DoubleElemRep)
-
-int32X4PrimTy :: Type
-int32X4PrimTy = mkTyConTy int32X4PrimTyCon
-int32X4PrimTyCon :: TyCon
-int32X4PrimTyCon = pcPrimTyCon0 int32X4PrimTyConName (VecRep 4 Int32ElemRep)
-
-int64X2PrimTy :: Type
-int64X2PrimTy = mkTyConTy int64X2PrimTyCon
-int64X2PrimTyCon :: TyCon
-int64X2PrimTyCon = pcPrimTyCon0 int64X2PrimTyConName (VecRep 2 Int64ElemRep)
+#include "primop-vector-tys.hs-incl"
\end{code}
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index cfd6afa4c6..78d8925079 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -39,6 +39,22 @@
-- (eg, out_of_line), whilst avoiding parsing complex expressions
-- needed for strictness info.
+-- The vector attribute is rather special. It takes a list of 3-tuples, each of
+-- which is of the form <ELEM_TYPE,SCALAR_TYPE,LENGTH>. ELEM_TYPE is the type of
+-- the elements in the vector; LENGTH is the length of the vector; and
+-- SCALAR_TYPE is the scalar type used to inject to/project from vector
+-- element. Note that ELEM_TYPE and SCALAR_TYPE are not the same; for example,
+-- to broadcast a scalar value to a vector whose elements are of type Int8, we
+-- use an Int#.
+
+-- When a primtype or primop has a vector attribute, it is instantiated at each
+-- 3-tuple in the list of 3-tuples. That is, the vector attribute allows us to
+-- define a family of types or primops. Vector support also adds three new
+-- keywords: VECTOR, SCALAR, and VECTUPLE. These keywords are expanded to types
+-- derived from the 3-tuple. For the 3-tuple <Int64,INT64,2>, VECTOR expands to
+-- Int64X2#, SCALAR expands to INT64, and VECTUPLE expands to (# INT64, INT64
+-- #).
+
defaults
has_side_effects = False
out_of_line = False -- See Note Note [PrimOp can_fail and has_side_effects] in PrimOp
@@ -48,6 +64,7 @@ defaults
strictness = { \ arity -> mkStrictSig (mkTopDmdType (replicate arity topDmd) topRes) }
fixity = Nothing
llvm_only = False
+ vector = []
-- Currently, documentation is produced using latex, so contents of
-- description fields should be legal latex. Descriptions can contain
@@ -2373,479 +2390,210 @@ primclass Coercible a b
}
------------------------------------------------------------------------
-section "Float SIMD Vectors"
- {Operations on SIMD vectors of 4 single-precision (32-bit)
- floating-point numbers.}
-------------------------------------------------------------------------
-
-primtype FloatX4#
+section "SIMD Vectors"
+ {Operations on SIMD vectors.}
+------------------------------------------------------------------------
+
+#define ALL_VECTOR_TYPES \
+ [<Int8,Int#,16>,<Int16,Int#,8>,<Int32,INT32,4>,<Int64,INT64,2> \
+ ,<Int8,Int#,32>,<Int16,Int#,16>,<Int32,INT32,8>,<Int64,INT64,4> \
+ ,<Int8,Int#,64>,<Int16,Int#,32>,<Int32,INT32,16>,<Int64,INT64,8> \
+ ,<Word8,Word#,16>,<Word16,Word#,8>,<Word32,WORD32,4>,<Word64,WORD64,2> \
+ ,<Word8,Word#,32>,<Word16,Word#,16>,<Word32,WORD32,8>,<Word64,WORD64,4> \
+ ,<Word8,Word#,64>,<Word16,Word#,32>,<Word32,WORD32,16>,<Word64,WORD64,8> \
+ ,<Float,Float#,4>,<Double,Double#,2> \
+ ,<Float,Float#,8>,<Double,Double#,4> \
+ ,<Float,Float#,16>,<Double,Double#,8>]
+
+#define SIGNED_VECTOR_TYPES \
+ [<Int8,Int#,16>,<Int16,Int#,8>,<Int32,INT32,4>,<Int64,INT64,2> \
+ ,<Int8,Int#,32>,<Int16,Int#,16>,<Int32,INT32,8>,<Int64,INT64,4> \
+ ,<Int8,Int#,64>,<Int16,Int#,32>,<Int32,INT32,16>,<Int64,INT64,8> \
+ ,<Float,Float#,4>,<Double,Double#,2> \
+ ,<Float,Float#,8>,<Double,Double#,4> \
+ ,<Float,Float#,16>,<Double,Double#,8>]
+
+#define FLOAT_VECTOR_TYPES \
+ [<Float,Float#,4>,<Double,Double#,2> \
+ ,<Float,Float#,8>,<Double,Double#,4> \
+ ,<Float,Float#,16>,<Double,Double#,8>]
+
+#define INT_VECTOR_TYPES \
+ [<Int8,Int#,16>,<Int16,Int#,8>,<Int32,INT32,4>,<Int64,INT64,2> \
+ ,<Int8,Int#,32>,<Int16,Int#,16>,<Int32,INT32,8>,<Int64,INT64,4> \
+ ,<Int8,Int#,64>,<Int16,Int#,32>,<Int32,INT32,16>,<Int64,INT64,8> \
+ ,<Word8,Word#,16>,<Word16,Word#,8>,<Word32,WORD32,4>,<Word64,WORD64,2> \
+ ,<Word8,Word#,32>,<Word16,Word#,16>,<Word32,WORD32,8>,<Word64,WORD64,4> \
+ ,<Word8,Word#,64>,<Word16,Word#,32>,<Word32,WORD32,16>,<Word64,WORD64,8>]
+
+primtype VECTOR
with llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop FloatToFloatX4Op "floatToFloatX4#" GenPrimOp
- Float# -> FloatX4#
+primop VecBroadcastOp "broadcast#" GenPrimOp
+ SCALAR -> VECTOR
+ { Broadcast a scalar to all elements of a vector. }
with llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop FloatX4PackOp "packFloatX4#" GenPrimOp
- Float# -> Float# -> Float# -> Float# -> FloatX4#
+primop VecPackOp "pack#" GenPrimOp
+ VECTUPLE -> VECTOR
+ { Pack the elements of an unboxed tuple into a vector. }
with llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop FloatX4UnpackOp "unpackFloatX4#" GenPrimOp
- FloatX4# -> (# Float#, Float#, Float#, Float# #)
+primop VecUnpackOp "unpack#" GenPrimOp
+ VECTOR -> VECTUPLE
+ { Unpack the elements of a vector into an unboxed tuple. #}
with llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop FloatX4InsertOp "insertFloatX4#" GenPrimOp
- FloatX4# -> Float# -> Int# -> FloatX4#
+primop VecInsertOp "insert#" GenPrimOp
+ VECTOR -> SCALAR -> Int# -> VECTOR
+ { Insert a scalar at the given position in a vector. }
with can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop FloatX4AddOp "plusFloatX4#" Dyadic
- FloatX4# -> FloatX4# -> FloatX4#
+primop VecAddOp "plus#" Dyadic
+ VECTOR -> VECTOR -> VECTOR
+ { Add two vectors element-wise. }
with commutable = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop FloatX4SubOp "minusFloatX4#" Dyadic
- FloatX4# -> FloatX4# -> FloatX4#
+primop VecSubOp "minus#" Dyadic
+ VECTOR -> VECTOR -> VECTOR
+ { Subtract two vectors element-wise. }
with llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop FloatX4MulOp "timesFloatX4#" Dyadic
- FloatX4# -> FloatX4# -> FloatX4#
+primop VecMulOp "times#" Dyadic
+ VECTOR -> VECTOR -> VECTOR
+ { Multiply two vectors element-wise. }
with commutable = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop FloatX4DivOp "divideFloatX4#" Dyadic
- FloatX4# -> FloatX4# -> FloatX4#
- with can_fail = True
- llvm_only = True
-
-primop FloatX4NegOp "negateFloatX4#" Monadic
- FloatX4# -> FloatX4#
- with llvm_only = True
-
-primop IndexByteArrayOp_FloatX4 "indexFloatX4Array#" GenPrimOp
- ByteArray# -> Int# -> FloatX4#
- with can_fail = True
- llvm_only = True
-
-primop ReadByteArrayOp_FloatX4 "readFloatX4Array#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, FloatX4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteByteArrayOp_FloatX4 "writeFloatX4Array#" GenPrimOp
- MutableByteArray# s -> Int# -> FloatX4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexOffAddrOp_FloatX4 "indexFloatX4OffAddr#" GenPrimOp
- Addr# -> Int# -> FloatX4#
- with can_fail = True
- llvm_only = True
-
-primop ReadOffAddrOp_FloatX4 "readFloatX4OffAddr#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, FloatX4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteOffAddrOp_FloatX4 "writeFloatX4OffAddr#" GenPrimOp
- Addr# -> Int# -> FloatX4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexByteArrayOp_FloatAsFloatX4 "indexFloatArrayAsFloatX4#" GenPrimOp
- ByteArray# -> Int# -> FloatX4#
- with can_fail = True
- llvm_only = True
-
-primop ReadByteArrayOp_FloatAsFloatX4 "readFloatArrayAsFloatX4#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, FloatX4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteByteArrayOp_FloatAsFloatX4 "writeFloatArrayAsFloatX4#" GenPrimOp
- MutableByteArray# s -> Int# -> FloatX4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexOffAddrOp_FloatAsFloatX4 "indexFloatOffAddrAsFloatX4#" GenPrimOp
- Addr# -> Int# -> FloatX4#
- with can_fail = True
- llvm_only = True
-
-primop ReadOffAddrOp_FloatAsFloatX4 "readFloatOffAddrAsFloatX4#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, FloatX4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteOffAddrOp_FloatAsFloatX4 "writeFloatOffAddrAsFloatX4#" GenPrimOp
- Addr# -> Int# -> FloatX4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-------------------------------------------------------------------------
-section "Double SIMD Vectors"
- {Operations on SIMD vectors of 2 double-precision (64-bit)
- floating-point numbers.}
-------------------------------------------------------------------------
-
-primtype DoubleX2#
- with llvm_only = True
-
-primop DoubleToDoubleX2Op "doubleToDoubleX2#" GenPrimOp
- Double# -> DoubleX2#
- with llvm_only = True
-
-primop DoubleX2InsertOp "insertDoubleX2#" GenPrimOp
- DoubleX2# -> Double# -> Int# -> DoubleX2#
- with can_fail = True
- llvm_only = True
-
-primop DoubleX2PackOp "packDoubleX2#" GenPrimOp
- Double# -> Double# -> DoubleX2#
- with llvm_only = True
-
-primop DoubleX2UnpackOp "unpackDoubleX2#" GenPrimOp
- DoubleX2# -> (# Double#, Double# #)
- with llvm_only = True
-
-primop DoubleX2AddOp "plusDoubleX2#" Dyadic
- DoubleX2# -> DoubleX2# -> DoubleX2#
- with commutable = True
- llvm_only = True
-
-primop DoubleX2SubOp "minusDoubleX2#" Dyadic
- DoubleX2# -> DoubleX2# -> DoubleX2#
- with llvm_only = True
-
-primop DoubleX2MulOp "timesDoubleX2#" Dyadic
- DoubleX2# -> DoubleX2# -> DoubleX2#
- with commutable = True
- llvm_only = True
-
-primop DoubleX2DivOp "divideDoubleX2#" Dyadic
- DoubleX2# -> DoubleX2# -> DoubleX2#
- with can_fail = True
- llvm_only = True
-
-primop DoubleX2NegOp "negateDoubleX2#" Monadic
- DoubleX2# -> DoubleX2#
- with llvm_only = True
-
-primop IndexByteArrayOp_DoubleX2 "indexDoubleX2Array#" GenPrimOp
- ByteArray# -> Int# -> DoubleX2#
- with can_fail = True
- llvm_only = True
-
-primop ReadByteArrayOp_DoubleX2 "readDoubleX2Array#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, DoubleX2# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteByteArrayOp_DoubleX2 "writeDoubleX2Array#" GenPrimOp
- MutableByteArray# s -> Int# -> DoubleX2# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexOffAddrOp_DoubleX2 "indexDoubleX2OffAddr#" GenPrimOp
- Addr# -> Int# -> DoubleX2#
- with can_fail = True
- llvm_only = True
-
-primop ReadOffAddrOp_DoubleX2 "readDoubleX2OffAddr#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, DoubleX2# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteOffAddrOp_DoubleX2 "writeDoubleX2OffAddr#" GenPrimOp
- Addr# -> Int# -> DoubleX2# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexByteArrayOp_DoubleAsDoubleX2 "indexDoubleArrayAsDoubleX2#" GenPrimOp
- ByteArray# -> Int# -> DoubleX2#
- with can_fail = True
- llvm_only = True
-
-primop ReadByteArrayOp_DoubleAsDoubleX2 "readDoubleArrayAsDoubleX2#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, DoubleX2# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteByteArrayOp_DoubleAsDoubleX2 "writeDoubleArrayAsDoubleX2#" GenPrimOp
- MutableByteArray# s -> Int# -> DoubleX2# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexOffAddrOp_DoubleAsDoubleX2 "indexDoubleOffAddrAsDoubleX2#" GenPrimOp
- Addr# -> Int# -> DoubleX2#
+primop VecDivOp "divide#" Dyadic
+ VECTOR -> VECTOR -> VECTOR
+ { Divide two vectors element-wise. }
with can_fail = True
llvm_only = True
+ vector = FLOAT_VECTOR_TYPES
-primop ReadOffAddrOp_DoubleAsDoubleX2 "readDoubleOffAddrAsDoubleX2#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, DoubleX2# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteOffAddrOp_DoubleAsDoubleX2 "writeDoubleOffAddrAsDoubleX2#" GenPrimOp
- Addr# -> Int# -> DoubleX2# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-------------------------------------------------------------------------
-section "Int32 SIMD Vectors"
- {Operations on SIMD vectors of 4 32-bit signed integers.}
-------------------------------------------------------------------------
-
-primtype Int32X4#
- with llvm_only = True
-
-primop Int32ToInt32X4Op "int32ToInt32X4#" GenPrimOp
- INT32 -> Int32X4#
- with llvm_only = True
-
-primop Int32X4InsertOp "insertInt32X4#" GenPrimOp
- Int32X4# -> INT32 -> Int# -> Int32X4#
- with can_fail = True
- llvm_only = True
-
-primop Int32X4PackOp "packInt32X4#" GenPrimOp
- INT32 -> INT32 -> INT32 -> INT32 -> Int32X4#
- with llvm_only = True
-
-primop Int32X4UnpackOp "unpackInt32X4#" GenPrimOp
- Int32X4# -> (# INT32, INT32, INT32, INT32 #)
- with llvm_only = True
-
-primop Int32X4AddOp "plusInt32X4#" Dyadic
- Int32X4# -> Int32X4# -> Int32X4#
- with commutable = True
- llvm_only = True
-
-primop Int32X4SubOp "minusInt32X4#" Dyadic
- Int32X4# -> Int32X4# -> Int32X4#
- with llvm_only = True
-
-primop Int32X4MulOp "timesInt32X4#" Dyadic
- Int32X4# -> Int32X4# -> Int32X4#
- with commutable = True
- llvm_only = True
-
-primop Int32X4QuotOp "quotInt32X4#" Dyadic
- Int32X4# -> Int32X4# -> Int32X4#
+primop VecQuotOp "quot#" Dyadic
+ VECTOR -> VECTOR -> VECTOR
+ { Rounds towards zero element-wise. }
with can_fail = True
llvm_only = True
+ vector = INT_VECTOR_TYPES
-primop Int32X4RemOp "remInt32X4#" Dyadic
- Int32X4# -> Int32X4# -> Int32X4#
+primop VecRemOp "rem#" Dyadic
+ VECTOR -> VECTOR -> VECTOR
+ { Satisfies \texttt{(quot\# x y) times\# y plus\# (rem\# x y) == x}. }
with can_fail = True
llvm_only = True
+ vector = INT_VECTOR_TYPES
-primop Int32X4NegOp "negateInt32X4#" Monadic
- Int32X4# -> Int32X4#
+primop VecNegOp "negate#" Monadic
+ VECTOR -> VECTOR
+ { Negate element-wise. }
with llvm_only = True
+ vector = SIGNED_VECTOR_TYPES
-primop IndexByteArrayOp_Int32X4 "indexInt32X4Array#" GenPrimOp
- ByteArray# -> Int# -> Int32X4#
+primop VecIndexByteArrayOp "indexArray#" GenPrimOp
+ ByteArray# -> Int# -> VECTOR
+ { Read a vector from specified index of immutable array. }
with can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop ReadByteArrayOp_Int32X4 "readInt32X4Array#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, Int32X4# #)
+primop VecReadByteArrayOp "readArray#" GenPrimOp
+ MutableByteArray# s -> Int# -> State# s -> (# State# s, VECTOR #)
+ { Read a vector from specified index of mutable array. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop WriteByteArrayOp_Int32X4 "writeInt32X4Array#" GenPrimOp
- MutableByteArray# s -> Int# -> Int32X4# -> State# s -> State# s
+primop VecWriteByteArrayOp "writeArray#" GenPrimOp
+ MutableByteArray# s -> Int# -> VECTOR -> State# s -> State# s
+ { Write a vector to specified index of mutable array. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop IndexOffAddrOp_Int32X4 "indexInt32X4OffAddr#" GenPrimOp
- Addr# -> Int# -> Int32X4#
+primop VecIndexOffAddrOp "indexOffAddr#" GenPrimOp
+ Addr# -> Int# -> VECTOR
+ { Reads vector; offset in bytes. }
with can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop ReadOffAddrOp_Int32X4 "readInt32X4OffAddr#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, Int32X4# #)
+primop VecReadOffAddrOp "readOffAddr#" GenPrimOp
+ Addr# -> Int# -> State# s -> (# State# s, VECTOR #)
+ { Reads vector; offset in bytes. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop WriteOffAddrOp_Int32X4 "writeInt32X4OffAddr#" GenPrimOp
- Addr# -> Int# -> Int32X4# -> State# s -> State# s
+primop VecWriteOffAddrOp "writeOffAddr#" GenPrimOp
+ Addr# -> Int# -> VECTOR -> State# s -> State# s
+ { Write vector; offset in bytes. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop IndexByteArrayOp_Int32AsInt32X4 "indexInt32ArrayAsInt32X4#" GenPrimOp
- ByteArray# -> Int# -> Int32X4#
- with can_fail = True
- llvm_only = True
-
-primop ReadByteArrayOp_Int32AsInt32X4 "readInt32ArrayAsInt32X4#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, Int32X4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteByteArrayOp_Int32AsInt32X4 "writeInt32ArrayAsInt32X4#" GenPrimOp
- MutableByteArray# s -> Int# -> Int32X4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexOffAddrOp_Int32AsInt32X4 "indexInt32OffAddrAsInt32X4#" GenPrimOp
- Addr# -> Int# -> Int32X4#
- with can_fail = True
- llvm_only = True
-
-primop ReadOffAddrOp_Int32AsInt32X4 "readInt32OffAddrAsInt32X4#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, Int32X4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteOffAddrOp_Int32AsInt32X4 "writeInt32OffAddrAsInt32X4#" GenPrimOp
- Addr# -> Int# -> Int32X4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-------------------------------------------------------------------------
-section "Int64 SIMD Vectors"
- {Operations on SIMD vectors of 2 64-bit signed integers.}
-------------------------------------------------------------------------
-
-primtype Int64X2#
- with llvm_only = True
-
-primop Int64ToInt64X2Op "int64ToInt64X2#" GenPrimOp
- INT64 -> Int64X2#
- with llvm_only = True
-
-primop Int64X2InsertOp "insertInt64X2#" GenPrimOp
- Int64X2# -> INT64 -> Int# -> Int64X2#
- with can_fail = True
- llvm_only = True
-
-primop Int64X2PackOp "packInt64X2#" GenPrimOp
- INT64 -> INT64 -> Int64X2#
- with llvm_only = True
-
-primop Int64X2UnpackOp "unpackInt64X2#" GenPrimOp
- Int64X2# -> (# INT64, INT64 #)
- with llvm_only = True
-
-primop Int64X2AddOp "plusInt64X2#" Dyadic
- Int64X2# -> Int64X2# -> Int64X2#
- with commutable = True
- llvm_only = True
-
-primop Int64X2SubOp "minusInt64X2#" Dyadic
- Int64X2# -> Int64X2# -> Int64X2#
- with llvm_only = True
-
-primop Int64X2MulOp "timesInt64X2#" Dyadic
- Int64X2# -> Int64X2# -> Int64X2#
- with commutable = True
- llvm_only = True
-
-primop Int64X2QuotOp "quotInt64X2#" Dyadic
- Int64X2# -> Int64X2# -> Int64X2#
- with can_fail = True
- llvm_only = True
-
-primop Int64X2RemOp "remInt64X2#" Dyadic
- Int64X2# -> Int64X2# -> Int64X2#
- with can_fail = True
- llvm_only = True
-
-primop Int64X2NegOp "negateInt64X2#" Monadic
- Int64X2# -> Int64X2#
- with llvm_only = True
-
-primop IndexByteArrayOp_Int64X2 "indexInt64X2Array#" GenPrimOp
- ByteArray# -> Int# -> Int64X2#
- with can_fail = True
- llvm_only = True
-
-primop ReadByteArrayOp_Int64X2 "readInt64X2Array#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, Int64X2# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteByteArrayOp_Int64X2 "writeInt64X2Array#" GenPrimOp
- MutableByteArray# s -> Int# -> Int64X2# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexOffAddrOp_Int64X2 "indexInt64X2OffAddr#" GenPrimOp
- Addr# -> Int# -> Int64X2#
- with can_fail = True
- llvm_only = True
-
-primop ReadOffAddrOp_Int64X2 "readInt64X2OffAddr#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, Int64X2# #)
- with has_side_effects = True
- llvm_only = True
-
-primop WriteOffAddrOp_Int64X2 "writeInt64X2OffAddr#" GenPrimOp
- Addr# -> Int# -> Int64X2# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-primop IndexByteArrayOp_Int64AsInt64X2 "indexInt64ArrayAsInt64X2#" GenPrimOp
- ByteArray# -> Int# -> Int64X2#
+primop VecIndexScalarByteArrayOp "indexArrayAs#" GenPrimOp
+ ByteArray# -> Int# -> VECTOR
+ { Read a vector from specified index of immutable array of scalars; offset is in scalar elements. }
with can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop ReadByteArrayOp_Int64AsInt64X2 "readInt64ArrayAsInt64X2#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, Int64X2# #)
+primop VecReadScalarByteArrayOp "readArrayAs#" GenPrimOp
+ MutableByteArray# s -> Int# -> State# s -> (# State# s, VECTOR #)
+ { Read a vector from specified index of mutable array of scalars; offset is in scalar elements. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop WriteByteArrayOp_Int64AsInt64X2 "writeInt64ArrayAsInt64X2#" GenPrimOp
- MutableByteArray# s -> Int# -> Int64X2# -> State# s -> State# s
+primop VecWriteScalarByteArrayOp "writeArrayAs#" GenPrimOp
+ MutableByteArray# s -> Int# -> VECTOR -> State# s -> State# s
+ { Write a vector to specified index of mutable array of scalars; offset is in scalar elements. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop IndexOffAddrOp_Int64AsInt64X2 "indexInt64OffAddrAsInt64X2#" GenPrimOp
- Addr# -> Int# -> Int64X2#
+primop VecIndexScalarOffAddrOp "indexOffAddrAs#" GenPrimOp
+ Addr# -> Int# -> VECTOR
+ { Reads vector; offset in scalar elements. }
with can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop ReadOffAddrOp_Int64AsInt64X2 "readInt64OffAddrAsInt64X2#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, Int64X2# #)
+primop VecReadScalarOffAddrOp "readOffAddrAs#" GenPrimOp
+ Addr# -> Int# -> State# s -> (# State# s, VECTOR #)
+ { Reads vector; offset in scalar elements. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop WriteOffAddrOp_Int64AsInt64X2 "writeInt64OffAddrAsInt64X2#" GenPrimOp
- Addr# -> Int# -> Int64X2# -> State# s -> State# s
+primop VecWriteScalarOffAddrOp "writeOffAddrAs#" GenPrimOp
+ Addr# -> Int# -> VECTOR -> State# s -> State# s
+ { Write vector; offset in scalar elements. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
------------------------------------------------------------------------
section "Prefetch"
diff --git a/includes/Cmm.h b/includes/Cmm.h
index 89baaa0987..e4898b48d7 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -99,6 +99,8 @@
#define D_ float64
#define L_ bits64
#define V16_ bits128
+#define V32_ bits256
+#define V64_ bits512
#define SIZEOF_StgDouble 8
#define SIZEOF_StgWord64 8
diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs
index 8007574b10..3d6dd41ae4 100644
--- a/includes/CodeGen.Platform.hs
+++ b/includes/CodeGen.Platform.hs
@@ -65,6 +65,40 @@ import Reg
# define xmm14 38
# define xmm15 39
+# define ymm0 40
+# define ymm1 41
+# define ymm2 42
+# define ymm3 43
+# define ymm4 44
+# define ymm5 45
+# define ymm6 46
+# define ymm7 47
+# define ymm8 48
+# define ymm9 49
+# define ymm10 50
+# define ymm11 51
+# define ymm12 52
+# define ymm13 53
+# define ymm14 54
+# define ymm15 55
+
+# define zmm0 56
+# define zmm1 57
+# define zmm2 58
+# define zmm3 59
+# define zmm4 60
+# define zmm5 61
+# define zmm6 62
+# define zmm7 63
+# define zmm8 64
+# define zmm9 65
+# define zmm10 66
+# define zmm11 67
+# define zmm12 68
+# define zmm13 69
+# define zmm14 70
+# define zmm15 71
+
#elif MACHREGS_powerpc
# define r0 0
@@ -391,6 +425,12 @@ activeStgRegs = [
#ifdef REG_XMM1
,XmmReg 1
#endif
+#ifdef REG_YMM1
+ ,YmmReg 1
+#endif
+#ifdef REG_ZMM1
+ ,ZmmReg 1
+#endif
#ifdef REG_F2
,FloatReg 2
#endif
@@ -400,6 +440,12 @@ activeStgRegs = [
#ifdef REG_XMM2
,XmmReg 2
#endif
+#ifdef REG_YMM2
+ ,YmmReg 2
+#endif
+#ifdef REG_ZMM2
+ ,ZmmReg 2
+#endif
#ifdef REG_F3
,FloatReg 3
#endif
@@ -409,6 +455,12 @@ activeStgRegs = [
#ifdef REG_XMM3
,XmmReg 3
#endif
+#ifdef REG_YMM3
+ ,YmmReg 3
+#endif
+#ifdef REG_ZMM3
+ ,ZmmReg 3
+#endif
#ifdef REG_F4
,FloatReg 4
#endif
@@ -418,6 +470,12 @@ activeStgRegs = [
#ifdef REG_XMM4
,XmmReg 4
#endif
+#ifdef REG_YMM4
+ ,YmmReg 4
+#endif
+#ifdef REG_ZMM4
+ ,ZmmReg 4
+#endif
#ifdef REG_F5
,FloatReg 5
#endif
@@ -427,6 +485,12 @@ activeStgRegs = [
#ifdef REG_XMM5
,XmmReg 5
#endif
+#ifdef REG_YMM5
+ ,YmmReg 5
+#endif
+#ifdef REG_ZMM5
+ ,ZmmReg 5
+#endif
#ifdef REG_F6
,FloatReg 6
#endif
@@ -436,6 +500,12 @@ activeStgRegs = [
#ifdef REG_XMM6
,XmmReg 6
#endif
+#ifdef REG_YMM6
+ ,YmmReg 6
+#endif
+#ifdef REG_ZMM6
+ ,ZmmReg 6
+#endif
#else /* MAX_REAL_XMM_REG == 0 */
#ifdef REG_F1
,FloatReg 1
@@ -587,13 +657,65 @@ globalRegMaybe (DoubleReg 6) =
Just (RealRegSingle REG_D6)
# endif
# endif
-#if MAX_REAL_XMM_REG != 0
+# if MAX_REAL_XMM_REG != 0
+# ifdef REG_XMM1
globalRegMaybe (XmmReg 1) = Just (RealRegSingle REG_XMM1)
+# endif
+# ifdef REG_XMM2
globalRegMaybe (XmmReg 2) = Just (RealRegSingle REG_XMM2)
+# endif
+# ifdef REG_XMM3
globalRegMaybe (XmmReg 3) = Just (RealRegSingle REG_XMM3)
+# endif
+# ifdef REG_XMM4
globalRegMaybe (XmmReg 4) = Just (RealRegSingle REG_XMM4)
+# endif
+# ifdef REG_XMM5
globalRegMaybe (XmmReg 5) = Just (RealRegSingle REG_XMM5)
+# endif
+# ifdef REG_XMM6
globalRegMaybe (XmmReg 6) = Just (RealRegSingle REG_XMM6)
+# endif
+# endif
+# if MAX_REAL_YMM_REG != 0
+# ifdef REG_YMM1
+globalRegMaybe (YmmReg 1) = Just (RealRegSingle REG_YMM1)
+# endif
+# ifdef REG_YMM2
+globalRegMaybe (YmmReg 2) = Just (RealRegSingle REG_YMM2)
+# endif
+# ifdef REG_YMM3
+globalRegMaybe (YmmReg 3) = Just (RealRegSingle REG_YMM3)
+# endif
+# ifdef REG_YMM4
+globalRegMaybe (YmmReg 4) = Just (RealRegSingle REG_YMM4)
+# endif
+# ifdef REG_YMM5
+globalRegMaybe (YmmReg 5) = Just (RealRegSingle REG_YMM5)
+# endif
+# ifdef REG_YMM6
+globalRegMaybe (YmmReg 6) = Just (RealRegSingle REG_YMM6)
+# endif
+# endif
+# if MAX_REAL_ZMM_REG != 0
+# ifdef REG_ZMM1
+globalRegMaybe (ZmmReg 1) = Just (RealRegSingle REG_ZMM1)
+# endif
+# ifdef REG_ZMM2
+globalRegMaybe (ZmmReg 2) = Just (RealRegSingle REG_ZMM2)
+# endif
+# ifdef REG_ZMM3
+globalRegMaybe (ZmmReg 3) = Just (RealRegSingle REG_ZMM3)
+# endif
+# ifdef REG_ZMM4
+globalRegMaybe (ZmmReg 4) = Just (RealRegSingle REG_ZMM4)
+# endif
+# ifdef REG_ZMM5
+globalRegMaybe (ZmmReg 5) = Just (RealRegSingle REG_ZMM5)
+# endif
+# ifdef REG_ZMM6
+globalRegMaybe (ZmmReg 6) = Just (RealRegSingle REG_ZMM6)
+# endif
# endif
# ifdef REG_Sp
globalRegMaybe Sp = Just (RealRegSingle REG_Sp)
diff --git a/includes/rts/storage/FunTypes.h b/includes/rts/storage/FunTypes.h
index 0ba65bb79d..094bd87d01 100644
--- a/includes/rts/storage/FunTypes.h
+++ b/includes/rts/storage/FunTypes.h
@@ -34,22 +34,24 @@
#define ARG_D 7
#define ARG_L 8
#define ARG_V16 9
-#define ARG_NN 10
-#define ARG_NP 11
-#define ARG_PN 12
-#define ARG_PP 13
-#define ARG_NNN 14
-#define ARG_NNP 15
-#define ARG_NPN 16
-#define ARG_NPP 17
-#define ARG_PNN 18
-#define ARG_PNP 19
-#define ARG_PPN 20
-#define ARG_PPP 21
-#define ARG_PPPP 22
-#define ARG_PPPPP 23
-#define ARG_PPPPPP 24
-#define ARG_PPPPPPP 25
-#define ARG_PPPPPPPP 26
+#define ARG_V32 10
+#define ARG_V64 11
+#define ARG_NN 12
+#define ARG_NP 13
+#define ARG_PN 14
+#define ARG_PP 15
+#define ARG_NNN 16
+#define ARG_NNP 17
+#define ARG_NPN 18
+#define ARG_NPP 19
+#define ARG_PNN 20
+#define ARG_PNP 21
+#define ARG_PPN 22
+#define ARG_PPP 23
+#define ARG_PPPP 24
+#define ARG_PPPPP 25
+#define ARG_PPPPPP 26
+#define ARG_PPPPPPP 27
+#define ARG_PPPPPPPP 28
#endif /* RTS_STORAGE_FUNTYPES_H */
diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h
index a8f2215578..81e48cc9b4 100644
--- a/includes/stg/MachRegs.h
+++ b/includes/stg/MachRegs.h
@@ -103,11 +103,28 @@
# define REG_Hp edi
#endif
+#define REG_XMM1 xmm0
+#define REG_XMM2 xmm1
+#define REG_XMM3 xmm2
+#define REG_XMM4 xmm3
+
+#define REG_YMM1 ymm0
+#define REG_YMM2 ymm1
+#define REG_YMM3 ymm2
+#define REG_YMM4 ymm3
+
+#define REG_ZMM1 zmm0
+#define REG_ZMM2 zmm1
+#define REG_ZMM3 zmm2
+#define REG_ZMM4 zmm3
+
#define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */
#define MAX_REAL_FLOAT_REG 0
#define MAX_REAL_DOUBLE_REG 0
#define MAX_REAL_LONG_REG 0
-#define MAX_REAL_XMM_REG 0
+#define MAX_REAL_XMM_REG 4
+#define MAX_REAL_YMM_REG 4
+#define MAX_REAL_ZMM_REG 4
/* -----------------------------------------------------------------------------
The x86-64 register mapping
@@ -174,6 +191,20 @@
#define REG_XMM5 xmm5
#define REG_XMM6 xmm6
+#define REG_YMM1 ymm1
+#define REG_YMM2 ymm2
+#define REG_YMM3 ymm3
+#define REG_YMM4 ymm4
+#define REG_YMM5 ymm5
+#define REG_YMM6 ymm6
+
+#define REG_ZMM1 zmm1
+#define REG_ZMM2 zmm2
+#define REG_ZMM3 zmm3
+#define REG_ZMM4 zmm4
+#define REG_ZMM5 zmm5
+#define REG_ZMM6 zmm6
+
#if !defined(mingw32_HOST_OS)
#define CALLER_SAVES_R3
#define CALLER_SAVES_R4
@@ -208,11 +239,31 @@
#define CALLER_SAVES_XMM6
#endif
+#define CALLER_SAVES_YMM1
+#define CALLER_SAVES_YMM2
+#define CALLER_SAVES_YMM3
+#define CALLER_SAVES_YMM4
+#define CALLER_SAVES_YMM5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_YMM6
+#endif
+
+#define CALLER_SAVES_ZMM1
+#define CALLER_SAVES_ZMM2
+#define CALLER_SAVES_ZMM3
+#define CALLER_SAVES_ZMM4
+#define CALLER_SAVES_ZMM5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_ZMM6
+#endif
+
#define MAX_REAL_VANILLA_REG 6
#define MAX_REAL_FLOAT_REG 6
#define MAX_REAL_DOUBLE_REG 6
#define MAX_REAL_LONG_REG 0
#define MAX_REAL_XMM_REG 6
+#define MAX_REAL_YMM_REG 6
+#define MAX_REAL_ZMM_REG 6
/* -----------------------------------------------------------------------------
The PowerPC register mapping
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
index 876f39a02c..3e4f3d150e 100644
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -225,6 +225,8 @@ RTS_RET(stg_ap_f);
RTS_RET(stg_ap_d);
RTS_RET(stg_ap_l);
RTS_RET(stg_ap_v16);
+RTS_RET(stg_ap_v32);
+RTS_RET(stg_ap_v64);
RTS_RET(stg_ap_n);
RTS_RET(stg_ap_p);
RTS_RET(stg_ap_pv);
@@ -242,6 +244,8 @@ RTS_FUN_DECL(stg_ap_f_fast);
RTS_FUN_DECL(stg_ap_d_fast);
RTS_FUN_DECL(stg_ap_l_fast);
RTS_FUN_DECL(stg_ap_v16_fast);
+RTS_FUN_DECL(stg_ap_v32_fast);
+RTS_FUN_DECL(stg_ap_v64_fast);
RTS_FUN_DECL(stg_ap_n_fast);
RTS_FUN_DECL(stg_ap_p_fast);
RTS_FUN_DECL(stg_ap_pv_fast);
diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h
index 10ae2851ac..1abf9da439 100644
--- a/includes/stg/Regs.h
+++ b/includes/stg/Regs.h
@@ -87,6 +87,18 @@ typedef struct {
StgWord128 rXMM4;
StgWord128 rXMM5;
StgWord128 rXMM6;
+ StgWord256 rYMM1;
+ StgWord256 rYMM2;
+ StgWord256 rYMM3;
+ StgWord256 rYMM4;
+ StgWord256 rYMM5;
+ StgWord256 rYMM6;
+ StgWord512 rZMM1;
+ StgWord512 rZMM2;
+ StgWord512 rZMM3;
+ StgWord512 rZMM4;
+ StgWord512 rZMM5;
+ StgWord512 rZMM6;
StgWord64 rL1;
StgPtr rSp;
StgPtr rSpLim;
@@ -312,6 +324,78 @@ GLOBAL_REG_DECL(StgWord128,XMM6,REG_XMM6)
#define XMM6 (BaseReg->rXMM6)
#endif
+#if defined(REG_YMM1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM1,REG_YMM1)
+#else
+#define YMM1 (BaseReg->rYMM1)
+#endif
+
+#if defined(REG_YMM2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM2,REG_YMM2)
+#else
+#define YMM2 (BaseReg->rYMM2)
+#endif
+
+#if defined(REG_YMM3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM3,REG_YMM3)
+#else
+#define YMM3 (BaseReg->rYMM3)
+#endif
+
+#if defined(REG_YMM4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM4,REG_YMM4)
+#else
+#define YMM4 (BaseReg->rYMM4)
+#endif
+
+#if defined(REG_YMM5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM5,REG_YMM5)
+#else
+#define YMM5 (BaseReg->rYMM5)
+#endif
+
+#if defined(REG_YMM6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM6,REG_YMM6)
+#else
+#define YMM6 (BaseReg->rYMM6)
+#endif
+
+#if defined(REG_ZMM1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM1,REG_ZMM1)
+#else
+#define ZMM1 (BaseReg->rZMM1)
+#endif
+
+#if defined(REG_ZMM2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM2,REG_ZMM2)
+#else
+#define ZMM2 (BaseReg->rZMM2)
+#endif
+
+#if defined(REG_ZMM3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM3,REG_ZMM3)
+#else
+#define ZMM3 (BaseReg->rZMM3)
+#endif
+
+#if defined(REG_ZMM4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM4,REG_ZMM4)
+#else
+#define ZMM4 (BaseReg->rZMM4)
+#endif
+
+#if defined(REG_ZMM5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM5,REG_ZMM5)
+#else
+#define ZMM5 (BaseReg->rZMM5)
+#endif
+
+#if defined(REG_ZMM6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM6,REG_ZMM6)
+#else
+#define ZMM6 (BaseReg->rZMM6)
+#endif
+
#if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord64,L1,REG_L1)
#else
diff --git a/includes/stg/Types.h b/includes/stg/Types.h
index ccc06a175b..6138a8f8a2 100644
--- a/includes/stg/Types.h
+++ b/includes/stg/Types.h
@@ -85,6 +85,10 @@ typedef unsigned long long int StgWord64;
typedef struct { StgWord64 h; StgWord64 l; } StgWord128;
+typedef struct { StgWord128 h; StgWord128 l; } StgWord256;
+
+typedef struct { StgWord256 h; StgWord256 l; } StgWord512;
+
/*
* Define the standard word size we'll use on this machine: make it
* big enough to hold a pointer.
diff --git a/rts/Linker.c b/rts/Linker.c
index 58cfca684a..31f60c09ac 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -883,6 +883,8 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(stg_ap_d_ret) \
SymI_HasProto(stg_ap_l_ret) \
SymI_HasProto(stg_ap_v16_ret) \
+ SymI_HasProto(stg_ap_v32_ret) \
+ SymI_HasProto(stg_ap_v64_ret) \
SymI_HasProto(stg_ap_n_ret) \
SymI_HasProto(stg_ap_p_ret) \
SymI_HasProto(stg_ap_pv_ret) \
@@ -1252,6 +1254,8 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(stg_ap_d_info) \
SymI_HasProto(stg_ap_l_info) \
SymI_HasProto(stg_ap_v16_info) \
+ SymI_HasProto(stg_ap_v32_info) \
+ SymI_HasProto(stg_ap_v64_info) \
SymI_HasProto(stg_ap_n_info) \
SymI_HasProto(stg_ap_p_info) \
SymI_HasProto(stg_ap_pv_info) \
@@ -1268,6 +1272,8 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(stg_ap_d_fast) \
SymI_HasProto(stg_ap_l_fast) \
SymI_HasProto(stg_ap_v16_fast) \
+ SymI_HasProto(stg_ap_v32_fast) \
+ SymI_HasProto(stg_ap_v64_fast) \
SymI_HasProto(stg_ap_n_fast) \
SymI_HasProto(stg_ap_p_fast) \
SymI_HasProto(stg_ap_pv_fast) \
diff --git a/utils/deriveConstants/DeriveConstants.hs b/utils/deriveConstants/DeriveConstants.hs
index 48990061cc..5b9b7c0bd9 100644
--- a/utils/deriveConstants/DeriveConstants.hs
+++ b/utils/deriveConstants/DeriveConstants.hs
@@ -313,6 +313,18 @@ wanteds = concat
,fieldOffset Both "StgRegTable" "rXMM4"
,fieldOffset Both "StgRegTable" "rXMM5"
,fieldOffset Both "StgRegTable" "rXMM6"
+ ,fieldOffset Both "StgRegTable" "rYMM1"
+ ,fieldOffset Both "StgRegTable" "rYMM2"
+ ,fieldOffset Both "StgRegTable" "rYMM3"
+ ,fieldOffset Both "StgRegTable" "rYMM4"
+ ,fieldOffset Both "StgRegTable" "rYMM5"
+ ,fieldOffset Both "StgRegTable" "rYMM6"
+ ,fieldOffset Both "StgRegTable" "rZMM1"
+ ,fieldOffset Both "StgRegTable" "rZMM2"
+ ,fieldOffset Both "StgRegTable" "rZMM3"
+ ,fieldOffset Both "StgRegTable" "rZMM4"
+ ,fieldOffset Both "StgRegTable" "rZMM5"
+ ,fieldOffset Both "StgRegTable" "rZMM6"
,fieldOffset Both "StgRegTable" "rL1"
,fieldOffset Both "StgRegTable" "rSp"
,fieldOffset Both "StgRegTable" "rSpLim"
diff --git a/utils/genapply/GenApply.hs b/utils/genapply/GenApply.hs
index 2baf85896a..188211df81 100644
--- a/utils/genapply/GenApply.hs
+++ b/utils/genapply/GenApply.hs
@@ -33,6 +33,8 @@ data ArgRep
| D -- double
| L -- long (64-bit)
| V16 -- 16-byte (128-bit) vectors
+ | V32 -- 32-byte (256-bit) vectors
+ | V64 -- 64-byte (512-bit) vectors
-- size of a value in *words*
argSize :: ArgRep -> Int
@@ -43,6 +45,8 @@ argSize F = 1
argSize D = (SIZEOF_DOUBLE `quot` SIZEOF_VOID_P :: Int)
argSize L = (8 `quot` SIZEOF_VOID_P :: Int)
argSize V16 = (16 `quot` SIZEOF_VOID_P :: Int)
+argSize V32 = (32 `quot` SIZEOF_VOID_P :: Int)
+argSize V64 = (64 `quot` SIZEOF_VOID_P :: Int)
showArg :: ArgRep -> String
showArg N = "n"
@@ -52,6 +56,8 @@ showArg F = "f"
showArg D = "d"
showArg L = "l"
showArg V16 = "v16"
+showArg V32 = "v32"
+showArg V64 = "v64"
-- is a value a pointer?
isPtr :: ArgRep -> Bool
@@ -504,6 +510,8 @@ argRep D = text "D_"
argRep L = text "L_"
argRep P = text "gcptr"
argRep V16 = text "V16_"
+argRep V32 = text "V32_"
+argRep V64 = text "V64_"
argRep _ = text "W_"
genApply regstatus args =
@@ -854,6 +862,8 @@ applyTypes = [
[D],
[L],
[V16],
+ [V32],
+ [V64],
[N],
[P],
[P,V],
@@ -882,6 +892,8 @@ stackApplyTypes = [
[D],
[L],
[V16],
+ [V32],
+ [V64],
[N,N],
[N,P],
[P,N],
diff --git a/utils/genprimopcode/Lexer.x b/utils/genprimopcode/Lexer.x
index ff18e17373..d29d8a17f0 100644
--- a/utils/genprimopcode/Lexer.x
+++ b/utils/genprimopcode/Lexer.x
@@ -40,6 +40,10 @@ words :-
<0> ")" { mkT TCloseParen }
<0> "(#" { mkT TOpenParenHash }
<0> "#)" { mkT THashCloseParen }
+ <0> "[" { mkT TOpenBracket }
+ <0> "]" { mkT TCloseBracket }
+ <0> "<" { mkT TOpenAngle }
+ <0> ">" { mkT TCloseAngle }
<0> "section" { mkT TSection }
<0> "primop" { mkT TPrimop }
<0> "pseudoop" { mkT TPseudoop }
@@ -58,7 +62,11 @@ words :-
<0> "infixl" { mkT TInfixL }
<0> "infixr" { mkT TInfixR }
<0> "Nothing" { mkT TNothing }
+ <0> "vector" { mkT TVector }
<0> "thats_all_folks" { mkT TThatsAllFolks }
+ <0> "SCALAR" { mkT TSCALAR }
+ <0> "VECTOR" { mkT TVECTOR }
+ <0> "VECTUPLE" { mkT TVECTUPLE }
<0> [a-z][a-zA-Z0-9\#_]* { mkTv TLowerName }
<0> [A-Z][a-zA-Z0-9\#_]* { mkTv TUpperName }
<0> [0-9][0-9]* { mkTv (TInteger . read) }
diff --git a/utils/genprimopcode/Main.hs b/utils/genprimopcode/Main.hs
index 5e1c9ab84b..8b97ca169c 100644
--- a/utils/genprimopcode/Main.hs
+++ b/utils/genprimopcode/Main.hs
@@ -13,6 +13,100 @@ import Data.List
import Data.Maybe ( catMaybes )
import System.Environment ( getArgs )
+vecOptions :: Entry -> [(String,String,Int)]
+vecOptions i =
+ concat [vecs | OptionVector vecs <- opts i]
+
+desugarVectorSpec :: Entry -> [Entry]
+desugarVectorSpec i@(Section {}) = [i]
+desugarVectorSpec i = case vecOptions i of
+ [] -> [i]
+ vos -> map genVecEntry vos
+ where
+ genVecEntry :: (String,String,Int) -> Entry
+ genVecEntry (con,repCon,n) =
+ case i of
+ PrimOpSpec {} ->
+ PrimVecOpSpec { cons = "(" ++ concat (intersperse " " [cons i, vecCat, show n, vecWidth]) ++ ")"
+ , name = name'
+ , prefix = pfx
+ , veclen = n
+ , elemrep = con ++ "ElemRep"
+ , ty = desugarTy (ty i)
+ , cat = cat i
+ , desc = desc i
+ , opts = opts i
+ }
+ PrimTypeSpec {} ->
+ PrimVecTypeSpec { ty = desugarTy (ty i)
+ , prefix = pfx
+ , veclen = n
+ , elemrep = con ++ "ElemRep"
+ , desc = desc i
+ , opts = opts i
+ }
+ _ ->
+ error "vector options can only be given for primops and primtypes"
+ where
+ vecCons = con++"X"++show n++"#"
+ vecCat = conCat con
+ vecWidth = conWidth con
+ pfx = lowerHead con++"X"++show n
+ vecTyName = pfx++"PrimTy"
+
+ name' | Just pre <- splitSuffix (name i) "Array#" = pre++vec++"Array#"
+ | Just pre <- splitSuffix (name i) "OffAddr#" = pre++vec++"OffAddr#"
+ | Just pre <- splitSuffix (name i) "ArrayAs#" = pre++con++"ArrayAs"++vec++"#"
+ | Just pre <- splitSuffix (name i) "OffAddrAs#" = pre++con++"OffAddrAs"++vec++"#"
+ | otherwise = init (name i)++vec ++"#"
+ where
+ vec = con++"X"++show n
+
+ splitSuffix :: Eq a => [a] -> [a] -> Maybe [a]
+ splitSuffix s suf
+ | drop len s == suf = Just (take len s)
+ | otherwise = Nothing
+ where
+ len = length s - length suf
+
+ lowerHead s = toLower (head s) : tail s
+
+ desugarTy :: Ty -> Ty
+ desugarTy (TyF s d) = TyF (desugarTy s) (desugarTy d)
+ desugarTy (TyC s d) = TyC (desugarTy s) (desugarTy d)
+ desugarTy (TyApp SCALAR []) = TyApp (TyCon repCon) []
+ desugarTy (TyApp VECTOR []) = TyApp (VecTyCon vecCons vecTyName) []
+ desugarTy (TyApp VECTUPLE []) = TyUTup (replicate n (TyApp (TyCon repCon) []))
+ desugarTy (TyApp tycon ts) = TyApp tycon (map desugarTy ts)
+ desugarTy t@(TyVar {}) = t
+ desugarTy (TyUTup ts) = TyUTup (map desugarTy ts)
+
+ conCat :: String -> String
+ conCat "Int8" = "IntVec"
+ conCat "Int16" = "IntVec"
+ conCat "Int32" = "IntVec"
+ conCat "Int64" = "IntVec"
+ conCat "Word8" = "WordVec"
+ conCat "Word16" = "WordVec"
+ conCat "Word32" = "WordVec"
+ conCat "Word64" = "WordVec"
+ conCat "Float" = "FloatVec"
+ conCat "Double" = "FloatVec"
+ conCat con = error $ "conCat: unknown type constructor " ++ con ++ "\n"
+
+ conWidth :: String -> String
+ conWidth "Int8" = "W8"
+ conWidth "Int16" = "W16"
+ conWidth "Int32" = "W32"
+ conWidth "Int64" = "W64"
+ conWidth "Word8" = "W8"
+ conWidth "Word16" = "W16"
+ conWidth "Word32" = "W32"
+ conWidth "Word64" = "W64"
+ conWidth "Float" = "W32"
+ conWidth "Double" = "W64"
+ conWidth con = error $ "conWidth: unknown type constructor " ++ con ++ "\n"
+
main :: IO ()
main = getArgs >>= \args ->
if length args /= 1 || head args `notElem` known_args
@@ -75,6 +169,18 @@ main = getArgs >>= \args ->
"--primop-list"
-> putStr (gen_primop_list p_o_specs)
+ "--primop-vector-uniques"
+ -> putStr (gen_primop_vector_uniques p_o_specs)
+
+ "--primop-vector-tys"
+ -> putStr (gen_primop_vector_tys p_o_specs)
+
+ "--primop-vector-tys-exports"
+ -> putStr (gen_primop_vector_tys_exports p_o_specs)
+
+ "--primop-vector-tycons"
+ -> putStr (gen_primop_vector_tycons p_o_specs)
+
"--make-haskell-wrappers"
-> putStr (gen_wrappers p_o_specs)
@@ -103,6 +209,10 @@ known_args
"--primop-primop-info",
"--primop-tag",
"--primop-list",
+ "--primop-vector-uniques",
+ "--primop-vector-tys",
+ "--primop-vector-tys-exports",
+ "--primop-vector-tycons",
"--make-haskell-wrappers",
"--make-haskell-source",
"--make-ext-core-source",
@@ -136,32 +246,40 @@ gen_hs_source (Info defaults entries) =
++ "-----------------------------------------------------------------------------\n"
++ "{-# LANGUAGE MultiParamTypeClasses #-}\n"
++ "module GHC.Prim (\n"
- ++ unlines (map (("\t" ++) . hdr) entries)
+ ++ unlines (map (("\t" ++) . hdr) entries')
++ ") where\n"
++ "\n"
++ "{-\n"
++ unlines (map opt defaults)
++ "-}\n"
- ++ unlines (concatMap ent entries) ++ "\n\n\n"
- where opt (OptionFalse n) = n ++ " = False"
+ ++ unlines (concatMap ent entries') ++ "\n\n\n"
+ where entries' = concatMap desugarVectorSpec entries
+
+ opt (OptionFalse n) = n ++ " = False"
opt (OptionTrue n) = n ++ " = True"
opt (OptionString n v) = n ++ " = { " ++ v ++ "}"
opt (OptionInteger n v) = n ++ " = " ++ show v
+ opt (OptionVector _) = ""
opt (OptionFixity mf) = "fixity" ++ " = " ++ show mf
- hdr s@(Section {}) = sec s
- hdr (PrimOpSpec { name = n }) = wrapOp n ++ ","
- hdr (PseudoOpSpec { name = n }) = wrapOp n ++ ","
- hdr (PrimTypeSpec { ty = TyApp n _ }) = wrapTy n ++ ","
- hdr (PrimTypeSpec {}) = error "Illegal type spec"
- hdr (PrimClassSpec { cls = TyApp n _ }) = wrapTy n ++ ","
- hdr (PrimClassSpec {}) = error "Illegal class spec"
-
- ent (Section {}) = []
- ent o@(PrimOpSpec {}) = spec o
- ent o@(PrimTypeSpec {}) = spec o
- ent o@(PrimClassSpec {}) = spec o
- ent o@(PseudoOpSpec {}) = spec o
+ hdr s@(Section {}) = sec s
+ hdr (PrimOpSpec { name = n }) = wrapOp n ++ ","
+ hdr (PrimVecOpSpec { name = n }) = wrapOp n ++ ","
+ hdr (PseudoOpSpec { name = n }) = wrapOp n ++ ","
+ hdr (PrimTypeSpec { ty = TyApp (TyCon n) _ }) = wrapTy n ++ ","
+ hdr (PrimTypeSpec {}) = error $ "Illegal type spec"
+ hdr (PrimClassSpec { cls = TyApp (TyCon n) _ }) = wrapTy n ++ ","
+ hdr (PrimClassSpec {}) = error "Illegal class spec"
+ hdr (PrimVecTypeSpec { ty = TyApp (VecTyCon n _) _ }) = wrapTy n ++ ","
+ hdr (PrimVecTypeSpec {}) = error $ "Illegal type spec"
+
+ ent (Section {}) = []
+ ent o@(PrimOpSpec {}) = spec o
+ ent o@(PrimVecOpSpec {}) = spec o
+ ent o@(PrimTypeSpec {}) = spec o
+ ent o@(PrimClassSpec {}) = spec o
+ ent o@(PrimVecTypeSpec {}) = spec o
+ ent o@(PseudoOpSpec {}) = spec o
sec s = "\n-- * " ++ escape (title s) ++ "\n"
++ (unlines $ map ("-- " ++ ) $ lines $ unlatex $ escape $ "|" ++ desc s) ++ "\n"
@@ -173,6 +291,11 @@ gen_hs_source (Info defaults entries) =
++
[ wrapOp n ++ " :: " ++ pprTy t,
wrapOp n ++ " = let x = x in x" ]
+ PrimVecOpSpec { name = n, ty = t, opts = options } ->
+ [ pprFixity fixity n | OptionFixity (Just fixity) <- options ]
+ ++
+ [ wrapOp n ++ " :: " ++ pprTy t,
+ wrapOp n ++ " = let x = x in x" ]
PseudoOpSpec { name = n, ty = t } ->
[ wrapOp n ++ " :: " ++ pprTy t,
wrapOp n ++ " = let x = x in x" ]
@@ -180,6 +303,8 @@ gen_hs_source (Info defaults entries) =
[ "data " ++ pprTy t ]
PrimClassSpec { cls = t } ->
[ "class " ++ pprTy t ]
+ PrimVecTypeSpec { ty = t } ->
+ [ "data " ++ pprTy t ]
Section { } -> []
comm = case (desc o) of
@@ -212,7 +337,7 @@ pprTy = pty
pty (TyF t1 t2) = pbty t1 ++ " -> " ++ pty t2
pty (TyC t1 t2) = pbty t1 ++ " => " ++ pty t2
pty t = pbty t
- pbty (TyApp tc ts) = tc ++ concat (map (' ' :) (map paty ts))
+ pbty (TyApp tc ts) = show tc ++ concat (map (' ' :) (map paty ts))
pbty (TyUTup ts) = "(# "
++ concat (intersperse "," (map pty ts))
++ " #)"
@@ -259,7 +384,7 @@ gen_ext_core_source entries =
where printList f = concat . intersperse ",\n" . filter (not . null) . map f
tcEnt (PrimTypeSpec {ty=t}) =
case t of
- TyApp tc args -> parens tc (tcKind tc args)
+ TyApp tc args -> parens (show tc) (tcKind tc args)
_ -> error ("tcEnt: type in PrimTypeSpec is not a type"
++ " constructor: " ++ show t)
tcEnt _ = ""
@@ -270,12 +395,12 @@ gen_ext_core_source entries =
-- alternative would be to refer to things indirectly and hard-wire
-- certain things (e.g., the kind of the Any constructor, here) into
-- ext-core's Prims module again.
- tcKind "Any" _ = "Klifted"
- tcKind tc [] | last tc == '#' = "Kunlifted"
- tcKind _ [] | otherwise = "Klifted"
+ tcKind (TyCon "Any") _ = "Klifted"
+ tcKind tc [] | last (show tc) == '#' = "Kunlifted"
+ tcKind _ [] | otherwise = "Klifted"
-- assumes that all type arguments are lifted (are they?)
- tcKind tc (_v:as) = "(Karrow Klifted " ++ tcKind tc as
- ++ ")"
+ tcKind tc (_v:as) = "(Karrow Klifted " ++ tcKind tc as
+ ++ ")"
valEnt (PseudoOpSpec {name=n, ty=t}) = valEntry n t
valEnt (PrimOpSpec {name=n, ty=t}) = valEntry n t
valEnt _ = ""
@@ -290,7 +415,7 @@ gen_ext_core_source entries =
++ " " ++ paren s1))
++ " " ++ paren s2
mkTconApp tc args = foldl tapp tc args
- mkTcon tc = paren $ "Tcon " ++ paren (qualify True tc)
+ mkTcon tc = paren $ "Tcon " ++ paren (qualify True (show tc))
mkUtupleTy args = foldl tapp (tcUTuple (length args)) args
mkForallTy [] t = t
mkForallTy vs t = foldr
@@ -314,7 +439,7 @@ gen_ext_core_source entries =
++ show n ++ "H")
tyEnt (PrimTypeSpec {ty=(TyApp tc _args)}) = " " ++ paren ("Tcon " ++
- (paren (qualify True tc)))
+ (paren (qualify True (show tc))))
tyEnt _ = ""
-- more hacks. might be better to do this on the ext-core side,
@@ -334,7 +459,7 @@ gen_ext_core_source entries =
prefixes ps = filter (\ t ->
case t of
(PrimTypeSpec {ty=(TyApp tc _args)}) ->
- any (\ p -> p `isPrefixOf` tc) ps
+ any (\ p -> p `isPrefixOf` show tc) ps
_ -> False)
parens n ty' = " (zEncodeString \"" ++ n ++ "\", " ++ ty' ++ ")"
@@ -358,6 +483,8 @@ gen_latex_doc (Info defaults entries)
++ d ++ "}{"
++ mk_options o
++ "}\n"
+ mk_entry (PrimVecOpSpec {}) =
+ ""
mk_entry (Section {title=ti,desc=d}) =
"\\primopsection{"
++ latex_encode ti ++ "}{"
@@ -376,6 +503,8 @@ gen_latex_doc (Info defaults entries)
++ d ++ "}{"
++ mk_options o
++ "}\n"
+ mk_entry (PrimVecTypeSpec {}) =
+ ""
mk_entry (PseudoOpSpec {name=n,ty=t,desc=d,opts=o}) =
"\\pseudoopspec{"
++ latex_encode (zencode n) ++ "}{"
@@ -388,7 +517,7 @@ gen_latex_doc (Info defaults entries)
where pty (TyF t1 t2) = pbty t1 ++ " -> " ++ pty t2
pty (TyC t1 t2) = pbty t1 ++ " => " ++ pty t2
pty t = pbty t
- pbty (TyApp tc ts) = tc ++ (concat (map (' ':) (map paty ts)))
+ pbty (TyApp tc ts) = show tc ++ (concat (map (' ':) (map paty ts)))
pbty (TyUTup ts) = "(# " ++ (concat (intersperse "," (map pty ts))) ++ " #)"
pbty t = paty t
paty (TyVar tv) = tv
@@ -398,11 +527,11 @@ gen_latex_doc (Info defaults entries)
where pty (TyF t1 t2) = pbty t1 ++ " -> " ++ pty t2
pty (TyC t1 t2) = pbty t1 ++ " => " ++ pty t2
pty t = pbty t
- pbty (TyApp tc ts) = (zencode tc) ++ (concat (map (' ':) (map paty ts)))
+ pbty (TyApp tc ts) = (zencode (show tc)) ++ (concat (map (' ':) (map paty ts)))
pbty (TyUTup ts) = (zencode (utuplenm (length ts))) ++ (concat ((map (' ':) (map paty ts))))
pbty t = paty t
paty (TyVar tv) = zencode tv
- paty (TyApp tc []) = zencode tc
+ paty (TyApp tc []) = zencode (show tc)
paty t = "(" ++ pty t ++ ")"
utuplenm 1 = "(# #)"
utuplenm n = "(#" ++ (replicate (n-1) ',') ++ "#)"
@@ -441,6 +570,7 @@ gen_latex_doc (Info defaults entries)
Just (OptionString _ _) -> error "String value for boolean option"
Just (OptionInteger _ _) -> error "Integer value for boolean option"
Just (OptionFixity _) -> error "Fixity value for boolean option"
+ Just (OptionVector _) -> error "vector template for boolean option"
Nothing -> ""
mk_strictness o =
@@ -525,20 +655,15 @@ gen_wrappers (Info _ entries)
++ "module GHC.PrimopWrappers where\n"
++ "import qualified GHC.Prim\n"
++ "import GHC.Tuple ()\n"
- ++ "import GHC.Prim (" ++ concat (intersperse ", " othertycons) ++ ")\n"
- ++ "#if defined (__GLASGOW_HASKELL_LLVM__)\n"
- ++ "import GHC.Prim (" ++ concat (intersperse ", " vectycons) ++ ")\n"
- ++ "#endif /* defined (__GLASGOW_HASKELL_LLVM__) */\n"
- ++ unlines (concatMap f otherspecs)
- ++ "#if defined (__GLASGOW_HASKELL_LLVM__)\n"
- ++ unlines (concatMap f vecspecs)
- ++ "#endif /* defined (__GLASGOW_HASKELL_LLVM__) */\n"
+ ++ "import GHC.Prim (" ++ types ++ ")\n"
+ ++ unlines (concatMap f specs)
where
- specs = filter (not.dodgy) (filter is_primop entries)
- (vecspecs, otherspecs) = partition is_llvm_only specs
+ specs = filter (not.dodgy) $
+ filter (not.is_llvm_only) $
+ filter is_primop entries
tycons = foldr union [] $ map (tyconsIn . ty) specs
- (vectycons, othertycons) =
- (partition llvmOnlyTyCon . filter (`notElem` ["()", "Bool"])) tycons
+ tycons' = filter (`notElem` [TyCon "()", TyCon "Bool"]) tycons
+ types = concat $ intersperse ", " $ map show tycons'
f spec = let args = map (\n -> "a" ++ show n) [1 .. arity (ty spec)]
src_name = wrap (name spec)
lhs = src_name ++ " " ++ unwords args
@@ -565,14 +690,6 @@ gen_wrappers (Info _ entries)
Just (OptionTrue _) -> True
_ -> False
- llvmOnlyTyCon :: TyCon -> Bool
- llvmOnlyTyCon "Int32#" = True
- llvmOnlyTyCon "FloatX4#" = True
- llvmOnlyTyCon "DoubleX2#" = True
- llvmOnlyTyCon "Int32X4#" = True
- llvmOnlyTyCon "Int64X2#" = True
- llvmOnlyTyCon _ = False
-
gen_primop_list :: Info -> String
gen_primop_list (Info _ entries)
= unlines (
@@ -581,24 +698,99 @@ gen_primop_list (Info _ entries)
map (\p -> " , " ++ cons p) rest
++
[ " ]" ]
- ) where (first:rest) = filter is_primop entries
+ ) where (first:rest) = concatMap desugarVectorSpec (filter is_primop entries)
+
+mIN_VECTOR_UNIQUE :: Int
+mIN_VECTOR_UNIQUE = 300
+
+gen_primop_vector_uniques :: Info -> String
+gen_primop_vector_uniques (Info _ entries)
+ = unlines $
+ concatMap mkVecUnique (specs `zip` [mIN_VECTOR_UNIQUE..])
+ where
+ specs = concatMap desugarVectorSpec (filter is_vector (filter is_primtype entries))
+
+ mkVecUnique :: (Entry, Int) -> [String]
+ mkVecUnique (i, unique) =
+ [ key_id ++ " :: Unique"
+ , key_id ++ " = mkPreludeTyConUnique " ++ show unique
+ ]
+ where
+ key_id = prefix i ++ "PrimTyConKey"
+
+gen_primop_vector_tys :: Info -> String
+gen_primop_vector_tys (Info _ entries)
+ = unlines $
+ concatMap mkVecTypes specs
+ where
+ specs = concatMap desugarVectorSpec (filter is_vector (filter is_primtype entries))
+
+ mkVecTypes :: Entry -> [String]
+ mkVecTypes i =
+ [ name_id ++ " :: Name"
+ , name_id ++ " = mkPrimTc (fsLit \"" ++ pprTy (ty i) ++ "\") " ++ key_id ++ " " ++ tycon_id
+ , ty_id ++ " :: Type"
+ , ty_id ++ " = mkTyConTy " ++ tycon_id
+ , tycon_id ++ " :: TyCon"
+ , tycon_id ++ " = pcPrimTyCon0 " ++ name_id ++
+ " (VecRep " ++ show (veclen i) ++ " " ++ elemrep i ++ ")"
+ ]
+ where
+ key_id = prefix i ++ "PrimTyConKey"
+ name_id = prefix i ++ "PrimTyConName"
+ ty_id = prefix i ++ "PrimTy"
+ tycon_id = prefix i ++ "PrimTyCon"
+
+gen_primop_vector_tys_exports :: Info -> String
+gen_primop_vector_tys_exports (Info _ entries)
+ = unlines $
+ map mkVecTypes specs
+ where
+ specs = concatMap desugarVectorSpec (filter is_vector (filter is_primtype entries))
+
+ mkVecTypes :: Entry -> String
+ mkVecTypes i =
+ "\t" ++ ty_id ++ ", " ++ tycon_id ++ ","
+ where
+ ty_id = prefix i ++ "PrimTy"
+ tycon_id = prefix i ++ "PrimTyCon"
+
+gen_primop_vector_tycons :: Info -> String
+gen_primop_vector_tycons (Info _ entries)
+ = unlines $
+ map mkVecTypes specs
+ where
+ specs = concatMap desugarVectorSpec (filter is_vector (filter is_primtype entries))
+
+ mkVecTypes :: Entry -> String
+ mkVecTypes i =
+ " , " ++ tycon_id
+ where
+ tycon_id = prefix i ++ "PrimTyCon"
gen_primop_tag :: Info -> String
gen_primop_tag (Info _ entries)
= unlines (max_def_type : max_def :
tagOf_type : zipWith f primop_entries [1 :: Int ..])
where
- primop_entries = filter is_primop entries
+ primop_entries = concatMap desugarVectorSpec $ filter is_primop entries
tagOf_type = "tagOf_PrimOp :: PrimOp -> FastInt"
f i n = "tagOf_PrimOp " ++ cons i ++ " = _ILIT(" ++ show n ++ ")"
max_def_type = "maxPrimOpTag :: Int"
max_def = "maxPrimOpTag = " ++ show (length primop_entries)
gen_data_decl :: Info -> String
-gen_data_decl (Info _ entries)
- = let conss = map cons (filter is_primop entries)
- in "data PrimOp\n = " ++ head conss ++ "\n"
- ++ unlines (map (" | "++) (tail conss))
+gen_data_decl (Info _ entries) =
+ "data PrimOp\n = " ++ head conss ++ "\n"
+ ++ unlines (map (" | "++) (tail conss))
+ where
+ conss = map genCons (filter is_primop entries)
+
+ genCons :: Entry -> String
+ genCons entry =
+ case vecOptions entry of
+ [] -> cons entry
+ _ -> cons entry ++ " PrimOpVecCat Length Width"
gen_switch_from_attribs :: String -> String -> Info -> String
gen_switch_from_attribs attrib_name fn_name (Info defaults entries)
@@ -609,12 +801,15 @@ gen_switch_from_attribs attrib_name fn_name (Info defaults entries)
getAltRhs (OptionTrue _) = "True"
getAltRhs (OptionInteger _ i) = show i
getAltRhs (OptionString _ s) = s
+ getAltRhs (OptionVector _) = "True"
getAltRhs (OptionFixity mf) = show mf
mkAlt po
= case lookup_attrib attrib_name (opts po) of
Nothing -> Nothing
- Just xx -> Just (fn_name ++ " " ++ cons po ++ " = " ++ getAltRhs xx)
+ Just xx -> case vecOptions po of
+ [] -> Just (fn_name ++ " " ++ cons po ++ " = " ++ getAltRhs xx)
+ _ -> Just (fn_name ++ " (" ++ cons po ++ " _ _ _) = " ++ getAltRhs xx)
in
case defv of
@@ -629,7 +824,7 @@ gen_switch_from_attribs attrib_name fn_name (Info defaults entries)
gen_primop_info :: Info -> String
gen_primop_info (Info _ entries)
- = unlines (map mkPOItext (filter is_primop entries))
+ = unlines (map mkPOItext (concatMap desugarVectorSpec (filter is_primop entries)))
mkPOItext :: Entry -> String
mkPOItext i = mkPOI_LHS_text i ++ mkPOI_RHS_text i
@@ -677,29 +872,25 @@ ppTyVar "o" = "openAlphaTyVar"
ppTyVar _ = error "Unknown type var"
ppType :: Ty -> String
-ppType (TyApp "Any" []) = "anyTy"
-ppType (TyApp "Bool" []) = "boolTy"
-
-ppType (TyApp "Int#" []) = "intPrimTy"
-ppType (TyApp "Int32#" []) = "int32PrimTy"
-ppType (TyApp "Int64#" []) = "int64PrimTy"
-ppType (TyApp "Char#" []) = "charPrimTy"
-ppType (TyApp "Word#" []) = "wordPrimTy"
-ppType (TyApp "Word32#" []) = "word32PrimTy"
-ppType (TyApp "Word64#" []) = "word64PrimTy"
-ppType (TyApp "Addr#" []) = "addrPrimTy"
-ppType (TyApp "Float#" []) = "floatPrimTy"
-ppType (TyApp "Double#" []) = "doublePrimTy"
-ppType (TyApp "FloatX4#" []) = "floatX4PrimTy"
-ppType (TyApp "DoubleX2#" []) = "doubleX2PrimTy"
-ppType (TyApp "Int32X4#" []) = "int32X4PrimTy"
-ppType (TyApp "Int64X2#" []) = "int64X2PrimTy"
-ppType (TyApp "ByteArray#" []) = "byteArrayPrimTy"
-ppType (TyApp "RealWorld" []) = "realWorldTy"
-ppType (TyApp "ThreadId#" []) = "threadIdPrimTy"
-ppType (TyApp "ForeignObj#" []) = "foreignObjPrimTy"
-ppType (TyApp "BCO#" []) = "bcoPrimTy"
-ppType (TyApp "()" []) = "unitTy" -- unitTy is TysWiredIn's name for ()
+ppType (TyApp (TyCon "Any") []) = "anyTy"
+ppType (TyApp (TyCon "Bool") []) = "boolTy"
+
+ppType (TyApp (TyCon "Int#") []) = "intPrimTy"
+ppType (TyApp (TyCon "Int32#") []) = "int32PrimTy"
+ppType (TyApp (TyCon "Int64#") []) = "int64PrimTy"
+ppType (TyApp (TyCon "Char#") []) = "charPrimTy"
+ppType (TyApp (TyCon "Word#") []) = "wordPrimTy"
+ppType (TyApp (TyCon "Word32#") []) = "word32PrimTy"
+ppType (TyApp (TyCon "Word64#") []) = "word64PrimTy"
+ppType (TyApp (TyCon "Addr#") []) = "addrPrimTy"
+ppType (TyApp (TyCon "Float#") []) = "floatPrimTy"
+ppType (TyApp (TyCon "Double#") []) = "doublePrimTy"
+ppType (TyApp (TyCon "ByteArray#") []) = "byteArrayPrimTy"
+ppType (TyApp (TyCon "RealWorld") []) = "realWorldTy"
+ppType (TyApp (TyCon "ThreadId#") []) = "threadIdPrimTy"
+ppType (TyApp (TyCon "ForeignObj#") []) = "foreignObjPrimTy"
+ppType (TyApp (TyCon "BCO#") []) = "bcoPrimTy"
+ppType (TyApp (TyCon "()") []) = "unitTy" -- unitTy is TysWiredIn's name for ()
ppType (TyVar "a") = "alphaTy"
ppType (TyVar "b") = "betaTy"
@@ -707,28 +898,31 @@ ppType (TyVar "c") = "gammaTy"
ppType (TyVar "s") = "deltaTy"
ppType (TyVar "o") = "openAlphaTy"
-ppType (TyApp "State#" [x]) = "mkStatePrimTy " ++ ppType x
-ppType (TyApp "MutVar#" [x,y]) = "mkMutVarPrimTy " ++ ppType x
- ++ " " ++ ppType y
-ppType (TyApp "MutableArray#" [x,y]) = "mkMutableArrayPrimTy " ++ ppType x
+ppType (TyApp (TyCon "State#") [x]) = "mkStatePrimTy " ++ ppType x
+ppType (TyApp (TyCon "MutVar#") [x,y]) = "mkMutVarPrimTy " ++ ppType x
+ ++ " " ++ ppType y
+ppType (TyApp (TyCon "MutableArray#") [x,y]) = "mkMutableArrayPrimTy " ++ ppType x
+ ++ " " ++ ppType y
+ppType (TyApp (TyCon "MutableArrayArray#") [x]) = "mkMutableArrayArrayPrimTy " ++ ppType x
+ppType (TyApp (TyCon "MutableByteArray#") [x]) = "mkMutableByteArrayPrimTy "
+ ++ ppType x
+ppType (TyApp (TyCon "Array#") [x]) = "mkArrayPrimTy " ++ ppType x
+ppType (TyApp (TyCon "ArrayArray#") []) = "mkArrayArrayPrimTy"
+
+
+ppType (TyApp (TyCon "Weak#") [x]) = "mkWeakPrimTy " ++ ppType x
+ppType (TyApp (TyCon "StablePtr#") [x]) = "mkStablePtrPrimTy " ++ ppType x
+ppType (TyApp (TyCon "StableName#") [x]) = "mkStableNamePrimTy " ++ ppType x
+
+ppType (TyApp (TyCon "MVar#") [x,y]) = "mkMVarPrimTy " ++ ppType x
+ ++ " " ++ ppType y
+ppType (TyApp (TyCon "TVar#") [x,y]) = "mkTVarPrimTy " ++ ppType x
++ " " ++ ppType y
-ppType (TyApp "MutableArrayArray#" [x]) = "mkMutableArrayArrayPrimTy " ++ ppType x
-ppType (TyApp "MutableByteArray#" [x]) = "mkMutableByteArrayPrimTy "
- ++ ppType x
-ppType (TyApp "Array#" [x]) = "mkArrayPrimTy " ++ ppType x
-ppType (TyApp "ArrayArray#" []) = "mkArrayArrayPrimTy"
-
-
-ppType (TyApp "Weak#" [x]) = "mkWeakPrimTy " ++ ppType x
-ppType (TyApp "StablePtr#" [x]) = "mkStablePtrPrimTy " ++ ppType x
-ppType (TyApp "StableName#" [x]) = "mkStableNamePrimTy " ++ ppType x
-
-ppType (TyApp "MVar#" [x,y]) = "mkMVarPrimTy " ++ ppType x
- ++ " " ++ ppType y
-ppType (TyApp "TVar#" [x,y]) = "mkTVarPrimTy " ++ ppType x
- ++ " " ++ ppType y
-ppType (TyUTup ts) = "(mkTupleTy UnboxedTuple "
- ++ listify (map ppType ts) ++ ")"
+
+ppType (TyApp (VecTyCon _ pptc) []) = pptc
+
+ppType (TyUTup ts) = "(mkTupleTy UnboxedTuple "
+ ++ listify (map ppType ts) ++ ")"
ppType (TyF s d) = "(mkFunTy (" ++ ppType s ++ ") (" ++ ppType d ++ "))"
ppType (TyC s d) = "(mkFunTy (" ++ ppType s ++ ") (" ++ ppType d ++ "))"
diff --git a/utils/genprimopcode/Parser.y b/utils/genprimopcode/Parser.y
index eb76cb0407..07ef03b986 100644
--- a/utils/genprimopcode/Parser.y
+++ b/utils/genprimopcode/Parser.y
@@ -32,6 +32,10 @@ import Syntax
'#)' { THashCloseParen }
'{' { TOpenBrace }
'}' { TCloseBrace }
+ '[' { TOpenBracket }
+ ']' { TCloseBracket }
+ '<' { TOpenAngle }
+ '>' { TCloseAngle }
section { TSection }
primop { TPrimop }
pseudoop { TPseudoop }
@@ -50,6 +54,10 @@ import Syntax
infixl { TInfixL }
infixr { TInfixR }
nothing { TNothing }
+ vector { TVector }
+ SCALAR { TSCALAR }
+ VECTOR { TVECTOR }
+ VECTUPLE { TVECTUPLE }
thats_all_folks { TThatsAllFolks }
lowerName { TLowerName $$ }
upperName { TUpperName $$ }
@@ -74,6 +82,7 @@ pOption : lowerName '=' false { OptionFalse $1 }
| lowerName '=' true { OptionTrue $1 }
| lowerName '=' pStuffBetweenBraces { OptionString $1 $3 }
| lowerName '=' integer { OptionInteger $1 $3 }
+ | vector '=' pVectorTemplate { OptionVector $3 }
| fixity '=' pInfix { OptionFixity $3 }
pInfix :: { Maybe Fixity }
@@ -147,6 +156,17 @@ pInside :: { String }
pInside : '{' pInsides '}' { "{" ++ $2 ++ "}" }
| noBraces { $1 }
+pVectorTemplate :: { [(String, String, Int)] }
+pVectorTemplate : '[' pVectors ']' { $2 }
+
+pVectors :: { [(String, String, Int)] }
+pVectors : pVector ',' pVectors { [$1] ++ $3 }
+ | pVector { [$1] }
+ | {- empty -} { [] }
+
+pVector :: { (String, String, Int) }
+pVector : '<' upperName ',' upperName ',' integer '>' { ($2, $4, $6) }
+
pType :: { Ty }
pType : paT '->' pType { TyF $1 $3 }
| paT '=>' pType { TyC $1 $3 }
@@ -175,9 +195,12 @@ ppT :: { Ty }
ppT : lowerName { TyVar $1 }
| pTycon { TyApp $1 [] }
-pTycon :: { String }
-pTycon : upperName { $1 }
- | '(' ')' { "()" }
+pTycon :: { TyCon }
+pTycon : upperName { TyCon $1 }
+ | '(' ')' { TyCon "()" }
+ | SCALAR { SCALAR }
+ | VECTOR { VECTOR }
+ | VECTUPLE { VECTUPLE }
{
parse :: String -> Either String Info
diff --git a/utils/genprimopcode/ParserM.hs b/utils/genprimopcode/ParserM.hs
index 8093675651..aaaf6ac66f 100644
--- a/utils/genprimopcode/ParserM.hs
+++ b/utils/genprimopcode/ParserM.hs
@@ -67,6 +67,10 @@ data Token = TEOF
| THashCloseParen
| TOpenBrace
| TCloseBrace
+ | TOpenBracket
+ | TCloseBracket
+ | TOpenAngle
+ | TCloseAngle
| TSection
| TPrimop
| TPseudoop
@@ -91,6 +95,10 @@ data Token = TEOF
| TInfixL
| TInfixR
| TNothing
+ | TVector
+ | TSCALAR
+ | TVECTOR
+ | TVECTUPLE
deriving Show
-- Actions
diff --git a/utils/genprimopcode/Syntax.hs b/utils/genprimopcode/Syntax.hs
index 333ea2c4c7..d0c380cf59 100644
--- a/utils/genprimopcode/Syntax.hs
+++ b/utils/genprimopcode/Syntax.hs
@@ -19,6 +19,15 @@ data Entry
cat :: Category, -- category
desc :: String, -- description
opts :: [Option] } -- default overrides
+ | PrimVecOpSpec { cons :: String, -- PrimOp name
+ name :: String, -- name in prog text
+ prefix :: String, -- prefix for generated names
+ veclen :: Int, -- vector length
+ elemrep :: String, -- vector ElemRep
+ ty :: Ty, -- type
+ cat :: Category, -- category
+ desc :: String, -- description
+ opts :: [Option] } -- default overrides
| PseudoOpSpec { name :: String, -- name in prog text
ty :: Ty, -- type
desc :: String, -- description
@@ -29,6 +38,12 @@ data Entry
| PrimClassSpec { cls :: Ty, -- name in prog text
desc :: String, -- description
opts :: [Option] } -- default overrides
+ | PrimVecTypeSpec { ty :: Ty, -- name in prog text
+ prefix :: String, -- prefix for generated names
+ veclen :: Int, -- vector length
+ elemrep :: String, -- vector ElemRep
+ desc :: String, -- description
+ opts :: [Option] } -- default overrides
| Section { title :: String, -- section title
desc :: String } -- description
deriving Show
@@ -37,12 +52,17 @@ is_primop :: Entry -> Bool
is_primop (PrimOpSpec _ _ _ _ _ _) = True
is_primop _ = False
+is_primtype :: Entry -> Bool
+is_primtype (PrimTypeSpec {}) = True
+is_primtype _ = False
+
-- a binding of property to value
data Option
= OptionFalse String -- name = False
| OptionTrue String -- name = True
| OptionString String String -- name = { ... unparsed stuff ... }
| OptionInteger String Int -- name = <int>
+ | OptionVector [(String,String,Int)] -- name = [(,...),...]
| OptionFixity (Maybe Fixity) -- fixity = infix{,l,r} <int> | Nothing
deriving Show
@@ -62,7 +82,20 @@ data Ty
deriving (Eq,Show)
type TyVar = String
-type TyCon = String
+
+data TyCon = TyCon String
+ | SCALAR
+ | VECTOR
+ | VECTUPLE
+ | VecTyCon String String
+ deriving (Eq, Ord)
+
+instance Show TyCon where
+ show (TyCon tc) = tc
+ show SCALAR = "SCALAR"
+ show VECTOR = "VECTOR"
+ show VECTUPLE = "VECTUPLE"
+ show (VecTyCon tc _) = tc
-- Follow definitions of Fixity and FixityDirection in GHC
@@ -118,7 +151,7 @@ sanityPrimOp def_names p
sane_ty :: Category -> Ty -> Bool
sane_ty Compare (TyF t1 (TyF t2 td))
- | t1 == t2 && td == TyApp "Int#" [] = True
+ | t1 == t2 && td == TyApp (TyCon "Int#") [] = True
sane_ty Monadic (TyF t1 td)
| t1 == td = True
sane_ty Dyadic (TyF t1 (TyF t2 td))
@@ -133,6 +166,7 @@ get_attrib_name (OptionFalse nm) = nm
get_attrib_name (OptionTrue nm) = nm
get_attrib_name (OptionString nm _) = nm
get_attrib_name (OptionInteger nm _) = nm
+get_attrib_name (OptionVector _) = "vector"
get_attrib_name (OptionFixity _) = "fixity"
lookup_attrib :: String -> [Option] -> Maybe Option
@@ -140,3 +174,7 @@ lookup_attrib _ [] = Nothing
lookup_attrib nm (a:as)
= if get_attrib_name a == nm then Just a else lookup_attrib nm as
+is_vector :: Entry -> Bool
+is_vector i = case lookup_attrib "vector" (opts i) of
+ Nothing -> False
+ _ -> True