summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
authorAbhiroop Sarkar <asiamgenius@gmail.com>2018-09-27 15:28:46 -0400
committerMarge Bot <ben+marge-bot@smart-cactus.org>2019-07-03 09:33:39 -0400
commitacd795583625401c5554f8e04ec7efca18814011 (patch)
tree545e529eed21e78592ff326d4ebf9804095ad2cb /compiler/codeGen
parentdf3e5b744db29c085f5bc05f8b609197bcbf9b0c (diff)
downloadhaskell-acd795583625401c5554f8e04ec7efca18814011.tar.gz
Add support for SIMD operations in the NCG
This adds support for constructing vector types from Float#, Double# etc and performing arithmetic operations on them Cleaned-Up-By: Ben Gamari <ben@well-typed.com>
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/CgUtils.hs42
-rw-r--r--compiler/codeGen/StgCmmPrim.hs55
2 files changed, 64 insertions, 33 deletions
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index 0ff9bd8b56..2cbcfc66a9 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -57,27 +57,27 @@ baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags
baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags
baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags
baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")")
-baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags
-baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags
-baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags
-baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags
-baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags
-baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags
-baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
-baseRegOffset dflags (YmmReg 1) = oFFSET_StgRegTable_rYMM1 dflags
-baseRegOffset dflags (YmmReg 2) = oFFSET_StgRegTable_rYMM2 dflags
-baseRegOffset dflags (YmmReg 3) = oFFSET_StgRegTable_rYMM3 dflags
-baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags
-baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags
-baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags
-baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
-baseRegOffset dflags (ZmmReg 1) = oFFSET_StgRegTable_rZMM1 dflags
-baseRegOffset dflags (ZmmReg 2) = oFFSET_StgRegTable_rZMM2 dflags
-baseRegOffset dflags (ZmmReg 3) = oFFSET_StgRegTable_rZMM3 dflags
-baseRegOffset dflags (ZmmReg 4) = oFFSET_StgRegTable_rZMM4 dflags
-baseRegOffset dflags (ZmmReg 5) = oFFSET_StgRegTable_rZMM5 dflags
-baseRegOffset dflags (ZmmReg 6) = oFFSET_StgRegTable_rZMM6 dflags
-baseRegOffset _ (ZmmReg n) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")")
+baseRegOffset dflags (XmmReg 1 _ _ _) = oFFSET_StgRegTable_rXMM1 dflags
+baseRegOffset dflags (XmmReg 2 _ _ _) = oFFSET_StgRegTable_rXMM2 dflags
+baseRegOffset dflags (XmmReg 3 _ _ _) = oFFSET_StgRegTable_rXMM3 dflags
+baseRegOffset dflags (XmmReg 4 _ _ _) = oFFSET_StgRegTable_rXMM4 dflags
+baseRegOffset dflags (XmmReg 5 _ _ _) = oFFSET_StgRegTable_rXMM5 dflags
+baseRegOffset dflags (XmmReg 6 _ _ _) = oFFSET_StgRegTable_rXMM6 dflags
+baseRegOffset _ (XmmReg n _ _ _) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
+baseRegOffset dflags (YmmReg 1 _ _ _) = oFFSET_StgRegTable_rYMM1 dflags
+baseRegOffset dflags (YmmReg 2 _ _ _) = oFFSET_StgRegTable_rYMM2 dflags
+baseRegOffset dflags (YmmReg 3 _ _ _) = oFFSET_StgRegTable_rYMM3 dflags
+baseRegOffset dflags (YmmReg 4 _ _ _) = oFFSET_StgRegTable_rYMM4 dflags
+baseRegOffset dflags (YmmReg 5 _ _ _) = oFFSET_StgRegTable_rYMM5 dflags
+baseRegOffset dflags (YmmReg 6 _ _ _) = oFFSET_StgRegTable_rYMM6 dflags
+baseRegOffset _ (YmmReg n _ _ _) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
+baseRegOffset dflags (ZmmReg 1 _ _ _) = oFFSET_StgRegTable_rZMM1 dflags
+baseRegOffset dflags (ZmmReg 2 _ _ _) = oFFSET_StgRegTable_rZMM2 dflags
+baseRegOffset dflags (ZmmReg 3 _ _ _) = oFFSET_StgRegTable_rZMM3 dflags
+baseRegOffset dflags (ZmmReg 4 _ _ _) = oFFSET_StgRegTable_rZMM4 dflags
+baseRegOffset dflags (ZmmReg 5 _ _ _) = oFFSET_StgRegTable_rZMM5 dflags
+baseRegOffset dflags (ZmmReg 6 _ _ _) = oFFSET_StgRegTable_rZMM6 dflags
+baseRegOffset _ (ZmmReg n _ _ _) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")")
baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index c3f9d5a279..9a6cf6c2e5 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -669,7 +669,7 @@ emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
-- SIMD primops
emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
checkVecCompatibility dflags vcat n w
- doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
+ doVecBroadcastOp (vecElemInjectCast dflags vcat w) ty zeros e res
where
zeros :: CmmExpr
zeros = CmmLit $ CmmVec (replicate n zero)
@@ -1765,9 +1765,8 @@ vecElemProjectCast _ _ _ = Nothing
checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
checkVecCompatibility dflags vcat l w = do
- when (hscTarget dflags /= HscLlvm) $ do
- sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
- ,"Please use -fllvm."]
+ when (hscTarget dflags /= HscLlvm && hscTarget dflags /= HscAsm) $ do
+ sorry "SIMD vector instructions not supported for the C backend or GHCi"
check vecWidth vcat l w
where
check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
@@ -1792,6 +1791,38 @@ checkVecCompatibility dflags vcat l w = do
------------------------------------------------------------------------------
-- Helpers for translating vector packing and unpacking.
+doVecBroadcastOp :: Maybe MachOp -- Cast from element to vector component
+ -> CmmType -- Type of vector
+ -> CmmExpr -- Initial vector
+ -> CmmExpr -- Elements
+ -> CmmFormal -- Destination for result
+ -> FCode ()
+doVecBroadcastOp maybe_pre_write_cast ty z es res = do
+ dst <- newTemp ty
+ emitAssign (CmmLocal dst) z
+ vecBroadcast dst es 0
+ where
+ vecBroadcast :: CmmFormal -> CmmExpr -> Int -> FCode ()
+ vecBroadcast src e _ = do
+ dst <- newTemp ty
+ if isFloatType (vecElemType ty)
+ then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Broadcast len wid)
+ [CmmReg (CmmLocal src), cast e])
+ --TODO : Add the MachOp MO_V_Broadcast
+ else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
+ [CmmReg (CmmLocal src), cast e])
+ emitAssign (CmmLocal res) (CmmReg (CmmLocal dst))
+
+ cast :: CmmExpr -> CmmExpr
+ cast val = case maybe_pre_write_cast of
+ Nothing -> val
+ Just cast -> CmmMachOp cast [val]
+
+ len :: Length
+ len = vecLength ty
+
+ wid :: Width
+ wid = typeWidth (vecElemType ty)
doVecPackOp :: Maybe MachOp -- Cast from element to vector component
-> CmmType -- Type of vector
@@ -1809,16 +1840,16 @@ doVecPackOp maybe_pre_write_cast ty z es res = do
emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
vecPack src (e : es) i = do
- dst <- newTemp ty
- if isFloatType (vecElemType ty)
- then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
- [CmmReg (CmmLocal src), cast e, iLit])
- else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
- [CmmReg (CmmLocal src), cast e, iLit])
- vecPack dst es (i + 1)
+ dst <- newTemp ty
+ if isFloatType (vecElemType ty)
+ then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
+ [CmmReg (CmmLocal src), cast e, iLit])
+ else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
+ [CmmReg (CmmLocal src), cast e, iLit])
+ vecPack dst es (i + 1)
where
-- vector indices are always 32-bits
- iLit = CmmLit (CmmInt (toInteger i) W32)
+ iLit = CmmLit (CmmInt ((toInteger i) * 16) W32)
cast :: CmmExpr -> CmmExpr
cast val = case maybe_pre_write_cast of