diff options
author | Abhiroop Sarkar <asiamgenius@gmail.com> | 2018-09-27 15:28:46 -0400 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2019-07-03 09:33:39 -0400 |
commit | acd795583625401c5554f8e04ec7efca18814011 (patch) | |
tree | 545e529eed21e78592ff326d4ebf9804095ad2cb /compiler/codeGen | |
parent | df3e5b744db29c085f5bc05f8b609197bcbf9b0c (diff) | |
download | haskell-acd795583625401c5554f8e04ec7efca18814011.tar.gz |
Add support for SIMD operations in the NCG
This adds support for constructing vector types from Float#, Double# etc
and performing arithmetic operations on them
Cleaned-Up-By: Ben Gamari <ben@well-typed.com>
Diffstat (limited to 'compiler/codeGen')
-rw-r--r-- | compiler/codeGen/CgUtils.hs | 42 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 55 |
2 files changed, 64 insertions, 33 deletions
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index 0ff9bd8b56..2cbcfc66a9 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -57,27 +57,27 @@ baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")") -baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags -baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags -baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags -baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags -baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags -baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags -baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")") -baseRegOffset dflags (YmmReg 1) = oFFSET_StgRegTable_rYMM1 dflags -baseRegOffset dflags (YmmReg 2) = oFFSET_StgRegTable_rYMM2 dflags -baseRegOffset dflags (YmmReg 3) = oFFSET_StgRegTable_rYMM3 dflags -baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags -baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags -baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags -baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")") -baseRegOffset dflags (ZmmReg 1) = oFFSET_StgRegTable_rZMM1 dflags -baseRegOffset dflags (ZmmReg 2) = oFFSET_StgRegTable_rZMM2 dflags -baseRegOffset dflags (ZmmReg 3) = oFFSET_StgRegTable_rZMM3 dflags -baseRegOffset dflags (ZmmReg 4) = oFFSET_StgRegTable_rZMM4 dflags -baseRegOffset dflags (ZmmReg 5) = oFFSET_StgRegTable_rZMM5 dflags -baseRegOffset dflags (ZmmReg 6) = oFFSET_StgRegTable_rZMM6 dflags -baseRegOffset _ (ZmmReg n) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")") +baseRegOffset dflags (XmmReg 1 _ _ _) = oFFSET_StgRegTable_rXMM1 dflags +baseRegOffset dflags (XmmReg 2 _ _ _) = oFFSET_StgRegTable_rXMM2 dflags +baseRegOffset dflags (XmmReg 3 _ _ _) = oFFSET_StgRegTable_rXMM3 dflags +baseRegOffset dflags (XmmReg 4 _ _ _) = oFFSET_StgRegTable_rXMM4 dflags +baseRegOffset dflags (XmmReg 5 _ _ _) = oFFSET_StgRegTable_rXMM5 dflags +baseRegOffset dflags (XmmReg 6 _ _ _) = oFFSET_StgRegTable_rXMM6 dflags +baseRegOffset _ (XmmReg n _ _ _) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")") +baseRegOffset dflags (YmmReg 1 _ _ _) = oFFSET_StgRegTable_rYMM1 dflags +baseRegOffset dflags (YmmReg 2 _ _ _) = oFFSET_StgRegTable_rYMM2 dflags +baseRegOffset dflags (YmmReg 3 _ _ _) = oFFSET_StgRegTable_rYMM3 dflags +baseRegOffset dflags (YmmReg 4 _ _ _) = oFFSET_StgRegTable_rYMM4 dflags +baseRegOffset dflags (YmmReg 5 _ _ _) = oFFSET_StgRegTable_rYMM5 dflags +baseRegOffset dflags (YmmReg 6 _ _ _) = oFFSET_StgRegTable_rYMM6 dflags +baseRegOffset _ (YmmReg n _ _ _) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")") +baseRegOffset dflags (ZmmReg 1 _ _ _) = oFFSET_StgRegTable_rZMM1 dflags +baseRegOffset dflags (ZmmReg 2 _ _ _) = oFFSET_StgRegTable_rZMM2 dflags +baseRegOffset dflags (ZmmReg 3 _ _ _) = oFFSET_StgRegTable_rZMM3 dflags +baseRegOffset dflags (ZmmReg 4 _ _ _) = oFFSET_StgRegTable_rZMM4 dflags +baseRegOffset dflags (ZmmReg 5 _ _ _) = oFFSET_StgRegTable_rZMM5 dflags +baseRegOffset dflags (ZmmReg 6 _ _ _) = oFFSET_StgRegTable_rZMM6 dflags +baseRegOffset _ (ZmmReg n _ _ _) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")") baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index c3f9d5a279..9a6cf6c2e5 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -669,7 +669,7 @@ emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res] -- SIMD primops emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do checkVecCompatibility dflags vcat n w - doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res + doVecBroadcastOp (vecElemInjectCast dflags vcat w) ty zeros e res where zeros :: CmmExpr zeros = CmmLit $ CmmVec (replicate n zero) @@ -1765,9 +1765,8 @@ vecElemProjectCast _ _ _ = Nothing checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode () checkVecCompatibility dflags vcat l w = do - when (hscTarget dflags /= HscLlvm) $ do - sorry $ unlines ["SIMD vector instructions require the LLVM back-end." - ,"Please use -fllvm."] + when (hscTarget dflags /= HscLlvm && hscTarget dflags /= HscAsm) $ do + sorry "SIMD vector instructions not supported for the C backend or GHCi" check vecWidth vcat l w where check :: Width -> PrimOpVecCat -> Length -> Width -> FCode () @@ -1792,6 +1791,38 @@ checkVecCompatibility dflags vcat l w = do ------------------------------------------------------------------------------ -- Helpers for translating vector packing and unpacking. +doVecBroadcastOp :: Maybe MachOp -- Cast from element to vector component + -> CmmType -- Type of vector + -> CmmExpr -- Initial vector + -> CmmExpr -- Elements + -> CmmFormal -- Destination for result + -> FCode () +doVecBroadcastOp maybe_pre_write_cast ty z es res = do + dst <- newTemp ty + emitAssign (CmmLocal dst) z + vecBroadcast dst es 0 + where + vecBroadcast :: CmmFormal -> CmmExpr -> Int -> FCode () + vecBroadcast src e _ = do + dst <- newTemp ty + if isFloatType (vecElemType ty) + then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Broadcast len wid) + [CmmReg (CmmLocal src), cast e]) + --TODO : Add the MachOp MO_V_Broadcast + else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid) + [CmmReg (CmmLocal src), cast e]) + emitAssign (CmmLocal res) (CmmReg (CmmLocal dst)) + + cast :: CmmExpr -> CmmExpr + cast val = case maybe_pre_write_cast of + Nothing -> val + Just cast -> CmmMachOp cast [val] + + len :: Length + len = vecLength ty + + wid :: Width + wid = typeWidth (vecElemType ty) doVecPackOp :: Maybe MachOp -- Cast from element to vector component -> CmmType -- Type of vector @@ -1809,16 +1840,16 @@ doVecPackOp maybe_pre_write_cast ty z es res = do emitAssign (CmmLocal res) (CmmReg (CmmLocal src)) vecPack src (e : es) i = do - dst <- newTemp ty - if isFloatType (vecElemType ty) - then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid) - [CmmReg (CmmLocal src), cast e, iLit]) - else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid) - [CmmReg (CmmLocal src), cast e, iLit]) - vecPack dst es (i + 1) + dst <- newTemp ty + if isFloatType (vecElemType ty) + then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid) + [CmmReg (CmmLocal src), cast e, iLit]) + else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid) + [CmmReg (CmmLocal src), cast e, iLit]) + vecPack dst es (i + 1) where -- vector indices are always 32-bits - iLit = CmmLit (CmmInt (toInteger i) W32) + iLit = CmmLit (CmmInt ((toInteger i) * 16) W32) cast :: CmmExpr -> CmmExpr cast val = case maybe_pre_write_cast of |