diff options
author | sheaf <sam.derbyshire@gmail.com> | 2022-10-14 14:31:15 +0200 |
---|---|---|
committer | Matthew Pickering <matthewtpickering@gmail.com> | 2022-12-13 09:30:47 +0000 |
commit | 19d7a5fc8f5c9f421987935bc4e9bc9f79f3c445 (patch) | |
tree | cf179a42546f107e63fabfc58b0a3c28984aad12 | |
parent | 7a40261fd963c2b8895afd34bf6a7969d5e474cf (diff) | |
download | haskell-19d7a5fc8f5c9f421987935bc4e9bc9f79f3c445.tar.gz |
Remove SIMD conversions
This patch makes it so that packing/unpacking SIMD
vectors always uses the right sized types, e.g.
unpacking a Word16X4# will give a tuple of Word16#s.
As a result, we can get rid of the conversion instructions
that were previously required.
Fixes #22296
(cherry picked from commit 6d7d91817795d7ee7f45557411368a1738daa488)
-rw-r--r-- | compiler/GHC/Builtin/primops.txt.pp | 12 | ||||
-rw-r--r-- | compiler/GHC/Cmm/MachOp.hs | 10 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Prim.hs | 81 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T22296.hs | 41 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T22296.stdout | 3 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/all.T | 1 |
6 files changed, 76 insertions, 72 deletions
diff --git a/compiler/GHC/Builtin/primops.txt.pp b/compiler/GHC/Builtin/primops.txt.pp index 625c512cc6..a3dcd50f06 100644 --- a/compiler/GHC/Builtin/primops.txt.pp +++ b/compiler/GHC/Builtin/primops.txt.pp @@ -3585,9 +3585,9 @@ section "SIMD Vectors" [<Int8,Int8#,16>,<Int16,Int16#,8>,<Int32,Int32#,4>,<Int64,Int64#,2> \ ,<Int8,Int8#,32>,<Int16,Int16#,16>,<Int32,Int32#,8>,<Int64,Int64#,4> \ ,<Int8,Int8#,64>,<Int16,Int16#,32>,<Int32,Int32#,16>,<Int64,Int64#,8> \ - ,<Word8,Word#,16>,<Word16,Word#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \ - ,<Word8,Word#,32>,<Word16,Word#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \ - ,<Word8,Word#,64>,<Word16,Word#,32>,<Word32,Word32#,16>,<Word64,Word64#,8> \ + ,<Word8,Word8#,16>,<Word16,Word16#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \ + ,<Word8,Word8#,32>,<Word16,Word16#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \ + ,<Word8,Word8#,64>,<Word16,Word16#,32>,<Word32,Word32#,16>,<Word64,Word64#,8> \ ,<Float,Float#,4>,<Double,Double#,2> \ ,<Float,Float#,8>,<Double,Double#,4> \ ,<Float,Float#,16>,<Double,Double#,8>] @@ -3609,9 +3609,9 @@ section "SIMD Vectors" [<Int8,Int8#,16>,<Int16,Int16#,8>,<Int32,Int32#,4>,<Int64,Int64#,2> \ ,<Int8,Int8#,32>,<Int16,Int16#,16>,<Int32,Int32#,8>,<Int64,Int64#,4> \ ,<Int8,Int8#,64>,<Int16,Int16#,32>,<Int32,Int32#,16>,<Int64,Int64#,8> \ - ,<Word8,Word#,16>,<Word16,Word#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \ - ,<Word8,Word#,32>,<Word16,Word#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \ - ,<Word8,Word#,64>,<Word16,Word#,32>,<Word32,Word32#,16>,<Word64,Word64#,8>] + ,<Word8,Word8#,16>,<Word16,Word16#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \ + ,<Word8,Word8#,32>,<Word16,Word16#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \ + ,<Word8,Word8#,64>,<Word16,Word16#,32>,<Word32,Word32#,16>,<Word64,Word64#,8>] primtype VECTOR with llvm_only = True diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs index 0bd3ac1111..632165b6b2 100644 --- a/compiler/GHC/Cmm/MachOp.hs +++ b/compiler/GHC/Cmm/MachOp.hs @@ -514,8 +514,11 @@ machOpArgReps platform op = MO_FS_Conv from _ -> [from] MO_FF_Conv from _ -> [from] - MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth platform] - MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth platform] + MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r, W32] + MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)), W32] + MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,W32] + MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),W32] + -- SIMD vector indices are always 32 bit MO_V_Add _ r -> [r,r] MO_V_Sub _ r -> [r,r] @@ -528,9 +531,6 @@ machOpArgReps platform op = MO_VU_Quot _ r -> [r,r] MO_VU_Rem _ r -> [r,r] - MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth platform] - MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth platform] - MO_VF_Add _ r -> [r,r] MO_VF_Sub _ r -> [r,r] MO_VF_Mul _ r -> [r,r] diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs index a0e6ecf871..2e646ef4fb 100644 --- a/compiler/GHC/StgToCmm/Prim.hs +++ b/compiler/GHC/StgToCmm/Prim.hs @@ -853,7 +853,7 @@ emitPrimOp cfg primop = -- SIMD primops (VecBroadcastOp vcat n w) -> \[e] -> opIntoRegs $ \[res] -> do checkVecCompatibility cfg vcat n w - doVecPackOp (vecElemInjectCast platform vcat w) ty zeros (replicate n e) res + doVecPackOp ty zeros (replicate n e) res where zeros :: CmmExpr zeros = CmmLit $ CmmVec (replicate n zero) @@ -871,7 +871,7 @@ emitPrimOp cfg primop = checkVecCompatibility cfg vcat n w when (es `lengthIsNot` n) $ panic "emitPrimOp: VecPackOp has wrong number of arguments" - doVecPackOp (vecElemInjectCast platform vcat w) ty zeros es res + doVecPackOp ty zeros es res where zeros :: CmmExpr zeros = CmmLit $ CmmVec (replicate n zero) @@ -889,14 +889,14 @@ emitPrimOp cfg primop = checkVecCompatibility cfg vcat n w when (res `lengthIsNot` n) $ panic "emitPrimOp: VecUnpackOp has wrong number of results" - doVecUnpackOp (vecElemProjectCast platform vcat w) ty arg res + doVecUnpackOp ty arg res where ty :: CmmType ty = vecVmmType vcat n w (VecInsertOp vcat n w) -> \[v,e,i] -> opIntoRegs $ \[res] -> do checkVecCompatibility cfg vcat n w - doVecInsertOp (vecElemInjectCast platform vcat w) ty v e i res + doVecInsertOp ty v e i res where ty :: CmmType ty = vecVmmType vcat n w @@ -2247,32 +2247,8 @@ vecCmmCat IntVec = cmmBits vecCmmCat WordVec = cmmBits vecCmmCat FloatVec = cmmFloat -vecElemInjectCast :: Platform -> PrimOpVecCat -> Width -> Maybe MachOp -vecElemInjectCast _ FloatVec _ = Nothing -vecElemInjectCast platform IntVec W8 = Just (mo_WordTo8 platform) -vecElemInjectCast platform IntVec W16 = Just (mo_WordTo16 platform) -vecElemInjectCast platform IntVec W32 = Just (mo_WordTo32 platform) -vecElemInjectCast _ IntVec W64 = Nothing -vecElemInjectCast platform WordVec W8 = Just (mo_WordTo8 platform) -vecElemInjectCast platform WordVec W16 = Just (mo_WordTo16 platform) -vecElemInjectCast platform WordVec W32 = Just (mo_WordTo32 platform) -vecElemInjectCast _ WordVec W64 = Nothing -vecElemInjectCast _ _ _ = Nothing - -vecElemProjectCast :: Platform -> PrimOpVecCat -> Width -> Maybe MachOp -vecElemProjectCast _ FloatVec _ = Nothing -vecElemProjectCast platform IntVec W8 = Just (mo_s_8ToWord platform) -vecElemProjectCast platform IntVec W16 = Just (mo_s_16ToWord platform) -vecElemProjectCast platform IntVec W32 = Just (mo_s_32ToWord platform) -vecElemProjectCast _ IntVec W64 = Nothing -vecElemProjectCast platform WordVec W8 = Just (mo_u_8ToWord platform) -vecElemProjectCast platform WordVec W16 = Just (mo_u_16ToWord platform) -vecElemProjectCast platform WordVec W32 = Just (mo_u_32ToWord platform) -vecElemProjectCast _ WordVec W64 = Nothing -vecElemProjectCast _ _ _ = Nothing - - --- NOTE [SIMD Design for the future] +-- Note [SIMD Design for the future] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- Check to make sure that we can generate code for the specified vector type -- given the current set of dynamic flags. -- Currently these checks are specific to x86 and x86_64 architecture. @@ -2333,13 +2309,12 @@ checkVecCompatibility cfg vcat l w = ------------------------------------------------------------------------------ -- Helpers for translating vector packing and unpacking. -doVecPackOp :: Maybe MachOp -- Cast from element to vector component - -> CmmType -- Type of vector +doVecPackOp :: CmmType -- Type of vector -> CmmExpr -- Initial vector -> [CmmExpr] -- Elements -> CmmFormal -- Destination for result -> FCode () -doVecPackOp maybe_pre_write_cast ty z es res = do +doVecPackOp ty z es res = do dst <- newTemp ty emitAssign (CmmLocal dst) z vecPack dst es 0 @@ -2352,31 +2327,25 @@ doVecPackOp maybe_pre_write_cast ty z es res = do dst <- newTemp ty if isFloatType (vecElemType ty) then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid) - [CmmReg (CmmLocal src), cast e, iLit]) + [CmmReg (CmmLocal src), e, iLit]) else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid) - [CmmReg (CmmLocal src), cast e, iLit]) + [CmmReg (CmmLocal src), e, iLit]) vecPack dst es (i + 1) where -- vector indices are always 32-bits iLit = CmmLit (CmmInt (toInteger i) W32) - cast :: CmmExpr -> CmmExpr - cast val = case maybe_pre_write_cast of - Nothing -> val - Just cast -> CmmMachOp cast [val] - len :: Length len = vecLength ty wid :: Width wid = typeWidth (vecElemType ty) -doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result - -> CmmType -- Type of vector +doVecUnpackOp :: CmmType -- Type of vector -> CmmExpr -- Vector -> [CmmFormal] -- Element results -> FCode () -doVecUnpackOp maybe_post_read_cast ty e res = +doVecUnpackOp ty e res = vecUnpack res 0 where vecUnpack :: [CmmFormal] -> Int -> FCode () @@ -2385,46 +2354,36 @@ doVecUnpackOp maybe_post_read_cast ty e res = vecUnpack (r : rs) i = do if isFloatType (vecElemType ty) - then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid) - [e, iLit])) - else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid) - [e, iLit])) + then emitAssign (CmmLocal r) (CmmMachOp (MO_VF_Extract len wid) + [e, iLit]) + else emitAssign (CmmLocal r) (CmmMachOp (MO_V_Extract len wid) + [e, iLit]) vecUnpack rs (i + 1) where -- vector indices are always 32-bits iLit = CmmLit (CmmInt (toInteger i) W32) - cast :: CmmExpr -> CmmExpr - cast val = case maybe_post_read_cast of - Nothing -> val - Just cast -> CmmMachOp cast [val] - len :: Length len = vecLength ty wid :: Width wid = typeWidth (vecElemType ty) -doVecInsertOp :: Maybe MachOp -- Cast from element to vector component - -> CmmType -- Vector type +doVecInsertOp :: CmmType -- Vector type -> CmmExpr -- Source vector -> CmmExpr -- Element -> CmmExpr -- Index at which to insert element -> CmmFormal -- Destination for result -> FCode () -doVecInsertOp maybe_pre_write_cast ty src e idx res = do +doVecInsertOp ty src e idx res = do platform <- getPlatform -- vector indices are always 32-bits let idx' :: CmmExpr idx' = CmmMachOp (MO_SS_Conv (wordWidth platform) W32) [idx] if isFloatType (vecElemType ty) - then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx']) - else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx']) + then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, e, idx']) + else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, e, idx']) where - cast :: CmmExpr -> CmmExpr - cast val = case maybe_pre_write_cast of - Nothing -> val - Just cast -> CmmMachOp cast [val] len :: Length len = vecLength ty diff --git a/testsuite/tests/codeGen/should_run/T22296.hs b/testsuite/tests/codeGen/should_run/T22296.hs new file mode 100644 index 0000000000..d5ea23afda --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T22296.hs @@ -0,0 +1,41 @@ +{-# language MagicHash, UnboxedTuples, UnboxedSums #-} + +module Main ( main ) where + +import GHC.Exts +import GHC.Int +import GHC.Word + +foo :: Word16X8# -> Integer +foo w16x8 = + case unpackWord16X8# w16x8 of + (# w1, w2, w3, w4, w5, w6, w7, w8 #) -> + let + s = sum $ map fromIntegral + [ W16# w1, W16# w2, W16# w3, W16# w4 + , W16# w5, W16# w6, W16# w7, W16# w8 ] + in s + +bar :: Int32X4# -> Integer +bar i32x4 = + case unpackInt32X4# i32x4 of + (# i1, i2, i3, i4 #) -> + let + s = sum $ map fromIntegral + [ I32# i1, I32# i2, I32# i3, I32# i4 ] + in s + +baz :: FloatX4# -> Float +baz fx4 = + case unpackFloatX4# fx4 of + (# f1, f2, f3, f4 #) -> + let + s = sum + [ F# f1, F# f2, F# f3, F# f4 ] + in s + +main :: IO () +main = do + print ( foo ( broadcastWord16X8# ( wordToWord16# 1## ) ) ) + print ( bar ( broadcastInt32X4# ( intToInt32# 1# ) ) ) + print ( baz ( broadcastFloatX4# ( 1.0# ) ) ) diff --git a/testsuite/tests/codeGen/should_run/T22296.stdout b/testsuite/tests/codeGen/should_run/T22296.stdout new file mode 100644 index 0000000000..52b2242af3 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T22296.stdout @@ -0,0 +1,3 @@ +8 +4 +4.0 diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index af3a91d026..7f1e9f6e18 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -220,3 +220,4 @@ test('T21141', normal, compile_and_run, ['']) test('T21186', normal, compile_and_run, ['']) test('T20640a', normal, compile_and_run, ['']) test('T20640b', normal, compile_and_run, ['']) +test('T22296',[only_ways(llvm_ways)],compile_and_run,['']) |