summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsheaf <sam.derbyshire@gmail.com>2022-10-14 14:31:15 +0200
committerMatthew Pickering <matthewtpickering@gmail.com>2022-12-13 09:30:47 +0000
commit19d7a5fc8f5c9f421987935bc4e9bc9f79f3c445 (patch)
treecf179a42546f107e63fabfc58b0a3c28984aad12
parent7a40261fd963c2b8895afd34bf6a7969d5e474cf (diff)
downloadhaskell-19d7a5fc8f5c9f421987935bc4e9bc9f79f3c445.tar.gz
Remove SIMD conversions
This patch makes it so that packing/unpacking SIMD vectors always uses the right sized types, e.g. unpacking a Word16X4# will give a tuple of Word16#s. As a result, we can get rid of the conversion instructions that were previously required. Fixes #22296 (cherry picked from commit 6d7d91817795d7ee7f45557411368a1738daa488)
-rw-r--r--compiler/GHC/Builtin/primops.txt.pp12
-rw-r--r--compiler/GHC/Cmm/MachOp.hs10
-rw-r--r--compiler/GHC/StgToCmm/Prim.hs81
-rw-r--r--testsuite/tests/codeGen/should_run/T22296.hs41
-rw-r--r--testsuite/tests/codeGen/should_run/T22296.stdout3
-rw-r--r--testsuite/tests/codeGen/should_run/all.T1
6 files changed, 76 insertions, 72 deletions
diff --git a/compiler/GHC/Builtin/primops.txt.pp b/compiler/GHC/Builtin/primops.txt.pp
index 625c512cc6..a3dcd50f06 100644
--- a/compiler/GHC/Builtin/primops.txt.pp
+++ b/compiler/GHC/Builtin/primops.txt.pp
@@ -3585,9 +3585,9 @@ section "SIMD Vectors"
[<Int8,Int8#,16>,<Int16,Int16#,8>,<Int32,Int32#,4>,<Int64,Int64#,2> \
,<Int8,Int8#,32>,<Int16,Int16#,16>,<Int32,Int32#,8>,<Int64,Int64#,4> \
,<Int8,Int8#,64>,<Int16,Int16#,32>,<Int32,Int32#,16>,<Int64,Int64#,8> \
- ,<Word8,Word#,16>,<Word16,Word#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \
- ,<Word8,Word#,32>,<Word16,Word#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \
- ,<Word8,Word#,64>,<Word16,Word#,32>,<Word32,Word32#,16>,<Word64,Word64#,8> \
+ ,<Word8,Word8#,16>,<Word16,Word16#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \
+ ,<Word8,Word8#,32>,<Word16,Word16#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \
+ ,<Word8,Word8#,64>,<Word16,Word16#,32>,<Word32,Word32#,16>,<Word64,Word64#,8> \
,<Float,Float#,4>,<Double,Double#,2> \
,<Float,Float#,8>,<Double,Double#,4> \
,<Float,Float#,16>,<Double,Double#,8>]
@@ -3609,9 +3609,9 @@ section "SIMD Vectors"
[<Int8,Int8#,16>,<Int16,Int16#,8>,<Int32,Int32#,4>,<Int64,Int64#,2> \
,<Int8,Int8#,32>,<Int16,Int16#,16>,<Int32,Int32#,8>,<Int64,Int64#,4> \
,<Int8,Int8#,64>,<Int16,Int16#,32>,<Int32,Int32#,16>,<Int64,Int64#,8> \
- ,<Word8,Word#,16>,<Word16,Word#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \
- ,<Word8,Word#,32>,<Word16,Word#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \
- ,<Word8,Word#,64>,<Word16,Word#,32>,<Word32,Word32#,16>,<Word64,Word64#,8>]
+ ,<Word8,Word8#,16>,<Word16,Word16#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \
+ ,<Word8,Word8#,32>,<Word16,Word16#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \
+ ,<Word8,Word8#,64>,<Word16,Word16#,32>,<Word32,Word32#,16>,<Word64,Word64#,8>]
primtype VECTOR
with llvm_only = True
diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs
index 0bd3ac1111..632165b6b2 100644
--- a/compiler/GHC/Cmm/MachOp.hs
+++ b/compiler/GHC/Cmm/MachOp.hs
@@ -514,8 +514,11 @@ machOpArgReps platform op =
MO_FS_Conv from _ -> [from]
MO_FF_Conv from _ -> [from]
- MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth platform]
- MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth platform]
+ MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r, W32]
+ MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)), W32]
+ MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,W32]
+ MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),W32]
+ -- SIMD vector indices are always 32 bit
MO_V_Add _ r -> [r,r]
MO_V_Sub _ r -> [r,r]
@@ -528,9 +531,6 @@ machOpArgReps platform op =
MO_VU_Quot _ r -> [r,r]
MO_VU_Rem _ r -> [r,r]
- MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth platform]
- MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth platform]
-
MO_VF_Add _ r -> [r,r]
MO_VF_Sub _ r -> [r,r]
MO_VF_Mul _ r -> [r,r]
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index a0e6ecf871..2e646ef4fb 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -853,7 +853,7 @@ emitPrimOp cfg primop =
-- SIMD primops
(VecBroadcastOp vcat n w) -> \[e] -> opIntoRegs $ \[res] -> do
checkVecCompatibility cfg vcat n w
- doVecPackOp (vecElemInjectCast platform vcat w) ty zeros (replicate n e) res
+ doVecPackOp ty zeros (replicate n e) res
where
zeros :: CmmExpr
zeros = CmmLit $ CmmVec (replicate n zero)
@@ -871,7 +871,7 @@ emitPrimOp cfg primop =
checkVecCompatibility cfg vcat n w
when (es `lengthIsNot` n) $
panic "emitPrimOp: VecPackOp has wrong number of arguments"
- doVecPackOp (vecElemInjectCast platform vcat w) ty zeros es res
+ doVecPackOp ty zeros es res
where
zeros :: CmmExpr
zeros = CmmLit $ CmmVec (replicate n zero)
@@ -889,14 +889,14 @@ emitPrimOp cfg primop =
checkVecCompatibility cfg vcat n w
when (res `lengthIsNot` n) $
panic "emitPrimOp: VecUnpackOp has wrong number of results"
- doVecUnpackOp (vecElemProjectCast platform vcat w) ty arg res
+ doVecUnpackOp ty arg res
where
ty :: CmmType
ty = vecVmmType vcat n w
(VecInsertOp vcat n w) -> \[v,e,i] -> opIntoRegs $ \[res] -> do
checkVecCompatibility cfg vcat n w
- doVecInsertOp (vecElemInjectCast platform vcat w) ty v e i res
+ doVecInsertOp ty v e i res
where
ty :: CmmType
ty = vecVmmType vcat n w
@@ -2247,32 +2247,8 @@ vecCmmCat IntVec = cmmBits
vecCmmCat WordVec = cmmBits
vecCmmCat FloatVec = cmmFloat
-vecElemInjectCast :: Platform -> PrimOpVecCat -> Width -> Maybe MachOp
-vecElemInjectCast _ FloatVec _ = Nothing
-vecElemInjectCast platform IntVec W8 = Just (mo_WordTo8 platform)
-vecElemInjectCast platform IntVec W16 = Just (mo_WordTo16 platform)
-vecElemInjectCast platform IntVec W32 = Just (mo_WordTo32 platform)
-vecElemInjectCast _ IntVec W64 = Nothing
-vecElemInjectCast platform WordVec W8 = Just (mo_WordTo8 platform)
-vecElemInjectCast platform WordVec W16 = Just (mo_WordTo16 platform)
-vecElemInjectCast platform WordVec W32 = Just (mo_WordTo32 platform)
-vecElemInjectCast _ WordVec W64 = Nothing
-vecElemInjectCast _ _ _ = Nothing
-
-vecElemProjectCast :: Platform -> PrimOpVecCat -> Width -> Maybe MachOp
-vecElemProjectCast _ FloatVec _ = Nothing
-vecElemProjectCast platform IntVec W8 = Just (mo_s_8ToWord platform)
-vecElemProjectCast platform IntVec W16 = Just (mo_s_16ToWord platform)
-vecElemProjectCast platform IntVec W32 = Just (mo_s_32ToWord platform)
-vecElemProjectCast _ IntVec W64 = Nothing
-vecElemProjectCast platform WordVec W8 = Just (mo_u_8ToWord platform)
-vecElemProjectCast platform WordVec W16 = Just (mo_u_16ToWord platform)
-vecElemProjectCast platform WordVec W32 = Just (mo_u_32ToWord platform)
-vecElemProjectCast _ WordVec W64 = Nothing
-vecElemProjectCast _ _ _ = Nothing
-
-
--- NOTE [SIMD Design for the future]
+-- Note [SIMD Design for the future]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Check to make sure that we can generate code for the specified vector type
-- given the current set of dynamic flags.
-- Currently these checks are specific to x86 and x86_64 architecture.
@@ -2333,13 +2309,12 @@ checkVecCompatibility cfg vcat l w =
------------------------------------------------------------------------------
-- Helpers for translating vector packing and unpacking.
-doVecPackOp :: Maybe MachOp -- Cast from element to vector component
- -> CmmType -- Type of vector
+doVecPackOp :: CmmType -- Type of vector
-> CmmExpr -- Initial vector
-> [CmmExpr] -- Elements
-> CmmFormal -- Destination for result
-> FCode ()
-doVecPackOp maybe_pre_write_cast ty z es res = do
+doVecPackOp ty z es res = do
dst <- newTemp ty
emitAssign (CmmLocal dst) z
vecPack dst es 0
@@ -2352,31 +2327,25 @@ doVecPackOp maybe_pre_write_cast ty z es res = do
dst <- newTemp ty
if isFloatType (vecElemType ty)
then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
- [CmmReg (CmmLocal src), cast e, iLit])
+ [CmmReg (CmmLocal src), e, iLit])
else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
- [CmmReg (CmmLocal src), cast e, iLit])
+ [CmmReg (CmmLocal src), e, iLit])
vecPack dst es (i + 1)
where
-- vector indices are always 32-bits
iLit = CmmLit (CmmInt (toInteger i) W32)
- cast :: CmmExpr -> CmmExpr
- cast val = case maybe_pre_write_cast of
- Nothing -> val
- Just cast -> CmmMachOp cast [val]
-
len :: Length
len = vecLength ty
wid :: Width
wid = typeWidth (vecElemType ty)
-doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
- -> CmmType -- Type of vector
+doVecUnpackOp :: CmmType -- Type of vector
-> CmmExpr -- Vector
-> [CmmFormal] -- Element results
-> FCode ()
-doVecUnpackOp maybe_post_read_cast ty e res =
+doVecUnpackOp ty e res =
vecUnpack res 0
where
vecUnpack :: [CmmFormal] -> Int -> FCode ()
@@ -2385,46 +2354,36 @@ doVecUnpackOp maybe_post_read_cast ty e res =
vecUnpack (r : rs) i = do
if isFloatType (vecElemType ty)
- then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
- [e, iLit]))
- else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
- [e, iLit]))
+ then emitAssign (CmmLocal r) (CmmMachOp (MO_VF_Extract len wid)
+ [e, iLit])
+ else emitAssign (CmmLocal r) (CmmMachOp (MO_V_Extract len wid)
+ [e, iLit])
vecUnpack rs (i + 1)
where
-- vector indices are always 32-bits
iLit = CmmLit (CmmInt (toInteger i) W32)
- cast :: CmmExpr -> CmmExpr
- cast val = case maybe_post_read_cast of
- Nothing -> val
- Just cast -> CmmMachOp cast [val]
-
len :: Length
len = vecLength ty
wid :: Width
wid = typeWidth (vecElemType ty)
-doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
- -> CmmType -- Vector type
+doVecInsertOp :: CmmType -- Vector type
-> CmmExpr -- Source vector
-> CmmExpr -- Element
-> CmmExpr -- Index at which to insert element
-> CmmFormal -- Destination for result
-> FCode ()
-doVecInsertOp maybe_pre_write_cast ty src e idx res = do
+doVecInsertOp ty src e idx res = do
platform <- getPlatform
-- vector indices are always 32-bits
let idx' :: CmmExpr
idx' = CmmMachOp (MO_SS_Conv (wordWidth platform) W32) [idx]
if isFloatType (vecElemType ty)
- then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
- else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
+ then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, e, idx'])
+ else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, e, idx'])
where
- cast :: CmmExpr -> CmmExpr
- cast val = case maybe_pre_write_cast of
- Nothing -> val
- Just cast -> CmmMachOp cast [val]
len :: Length
len = vecLength ty
diff --git a/testsuite/tests/codeGen/should_run/T22296.hs b/testsuite/tests/codeGen/should_run/T22296.hs
new file mode 100644
index 0000000000..d5ea23afda
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T22296.hs
@@ -0,0 +1,41 @@
+{-# language MagicHash, UnboxedTuples, UnboxedSums #-}
+
+module Main ( main ) where
+
+import GHC.Exts
+import GHC.Int
+import GHC.Word
+
+foo :: Word16X8# -> Integer
+foo w16x8 =
+ case unpackWord16X8# w16x8 of
+ (# w1, w2, w3, w4, w5, w6, w7, w8 #) ->
+ let
+ s = sum $ map fromIntegral
+ [ W16# w1, W16# w2, W16# w3, W16# w4
+ , W16# w5, W16# w6, W16# w7, W16# w8 ]
+ in s
+
+bar :: Int32X4# -> Integer
+bar i32x4 =
+ case unpackInt32X4# i32x4 of
+ (# i1, i2, i3, i4 #) ->
+ let
+ s = sum $ map fromIntegral
+ [ I32# i1, I32# i2, I32# i3, I32# i4 ]
+ in s
+
+baz :: FloatX4# -> Float
+baz fx4 =
+ case unpackFloatX4# fx4 of
+ (# f1, f2, f3, f4 #) ->
+ let
+ s = sum
+ [ F# f1, F# f2, F# f3, F# f4 ]
+ in s
+
+main :: IO ()
+main = do
+ print ( foo ( broadcastWord16X8# ( wordToWord16# 1## ) ) )
+ print ( bar ( broadcastInt32X4# ( intToInt32# 1# ) ) )
+ print ( baz ( broadcastFloatX4# ( 1.0# ) ) )
diff --git a/testsuite/tests/codeGen/should_run/T22296.stdout b/testsuite/tests/codeGen/should_run/T22296.stdout
new file mode 100644
index 0000000000..52b2242af3
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T22296.stdout
@@ -0,0 +1,3 @@
+8
+4
+4.0
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index af3a91d026..7f1e9f6e18 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -220,3 +220,4 @@ test('T21141', normal, compile_and_run, [''])
test('T21186', normal, compile_and_run, [''])
test('T20640a', normal, compile_and_run, [''])
test('T20640b', normal, compile_and_run, [''])
+test('T22296',[only_ways(llvm_ways)],compile_and_run,[''])