summaryrefslogtreecommitdiff
path: root/compiler/codeGen/StgCmmPrim.hs
diff options
context:
space:
mode:
authorGeoffrey Mainland <gmainlan@microsoft.com>2013-08-21 16:18:24 +0100
committerGeoffrey Mainland <gmainlan@microsoft.com>2013-09-22 22:33:59 -0400
commit16b350a4227c96e09533c6f165895f50003d3801 (patch)
treef2fbf6f0f4b5ea2a406cd6a078fc1cb7cce31ad5 /compiler/codeGen/StgCmmPrim.hs
parentda5a647c0c49fee7531ef4c076b1c9e6a9d0fe6d (diff)
downloadhaskell-16b350a4227c96e09533c6f165895f50003d3801.tar.gz
SIMD primops are now generated using schemas that are polymorphic in
width and element type. SIMD primops are now polymorphic in vector size and element type, but only internally to the compiler. More specifically, utils/genprimopcode has been extended so that it "knows" about SIMD vectors. This allows us to, for example, write a single definition for the "add two vectors" primop in primops.txt.pp and have it instantiated at many vector types. This generates a primop in GHC.Prim for each vector type at which "add two vectors" is instantiated, but only one data constructor for the PrimOp data type, so the code generator is much, much simpler.
Diffstat (limited to 'compiler/codeGen/StgCmmPrim.hs')
-rw-r--r--compiler/codeGen/StgCmmPrim.hs288
1 files changed, 163 insertions, 125 deletions
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 8560f7cf1c..5250c9378e 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -40,7 +40,7 @@ import FastString
import Outputable
import Util
-import Control.Monad (liftM)
+import Control.Monad (liftM, when)
import Data.Bits
------------------------------------------------------------------------
@@ -380,14 +380,6 @@ emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp
emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
-emitPrimOp _ res IndexOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
-emitPrimOp _ res IndexOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 f32 res args
-emitPrimOp _ res IndexOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
-emitPrimOp _ res IndexOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 f64 res args
-emitPrimOp _ res IndexOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
-emitPrimOp _ res IndexOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res IndexOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
-emitPrimOp _ res IndexOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
@@ -407,14 +399,6 @@ emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (
emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
-emitPrimOp _ res ReadOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
-emitPrimOp _ res ReadOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 b32 res args
-emitPrimOp _ res ReadOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
-emitPrimOp _ res ReadOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 b64 res args
-emitPrimOp _ res ReadOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
-emitPrimOp _ res ReadOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res ReadOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
-emitPrimOp _ res ReadOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
-- IndexXXXArray
@@ -434,14 +418,6 @@ emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayO
emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
-emitPrimOp _ res IndexByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
-emitPrimOp _ res IndexByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
-emitPrimOp _ res IndexByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
-emitPrimOp _ res IndexByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
-emitPrimOp _ res IndexByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
-emitPrimOp _ res IndexByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res IndexByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
-emitPrimOp _ res IndexByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
-- ReadXXXArray, identical to IndexXXXArray.
@@ -461,14 +437,6 @@ emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp
emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
-emitPrimOp _ res ReadByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
-emitPrimOp _ res ReadByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
-emitPrimOp _ res ReadByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
-emitPrimOp _ res ReadByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
-emitPrimOp _ res ReadByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
-emitPrimOp _ res ReadByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res ReadByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
-emitPrimOp _ res ReadByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
-- WriteXXXoffAddr
@@ -488,14 +456,6 @@ emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (J
emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
-emitPrimOp _ res WriteOffAddrOp_FloatX4 args = doWriteOffAddrOp Nothing vec4f32 res args
-emitPrimOp _ res WriteOffAddrOp_FloatAsFloatX4 args = doWriteOffAddrOp Nothing f32 res args
-emitPrimOp _ res WriteOffAddrOp_DoubleX2 args = doWriteOffAddrOp Nothing vec2f64 res args
-emitPrimOp _ res WriteOffAddrOp_DoubleAsDoubleX2 args = doWriteOffAddrOp Nothing f64 res args
-emitPrimOp _ res WriteOffAddrOp_Int32X4 args = doWriteOffAddrOp Nothing vec4b32 res args
-emitPrimOp _ res WriteOffAddrOp_Int32AsInt32X4 args = doWriteOffAddrOp Nothing b32 res args
-emitPrimOp _ res WriteOffAddrOp_Int64X2 args = doWriteOffAddrOp Nothing vec2b64 res args
-emitPrimOp _ res WriteOffAddrOp_Int64AsInt64X2 args = doWriteOffAddrOp Nothing b64 res args
-- WriteXXXArray
@@ -515,14 +475,6 @@ emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayO
emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
-emitPrimOp _ res WriteByteArrayOp_FloatX4 args = doWriteByteArrayOp Nothing vec4f32 res args
-emitPrimOp _ res WriteByteArrayOp_FloatAsFloatX4 args = doWriteByteArrayOp Nothing f32 res args
-emitPrimOp _ res WriteByteArrayOp_DoubleX2 args = doWriteByteArrayOp Nothing vec2f64 res args
-emitPrimOp _ res WriteByteArrayOp_DoubleAsDoubleX2 args = doWriteByteArrayOp Nothing f64 res args
-emitPrimOp _ res WriteByteArrayOp_Int32X4 args = doWriteByteArrayOp Nothing vec4b32 res args
-emitPrimOp _ res WriteByteArrayOp_Int32AsInt32X4 args = doWriteByteArrayOp Nothing b32 res args
-emitPrimOp _ res WriteByteArrayOp_Int64X2 args = doWriteByteArrayOp Nothing vec2b64 res args
-emitPrimOp _ res WriteByteArrayOp_Int64AsInt64X2 args = doWriteByteArrayOp Nothing b64 res args
-- Copying and setting byte arrays
emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
@@ -556,78 +508,136 @@ emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
(MO_UF_Conv W64) [w]
--- SIMD vector packing and unpacking
-emitPrimOp _ [res] FloatToFloatX4Op [e] =
- doVecPackOp Nothing vec4f32 zero [e,e,e,e] res
+-- SIMD primops
+emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] =
+ doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
+ zeros :: CmmExpr
+ zeros = CmmLit $ CmmVec (replicate n zero)
+
+ zero :: CmmLit
+ zero = case vcat of
+ IntVec -> CmmInt 0 w
+ WordVec -> CmmInt 0 w
+ FloatVec -> CmmFloat 0 w
+
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
+ when (length es /= n) $
+ panic "emitPrimOp: VecPackOp has wrong number of arguments"
+ doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
+ where
+ zeros :: CmmExpr
+ zeros = CmmLit $ CmmVec (replicate n zero)
+
+ zero :: CmmLit
+ zero = case vcat of
+ IntVec -> CmmInt 0 w
+ WordVec -> CmmInt 0 w
+ FloatVec -> CmmFloat 0 w
+
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
+ when (length res /= n) $
+ panic "emitPrimOp: VecUnpackOp has wrong number of results"
+ doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] FloatX4PackOp es@[_,_,_,_] =
- doVecPackOp Nothing vec4f32 zero es res
+emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] =
+ doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ res@[_,_,_,_] FloatX4UnpackOp [arg] =
- doVecUnpackOp Nothing vec4f32 arg res
+emitPrimOp _ res (VecIndexByteArrayOp vcat n w) args =
+ doIndexByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] FloatX4InsertOp [v,e,i] =
- doVecInsertOp Nothing vec4f32 v e i res
+emitPrimOp _ res (VecReadByteArrayOp vcat n w) args =
+ doIndexByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] DoubleToDoubleX2Op [e] =
- doVecPackOp Nothing vec2f64 zero [e,e] res
+emitPrimOp _ res (VecWriteByteArrayOp vcat n w) args =
+ doWriteByteArrayOp Nothing ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] DoubleX2PackOp es@[_,_] =
- doVecPackOp Nothing vec2f64 zero es res
+emitPrimOp _ res (VecIndexOffAddrOp vcat n w) args =
+ doIndexOffAddrOp Nothing ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ res@[_,_] DoubleX2UnpackOp [arg] =
- doVecUnpackOp Nothing vec2f64 arg res
+emitPrimOp _ res (VecReadOffAddrOp vcat n w) args =
+ doIndexOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] DoubleX2InsertOp [v,e,i] =
- doVecInsertOp Nothing vec2f64 v e i res
+emitPrimOp _ res (VecWriteOffAddrOp vcat n w) args =
+ doWriteOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp dflags [res] Int32ToInt32X4Op [e] =
- doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero [e,e,e,e] res
+emitPrimOp _ res (VecIndexScalarByteArrayOp vcat n w) args =
+ doIndexByteArrayOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp dflags [res] Int32X4PackOp es@[_,_,_,_] =
- doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero es res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp _ res (VecReadScalarByteArrayOp vcat n w) args =
+ doIndexByteArrayOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp dflags res@[_,_,_,_] Int32X4UnpackOp [arg] =
- doVecUnpackOp (Just (mo_s_32ToWord dflags)) vec4b32 arg res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-emitPrimOp dflags [res] Int32X4InsertOp [v,e,i] =
- doVecInsertOp (Just (mo_WordTo32 dflags)) vec4b32 v e i res
+emitPrimOp _ res (VecWriteScalarByteArrayOp vcat _ w) args =
+ doWriteByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-emitPrimOp _ [res] Int64ToInt64X2Op [e] =
- doVecPackOp Nothing vec2b64 zero [e,e] res
+emitPrimOp _ res (VecIndexScalarOffAddrOp vcat n w) args =
+ doIndexOffAddrOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp _ [res] Int64X2PackOp es@[_,_] =
- doVecPackOp Nothing vec2b64 zero es res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp _ res (VecReadScalarOffAddrOp vcat n w) args =
+ doIndexOffAddrOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp _ res@[_,_] Int64X2UnpackOp [arg] =
- doVecUnpackOp Nothing vec2b64 arg res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-emitPrimOp _ [res] Int64X2InsertOp [v,e,i] =
- doVecInsertOp Nothing vec2b64 v e i res
+emitPrimOp _ res (VecWriteScalarOffAddrOp vcat _ w) args =
+ doWriteOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-- Prefetch
emitPrimOp _ res PrefetchByteArrayOp args = doPrefetchByteArrayOp res args
@@ -944,33 +954,26 @@ translateOp _ FloatMulOp = Just (MO_F_Mul W32)
translateOp _ FloatDivOp = Just (MO_F_Quot W32)
translateOp _ FloatNegOp = Just (MO_F_Neg W32)
--- Floating point vector ops
-
-translateOp _ FloatX4AddOp = Just (MO_VF_Add 4 W32)
-translateOp _ FloatX4SubOp = Just (MO_VF_Sub 4 W32)
-translateOp _ FloatX4MulOp = Just (MO_VF_Mul 4 W32)
-translateOp _ FloatX4DivOp = Just (MO_VF_Quot 4 W32)
-translateOp _ FloatX4NegOp = Just (MO_VF_Neg 4 W32)
-
-translateOp _ DoubleX2AddOp = Just (MO_VF_Add 2 W64)
-translateOp _ DoubleX2SubOp = Just (MO_VF_Sub 2 W64)
-translateOp _ DoubleX2MulOp = Just (MO_VF_Mul 2 W64)
-translateOp _ DoubleX2DivOp = Just (MO_VF_Quot 2 W64)
-translateOp _ DoubleX2NegOp = Just (MO_VF_Neg 2 W64)
-
-translateOp _ Int32X4AddOp = Just (MO_V_Add 4 W32)
-translateOp _ Int32X4SubOp = Just (MO_V_Sub 4 W32)
-translateOp _ Int32X4MulOp = Just (MO_V_Mul 4 W32)
-translateOp _ Int32X4QuotOp = Just (MO_VS_Quot 4 W32)
-translateOp _ Int32X4RemOp = Just (MO_VS_Rem 4 W32)
-translateOp _ Int32X4NegOp = Just (MO_VS_Neg 4 W32)
-
-translateOp _ Int64X2AddOp = Just (MO_V_Add 2 W64)
-translateOp _ Int64X2SubOp = Just (MO_V_Sub 2 W64)
-translateOp _ Int64X2MulOp = Just (MO_V_Mul 2 W64)
-translateOp _ Int64X2QuotOp = Just (MO_VS_Quot 2 W64)
-translateOp _ Int64X2RemOp = Just (MO_VS_Rem 2 W64)
-translateOp _ Int64X2NegOp = Just (MO_VS_Neg 2 W64)
+-- Vector ops
+
+translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
+translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
+translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
+translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
+translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
+
+translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
+translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
+translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
+translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
+translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
+translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
+
+translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
+translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
+translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
+translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
+translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
-- Conversions
@@ -1183,6 +1186,41 @@ setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
------------------------------------------------------------------------------
+-- Helpers for translating vector primops.
+
+vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
+vecVmmType pocat n w = vec n (vecCmmCat pocat w)
+
+vecCmmCat :: PrimOpVecCat -> Width -> CmmType
+vecCmmCat IntVec = cmmBits
+vecCmmCat WordVec = cmmBits
+vecCmmCat FloatVec = cmmFloat
+
+vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
+vecElemInjectCast _ FloatVec _ = Nothing
+vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
+vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
+vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
+vecElemInjectCast _ IntVec W64 = Nothing
+vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
+vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
+vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
+vecElemInjectCast _ WordVec W64 = Nothing
+vecElemInjectCast _ _ _ = Nothing
+
+vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
+vecElemProjectCast _ FloatVec _ = Nothing
+vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
+vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
+vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
+vecElemProjectCast _ IntVec W64 = Nothing
+vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
+vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
+vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
+vecElemProjectCast _ WordVec W64 = Nothing
+vecElemProjectCast _ _ _ = Nothing
+
+------------------------------------------------------------------------------
-- Helpers for translating vector packing and unpacking.
doVecPackOp :: Maybe MachOp -- Cast from element to vector component