summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/StgCmmPrim.hs288
1 files changed, 163 insertions, 125 deletions
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 8560f7cf1c..5250c9378e 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -40,7 +40,7 @@ import FastString
import Outputable
import Util
-import Control.Monad (liftM)
+import Control.Monad (liftM, when)
import Data.Bits
------------------------------------------------------------------------
@@ -380,14 +380,6 @@ emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp
emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
-emitPrimOp _ res IndexOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
-emitPrimOp _ res IndexOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 f32 res args
-emitPrimOp _ res IndexOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
-emitPrimOp _ res IndexOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 f64 res args
-emitPrimOp _ res IndexOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
-emitPrimOp _ res IndexOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res IndexOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
-emitPrimOp _ res IndexOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
@@ -407,14 +399,6 @@ emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (
emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
-emitPrimOp _ res ReadOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
-emitPrimOp _ res ReadOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 b32 res args
-emitPrimOp _ res ReadOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
-emitPrimOp _ res ReadOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 b64 res args
-emitPrimOp _ res ReadOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
-emitPrimOp _ res ReadOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res ReadOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
-emitPrimOp _ res ReadOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
-- IndexXXXArray
@@ -434,14 +418,6 @@ emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayO
emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
-emitPrimOp _ res IndexByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
-emitPrimOp _ res IndexByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
-emitPrimOp _ res IndexByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
-emitPrimOp _ res IndexByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
-emitPrimOp _ res IndexByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
-emitPrimOp _ res IndexByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res IndexByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
-emitPrimOp _ res IndexByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
-- ReadXXXArray, identical to IndexXXXArray.
@@ -461,14 +437,6 @@ emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp
emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
-emitPrimOp _ res ReadByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
-emitPrimOp _ res ReadByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
-emitPrimOp _ res ReadByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
-emitPrimOp _ res ReadByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
-emitPrimOp _ res ReadByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
-emitPrimOp _ res ReadByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res ReadByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
-emitPrimOp _ res ReadByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
-- WriteXXXoffAddr
@@ -488,14 +456,6 @@ emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (J
emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
-emitPrimOp _ res WriteOffAddrOp_FloatX4 args = doWriteOffAddrOp Nothing vec4f32 res args
-emitPrimOp _ res WriteOffAddrOp_FloatAsFloatX4 args = doWriteOffAddrOp Nothing f32 res args
-emitPrimOp _ res WriteOffAddrOp_DoubleX2 args = doWriteOffAddrOp Nothing vec2f64 res args
-emitPrimOp _ res WriteOffAddrOp_DoubleAsDoubleX2 args = doWriteOffAddrOp Nothing f64 res args
-emitPrimOp _ res WriteOffAddrOp_Int32X4 args = doWriteOffAddrOp Nothing vec4b32 res args
-emitPrimOp _ res WriteOffAddrOp_Int32AsInt32X4 args = doWriteOffAddrOp Nothing b32 res args
-emitPrimOp _ res WriteOffAddrOp_Int64X2 args = doWriteOffAddrOp Nothing vec2b64 res args
-emitPrimOp _ res WriteOffAddrOp_Int64AsInt64X2 args = doWriteOffAddrOp Nothing b64 res args
-- WriteXXXArray
@@ -515,14 +475,6 @@ emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayO
emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
-emitPrimOp _ res WriteByteArrayOp_FloatX4 args = doWriteByteArrayOp Nothing vec4f32 res args
-emitPrimOp _ res WriteByteArrayOp_FloatAsFloatX4 args = doWriteByteArrayOp Nothing f32 res args
-emitPrimOp _ res WriteByteArrayOp_DoubleX2 args = doWriteByteArrayOp Nothing vec2f64 res args
-emitPrimOp _ res WriteByteArrayOp_DoubleAsDoubleX2 args = doWriteByteArrayOp Nothing f64 res args
-emitPrimOp _ res WriteByteArrayOp_Int32X4 args = doWriteByteArrayOp Nothing vec4b32 res args
-emitPrimOp _ res WriteByteArrayOp_Int32AsInt32X4 args = doWriteByteArrayOp Nothing b32 res args
-emitPrimOp _ res WriteByteArrayOp_Int64X2 args = doWriteByteArrayOp Nothing vec2b64 res args
-emitPrimOp _ res WriteByteArrayOp_Int64AsInt64X2 args = doWriteByteArrayOp Nothing b64 res args
-- Copying and setting byte arrays
emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
@@ -556,78 +508,136 @@ emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
(MO_UF_Conv W64) [w]
--- SIMD vector packing and unpacking
-emitPrimOp _ [res] FloatToFloatX4Op [e] =
- doVecPackOp Nothing vec4f32 zero [e,e,e,e] res
+-- SIMD primops
+emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] =
+ doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
+ zeros :: CmmExpr
+ zeros = CmmLit $ CmmVec (replicate n zero)
+
+ zero :: CmmLit
+ zero = case vcat of
+ IntVec -> CmmInt 0 w
+ WordVec -> CmmInt 0 w
+ FloatVec -> CmmFloat 0 w
+
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
+ when (length es /= n) $
+ panic "emitPrimOp: VecPackOp has wrong number of arguments"
+ doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
+ where
+ zeros :: CmmExpr
+ zeros = CmmLit $ CmmVec (replicate n zero)
+
+ zero :: CmmLit
+ zero = case vcat of
+ IntVec -> CmmInt 0 w
+ WordVec -> CmmInt 0 w
+ FloatVec -> CmmFloat 0 w
+
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
+ when (length res /= n) $
+ panic "emitPrimOp: VecUnpackOp has wrong number of results"
+ doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] FloatX4PackOp es@[_,_,_,_] =
- doVecPackOp Nothing vec4f32 zero es res
+emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] =
+ doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ res@[_,_,_,_] FloatX4UnpackOp [arg] =
- doVecUnpackOp Nothing vec4f32 arg res
+emitPrimOp _ res (VecIndexByteArrayOp vcat n w) args =
+ doIndexByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] FloatX4InsertOp [v,e,i] =
- doVecInsertOp Nothing vec4f32 v e i res
+emitPrimOp _ res (VecReadByteArrayOp vcat n w) args =
+ doIndexByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] DoubleToDoubleX2Op [e] =
- doVecPackOp Nothing vec2f64 zero [e,e] res
+emitPrimOp _ res (VecWriteByteArrayOp vcat n w) args =
+ doWriteByteArrayOp Nothing ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] DoubleX2PackOp es@[_,_] =
- doVecPackOp Nothing vec2f64 zero es res
+emitPrimOp _ res (VecIndexOffAddrOp vcat n w) args =
+ doIndexOffAddrOp Nothing ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ res@[_,_] DoubleX2UnpackOp [arg] =
- doVecUnpackOp Nothing vec2f64 arg res
+emitPrimOp _ res (VecReadOffAddrOp vcat n w) args =
+ doIndexOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] DoubleX2InsertOp [v,e,i] =
- doVecInsertOp Nothing vec2f64 v e i res
+emitPrimOp _ res (VecWriteOffAddrOp vcat n w) args =
+ doWriteOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp dflags [res] Int32ToInt32X4Op [e] =
- doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero [e,e,e,e] res
+emitPrimOp _ res (VecIndexScalarByteArrayOp vcat n w) args =
+ doIndexByteArrayOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp dflags [res] Int32X4PackOp es@[_,_,_,_] =
- doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero es res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp _ res (VecReadScalarByteArrayOp vcat n w) args =
+ doIndexByteArrayOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp dflags res@[_,_,_,_] Int32X4UnpackOp [arg] =
- doVecUnpackOp (Just (mo_s_32ToWord dflags)) vec4b32 arg res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-emitPrimOp dflags [res] Int32X4InsertOp [v,e,i] =
- doVecInsertOp (Just (mo_WordTo32 dflags)) vec4b32 v e i res
+emitPrimOp _ res (VecWriteScalarByteArrayOp vcat _ w) args =
+ doWriteByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-emitPrimOp _ [res] Int64ToInt64X2Op [e] =
- doVecPackOp Nothing vec2b64 zero [e,e] res
+emitPrimOp _ res (VecIndexScalarOffAddrOp vcat n w) args =
+ doIndexOffAddrOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp _ [res] Int64X2PackOp es@[_,_] =
- doVecPackOp Nothing vec2b64 zero es res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp _ res (VecReadScalarOffAddrOp vcat n w) args =
+ doIndexOffAddrOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp _ res@[_,_] Int64X2UnpackOp [arg] =
- doVecUnpackOp Nothing vec2b64 arg res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-emitPrimOp _ [res] Int64X2InsertOp [v,e,i] =
- doVecInsertOp Nothing vec2b64 v e i res
+emitPrimOp _ res (VecWriteScalarOffAddrOp vcat _ w) args =
+ doWriteOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-- Prefetch
emitPrimOp _ res PrefetchByteArrayOp args = doPrefetchByteArrayOp res args
@@ -944,33 +954,26 @@ translateOp _ FloatMulOp = Just (MO_F_Mul W32)
translateOp _ FloatDivOp = Just (MO_F_Quot W32)
translateOp _ FloatNegOp = Just (MO_F_Neg W32)
--- Floating point vector ops
-
-translateOp _ FloatX4AddOp = Just (MO_VF_Add 4 W32)
-translateOp _ FloatX4SubOp = Just (MO_VF_Sub 4 W32)
-translateOp _ FloatX4MulOp = Just (MO_VF_Mul 4 W32)
-translateOp _ FloatX4DivOp = Just (MO_VF_Quot 4 W32)
-translateOp _ FloatX4NegOp = Just (MO_VF_Neg 4 W32)
-
-translateOp _ DoubleX2AddOp = Just (MO_VF_Add 2 W64)
-translateOp _ DoubleX2SubOp = Just (MO_VF_Sub 2 W64)
-translateOp _ DoubleX2MulOp = Just (MO_VF_Mul 2 W64)
-translateOp _ DoubleX2DivOp = Just (MO_VF_Quot 2 W64)
-translateOp _ DoubleX2NegOp = Just (MO_VF_Neg 2 W64)
-
-translateOp _ Int32X4AddOp = Just (MO_V_Add 4 W32)
-translateOp _ Int32X4SubOp = Just (MO_V_Sub 4 W32)
-translateOp _ Int32X4MulOp = Just (MO_V_Mul 4 W32)
-translateOp _ Int32X4QuotOp = Just (MO_VS_Quot 4 W32)
-translateOp _ Int32X4RemOp = Just (MO_VS_Rem 4 W32)
-translateOp _ Int32X4NegOp = Just (MO_VS_Neg 4 W32)
-
-translateOp _ Int64X2AddOp = Just (MO_V_Add 2 W64)
-translateOp _ Int64X2SubOp = Just (MO_V_Sub 2 W64)
-translateOp _ Int64X2MulOp = Just (MO_V_Mul 2 W64)
-translateOp _ Int64X2QuotOp = Just (MO_VS_Quot 2 W64)
-translateOp _ Int64X2RemOp = Just (MO_VS_Rem 2 W64)
-translateOp _ Int64X2NegOp = Just (MO_VS_Neg 2 W64)
+-- Vector ops
+
+translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
+translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
+translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
+translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
+translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
+
+translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
+translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
+translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
+translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
+translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
+translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
+
+translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
+translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
+translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
+translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
+translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
-- Conversions
@@ -1183,6 +1186,41 @@ setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
------------------------------------------------------------------------------
+-- Helpers for translating vector primops.
+
+vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
+vecVmmType pocat n w = vec n (vecCmmCat pocat w)
+
+vecCmmCat :: PrimOpVecCat -> Width -> CmmType
+vecCmmCat IntVec = cmmBits
+vecCmmCat WordVec = cmmBits
+vecCmmCat FloatVec = cmmFloat
+
+vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
+vecElemInjectCast _ FloatVec _ = Nothing
+vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
+vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
+vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
+vecElemInjectCast _ IntVec W64 = Nothing
+vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
+vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
+vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
+vecElemInjectCast _ WordVec W64 = Nothing
+vecElemInjectCast _ _ _ = Nothing
+
+vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
+vecElemProjectCast _ FloatVec _ = Nothing
+vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
+vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
+vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
+vecElemProjectCast _ IntVec W64 = Nothing
+vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
+vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
+vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
+vecElemProjectCast _ WordVec W64 = Nothing
+vecElemProjectCast _ _ _ = Nothing
+
+------------------------------------------------------------------------------
-- Helpers for translating vector packing and unpacking.
doVecPackOp :: Maybe MachOp -- Cast from element to vector component