From f792079cf981cdcbc3278532d423ac95eed32bf4 Mon Sep 17 00:00:00 2001 From: Geoffrey Mainland Date: Thu, 14 Feb 2013 18:52:00 +0000 Subject: Add support for 256-bit-wide vectors. --- compiler/cmm/CmmType.hs | 5 +++-- compiler/codeGen/StgCmmArgRep.hs | 11 ++++++++--- compiler/codeGen/StgCmmLayout.hs | 1 + compiler/ghci/ByteCodeAsm.lhs | 2 ++ includes/Cmm.h | 1 + includes/rts/storage/FunTypes.h | 35 ++++++++++++++++++----------------- includes/stg/MiscClosures.h | 2 ++ rts/Linker.c | 3 +++ utils/genapply/GenApply.hs | 6 ++++++ 9 files changed, 44 insertions(+), 22 deletions(-) diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs index 98e40534f8..76de02bdac 100644 --- a/compiler/cmm/CmmType.hs +++ b/compiler/cmm/CmmType.hs @@ -21,7 +21,7 @@ module CmmType , Length , vec, vec2, vec4, vec8, vec16 - , vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 + , vec2f64, vec4f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 , cmmVec , vecLength, vecElemType , isVecType @@ -285,9 +285,10 @@ vec4 = vec 4 vec8 = vec 8 vec16 = vec 16 -vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 :: CmmType +vec2f64, vec4f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 :: CmmType vec2f64 = vec 2 f64 vec2b64 = vec 2 b64 +vec4f64 = vec 4 f64 vec4f32 = vec 4 f32 vec4b32 = vec 4 b32 vec8b16 = vec 8 b16 diff --git a/compiler/codeGen/StgCmmArgRep.hs b/compiler/codeGen/StgCmmArgRep.hs index bd228d4617..26569cffdd 100644 --- a/compiler/codeGen/StgCmmArgRep.hs +++ b/compiler/codeGen/StgCmmArgRep.hs @@ -47,6 +47,7 @@ data ArgRep = P -- GC Ptr | F -- Float | D -- Double | V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc. + | V32 -- 32-byte (256-bit) vectors of Float/Double/Int8/Word32/etc. instance Outputable ArgRep where ppr = text . argRepString argRepString :: ArgRep -> String @@ -57,6 +58,7 @@ argRepString V = "V" argRepString F = "F" argRepString D = "D" argRepString V16 = "V16" +argRepString V32 = "V32" toArgRep :: PrimRep -> ArgRep toArgRep VoidRep = V @@ -68,9 +70,10 @@ toArgRep Int64Rep = L toArgRep Word64Rep = L toArgRep FloatRep = F toArgRep DoubleRep = D -toArgRep (VecRep len elem) - | len*primElemRepSizeB elem == 16 = V16 - | otherwise = error "toArgRep: bad vector primrep" +toArgRep (VecRep len elem) = case len*primElemRepSizeB elem of + 16 -> V16 + 32 -> V32 + _ -> error "toArgRep: bad vector primrep" isNonV :: ArgRep -> Bool isNonV V = False @@ -84,6 +87,7 @@ argRepSizeW dflags L = wORD64_SIZE `quot` wORD_SIZE dflags argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags argRepSizeW _ V = 0 argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags +argRepSizeW dflags V32 = 32 `quot` wORD_SIZE dflags idArgRep :: Id -> ArgRep idArgRep = toArgRep . idPrimRep @@ -132,4 +136,5 @@ slowCallPattern (F: _) = (fsLit "stg_ap_f", 1) slowCallPattern (D: _) = (fsLit "stg_ap_d", 1) slowCallPattern (L: _) = (fsLit "stg_ap_l", 1) slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1) +slowCallPattern (V32: _) = (fsLit "stg_ap_v32", 1) slowCallPattern [] = (fsLit "stg_ap_0", 0) diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 06a47c151b..a74b62ce82 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -394,6 +394,7 @@ stdPattern reps [D] -> Just ARG_D [L] -> Just ARG_L [V16] -> Just ARG_V16 + [V32] -> Just ARG_V32 [N,N] -> Just ARG_NN [N,P] -> Just ARG_NP diff --git a/compiler/ghci/ByteCodeAsm.lhs b/compiler/ghci/ByteCodeAsm.lhs index 9906467186..7579d7a064 100644 --- a/compiler/ghci/ByteCodeAsm.lhs +++ b/compiler/ghci/ByteCodeAsm.lhs @@ -446,6 +446,7 @@ push_alts L = bci_PUSH_ALTS_L push_alts F = bci_PUSH_ALTS_F push_alts D = bci_PUSH_ALTS_D push_alts V16 = error "push_alts: vector" +push_alts V32 = error "push_alts: vector" return_ubx :: ArgRep -> Word16 return_ubx V = bci_RETURN_V @@ -455,6 +456,7 @@ return_ubx L = bci_RETURN_L return_ubx F = bci_RETURN_F return_ubx D = bci_RETURN_D return_ubx V16 = error "return_ubx: vector" +return_ubx V32 = error "return_ubx: vector" -- Make lists of host-sized words for literals, so that when the -- words are placed in memory at increasing addresses, the diff --git a/includes/Cmm.h b/includes/Cmm.h index 89baaa0987..ae45fd4ded 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -99,6 +99,7 @@ #define D_ float64 #define L_ bits64 #define V16_ bits128 +#define V32_ bits256 #define SIZEOF_StgDouble 8 #define SIZEOF_StgWord64 8 diff --git a/includes/rts/storage/FunTypes.h b/includes/rts/storage/FunTypes.h index 0ba65bb79d..744e8241f2 100644 --- a/includes/rts/storage/FunTypes.h +++ b/includes/rts/storage/FunTypes.h @@ -34,22 +34,23 @@ #define ARG_D 7 #define ARG_L 8 #define ARG_V16 9 -#define ARG_NN 10 -#define ARG_NP 11 -#define ARG_PN 12 -#define ARG_PP 13 -#define ARG_NNN 14 -#define ARG_NNP 15 -#define ARG_NPN 16 -#define ARG_NPP 17 -#define ARG_PNN 18 -#define ARG_PNP 19 -#define ARG_PPN 20 -#define ARG_PPP 21 -#define ARG_PPPP 22 -#define ARG_PPPPP 23 -#define ARG_PPPPPP 24 -#define ARG_PPPPPPP 25 -#define ARG_PPPPPPPP 26 +#define ARG_V32 10 +#define ARG_NN 11 +#define ARG_NP 12 +#define ARG_PN 13 +#define ARG_PP 14 +#define ARG_NNN 15 +#define ARG_NNP 16 +#define ARG_NPN 17 +#define ARG_NPP 18 +#define ARG_PNN 19 +#define ARG_PNP 20 +#define ARG_PPN 21 +#define ARG_PPP 22 +#define ARG_PPPP 23 +#define ARG_PPPPP 24 +#define ARG_PPPPPP 25 +#define ARG_PPPPPPP 26 +#define ARG_PPPPPPPP 27 #endif /* RTS_STORAGE_FUNTYPES_H */ diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index 8717687f3e..3ce8683a3e 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -225,6 +225,7 @@ RTS_RET(stg_ap_f); RTS_RET(stg_ap_d); RTS_RET(stg_ap_l); RTS_RET(stg_ap_v16); +RTS_RET(stg_ap_v32); RTS_RET(stg_ap_n); RTS_RET(stg_ap_p); RTS_RET(stg_ap_pv); @@ -242,6 +243,7 @@ RTS_FUN_DECL(stg_ap_f_fast); RTS_FUN_DECL(stg_ap_d_fast); RTS_FUN_DECL(stg_ap_l_fast); RTS_FUN_DECL(stg_ap_v16_fast); +RTS_FUN_DECL(stg_ap_v32_fast); RTS_FUN_DECL(stg_ap_n_fast); RTS_FUN_DECL(stg_ap_p_fast); RTS_FUN_DECL(stg_ap_pv_fast); diff --git a/rts/Linker.c b/rts/Linker.c index 43edde23f8..aa1d3d6ccb 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -882,6 +882,7 @@ typedef struct _RtsSymbolVal { SymI_HasProto(stg_ap_d_ret) \ SymI_HasProto(stg_ap_l_ret) \ SymI_HasProto(stg_ap_v16_ret) \ + SymI_HasProto(stg_ap_v32_ret) \ SymI_HasProto(stg_ap_n_ret) \ SymI_HasProto(stg_ap_p_ret) \ SymI_HasProto(stg_ap_pv_ret) \ @@ -1244,6 +1245,7 @@ typedef struct _RtsSymbolVal { SymI_HasProto(stg_ap_d_info) \ SymI_HasProto(stg_ap_l_info) \ SymI_HasProto(stg_ap_v16_info) \ + SymI_HasProto(stg_ap_v32_info) \ SymI_HasProto(stg_ap_n_info) \ SymI_HasProto(stg_ap_p_info) \ SymI_HasProto(stg_ap_pv_info) \ @@ -1260,6 +1262,7 @@ typedef struct _RtsSymbolVal { SymI_HasProto(stg_ap_d_fast) \ SymI_HasProto(stg_ap_l_fast) \ SymI_HasProto(stg_ap_v16_fast) \ + SymI_HasProto(stg_ap_v32_fast) \ SymI_HasProto(stg_ap_n_fast) \ SymI_HasProto(stg_ap_p_fast) \ SymI_HasProto(stg_ap_pv_fast) \ diff --git a/utils/genapply/GenApply.hs b/utils/genapply/GenApply.hs index 2baf85896a..036a8479a4 100644 --- a/utils/genapply/GenApply.hs +++ b/utils/genapply/GenApply.hs @@ -33,6 +33,7 @@ data ArgRep | D -- double | L -- long (64-bit) | V16 -- 16-byte (128-bit) vectors + | V32 -- 32-byte (256-bit) vectors -- size of a value in *words* argSize :: ArgRep -> Int @@ -43,6 +44,7 @@ argSize F = 1 argSize D = (SIZEOF_DOUBLE `quot` SIZEOF_VOID_P :: Int) argSize L = (8 `quot` SIZEOF_VOID_P :: Int) argSize V16 = (16 `quot` SIZEOF_VOID_P :: Int) +argSize V32 = (32 `quot` SIZEOF_VOID_P :: Int) showArg :: ArgRep -> String showArg N = "n" @@ -52,6 +54,7 @@ showArg F = "f" showArg D = "d" showArg L = "l" showArg V16 = "v16" +showArg V32 = "v32" -- is a value a pointer? isPtr :: ArgRep -> Bool @@ -504,6 +507,7 @@ argRep D = text "D_" argRep L = text "L_" argRep P = text "gcptr" argRep V16 = text "V16_" +argRep V32 = text "V32_" argRep _ = text "W_" genApply regstatus args = @@ -854,6 +858,7 @@ applyTypes = [ [D], [L], [V16], + [V32], [N], [P], [P,V], @@ -882,6 +887,7 @@ stackApplyTypes = [ [D], [L], [V16], + [V32], [N,N], [N,P], [P,N], -- cgit v1.2.1