summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeoffrey Mainland <gmainlan@microsoft.com>2013-02-14 18:52:00 +0000
committerGeoffrey Mainland <gmainlan@microsoft.com>2013-07-08 10:28:37 +0100
commitf792079cf981cdcbc3278532d423ac95eed32bf4 (patch)
treeaf04c025eb2c487a393d15fade49ffdc20c43bcc
parent5401d5d136b6769b392c12d00dd1ea2a12a74e16 (diff)
downloadhaskell-f792079cf981cdcbc3278532d423ac95eed32bf4.tar.gz
Add support for 256-bit-wide vectors.
-rw-r--r--compiler/cmm/CmmType.hs5
-rw-r--r--compiler/codeGen/StgCmmArgRep.hs11
-rw-r--r--compiler/codeGen/StgCmmLayout.hs1
-rw-r--r--compiler/ghci/ByteCodeAsm.lhs2
-rw-r--r--includes/Cmm.h1
-rw-r--r--includes/rts/storage/FunTypes.h35
-rw-r--r--includes/stg/MiscClosures.h2
-rw-r--r--rts/Linker.c3
-rw-r--r--utils/genapply/GenApply.hs6
9 files changed, 44 insertions, 22 deletions
diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs
index 98e40534f8..76de02bdac 100644
--- a/compiler/cmm/CmmType.hs
+++ b/compiler/cmm/CmmType.hs
@@ -21,7 +21,7 @@ module CmmType
, Length
, vec, vec2, vec4, vec8, vec16
- , vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8
+ , vec2f64, vec4f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8
, cmmVec
, vecLength, vecElemType
, isVecType
@@ -285,9 +285,10 @@ vec4 = vec 4
vec8 = vec 8
vec16 = vec 16
-vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 :: CmmType
+vec2f64, vec4f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 :: CmmType
vec2f64 = vec 2 f64
vec2b64 = vec 2 b64
+vec4f64 = vec 4 f64
vec4f32 = vec 4 f32
vec4b32 = vec 4 b32
vec8b16 = vec 8 b16
diff --git a/compiler/codeGen/StgCmmArgRep.hs b/compiler/codeGen/StgCmmArgRep.hs
index bd228d4617..26569cffdd 100644
--- a/compiler/codeGen/StgCmmArgRep.hs
+++ b/compiler/codeGen/StgCmmArgRep.hs
@@ -47,6 +47,7 @@ data ArgRep = P -- GC Ptr
| F -- Float
| D -- Double
| V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc.
+ | V32 -- 32-byte (256-bit) vectors of Float/Double/Int8/Word32/etc.
instance Outputable ArgRep where ppr = text . argRepString
argRepString :: ArgRep -> String
@@ -57,6 +58,7 @@ argRepString V = "V"
argRepString F = "F"
argRepString D = "D"
argRepString V16 = "V16"
+argRepString V32 = "V32"
toArgRep :: PrimRep -> ArgRep
toArgRep VoidRep = V
@@ -68,9 +70,10 @@ toArgRep Int64Rep = L
toArgRep Word64Rep = L
toArgRep FloatRep = F
toArgRep DoubleRep = D
-toArgRep (VecRep len elem)
- | len*primElemRepSizeB elem == 16 = V16
- | otherwise = error "toArgRep: bad vector primrep"
+toArgRep (VecRep len elem) = case len*primElemRepSizeB elem of
+ 16 -> V16
+ 32 -> V32
+ _ -> error "toArgRep: bad vector primrep"
isNonV :: ArgRep -> Bool
isNonV V = False
@@ -84,6 +87,7 @@ argRepSizeW dflags L = wORD64_SIZE `quot` wORD_SIZE dflags
argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags
argRepSizeW _ V = 0
argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags
+argRepSizeW dflags V32 = 32 `quot` wORD_SIZE dflags
idArgRep :: Id -> ArgRep
idArgRep = toArgRep . idPrimRep
@@ -132,4 +136,5 @@ slowCallPattern (F: _) = (fsLit "stg_ap_f", 1)
slowCallPattern (D: _) = (fsLit "stg_ap_d", 1)
slowCallPattern (L: _) = (fsLit "stg_ap_l", 1)
slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1)
+slowCallPattern (V32: _) = (fsLit "stg_ap_v32", 1)
slowCallPattern [] = (fsLit "stg_ap_0", 0)
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 06a47c151b..a74b62ce82 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -394,6 +394,7 @@ stdPattern reps
[D] -> Just ARG_D
[L] -> Just ARG_L
[V16] -> Just ARG_V16
+ [V32] -> Just ARG_V32
[N,N] -> Just ARG_NN
[N,P] -> Just ARG_NP
diff --git a/compiler/ghci/ByteCodeAsm.lhs b/compiler/ghci/ByteCodeAsm.lhs
index 9906467186..7579d7a064 100644
--- a/compiler/ghci/ByteCodeAsm.lhs
+++ b/compiler/ghci/ByteCodeAsm.lhs
@@ -446,6 +446,7 @@ push_alts L = bci_PUSH_ALTS_L
push_alts F = bci_PUSH_ALTS_F
push_alts D = bci_PUSH_ALTS_D
push_alts V16 = error "push_alts: vector"
+push_alts V32 = error "push_alts: vector"
return_ubx :: ArgRep -> Word16
return_ubx V = bci_RETURN_V
@@ -455,6 +456,7 @@ return_ubx L = bci_RETURN_L
return_ubx F = bci_RETURN_F
return_ubx D = bci_RETURN_D
return_ubx V16 = error "return_ubx: vector"
+return_ubx V32 = error "return_ubx: vector"
-- Make lists of host-sized words for literals, so that when the
-- words are placed in memory at increasing addresses, the
diff --git a/includes/Cmm.h b/includes/Cmm.h
index 89baaa0987..ae45fd4ded 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -99,6 +99,7 @@
#define D_ float64
#define L_ bits64
#define V16_ bits128
+#define V32_ bits256
#define SIZEOF_StgDouble 8
#define SIZEOF_StgWord64 8
diff --git a/includes/rts/storage/FunTypes.h b/includes/rts/storage/FunTypes.h
index 0ba65bb79d..744e8241f2 100644
--- a/includes/rts/storage/FunTypes.h
+++ b/includes/rts/storage/FunTypes.h
@@ -34,22 +34,23 @@
#define ARG_D 7
#define ARG_L 8
#define ARG_V16 9
-#define ARG_NN 10
-#define ARG_NP 11
-#define ARG_PN 12
-#define ARG_PP 13
-#define ARG_NNN 14
-#define ARG_NNP 15
-#define ARG_NPN 16
-#define ARG_NPP 17
-#define ARG_PNN 18
-#define ARG_PNP 19
-#define ARG_PPN 20
-#define ARG_PPP 21
-#define ARG_PPPP 22
-#define ARG_PPPPP 23
-#define ARG_PPPPPP 24
-#define ARG_PPPPPPP 25
-#define ARG_PPPPPPPP 26
+#define ARG_V32 10
+#define ARG_NN 11
+#define ARG_NP 12
+#define ARG_PN 13
+#define ARG_PP 14
+#define ARG_NNN 15
+#define ARG_NNP 16
+#define ARG_NPN 17
+#define ARG_NPP 18
+#define ARG_PNN 19
+#define ARG_PNP 20
+#define ARG_PPN 21
+#define ARG_PPP 22
+#define ARG_PPPP 23
+#define ARG_PPPPP 24
+#define ARG_PPPPPP 25
+#define ARG_PPPPPPP 26
+#define ARG_PPPPPPPP 27
#endif /* RTS_STORAGE_FUNTYPES_H */
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
index 8717687f3e..3ce8683a3e 100644
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -225,6 +225,7 @@ RTS_RET(stg_ap_f);
RTS_RET(stg_ap_d);
RTS_RET(stg_ap_l);
RTS_RET(stg_ap_v16);
+RTS_RET(stg_ap_v32);
RTS_RET(stg_ap_n);
RTS_RET(stg_ap_p);
RTS_RET(stg_ap_pv);
@@ -242,6 +243,7 @@ RTS_FUN_DECL(stg_ap_f_fast);
RTS_FUN_DECL(stg_ap_d_fast);
RTS_FUN_DECL(stg_ap_l_fast);
RTS_FUN_DECL(stg_ap_v16_fast);
+RTS_FUN_DECL(stg_ap_v32_fast);
RTS_FUN_DECL(stg_ap_n_fast);
RTS_FUN_DECL(stg_ap_p_fast);
RTS_FUN_DECL(stg_ap_pv_fast);
diff --git a/rts/Linker.c b/rts/Linker.c
index 43edde23f8..aa1d3d6ccb 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -882,6 +882,7 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(stg_ap_d_ret) \
SymI_HasProto(stg_ap_l_ret) \
SymI_HasProto(stg_ap_v16_ret) \
+ SymI_HasProto(stg_ap_v32_ret) \
SymI_HasProto(stg_ap_n_ret) \
SymI_HasProto(stg_ap_p_ret) \
SymI_HasProto(stg_ap_pv_ret) \
@@ -1244,6 +1245,7 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(stg_ap_d_info) \
SymI_HasProto(stg_ap_l_info) \
SymI_HasProto(stg_ap_v16_info) \
+ SymI_HasProto(stg_ap_v32_info) \
SymI_HasProto(stg_ap_n_info) \
SymI_HasProto(stg_ap_p_info) \
SymI_HasProto(stg_ap_pv_info) \
@@ -1260,6 +1262,7 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(stg_ap_d_fast) \
SymI_HasProto(stg_ap_l_fast) \
SymI_HasProto(stg_ap_v16_fast) \
+ SymI_HasProto(stg_ap_v32_fast) \
SymI_HasProto(stg_ap_n_fast) \
SymI_HasProto(stg_ap_p_fast) \
SymI_HasProto(stg_ap_pv_fast) \
diff --git a/utils/genapply/GenApply.hs b/utils/genapply/GenApply.hs
index 2baf85896a..036a8479a4 100644
--- a/utils/genapply/GenApply.hs
+++ b/utils/genapply/GenApply.hs
@@ -33,6 +33,7 @@ data ArgRep
| D -- double
| L -- long (64-bit)
| V16 -- 16-byte (128-bit) vectors
+ | V32 -- 32-byte (256-bit) vectors
-- size of a value in *words*
argSize :: ArgRep -> Int
@@ -43,6 +44,7 @@ argSize F = 1
argSize D = (SIZEOF_DOUBLE `quot` SIZEOF_VOID_P :: Int)
argSize L = (8 `quot` SIZEOF_VOID_P :: Int)
argSize V16 = (16 `quot` SIZEOF_VOID_P :: Int)
+argSize V32 = (32 `quot` SIZEOF_VOID_P :: Int)
showArg :: ArgRep -> String
showArg N = "n"
@@ -52,6 +54,7 @@ showArg F = "f"
showArg D = "d"
showArg L = "l"
showArg V16 = "v16"
+showArg V32 = "v32"
-- is a value a pointer?
isPtr :: ArgRep -> Bool
@@ -504,6 +507,7 @@ argRep D = text "D_"
argRep L = text "L_"
argRep P = text "gcptr"
argRep V16 = text "V16_"
+argRep V32 = text "V32_"
argRep _ = text "W_"
genApply regstatus args =
@@ -854,6 +858,7 @@ applyTypes = [
[D],
[L],
[V16],
+ [V32],
[N],
[P],
[P,V],
@@ -882,6 +887,7 @@ stackApplyTypes = [
[D],
[L],
[V16],
+ [V32],
[N,N],
[N,P],
[P,N],