summaryrefslogtreecommitdiff
path: root/gcc/tree-vect-stmts.c
diff options
context:
space:
mode:
authorkyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4>2013-12-31 11:09:42 +0000
committerkyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4>2013-12-31 11:09:42 +0000
commit1706116db656978bf56667cdc36742be0a507a9b (patch)
treec68ae3cee65d5606c9b4d515b06f89e789e4d701 /gcc/tree-vect-stmts.c
parentf750fe99449324d741dba8cce75c64961b80ee69 (diff)
downloadgcc-1706116db656978bf56667cdc36742be0a507a9b.tar.gz
gcc/
* config/i386/i386.c (MAX_CLASSES): Increase number of classes. (classify_argument): Extend for 512 bit vectors. (construct_container): Ditto. (function_arg_advance_32): Ditto. (function_arg_advance_64): Ditto. (function_arg_32): Ditto. (function_arg_64): Ditto. (function_value_32): Ditto. (return_in_memory_32): Ditto. (ix86_gimplify_va_arg): Ditto. (standard_sse_constant_p): Ditto. (standard_sse_constant_opcode): Ditto. (ix86_expand_vector_convert_uns_vsivsf): Ditto. (ix86_build_const_vector): Ditto. (ix86_build_signbit_mask): Ditto. (ix86_expand_sse_cmp): Extend for AVX512. (ix86_expand_sse_movcc): Ditto. (ix86_expand_int_vcond): Ditto. (ix86_expand_vec_perm): Ditto. (ix86_expand_sse_unpack): Ditto. (ix86_constant_alignment): Ditto. (ix86_builtin_vectorized_function): Ditto. (ix86_vectorize_builtin_gather): Ditto. (avx_vpermilp_parallel): Ditto. (ix86_rtx_costs): Ditto. (ix86_expand_vector_init_duplicate): Ditto. (ix86_expand_vector_init_concat): Ditto. (ix86_expand_vector_init_general): Ditto. (ix86_expand_vector_extract): Ditto. (emit_reduc_half): Ditto. (ix86_vector_mode_supported_p): Ditto. (ix86_emit_swdivsf): Ditto. (ix86_emit_swsqrtsf): Ditto. (expand_vec_perm_1): Ditto. (ix86_vectorize_vec_perm_const_ok): Ditto. (ix86_expand_mul_widen_evenodd): Ditto. (ix86_expand_sse2_mulvxdi3): Ditto. (ix86_preferred_simd_mode): Ditto. (ix86_autovectorize_vector_sizes): Ditto. (ix86_expand_vec_perm_vpermi2): New. (ix86_vector_duplicate_value): Ditto. (IX86_BUILTIN_SQRTPD512, IX86_BUILTIN_EXP2PS, IX86_BUILTIN_SQRTPS_NR512, IX86_BUILTIN_GATHER3ALTDIV16SF, IX86_BUILTIN_GATHER3ALTDIV16SI, IX86_BUILTIN_GATHER3ALTSIV8DF, IX86_BUILTIN_GATHER3ALTSIV8DI, IX86_BUILTIN_GATHER3DIV16SF, IX86_BUILTIN_GATHER3DIV16SI, IX86_BUILTIN_GATHER3DIV8DF, IX86_BUILTIN_GATHER3DIV8DI, IX86_BUILTIN_GATHER3SIV16SF, IX86_BUILTIN_GATHER3SIV16SI, IX86_BUILTIN_GATHER3SIV8DF, IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, IX86_BUILTIN_CPYSGNPS512, IX86_BUILTIN_CPYSGNPD512, IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512): Ditto. * config/i386/sse.md (*mov<mode>_internal): Disable SSE typeless stores vectors > 128bit (AVX*). (<sse>_storeu<ssemodesuffix><avxsizesuffix>): Ditto. (<sse2_avx_avx512f>_storedqu<mode>): Extend for AVX-512, disable SSE typeless stores vectors > 128bit (AVX*). (fixuns_trunc<mode><sseintvecmodelower>2): Extend for AVX-512. (vec_pack_ufix_trunc_<mode>): Ditto. (vec_unpacku_float_hi_v16si): New. * tree-vect-stmts.c (vectorizable_load): Support AVX512's gathers. * tree-vectorizer.h (MAX_VECTORIZATION_FACTOR): Extend for 512 bit vectors. testsuite/ * gcc.target/i386/pr49002-2.c: allow vmovapd generation. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@206260 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vect-stmts.c')
-rw-r--r--gcc/tree-vect-stmts.c34
1 files changed, 27 insertions, 7 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index a07c14d153e..e4f04c44760 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5699,7 +5699,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
tree vec_oprnd0 = NULL_TREE, op;
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
- tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
+ tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
edge pe = loop_preheader_edge (loop);
gimple_seq seq;
basic_block new_bb;
@@ -5741,8 +5741,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
scaletype = TREE_VALUE (arglist);
- gcc_checking_assert (types_compatible_p (srctype, rettype)
- && types_compatible_p (srctype, masktype));
+ gcc_checking_assert (types_compatible_p (srctype, rettype));
vec_dest = vect_create_destination_var (scalar_dest, vectype);
@@ -5756,8 +5755,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
/* Currently we support only unconditional gather loads,
so mask should be all ones. */
- if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
- mask = build_int_cst (TREE_TYPE (masktype), -1);
+ if (TREE_CODE (masktype) == INTEGER_TYPE)
+ mask = build_int_cst (masktype, -1);
+ else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
+ {
+ mask = build_int_cst (TREE_TYPE (masktype), -1);
+ mask = build_vector_from_val (masktype, mask);
+ }
else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
{
REAL_VALUE_TYPE r;
@@ -5766,14 +5770,30 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
tmp[j] = -1;
real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
mask = build_real (TREE_TYPE (masktype), r);
+ mask = build_vector_from_val (masktype, mask);
}
else
gcc_unreachable ();
- mask = build_vector_from_val (masktype, mask);
mask = vect_init_vector (stmt, mask, masktype, NULL);
scale = build_int_cst (scaletype, gather_scale);
+ if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
+ merge = build_int_cst (TREE_TYPE (rettype), 0);
+ else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
+ {
+ REAL_VALUE_TYPE r;
+ long tmp[6];
+ for (j = 0; j < 6; ++j)
+ tmp[j] = 0;
+ real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
+ merge = build_real (TREE_TYPE (rettype), r);
+ }
+ else
+ gcc_unreachable ();
+ merge = build_vector_from_val (rettype, merge);
+ merge = vect_init_vector (stmt, merge, rettype, NULL);
+
prev_stmt_info = NULL;
for (j = 0; j < ncopies; ++j)
{
@@ -5802,7 +5822,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
}
new_stmt
- = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
+ = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
if (!useless_type_conversion_p (vectype, rettype))
{