diff options
author | kyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-12-31 11:09:42 +0000 |
---|---|---|
committer | kyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-12-31 11:09:42 +0000 |
commit | 1706116db656978bf56667cdc36742be0a507a9b (patch) | |
tree | c68ae3cee65d5606c9b4d515b06f89e789e4d701 /gcc/tree-vect-stmts.c | |
parent | f750fe99449324d741dba8cce75c64961b80ee69 (diff) | |
download | gcc-1706116db656978bf56667cdc36742be0a507a9b.tar.gz |
gcc/
* config/i386/i386.c (MAX_CLASSES): Increase number of classes.
(classify_argument): Extend for 512 bit vectors.
(construct_container): Ditto.
(function_arg_advance_32): Ditto.
(function_arg_advance_64): Ditto.
(function_arg_32): Ditto.
(function_arg_64): Ditto.
(function_value_32): Ditto.
(return_in_memory_32): Ditto.
(ix86_gimplify_va_arg): Ditto.
(standard_sse_constant_p): Ditto.
(standard_sse_constant_opcode): Ditto.
(ix86_expand_vector_convert_uns_vsivsf): Ditto.
(ix86_build_const_vector): Ditto.
(ix86_build_signbit_mask): Ditto.
(ix86_expand_sse_cmp): Extend for AVX512.
(ix86_expand_sse_movcc): Ditto.
(ix86_expand_int_vcond): Ditto.
(ix86_expand_vec_perm): Ditto.
(ix86_expand_sse_unpack): Ditto.
(ix86_constant_alignment): Ditto.
(ix86_builtin_vectorized_function): Ditto.
(ix86_vectorize_builtin_gather): Ditto.
(avx_vpermilp_parallel): Ditto.
(ix86_rtx_costs): Ditto.
(ix86_expand_vector_init_duplicate): Ditto.
(ix86_expand_vector_init_concat): Ditto.
(ix86_expand_vector_init_general): Ditto.
(ix86_expand_vector_extract): Ditto.
(emit_reduc_half): Ditto.
(ix86_vector_mode_supported_p): Ditto.
(ix86_emit_swdivsf): Ditto.
(ix86_emit_swsqrtsf): Ditto.
(expand_vec_perm_1): Ditto.
(ix86_vectorize_vec_perm_const_ok): Ditto.
(ix86_expand_mul_widen_evenodd): Ditto.
(ix86_expand_sse2_mulvxdi3): Ditto.
(ix86_preferred_simd_mode): Ditto.
(ix86_autovectorize_vector_sizes): Ditto.
(ix86_expand_vec_perm_vpermi2): New.
(ix86_vector_duplicate_value): Ditto.
(IX86_BUILTIN_SQRTPD512, IX86_BUILTIN_EXP2PS, IX86_BUILTIN_SQRTPS_NR512,
IX86_BUILTIN_GATHER3ALTDIV16SF, IX86_BUILTIN_GATHER3ALTDIV16SI,
IX86_BUILTIN_GATHER3ALTSIV8DF, IX86_BUILTIN_GATHER3ALTSIV8DI,
IX86_BUILTIN_GATHER3DIV16SF, IX86_BUILTIN_GATHER3DIV16SI,
IX86_BUILTIN_GATHER3DIV8DF, IX86_BUILTIN_GATHER3DIV8DI,
IX86_BUILTIN_GATHER3SIV16SF, IX86_BUILTIN_GATHER3SIV16SI,
IX86_BUILTIN_GATHER3SIV8DF, IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
IX86_BUILTIN_CPYSGNPS512, IX86_BUILTIN_CPYSGNPD512,
IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512): Ditto.
* config/i386/sse.md (*mov<mode>_internal): Disable SSE typeless
stores vectors > 128bit (AVX*).
(<sse>_storeu<ssemodesuffix><avxsizesuffix>): Ditto.
(<sse2_avx_avx512f>_storedqu<mode>): Extend for AVX-512, disable
SSE typeless stores vectors > 128bit (AVX*).
(fixuns_trunc<mode><sseintvecmodelower>2): Extend for AVX-512.
(vec_pack_ufix_trunc_<mode>): Ditto.
(vec_unpacku_float_hi_v16si): New.
* tree-vect-stmts.c (vectorizable_load): Support AVX512's gathers.
* tree-vectorizer.h (MAX_VECTORIZATION_FACTOR): Extend for 512 bit
vectors.
testsuite/
* gcc.target/i386/pr49002-2.c: allow vmovapd generation.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@206260 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vect-stmts.c')
-rw-r--r-- | gcc/tree-vect-stmts.c | 34 |
1 files changed, 27 insertions, 7 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index a07c14d153e..e4f04c44760 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -5699,7 +5699,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, tree vec_oprnd0 = NULL_TREE, op; tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl)); tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; - tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE; + tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE; edge pe = loop_preheader_edge (loop); gimple_seq seq; basic_block new_bb; @@ -5741,8 +5741,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); scaletype = TREE_VALUE (arglist); - gcc_checking_assert (types_compatible_p (srctype, rettype) - && types_compatible_p (srctype, masktype)); + gcc_checking_assert (types_compatible_p (srctype, rettype)); vec_dest = vect_create_destination_var (scalar_dest, vectype); @@ -5756,8 +5755,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, /* Currently we support only unconditional gather loads, so mask should be all ones. */ - if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) - mask = build_int_cst (TREE_TYPE (masktype), -1); + if (TREE_CODE (masktype) == INTEGER_TYPE) + mask = build_int_cst (masktype, -1); + else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) + { + mask = build_int_cst (TREE_TYPE (masktype), -1); + mask = build_vector_from_val (masktype, mask); + } else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype))) { REAL_VALUE_TYPE r; @@ -5766,14 +5770,30 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, tmp[j] = -1; real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype))); mask = build_real (TREE_TYPE (masktype), r); + mask = build_vector_from_val (masktype, mask); } else gcc_unreachable (); - mask = build_vector_from_val (masktype, mask); mask = vect_init_vector (stmt, mask, masktype, NULL); scale = build_int_cst (scaletype, gather_scale); + if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE) + merge = build_int_cst (TREE_TYPE (rettype), 0); + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype))) + { + REAL_VALUE_TYPE r; + long tmp[6]; + for (j = 0; j < 6; ++j) + tmp[j] = 0; + real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype))); + merge = build_real (TREE_TYPE (rettype), r); + } + else + gcc_unreachable (); + merge = build_vector_from_val (rettype, merge); + merge = vect_init_vector (stmt, merge, rettype, NULL); + prev_stmt_info = NULL; for (j = 0; j < ncopies; ++j) { @@ -5802,7 +5822,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, } new_stmt - = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale); + = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale); if (!useless_type_conversion_p (vectype, rettype)) { |