diff options
Diffstat (limited to 'gcc/tree-vect-stmts.c')
-rw-r--r-- | gcc/tree-vect-stmts.c | 1227 |
1 files changed, 1175 insertions, 52 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 72dfacd3151..eb615986c14 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1,5 +1,5 @@ /* Statement Analysis and Transformation for Vectorization - Copyright (C) 2003-2013 Free Software Foundation, Inc. + Copyright (C) 2003-2014 Free Software Foundation, Inc. Contributed by Dorit Naishlos <dorit@il.ibm.com> and Ira Rosen <irar@il.ibm.com> @@ -46,12 +46,15 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssanames.h" #include "tree-ssa-loop-manip.h" #include "cfgloop.h" +#include "tree-ssa-loop.h" +#include "tree-scalar-evolution.h" #include "expr.h" #include "recog.h" /* FIXME: for insn_data */ #include "optabs.h" #include "diagnostic-core.h" #include "tree-vectorizer.h" #include "dumpfile.h" +#include "cgraph.h" /* For lang_hooks.types.type_for_mode. */ #include "langhooks.h" @@ -232,7 +235,7 @@ vect_mark_relevant (vec<gimple> *worklist, gimple stmt, /* This use is out of pattern use, if LHS has other uses that are pattern uses, we should mark the stmt itself, and not the pattern stmt. */ - if (TREE_CODE (lhs) == SSA_NAME) + if (lhs && TREE_CODE (lhs) == SSA_NAME) FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) { if (is_gimple_debug (USE_STMT (use_p))) @@ -390,7 +393,27 @@ exist_non_indexing_operands_for_use_p (tree use, gimple stmt) first case, and whether var corresponds to USE. */ if (!gimple_assign_copy_p (stmt)) - return false; + { + if (is_gimple_call (stmt) + && gimple_call_internal_p (stmt)) + switch (gimple_call_internal_fn (stmt)) + { + case IFN_MASK_STORE: + operand = gimple_call_arg (stmt, 3); + if (operand == use) + return true; + /* FALLTHRU */ + case IFN_MASK_LOAD: + operand = gimple_call_arg (stmt, 2); + if (operand == use) + return true; + break; + default: + break; + } + return false; + } + if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME) return false; operand = gimple_assign_rhs1 (stmt); @@ -606,7 +629,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) dump_printf_loc (MSG_NOTE, vect_location, "=== vect_mark_stmts_to_be_vectorized ===\n"); - stack_vec<gimple, 64> worklist; + auto_vec<gimple, 64> worklist; /* 1. Init worklist. */ for (i = 0; i < nbbs; i++) @@ -1693,6 +1716,425 @@ vectorizable_function (gimple call, tree vectype_out, tree vectype_in) vectype_in); } + +static tree permute_vec_elements (tree, tree, tree, gimple, + gimple_stmt_iterator *); + + +/* Function vectorizable_mask_load_store. + + Check if STMT performs a conditional load or store that can be vectorized. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt to replace it, put it in VEC_STMT, and insert it at GSI. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + +static bool +vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi, + gimple *vec_stmt, slp_tree slp_node) +{ + tree vec_dest = NULL; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + stmt_vec_info prev_stmt_info; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree elem_type; + gimple new_stmt; + tree dummy; + tree dataref_ptr = NULL_TREE; + gimple ptr_incr; + int nunits = TYPE_VECTOR_SUBPARTS (vectype); + int ncopies; + int i, j; + bool inv_p; + tree gather_base = NULL_TREE, gather_off = NULL_TREE; + tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE; + int gather_scale = 1; + enum vect_def_type gather_dt = vect_unknown_def_type; + bool is_store; + tree mask; + gimple def_stmt; + tree def; + enum vect_def_type dt; + + if (slp_node != NULL) + return false; + + ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; + gcc_assert (ncopies >= 1); + + is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE; + mask = gimple_call_arg (stmt, 2); + if (TYPE_PRECISION (TREE_TYPE (mask)) + != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)))) + return false; + + /* FORNOW. This restriction should be relaxed. */ + if (nested_in_vect_loop && ncopies > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "multiple types in nested loop."); + return false; + } + + if (!STMT_VINFO_RELEVANT_P (stmt_info)) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) + return false; + + if (!STMT_VINFO_DATA_REF (stmt_info)) + return false; + + elem_type = TREE_TYPE (vectype); + + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) + return false; + + if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + return false; + + if (STMT_VINFO_GATHER_P (stmt_info)) + { + gimple def_stmt; + tree def; + gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base, + &gather_off, &gather_scale); + gcc_assert (gather_decl); + if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL, + &def_stmt, &def, &gather_dt, + &gather_off_vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "gather index use not simple."); + return false; + } + } + else if (tree_int_cst_compare (nested_in_vect_loop + ? STMT_VINFO_DR_STEP (stmt_info) + : DR_STEP (dr), size_zero_node) <= 0) + return false; + else if (!VECTOR_MODE_P (TYPE_MODE (vectype)) + || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store)) + return false; + + if (TREE_CODE (mask) != SSA_NAME) + return false; + + if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL, + &def_stmt, &def, &dt)) + return false; + + if (is_store) + { + tree rhs = gimple_call_arg (stmt, 3); + if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL, + &def_stmt, &def, &dt)) + return false; + } + + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; + if (is_store) + vect_model_store_cost (stmt_info, ncopies, false, dt, + NULL, NULL, NULL); + else + vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL); + return true; + } + + /** Transform. **/ + + if (STMT_VINFO_GATHER_P (stmt_info)) + { + tree vec_oprnd0 = NULL_TREE, op; + tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl)); + tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; + tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale; + tree perm_mask = NULL_TREE, prev_res = NULL_TREE; + tree mask_perm_mask = NULL_TREE; + edge pe = loop_preheader_edge (loop); + gimple_seq seq; + basic_block new_bb; + enum { NARROW, NONE, WIDEN } modifier; + int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype); + + rettype = TREE_TYPE (TREE_TYPE (gather_decl)); + srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); + ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); + idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); + masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); + scaletype = TREE_VALUE (arglist); + gcc_checking_assert (types_compatible_p (srctype, rettype) + && types_compatible_p (srctype, masktype)); + + if (nunits == gather_off_nunits) + modifier = NONE; + else if (nunits == gather_off_nunits / 2) + { + unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits); + modifier = WIDEN; + + for (i = 0; i < gather_off_nunits; ++i) + sel[i] = i | nunits; + + perm_mask = vect_gen_perm_mask (gather_off_vectype, sel); + gcc_assert (perm_mask != NULL_TREE); + } + else if (nunits == gather_off_nunits * 2) + { + unsigned char *sel = XALLOCAVEC (unsigned char, nunits); + modifier = NARROW; + + for (i = 0; i < nunits; ++i) + sel[i] = i < gather_off_nunits + ? i : i + nunits - gather_off_nunits; + + perm_mask = vect_gen_perm_mask (vectype, sel); + gcc_assert (perm_mask != NULL_TREE); + ncopies *= 2; + for (i = 0; i < nunits; ++i) + sel[i] = i | gather_off_nunits; + mask_perm_mask = vect_gen_perm_mask (masktype, sel); + gcc_assert (mask_perm_mask != NULL_TREE); + } + else + gcc_unreachable (); + + vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype); + + ptr = fold_convert (ptrtype, gather_base); + if (!is_gimple_min_invariant (ptr)) + { + ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); + gcc_assert (!new_bb); + } + + scale = build_int_cst (scaletype, gather_scale); + + prev_stmt_info = NULL; + for (j = 0; j < ncopies; ++j) + { + if (modifier == WIDEN && (j & 1)) + op = permute_vec_elements (vec_oprnd0, vec_oprnd0, + perm_mask, stmt, gsi); + else if (j == 0) + op = vec_oprnd0 + = vect_get_vec_def_for_operand (gather_off, stmt, NULL); + else + op = vec_oprnd0 + = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0); + + if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) + { + gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)) + == TYPE_VECTOR_SUBPARTS (idxtype)); + var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL); + var = make_ssa_name (var, NULL); + op = build1 (VIEW_CONVERT_EXPR, idxtype, op); + new_stmt + = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, + op, NULL_TREE); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + op = var; + } + + if (mask_perm_mask && (j & 1)) + mask_op = permute_vec_elements (mask_op, mask_op, + mask_perm_mask, stmt, gsi); + else + { + if (j == 0) + vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL); + else + { + vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, + &def_stmt, &def, &dt); + vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask); + } + + mask_op = vec_mask; + if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask))) + { + gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)) + == TYPE_VECTOR_SUBPARTS (masktype)); + var = vect_get_new_vect_var (masktype, vect_simple_var, + NULL); + var = make_ssa_name (var, NULL); + mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op); + new_stmt + = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, + mask_op, NULL_TREE); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + mask_op = var; + } + } + + new_stmt + = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op, + scale); + + if (!useless_type_conversion_p (vectype, rettype)) + { + gcc_assert (TYPE_VECTOR_SUBPARTS (vectype) + == TYPE_VECTOR_SUBPARTS (rettype)); + var = vect_get_new_vect_var (rettype, vect_simple_var, NULL); + op = make_ssa_name (var, new_stmt); + gimple_call_set_lhs (new_stmt, op); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + var = make_ssa_name (vec_dest, NULL); + op = build1 (VIEW_CONVERT_EXPR, vectype, op); + new_stmt + = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op, + NULL_TREE); + } + else + { + var = make_ssa_name (vec_dest, new_stmt); + gimple_call_set_lhs (new_stmt, var); + } + + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + if (modifier == NARROW) + { + if ((j & 1) == 0) + { + prev_res = var; + continue; + } + var = permute_vec_elements (prev_res, var, + perm_mask, stmt, gsi); + new_stmt = SSA_NAME_DEF_STMT (var); + } + + if (prev_stmt_info == NULL) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + return true; + } + else if (is_store) + { + tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE; + prev_stmt_info = NULL; + for (i = 0; i < ncopies; i++) + { + unsigned align, misalign; + + if (i == 0) + { + tree rhs = gimple_call_arg (stmt, 3); + vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL); + vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL); + /* We should have catched mismatched types earlier. */ + gcc_assert (useless_type_conversion_p (vectype, + TREE_TYPE (vec_rhs))); + dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL, + NULL_TREE, &dummy, gsi, + &ptr_incr, false, &inv_p); + gcc_assert (!inv_p); + } + else + { + vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt, + &def, &dt); + vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs); + vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt, + &def, &dt); + vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask); + dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, + TYPE_SIZE_UNIT (vectype)); + } + + align = TYPE_ALIGN_UNIT (vectype); + if (aligned_access_p (dr)) + misalign = 0; + else if (DR_MISALIGNMENT (dr) == -1) + { + align = TYPE_ALIGN_UNIT (elem_type); + misalign = 0; + } + else + misalign = DR_MISALIGNMENT (dr); + set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, + misalign); + new_stmt + = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr, + gimple_call_arg (stmt, 1), + vec_mask, vec_rhs); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + if (i == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + } + else + { + tree vec_mask = NULL_TREE; + prev_stmt_info = NULL; + vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype); + for (i = 0; i < ncopies; i++) + { + unsigned align, misalign; + + if (i == 0) + { + vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL); + dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL, + NULL_TREE, &dummy, gsi, + &ptr_incr, false, &inv_p); + gcc_assert (!inv_p); + } + else + { + vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt, + &def, &dt); + vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask); + dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, + TYPE_SIZE_UNIT (vectype)); + } + + align = TYPE_ALIGN_UNIT (vectype); + if (aligned_access_p (dr)) + misalign = 0; + else if (DR_MISALIGNMENT (dr) == -1) + { + align = TYPE_ALIGN_UNIT (elem_type); + misalign = 0; + } + else + misalign = DR_MISALIGNMENT (dr); + set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, + misalign); + new_stmt + = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr, + gimple_call_arg (stmt, 1), + vec_mask); + gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL)); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + if (i == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + } + + return true; +} + + /* Function vectorizable_call. Check if STMT performs a function call that can be vectorized. @@ -1735,12 +2177,18 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (!is_gimple_call (stmt)) return false; - if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) - return false; + if (gimple_call_internal_p (stmt) + && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD + || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)) + return vectorizable_mask_load_store (stmt, gsi, vec_stmt, + slp_node); - if (stmt_can_throw_internal (stmt)) + if (gimple_call_lhs (stmt) == NULL_TREE + || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) return false; + gcc_checking_assert (!stmt_can_throw_internal (stmt)); + vectype_out = STMT_VINFO_VECTYPE (stmt_info); /* Process function arguments. */ @@ -2082,10 +2530,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, vargs.release (); - /* Update the exception handling table with the vector stmt if necessary. */ - if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt)) - gimple_purge_dead_eh_edges (gimple_bb (stmt)); - /* The call in STMT might prevent it from being removed in dce. We however cannot remove it here, due to the way the ssa name it defines is mapped to the new definition. So just replace @@ -2109,6 +2553,605 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, } +struct simd_call_arg_info +{ + tree vectype; + tree op; + enum vect_def_type dt; + HOST_WIDE_INT linear_step; + unsigned int align; +}; + +/* Function vectorizable_simd_clone_call. + + Check if STMT performs a function call that can be vectorized + by calling a simd clone of the function. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt to replace it, put it in VEC_STMT, and insert it at BSI. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + +static bool +vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi, + gimple *vec_stmt, slp_tree slp_node) +{ + tree vec_dest; + tree scalar_dest; + tree op, type; + tree vec_oprnd0 = NULL_TREE; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info; + tree vectype; + unsigned int nunits; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); + struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; + tree fndecl, new_temp, def; + gimple def_stmt; + gimple new_stmt = NULL; + int ncopies, j; + vec<simd_call_arg_info> arginfo = vNULL; + vec<tree> vargs = vNULL; + size_t i, nargs; + tree lhs, rtype, ratype; + vec<constructor_elt, va_gc> *ret_ctor_elts; + + /* Is STMT a vectorizable call? */ + if (!is_gimple_call (stmt)) + return false; + + fndecl = gimple_call_fndecl (stmt); + if (fndecl == NULL_TREE) + return false; + + struct cgraph_node *node = cgraph_get_node (fndecl); + if (node == NULL || node->simd_clones == NULL) + return false; + + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) + return false; + + if (gimple_call_lhs (stmt) + && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) + return false; + + gcc_checking_assert (!stmt_can_throw_internal (stmt)); + + vectype = STMT_VINFO_VECTYPE (stmt_info); + + if (loop_vinfo && nested_in_vect_loop_p (loop, stmt)) + return false; + + /* FORNOW */ + if (slp_node || PURE_SLP_STMT (stmt_info)) + return false; + + /* Process function arguments. */ + nargs = gimple_call_num_args (stmt); + + /* Bail out if the function has zero arguments. */ + if (nargs == 0) + return false; + + arginfo.create (nargs); + + for (i = 0; i < nargs; i++) + { + simd_call_arg_info thisarginfo; + affine_iv iv; + + thisarginfo.linear_step = 0; + thisarginfo.align = 0; + thisarginfo.op = NULL_TREE; + + op = gimple_call_arg (stmt, i); + if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo, + &def_stmt, &def, &thisarginfo.dt, + &thisarginfo.vectype) + || thisarginfo.dt == vect_uninitialized_def) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "use not simple.\n"); + arginfo.release (); + return false; + } + + if (thisarginfo.dt == vect_constant_def + || thisarginfo.dt == vect_external_def) + gcc_assert (thisarginfo.vectype == NULL_TREE); + else + gcc_assert (thisarginfo.vectype != NULL_TREE); + + if (thisarginfo.dt != vect_constant_def + && thisarginfo.dt != vect_external_def + && loop_vinfo + && TREE_CODE (op) == SSA_NAME + && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false) + && tree_fits_shwi_p (iv.step)) + { + thisarginfo.linear_step = tree_to_shwi (iv.step); + thisarginfo.op = iv.base; + } + else if ((thisarginfo.dt == vect_constant_def + || thisarginfo.dt == vect_external_def) + && POINTER_TYPE_P (TREE_TYPE (op))) + thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT; + + arginfo.quick_push (thisarginfo); + } + + unsigned int badness = 0; + struct cgraph_node *bestn = NULL; + if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info)) + bestn = cgraph_get_node (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info)); + else + for (struct cgraph_node *n = node->simd_clones; n != NULL; + n = n->simdclone->next_clone) + { + unsigned int this_badness = 0; + if (n->simdclone->simdlen + > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo) + || n->simdclone->nargs != nargs) + continue; + if (n->simdclone->simdlen + < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)) + this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)) + - exact_log2 (n->simdclone->simdlen)) * 1024; + if (n->simdclone->inbranch) + this_badness += 2048; + int target_badness = targetm.simd_clone.usable (n); + if (target_badness < 0) + continue; + this_badness += target_badness * 512; + /* FORNOW: Have to add code to add the mask argument. */ + if (n->simdclone->inbranch) + continue; + for (i = 0; i < nargs; i++) + { + switch (n->simdclone->args[i].arg_type) + { + case SIMD_CLONE_ARG_TYPE_VECTOR: + if (!useless_type_conversion_p + (n->simdclone->args[i].orig_type, + TREE_TYPE (gimple_call_arg (stmt, i)))) + i = -1; + else if (arginfo[i].dt == vect_constant_def + || arginfo[i].dt == vect_external_def + || arginfo[i].linear_step) + this_badness += 64; + break; + case SIMD_CLONE_ARG_TYPE_UNIFORM: + if (arginfo[i].dt != vect_constant_def + && arginfo[i].dt != vect_external_def) + i = -1; + break; + case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: + if (arginfo[i].dt == vect_constant_def + || arginfo[i].dt == vect_external_def + || (arginfo[i].linear_step + != n->simdclone->args[i].linear_step)) + i = -1; + break; + case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: + /* FORNOW */ + i = -1; + break; + case SIMD_CLONE_ARG_TYPE_MASK: + gcc_unreachable (); + } + if (i == (size_t) -1) + break; + if (n->simdclone->args[i].alignment > arginfo[i].align) + { + i = -1; + break; + } + if (arginfo[i].align) + this_badness += (exact_log2 (arginfo[i].align) + - exact_log2 (n->simdclone->args[i].alignment)); + } + if (i == (size_t) -1) + continue; + if (bestn == NULL || this_badness < badness) + { + bestn = n; + badness = this_badness; + } + } + + if (bestn == NULL) + { + arginfo.release (); + return false; + } + + for (i = 0; i < nargs; i++) + if ((arginfo[i].dt == vect_constant_def + || arginfo[i].dt == vect_external_def) + && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) + { + arginfo[i].vectype + = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt, + i))); + if (arginfo[i].vectype == NULL + || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype) + > bestn->simdclone->simdlen)) + { + arginfo.release (); + return false; + } + } + + fndecl = bestn->decl; + nunits = bestn->simdclone->simdlen; + ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; + + /* If the function isn't const, only allow it in simd loops where user + has asserted that at least nunits consecutive iterations can be + performed using SIMD instructions. */ + if ((loop == NULL || (unsigned) loop->safelen < nunits) + && gimple_vuse (stmt)) + { + arginfo.release (); + return false; + } + + /* Sanity check: make sure that at least one copy of the vectorized stmt + needs to be generated. */ + gcc_assert (ncopies >= 1); + + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl; + STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "=== vectorizable_simd_clone_call ===\n"); +/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */ + arginfo.release (); + return true; + } + + /** Transform. **/ + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n"); + + /* Handle def. */ + scalar_dest = gimple_call_lhs (stmt); + vec_dest = NULL_TREE; + rtype = NULL_TREE; + ratype = NULL_TREE; + if (scalar_dest) + { + vec_dest = vect_create_destination_var (scalar_dest, vectype); + rtype = TREE_TYPE (TREE_TYPE (fndecl)); + if (TREE_CODE (rtype) == ARRAY_TYPE) + { + ratype = rtype; + rtype = TREE_TYPE (ratype); + } + } + + prev_stmt_info = NULL; + for (j = 0; j < ncopies; ++j) + { + /* Build argument list for the vectorized call. */ + if (j == 0) + vargs.create (nargs); + else + vargs.truncate (0); + + for (i = 0; i < nargs; i++) + { + unsigned int k, l, m, o; + tree atype; + op = gimple_call_arg (stmt, i); + switch (bestn->simdclone->args[i].arg_type) + { + case SIMD_CLONE_ARG_TYPE_VECTOR: + atype = bestn->simdclone->args[i].vector_type; + o = nunits / TYPE_VECTOR_SUBPARTS (atype); + for (m = j * o; m < (j + 1) * o; m++) + { + if (TYPE_VECTOR_SUBPARTS (atype) + < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)) + { + unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype)); + k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype) + / TYPE_VECTOR_SUBPARTS (atype)); + gcc_assert ((k & (k - 1)) == 0); + if (m == 0) + vec_oprnd0 + = vect_get_vec_def_for_operand (op, stmt, NULL); + else + { + vec_oprnd0 = arginfo[i].op; + if ((m & (k - 1)) == 0) + vec_oprnd0 + = vect_get_vec_def_for_stmt_copy (arginfo[i].dt, + vec_oprnd0); + } + arginfo[i].op = vec_oprnd0; + vec_oprnd0 + = build3 (BIT_FIELD_REF, atype, vec_oprnd0, + size_int (prec), + bitsize_int ((m & (k - 1)) * prec)); + new_stmt + = gimple_build_assign (make_ssa_name (atype, NULL), + vec_oprnd0); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + vargs.safe_push (gimple_assign_lhs (new_stmt)); + } + else + { + k = (TYPE_VECTOR_SUBPARTS (atype) + / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)); + gcc_assert ((k & (k - 1)) == 0); + vec<constructor_elt, va_gc> *ctor_elts; + if (k != 1) + vec_alloc (ctor_elts, k); + else + ctor_elts = NULL; + for (l = 0; l < k; l++) + { + if (m == 0 && l == 0) + vec_oprnd0 + = vect_get_vec_def_for_operand (op, stmt, NULL); + else + vec_oprnd0 + = vect_get_vec_def_for_stmt_copy (arginfo[i].dt, + arginfo[i].op); + arginfo[i].op = vec_oprnd0; + if (k == 1) + break; + CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, + vec_oprnd0); + } + if (k == 1) + vargs.safe_push (vec_oprnd0); + else + { + vec_oprnd0 = build_constructor (atype, ctor_elts); + new_stmt + = gimple_build_assign (make_ssa_name (atype, NULL), + vec_oprnd0); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + vargs.safe_push (gimple_assign_lhs (new_stmt)); + } + } + } + break; + case SIMD_CLONE_ARG_TYPE_UNIFORM: + vargs.safe_push (op); + break; + case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: + if (j == 0) + { + gimple_seq stmts; + arginfo[i].op + = force_gimple_operand (arginfo[i].op, &stmts, true, + NULL_TREE); + if (stmts != NULL) + { + basic_block new_bb; + edge pe = loop_preheader_edge (loop); + new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); + gcc_assert (!new_bb); + } + tree phi_res = copy_ssa_name (op, NULL); + gimple new_phi = create_phi_node (phi_res, loop->header); + set_vinfo_for_stmt (new_phi, + new_stmt_vec_info (new_phi, loop_vinfo, + NULL)); + add_phi_arg (new_phi, arginfo[i].op, + loop_preheader_edge (loop), UNKNOWN_LOCATION); + enum tree_code code + = POINTER_TYPE_P (TREE_TYPE (op)) + ? POINTER_PLUS_EXPR : PLUS_EXPR; + tree type = POINTER_TYPE_P (TREE_TYPE (op)) + ? sizetype : TREE_TYPE (op); + double_int cst + = double_int::from_shwi + (bestn->simdclone->args[i].linear_step); + cst *= double_int::from_uhwi (ncopies * nunits); + tree tcst = double_int_to_tree (type, cst); + tree phi_arg = copy_ssa_name (op, NULL); + new_stmt = gimple_build_assign_with_ops (code, phi_arg, + phi_res, tcst); + gimple_stmt_iterator si = gsi_after_labels (loop->header); + gsi_insert_after (&si, new_stmt, GSI_NEW_STMT); + set_vinfo_for_stmt (new_stmt, + new_stmt_vec_info (new_stmt, loop_vinfo, + NULL)); + add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop), + UNKNOWN_LOCATION); + arginfo[i].op = phi_res; + vargs.safe_push (phi_res); + } + else + { + enum tree_code code + = POINTER_TYPE_P (TREE_TYPE (op)) + ? POINTER_PLUS_EXPR : PLUS_EXPR; + tree type = POINTER_TYPE_P (TREE_TYPE (op)) + ? sizetype : TREE_TYPE (op); + double_int cst + = double_int::from_shwi + (bestn->simdclone->args[i].linear_step); + cst *= double_int::from_uhwi (j * nunits); + tree tcst = double_int_to_tree (type, cst); + new_temp = make_ssa_name (TREE_TYPE (op), NULL); + new_stmt + = gimple_build_assign_with_ops (code, new_temp, + arginfo[i].op, tcst); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + vargs.safe_push (new_temp); + } + break; + case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: + default: + gcc_unreachable (); + } + } + + new_stmt = gimple_build_call_vec (fndecl, vargs); + if (vec_dest) + { + gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits); + if (ratype) + new_temp = create_tmp_var (ratype, NULL); + else if (TYPE_VECTOR_SUBPARTS (vectype) + == TYPE_VECTOR_SUBPARTS (rtype)) + new_temp = make_ssa_name (vec_dest, new_stmt); + else + new_temp = make_ssa_name (rtype, new_stmt); + gimple_call_set_lhs (new_stmt, new_temp); + } + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + if (vec_dest) + { + if (TYPE_VECTOR_SUBPARTS (vectype) < nunits) + { + unsigned int k, l; + unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype)); + k = nunits / TYPE_VECTOR_SUBPARTS (vectype); + gcc_assert ((k & (k - 1)) == 0); + for (l = 0; l < k; l++) + { + tree t; + if (ratype) + { + t = build_fold_addr_expr (new_temp); + t = build2 (MEM_REF, vectype, t, + build_int_cst (TREE_TYPE (t), + l * prec / BITS_PER_UNIT)); + } + else + t = build3 (BIT_FIELD_REF, vectype, new_temp, + size_int (prec), bitsize_int (l * prec)); + new_stmt + = gimple_build_assign (make_ssa_name (vectype, NULL), t); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + if (j == 0 && l == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + + if (ratype) + { + tree clobber = build_constructor (ratype, NULL); + TREE_THIS_VOLATILE (clobber) = 1; + new_stmt = gimple_build_assign (new_temp, clobber); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + } + continue; + } + else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits) + { + unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype) + / TYPE_VECTOR_SUBPARTS (rtype)); + gcc_assert ((k & (k - 1)) == 0); + if ((j & (k - 1)) == 0) + vec_alloc (ret_ctor_elts, k); + if (ratype) + { + unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype); + for (m = 0; m < o; m++) + { + tree tem = build4 (ARRAY_REF, rtype, new_temp, + size_int (m), NULL_TREE, NULL_TREE); + new_stmt + = gimple_build_assign (make_ssa_name (rtype, NULL), + tem); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, + gimple_assign_lhs (new_stmt)); + } + tree clobber = build_constructor (ratype, NULL); + TREE_THIS_VOLATILE (clobber) = 1; + new_stmt = gimple_build_assign (new_temp, clobber); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + } + else + CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp); + if ((j & (k - 1)) != k - 1) + continue; + vec_oprnd0 = build_constructor (vectype, ret_ctor_elts); + new_stmt + = gimple_build_assign (make_ssa_name (vec_dest, NULL), + vec_oprnd0); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + if ((unsigned) j == k - 1) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + + prev_stmt_info = vinfo_for_stmt (new_stmt); + continue; + } + else if (ratype) + { + tree t = build_fold_addr_expr (new_temp); + t = build2 (MEM_REF, vectype, t, + build_int_cst (TREE_TYPE (t), 0)); + new_stmt + = gimple_build_assign (make_ssa_name (vec_dest, NULL), t); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + tree clobber = build_constructor (ratype, NULL); + TREE_THIS_VOLATILE (clobber) = 1; + vect_finish_stmt_generation (stmt, + gimple_build_assign (new_temp, + clobber), gsi); + } + } + + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + + vargs.release (); + + /* The call in STMT might prevent it from being removed in dce. + We however cannot remove it here, due to the way the ssa name + it defines is mapped to the new definition. So just replace + rhs of the statement with something harmless. */ + + if (slp_node) + return true; + + if (scalar_dest) + { + type = TREE_TYPE (scalar_dest); + if (is_pattern_stmt_p (stmt_info)) + lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); + else + lhs = gimple_call_lhs (stmt); + new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); + } + else + new_stmt = gimple_build_nop (); + set_vinfo_for_stmt (new_stmt, stmt_info); + set_vinfo_for_stmt (stmt, NULL); + STMT_VINFO_STMT (stmt_info) = new_stmt; + gsi_replace (gsi, new_stmt, false); + unlink_stmt_vdef (stmt); + + return true; +} + + /* Function vect_gen_widened_results_half Create a vector stmt whose code, type, number of arguments, and result @@ -3453,10 +4496,6 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi, } -static tree permute_vec_elements (tree, tree, tree, gimple, - gimple_stmt_iterator *); - - /* Function vectorizable_operation. Check if STMT performs a binary, unary or ternary operation that can @@ -3832,6 +4871,25 @@ ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr) } +/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements + reversal of the vector elements. If that is impossible to do, + returns NULL. */ + +static tree +perm_mask_for_reverse (tree vectype) +{ + int i, nunits; + unsigned char *sel; + + nunits = TYPE_VECTOR_SUBPARTS (vectype); + sel = XALLOCAVEC (unsigned char, nunits); + + for (i = 0; i < nunits; ++i) + sel[i] = nunits - 1 - i; + + return vect_gen_perm_mask (vectype, sel); +} + /* Function vectorizable_store. Check if STMT defines a non scalar data-ref (array/pointer/structure) that @@ -3875,6 +4933,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, vec<tree> oprnds = vNULL; vec<tree> result_chain = vNULL; bool inv_p; + bool negative = false; + tree offset = NULL_TREE; vec<tree> vec_oprnds = vNULL; bool slp = (slp_node != NULL); unsigned int vec_num; @@ -3949,16 +5009,41 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt) - ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr), - size_zero_node) < 0) + negative = + tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt) + ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr), + size_zero_node) < 0; + if (negative && ncopies > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step for store.\n"); + "multiple types with negative step.\n"); return false; } + if (negative) + { + gcc_assert (!grouped_store); + alignment_support_scheme = vect_supportable_dr_alignment (dr, false); + if (alignment_support_scheme != dr_aligned + && alignment_support_scheme != dr_unaligned_supported) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "negative step but alignment required.\n"); + return false; + } + if (dt != vect_constant_def + && dt != vect_external_def + && !perm_mask_for_reverse (vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "negative step and reversing not supported.\n"); + return false; + } + } + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) { grouped_store = true; @@ -4063,6 +5148,9 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, || alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); + if (negative) + offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); + if (store_lanes_p) aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); else @@ -4173,7 +5261,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, simd_lane_access_p ? loop : NULL, - NULL_TREE, &dummy, gsi, &ptr_incr, + offset, &dummy, gsi, &ptr_incr, simd_lane_access_p, &inv_p); gcc_assert (bb_vinfo || !inv_p); } @@ -4279,6 +5367,27 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign); + if (negative + && dt != vect_constant_def + && dt != vect_external_def) + { + tree perm_mask = perm_mask_for_reverse (vectype); + tree perm_dest + = vect_create_destination_var (gimple_assign_rhs1 (stmt), + vectype); + tree new_temp = make_ssa_name (perm_dest, NULL); + + /* Generate the permute statement. */ + gimple perm_stmt + = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp, + vec_oprnd, vec_oprnd, + perm_mask); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + + perm_stmt = SSA_NAME_DEF_STMT (new_temp); + vec_oprnd = new_temp; + } + /* Arguments are ready. Create the new vector stmt. */ new_stmt = gimple_build_assign (data_ref, vec_oprnd); vect_finish_stmt_generation (stmt, new_stmt, gsi); @@ -4336,25 +5445,6 @@ vect_gen_perm_mask (tree vectype, unsigned char *sel) return mask_vec; } -/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements - reversal of the vector elements. If that is impossible to do, - returns NULL. */ - -static tree -perm_mask_for_reverse (tree vectype) -{ - int i, nunits; - unsigned char *sel; - - nunits = TYPE_VECTOR_SUBPARTS (vectype); - sel = XALLOCAVEC (unsigned char, nunits); - - for (i = 0; i < nunits; ++i) - sel[i] = nunits - 1 - i; - - return vect_gen_perm_mask (vectype, sel); -} - /* Given a vector variable X and Y, that was generated for the scalar STMT, generate instructions to permute the vector elements of X and Y using permutation mask MASK_VEC, insert them at *GSI and return the @@ -4368,7 +5458,7 @@ permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt, tree perm_dest, data_ref; gimple perm_stmt; - perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype); + perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype); data_ref = make_ssa_name (perm_dest, NULL); /* Generate the permute statement. */ @@ -4609,7 +5699,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, tree vec_oprnd0 = NULL_TREE, op; tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl)); tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; - tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE; + tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE; edge pe = loop_preheader_edge (loop); gimple_seq seq; basic_block new_bb; @@ -4651,8 +5741,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); scaletype = TREE_VALUE (arglist); - gcc_checking_assert (types_compatible_p (srctype, rettype) - && types_compatible_p (srctype, masktype)); + gcc_checking_assert (types_compatible_p (srctype, rettype)); vec_dest = vect_create_destination_var (scalar_dest, vectype); @@ -4666,8 +5755,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, /* Currently we support only unconditional gather loads, so mask should be all ones. */ - if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) - mask = build_int_cst (TREE_TYPE (masktype), -1); + if (TREE_CODE (masktype) == INTEGER_TYPE) + mask = build_int_cst (masktype, -1); + else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) + { + mask = build_int_cst (TREE_TYPE (masktype), -1); + mask = build_vector_from_val (masktype, mask); + } else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype))) { REAL_VALUE_TYPE r; @@ -4676,14 +5770,30 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, tmp[j] = -1; real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype))); mask = build_real (TREE_TYPE (masktype), r); + mask = build_vector_from_val (masktype, mask); } else gcc_unreachable (); - mask = build_vector_from_val (masktype, mask); mask = vect_init_vector (stmt, mask, masktype, NULL); scale = build_int_cst (scaletype, gather_scale); + if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE) + merge = build_int_cst (TREE_TYPE (rettype), 0); + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype))) + { + REAL_VALUE_TYPE r; + long tmp[6]; + for (j = 0; j < 6; ++j) + tmp[j] = 0; + real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype))); + merge = build_real (TREE_TYPE (rettype), r); + } + else + gcc_unreachable (); + merge = build_vector_from_val (rettype, merge); + merge = vect_init_vector (stmt, merge, rettype, NULL); + prev_stmt_info = NULL; for (j = 0; j < ncopies; ++j) { @@ -4712,7 +5822,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, } new_stmt - = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale); + = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale); if (!useless_type_conversion_p (vectype, rettype)) { @@ -5531,8 +6641,8 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, { if (slp_node) { - stack_vec<tree, 4> ops; - stack_vec<vec<tree>, 4> vec_defs; + auto_vec<tree, 4> ops; + auto_vec<vec<tree>, 4> vec_defs; ops.safe_push (TREE_OPERAND (cond_expr, 0)); ops.safe_push (TREE_OPERAND (cond_expr, 1)); @@ -5819,7 +6929,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) if (STMT_VINFO_RELEVANT_P (stmt_info)) { gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt)))); - gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); + gcc_assert (STMT_VINFO_VECTYPE (stmt_info) + || (is_gimple_call (stmt) + && gimple_call_lhs (stmt) == NULL_TREE)); *need_to_vectorize = true; } @@ -5827,7 +6939,8 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) if (!bb_vinfo && (STMT_VINFO_RELEVANT_P (stmt_info) || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) - ok = (vectorizable_conversion (stmt, NULL, NULL, NULL) + ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL) + || vectorizable_conversion (stmt, NULL, NULL, NULL) || vectorizable_shift (stmt, NULL, NULL, NULL) || vectorizable_operation (stmt, NULL, NULL, NULL) || vectorizable_assignment (stmt, NULL, NULL, NULL) @@ -5839,7 +6952,8 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) else { if (bb_vinfo) - ok = (vectorizable_conversion (stmt, NULL, NULL, node) + ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node) + || vectorizable_conversion (stmt, NULL, NULL, node) || vectorizable_shift (stmt, NULL, NULL, node) || vectorizable_operation (stmt, NULL, NULL, node) || vectorizable_assignment (stmt, NULL, NULL, node) @@ -5965,6 +7079,15 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi, case call_vec_info_type: done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node); stmt = gsi_stmt (*gsi); + if (is_gimple_call (stmt) + && gimple_call_internal_p (stmt) + && gimple_call_internal_fn (stmt) == IFN_MASK_STORE) + is_store = true; + break; + + case call_simd_clone_vec_info_type: + done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node); + stmt = gsi_stmt (*gsi); break; case reduc_vec_info_type: |