summaryrefslogtreecommitdiff
path: root/gcc/tree-vect-data-refs.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-data-refs.c')
-rw-r--r--gcc/tree-vect-data-refs.c369
1 files changed, 258 insertions, 111 deletions
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index b472e2ee49..aa504b6a1c 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1,5 +1,5 @@
/* Data References Analysis and Manipulation Utilities for Vectorization.
- Copyright (C) 2003-2016 Free Software Foundation, Inc.
+ Copyright (C) 2003-2017 Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
and Ira Rosen <irar@il.ibm.com>
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree.h"
#include "gimple.h"
#include "predict.h"
+#include "memmodel.h"
#include "tm_p.h"
#include "ssa.h"
#include "optabs-tree.h"
@@ -227,6 +228,12 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
|| (DR_IS_READ (dra) && DR_IS_READ (drb)))
return false;
+ /* We do not have to consider dependences between accesses that belong
+ to the same group. */
+ if (GROUP_FIRST_ELEMENT (stmtinfo_a)
+ && GROUP_FIRST_ELEMENT (stmtinfo_a) == GROUP_FIRST_ELEMENT (stmtinfo_b))
+ return false;
+
/* Even if we have an anti-dependence then, as the vectorized loop covers at
least two scalar iterations, there is always also a true dependence.
As the vectorizer does not re-order loads and stores we can ignore
@@ -467,14 +474,21 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
.create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
* LOOP_VINFO_DATAREFS (loop_vinfo).length ());
LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
+ /* We need read-read dependences to compute STMT_VINFO_SAME_ALIGN_REFS. */
if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
&LOOP_VINFO_DDRS (loop_vinfo),
LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
return false;
- FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
- if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
- return false;
+ /* For epilogues we either have no aliases or alias versioning
+ was applied to original loop. Therefore we may just get max_vf
+ using VF of original loop. */
+ if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+ *max_vf = LOOP_VINFO_ORIG_VECT_FACTOR (loop_vinfo);
+ else
+ FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
+ if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
+ return false;
return true;
}
@@ -575,6 +589,7 @@ vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node,
if (!dr_b)
return false;
+ bool dependent = false;
/* If we run into a store of this same instance (we've just
marked those) then delay dependence checking until we run
into the last store because this is where it will have
@@ -591,22 +606,21 @@ vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node,
= STMT_VINFO_DATA_REF (vinfo_for_stmt (store));
ddr_p ddr = initialize_data_dependence_relation
(dr_a, store_dr, vNULL);
- if (vect_slp_analyze_data_ref_dependence (ddr))
- {
- free_dependence_relation (ddr);
- return false;
- }
+ dependent = vect_slp_analyze_data_ref_dependence (ddr);
free_dependence_relation (ddr);
+ if (dependent)
+ break;
}
}
-
- ddr_p ddr = initialize_data_dependence_relation (dr_a, dr_b, vNULL);
- if (vect_slp_analyze_data_ref_dependence (ddr))
+ else
{
+ ddr_p ddr = initialize_data_dependence_relation (dr_a,
+ dr_b, vNULL);
+ dependent = vect_slp_analyze_data_ref_dependence (ddr);
free_dependence_relation (ddr);
- return false;
}
- free_dependence_relation (ddr);
+ if (dependent)
+ return false;
}
}
return true;
@@ -765,10 +779,34 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
base = ref;
while (handled_component_p (base))
base = TREE_OPERAND (base, 0);
+ unsigned int base_alignment = 0;
+ unsigned HOST_WIDE_INT base_bitpos;
+ get_object_alignment_1 (base, &base_alignment, &base_bitpos);
+ /* As data-ref analysis strips the MEM_REF down to its base operand
+ to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to
+ adjust things to make base_alignment valid as the alignment of
+ DR_BASE_ADDRESS. */
if (TREE_CODE (base) == MEM_REF)
- base = build2 (MEM_REF, TREE_TYPE (base), base_addr,
- build_int_cst (TREE_TYPE (TREE_OPERAND (base, 1)), 0));
- unsigned int base_alignment = get_object_alignment (base);
+ {
+ /* Note all this only works if DR_BASE_ADDRESS is the same as
+ MEM_REF operand zero, otherwise DR/SCEV analysis might have factored
+ in other offsets. We need to rework DR to compute the alingment
+ of DR_BASE_ADDRESS as long as all information is still available. */
+ if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))
+ {
+ base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT;
+ base_bitpos &= (base_alignment - 1);
+ }
+ else
+ base_bitpos = BITS_PER_UNIT;
+ }
+ if (base_bitpos != 0)
+ base_alignment = base_bitpos & -base_bitpos;
+ /* Also look at the alignment of the base address DR analysis
+ computed. */
+ unsigned int base_addr_alignment = get_pointer_alignment (base_addr);
+ if (base_addr_alignment > base_alignment)
+ base_alignment = base_addr_alignment;
if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
DR_VECT_AUX (dr)->base_element_aligned = true;
@@ -790,12 +828,9 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
if (base_alignment < TYPE_ALIGN (vectype))
{
- /* Strip an inner MEM_REF to a bare decl if possible. */
- if (TREE_CODE (base) == MEM_REF
- && integer_zerop (TREE_OPERAND (base, 1))
- && TREE_CODE (TREE_OPERAND (base, 0)) == ADDR_EXPR)
- base = TREE_OPERAND (TREE_OPERAND (base, 0), 0);
-
+ base = base_addr;
+ if (TREE_CODE (base) == ADDR_EXPR)
+ base = TREE_OPERAND (base, 0);
if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
{
if (dump_enabled_p ())
@@ -808,6 +843,19 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
return true;
}
+ if (DECL_USER_ALIGN (base))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "not forcing alignment of user-aligned "
+ "variable: ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, base);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ return true;
+ }
+
/* Force the alignment of the decl.
NOTE: This is the only change to the code we make during
the analysis phase, before deciding to vectorize the loop. */
@@ -1059,12 +1107,9 @@ vector_alignment_reachable_p (struct data_reference *dr)
bool is_packed = not_size_aligned (DR_REF (dr));
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Unknown misalignment, is_packed = %d\n",is_packed);
- if ((TYPE_USER_ALIGN (type) && !is_packed)
- || targetm.vectorize.vector_alignment_reachable (type, is_packed))
- return true;
- else
- return false;
+ "Unknown misalignment, %snaturally aligned\n",
+ is_packed ? "not " : "");
+ return targetm.vectorize.vector_alignment_reachable (type, is_packed);
}
return true;
@@ -1101,8 +1146,8 @@ vect_get_data_access_cost (struct data_reference *dr,
typedef struct _vect_peel_info
{
- int npeel;
struct data_reference *dr;
+ int npeel;
unsigned int count;
} *vect_peel_info;
@@ -1505,7 +1550,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
Hence, except for the immediate peeling amount, we also want
to try to add full vector size, while we don't exceed
vectorization factor.
- We do this automtically for cost model, since we calculate cost
+ We do this automatically for cost model, since we calculate cost
for every peeling option. */
if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
{
@@ -2234,10 +2279,11 @@ vect_analyze_group_access_1 (struct data_reference *dr)
if (DR_IS_READ (dr)
&& (dr_step % type_size) == 0
&& groupsize > 0
- && exact_log2 (groupsize) != -1)
+ && pow2p_hwi (groupsize))
{
GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
+ GROUP_GAP (stmt_info) = groupsize - 1;
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
@@ -2350,7 +2396,9 @@ vect_analyze_group_access_1 (struct data_reference *dr)
if (groupsize == 0)
groupsize = count + gaps;
- if (groupsize > UINT_MAX)
+ /* This could be UINT_MAX but as we are generating code in a very
+ inefficient way we have to cap earlier. See PR78699 for example. */
+ if (groupsize > 4096)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2703,10 +2751,17 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
data_reference_p dra = datarefs_copy[i];
stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
stmt_vec_info lastinfo = NULL;
+ if (! STMT_VINFO_VECTORIZABLE (stmtinfo_a))
+ {
+ ++i;
+ continue;
+ }
for (i = i + 1; i < datarefs_copy.length (); ++i)
{
data_reference_p drb = datarefs_copy[i];
stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
+ if (! STMT_VINFO_VECTORIZABLE (stmtinfo_b))
+ break;
/* ??? Imperfect sorting (non-compatible types, non-modulo
accesses, same accesses) can lead to a group to be artificially
@@ -2849,7 +2904,8 @@ operator == (const dr_with_seg_len& d1,
{
return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
DR_BASE_ADDRESS (d2.dr), 0)
- && compare_tree (d1.offset, d2.offset) == 0
+ && compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
+ && compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
&& compare_tree (d1.seg_len, d2.seg_len) == 0;
}
@@ -2859,15 +2915,12 @@ operator == (const dr_with_seg_len& d1,
so that we can combine aliasing checks in one scan. */
static int
-comp_dr_with_seg_len_pair (const void *p1_, const void *p2_)
+comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
{
- const dr_with_seg_len_pair_t* p1 = (const dr_with_seg_len_pair_t *) p1_;
- const dr_with_seg_len_pair_t* p2 = (const dr_with_seg_len_pair_t *) p2_;
-
- const dr_with_seg_len &p11 = p1->first,
- &p12 = p1->second,
- &p21 = p2->first,
- &p22 = p2->second;
+ const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
+ const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
+ const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
+ const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
/* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
if a and c have the same basic address snd step, and b and d have the same
@@ -2875,19 +2928,23 @@ comp_dr_with_seg_len_pair (const void *p1_, const void *p2_)
and step, we don't care the order of those two pairs after sorting. */
int comp_res;
- if ((comp_res = compare_tree (DR_BASE_ADDRESS (p11.dr),
- DR_BASE_ADDRESS (p21.dr))) != 0)
+ if ((comp_res = compare_tree (DR_BASE_ADDRESS (a1.dr),
+ DR_BASE_ADDRESS (b1.dr))) != 0)
+ return comp_res;
+ if ((comp_res = compare_tree (DR_BASE_ADDRESS (a2.dr),
+ DR_BASE_ADDRESS (b2.dr))) != 0)
return comp_res;
- if ((comp_res = compare_tree (DR_BASE_ADDRESS (p12.dr),
- DR_BASE_ADDRESS (p22.dr))) != 0)
+ if ((comp_res = compare_tree (DR_STEP (a1.dr), DR_STEP (b1.dr))) != 0)
return comp_res;
- if ((comp_res = compare_tree (DR_STEP (p11.dr), DR_STEP (p21.dr))) != 0)
+ if ((comp_res = compare_tree (DR_STEP (a2.dr), DR_STEP (b2.dr))) != 0)
return comp_res;
- if ((comp_res = compare_tree (DR_STEP (p12.dr), DR_STEP (p22.dr))) != 0)
+ if ((comp_res = compare_tree (DR_OFFSET (a1.dr), DR_OFFSET (b1.dr))) != 0)
return comp_res;
- if ((comp_res = compare_tree (p11.offset, p21.offset)) != 0)
+ if ((comp_res = compare_tree (DR_INIT (a1.dr), DR_INIT (b1.dr))) != 0)
return comp_res;
- if ((comp_res = compare_tree (p12.offset, p22.offset)) != 0)
+ if ((comp_res = compare_tree (DR_OFFSET (a2.dr), DR_OFFSET (b2.dr))) != 0)
+ return comp_res;
+ if ((comp_res = compare_tree (DR_INIT (a2.dr), DR_INIT (b2.dr))) != 0)
return comp_res;
return 0;
@@ -2929,6 +2986,56 @@ vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
return segment_length;
}
+/* Function vect_no_alias_p.
+
+ Given data references A and B with equal base and offset, the alias
+ relation can be decided at compilation time, return TRUE if they do
+ not alias to each other; return FALSE otherwise. SEGMENT_LENGTH_A
+ and SEGMENT_LENGTH_B are the memory lengths accessed by A and B
+ respectively. */
+
+static bool
+vect_no_alias_p (struct data_reference *a, struct data_reference *b,
+ tree segment_length_a, tree segment_length_b)
+{
+ gcc_assert (TREE_CODE (DR_INIT (a)) == INTEGER_CST
+ && TREE_CODE (DR_INIT (b)) == INTEGER_CST);
+ if (tree_int_cst_equal (DR_INIT (a), DR_INIT (b)))
+ return false;
+
+ tree seg_a_min = DR_INIT (a);
+ tree seg_a_max = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_a_min),
+ seg_a_min, segment_length_a);
+ /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
+ bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
+ [a, a+12) */
+ if (tree_int_cst_compare (DR_STEP (a), size_zero_node) < 0)
+ {
+ tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
+ seg_a_min = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_a_max),
+ seg_a_max, unit_size);
+ seg_a_max = fold_build2 (PLUS_EXPR, TREE_TYPE (DR_INIT (a)),
+ DR_INIT (a), unit_size);
+ }
+ tree seg_b_min = DR_INIT (b);
+ tree seg_b_max = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_b_min),
+ seg_b_min, segment_length_b);
+ if (tree_int_cst_compare (DR_STEP (b), size_zero_node) < 0)
+ {
+ tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
+ seg_b_min = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_b_max),
+ seg_b_max, unit_size);
+ seg_b_max = fold_build2 (PLUS_EXPR, TREE_TYPE (DR_INIT (b)),
+ DR_INIT (b), unit_size);
+ }
+
+ if (tree_int_cst_le (seg_a_max, seg_b_min)
+ || tree_int_cst_le (seg_b_max, seg_a_min))
+ return true;
+
+ return false;
+}
+
/* Function vect_prune_runtime_alias_test_list.
Prune a list of ddrs to be tested at run-time by versioning for alias.
@@ -2990,6 +3097,7 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
/* First, we collect all data ref pairs for aliasing checks. */
FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
{
+ int comp_res;
struct data_reference *dr_a, *dr_b;
gimple *dr_group_first_a, *dr_group_first_b;
tree segment_length_a, segment_length_b;
@@ -3020,11 +3128,33 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
+ comp_res = compare_tree (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b));
+ if (comp_res == 0)
+ comp_res = compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
+
+ /* Alias is known at compilation time. */
+ if (comp_res == 0
+ && TREE_CODE (DR_STEP (dr_a)) == INTEGER_CST
+ && TREE_CODE (DR_STEP (dr_b)) == INTEGER_CST
+ && TREE_CODE (segment_length_a) == INTEGER_CST
+ && TREE_CODE (segment_length_b) == INTEGER_CST)
+ {
+ if (vect_no_alias_p (dr_a, dr_b, segment_length_a, segment_length_b))
+ continue;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "not vectorized: compilation time alias.\n");
+
+ return false;
+ }
+
dr_with_seg_len_pair_t dr_with_seg_len_pair
(dr_with_seg_len (dr_a, segment_length_a),
dr_with_seg_len (dr_b, segment_length_b));
- if (compare_tree (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b)) > 0)
+ /* Canonicalize pairs by sorting the two DR members. */
+ if (comp_res > 0)
std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
@@ -3080,21 +3210,21 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
}
if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
- DR_BASE_ADDRESS (dr_a2->dr),
- 0)
- || !tree_fits_shwi_p (dr_a1->offset)
- || !tree_fits_shwi_p (dr_a2->offset))
+ DR_BASE_ADDRESS (dr_a2->dr), 0)
+ || !operand_equal_p (DR_OFFSET (dr_a1->dr),
+ DR_OFFSET (dr_a2->dr), 0)
+ || !tree_fits_shwi_p (DR_INIT (dr_a1->dr))
+ || !tree_fits_shwi_p (DR_INIT (dr_a2->dr)))
continue;
/* Make sure dr_a1 starts left of dr_a2. */
- if (tree_int_cst_lt (dr_a2->offset, dr_a1->offset))
+ if (tree_int_cst_lt (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr)))
std::swap (*dr_a1, *dr_a2);
- unsigned HOST_WIDE_INT diff
- = tree_to_shwi (dr_a2->offset) - tree_to_shwi (dr_a1->offset);
-
-
bool do_remove = false;
+ unsigned HOST_WIDE_INT diff
+ = (tree_to_shwi (DR_INIT (dr_a2->dr))
+ - tree_to_shwi (DR_INIT (dr_a1->dr)));
/* If the left segment does not extend beyond the start of the
right segment the new segment length is that of the right
@@ -3122,7 +3252,7 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
- where DIFF = DR_A2->OFFSET - DR_A1->OFFSET. However,
+ where DIFF = DR_A2_INIT - DR_A1_INIT. However,
SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
have to make a best estimation. We can get the minimum value
of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
@@ -3173,17 +3303,29 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
may_alias_ddrs.length (), comp_alias_ddrs.length ());
if ((int) comp_alias_ddrs.length () >
PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
- return false;
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "number of versioning for alias "
+ "run-time tests exceeds %d "
+ "(--param vect-max-version-for-alias-checks)\n",
+ PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
+ return false;
+ }
+
+ /* All alias checks have been resolved at compilation time. */
+ if (!comp_alias_ddrs.length ())
+ LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).truncate (0);
return true;
}
-/* Check whether a non-affine read or write in stmt is suitable for gather load
- or scatter store and if so, return a builtin decl for that operation. */
+/* Return true if a non-affine read or write in STMT is suitable for a
+ gather load or scatter store. Describe the operation in *INFO if so. */
-tree
-vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo, tree *basep,
- tree *offp, int *scalep)
+bool
+vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
+ gather_scatter_info *info)
{
HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -3225,7 +3367,7 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo, tree *basep,
SSA_NAME OFF and put the loop invariants into a tree BASE
that can be gimplified before the loop. */
base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
- &punsignedp, &reversep, &pvolatilep, false);
+ &punsignedp, &reversep, &pvolatilep);
gcc_assert (base && (pbitpos % BITS_PER_UNIT) == 0 && !reversep);
if (TREE_CODE (base) == MEM_REF)
@@ -3257,7 +3399,7 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo, tree *basep,
if (!expr_invariant_in_loop_p (loop, base))
{
if (!integer_zerop (off))
- return NULL_TREE;
+ return false;
off = base;
base = size_int (pbitpos / BITS_PER_UNIT);
}
@@ -3283,7 +3425,7 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo, tree *basep,
gimple *def_stmt = SSA_NAME_DEF_STMT (off);
if (expr_invariant_in_loop_p (loop, off))
- return NULL_TREE;
+ return false;
if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
break;
@@ -3295,7 +3437,7 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo, tree *basep,
else
{
if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
- return NULL_TREE;
+ return false;
code = TREE_CODE (off);
extract_ops_from_tree (off, &code, &op0, &op1);
}
@@ -3370,7 +3512,7 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo, tree *basep,
defined in the loop, punt. */
if (TREE_CODE (off) != SSA_NAME
|| expr_invariant_in_loop_p (loop, off))
- return NULL_TREE;
+ return false;
if (offtype == NULL_TREE)
offtype = TREE_TYPE (off);
@@ -3383,15 +3525,15 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo, tree *basep,
offtype, scale);
if (decl == NULL_TREE)
- return NULL_TREE;
-
- if (basep)
- *basep = base;
- if (offp)
- *offp = off;
- if (scalep)
- *scalep = scale;
- return decl;
+ return false;
+
+ info->decl = decl;
+ info->base = base;
+ info->offset = off;
+ info->offset_dt = vect_unknown_def_type;
+ info->offset_vectype = NULL_TREE;
+ info->scale = scale;
+ return true;
}
/* Function vect_analyze_data_refs.
@@ -3561,7 +3703,6 @@ again:
"not vectorized: data ref analysis "
"failed ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
if (is_a <bb_vec_info> (vinfo))
@@ -3593,7 +3734,6 @@ again:
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: volatile type ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
if (is_a <bb_vec_info> (vinfo))
@@ -3610,7 +3750,6 @@ again:
"not vectorized: statement can throw an "
"exception ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
if (is_a <bb_vec_info> (vinfo))
@@ -3630,7 +3769,6 @@ again:
"not vectorized: statement is bitfield "
"access ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
if (is_a <bb_vec_info> (vinfo))
@@ -3655,7 +3793,6 @@ again:
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: dr in a call ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
if (is_a <bb_vec_info> (vinfo))
@@ -3700,7 +3837,7 @@ again:
outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
&poffset, &pmode, &punsignedp,
- &preversep, &pvolatilep, false);
+ &preversep, &pvolatilep);
gcc_assert (outer_base != NULL_TREE);
if (pbitpos % BITS_PER_UNIT != 0)
@@ -3802,7 +3939,6 @@ again:
"not vectorized: more than one data ref "
"in stmt: ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
if (is_a <bb_vec_info> (vinfo))
@@ -3875,10 +4011,10 @@ again:
if (gatherscatter != SG_NONE)
{
- tree off;
+ gather_scatter_info gs_info;
if (!vect_check_gather_scatter (stmt, as_a <loop_vec_info> (vinfo),
- NULL, &off, NULL)
- || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
+ &gs_info)
+ || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset)))
{
STMT_VINFO_DATA_REF (stmt_info) = NULL;
free_data_ref (dr);
@@ -3891,7 +4027,6 @@ again:
"not vectorized: not suitable for scatter "
"store ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
@@ -3912,7 +4047,6 @@ again:
"not vectorized: not suitable for strided "
"load ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
@@ -4649,7 +4783,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
else
{
/* If length is not equal to 3 then only power of 2 is supported. */
- gcc_assert (exact_log2 (count) != -1);
+ gcc_assert (pow2p_hwi (count));
for (i = 0; i < nelt / 2; i++)
{
@@ -4827,7 +4961,7 @@ vect_permute_store_chain (vec<tree> dr_chain,
else
{
/* If length is not equal to 3 then only power of 2 is supported. */
- gcc_assert (exact_log2 (length) != -1);
+ gcc_assert (pow2p_hwi (length));
for (i = 0, n = nelt / 2; i < n; i++)
{
@@ -5143,14 +5277,31 @@ vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi,
/* Function vect_grouped_load_supported.
- Returns TRUE if even and odd permutations are supported,
- and FALSE otherwise. */
+ COUNT is the size of the load group (the number of statements plus the
+ number of gaps). SINGLE_ELEMENT_P is true if there is actually
+ only one statement, with a gap of COUNT - 1.
+
+ Returns true if a suitable permute exists. */
bool
-vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
+vect_grouped_load_supported (tree vectype, bool single_element_p,
+ unsigned HOST_WIDE_INT count)
{
machine_mode mode = TYPE_MODE (vectype);
+ /* If this is single-element interleaving with an element distance
+ that leaves unused vector loads around punt - we at least create
+ very sub-optimal code in that case (and blow up memory,
+ see PR65518). */
+ if (single_element_p && count > TYPE_VECTOR_SUBPARTS (vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "single-element interleaving not supported "
+ "for not adjacent vector loads\n");
+ return false;
+ }
+
/* vect_permute_load_chain requires the group size to be equal to 3 or
be a power of two. */
if (count != 3 && exact_log2 (count) == -1)
@@ -5205,7 +5356,7 @@ vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
else
{
/* If length is not equal to 3 then only power of 2 is supported. */
- gcc_assert (exact_log2 (count) != -1);
+ gcc_assert (pow2p_hwi (count));
for (i = 0; i < nelt; i++)
sel[i] = i * 2;
if (can_vec_perm_p (mode, false, sel))
@@ -5379,7 +5530,7 @@ vect_permute_load_chain (vec<tree> dr_chain,
else
{
/* If length is not equal to 3 then only power of 2 is supported. */
- gcc_assert (exact_log2 (length) != -1);
+ gcc_assert (pow2p_hwi (length));
for (i = 0; i < nelt; ++i)
sel[i] = i * 2;
@@ -5528,7 +5679,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
memcpy (result_chain->address (), dr_chain.address (),
length * sizeof (tree));
- if (exact_log2 (length) != -1 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 4)
+ if (pow2p_hwi (length) && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 4)
{
unsigned int j, log_length = exact_log2 (length);
for (i = 0; i < nelt / 2; ++i)
@@ -5776,7 +5927,7 @@ vect_transform_grouped_load (gimple *stmt, vec<tree> dr_chain, int size,
get chain for loads group using vect_shift_permute_load_chain. */
mode = TYPE_MODE (STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)));
if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
- || exact_log2 (size) != -1
+ || pow2p_hwi (size)
|| !vect_shift_permute_load_chain (dr_chain, size, stmt,
gsi, &result_chain))
vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
@@ -5866,7 +6017,7 @@ vect_record_grouped_load_vectors (gimple *stmt, vec<tree> result_chain)
bool
vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
{
- if (TREE_CODE (decl) != VAR_DECL)
+ if (!VAR_P (decl))
return false;
if (decl_in_symtab_p (decl)
@@ -6008,10 +6159,8 @@ vect_supportable_dr_alignment (struct data_reference *dr,
if (!known_alignment_for_access_p (dr))
is_packed = not_size_aligned (DR_REF (dr));
- if ((TYPE_USER_ALIGN (type) && !is_packed)
- || targetm.vectorize.
- support_vector_misalignment (mode, type,
- DR_MISALIGNMENT (dr), is_packed))
+ if (targetm.vectorize.support_vector_misalignment
+ (mode, type, DR_MISALIGNMENT (dr), is_packed))
/* Can't software pipeline the loads, but can at least do them. */
return dr_unaligned_supported;
}
@@ -6023,10 +6172,8 @@ vect_supportable_dr_alignment (struct data_reference *dr,
if (!known_alignment_for_access_p (dr))
is_packed = not_size_aligned (DR_REF (dr));
- if ((TYPE_USER_ALIGN (type) && !is_packed)
- || targetm.vectorize.
- support_vector_misalignment (mode, type,
- DR_MISALIGNMENT (dr), is_packed))
+ if (targetm.vectorize.support_vector_misalignment
+ (mode, type, DR_MISALIGNMENT (dr), is_packed))
return dr_unaligned_supported;
}