summaryrefslogtreecommitdiff
path: root/gcc/tree-vect-data-refs.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-data-refs.c')
-rw-r--r--gcc/tree-vect-data-refs.c180
1 files changed, 111 insertions, 69 deletions
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 5e3b5209e68..ae9189cf476 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1,5 +1,5 @@
/* Data References Analysis and Manipulation Utilities for Vectorization.
- Copyright (C) 2003-2013 Free Software Foundation, Inc.
+ Copyright (C) 2003-2014 Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
and Ira Rosen <irar@il.ibm.com>
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-scalar-evolution.h"
#include "tree-vectorizer.h"
#include "diagnostic-core.h"
+#include "cgraph.h"
/* Need to include rtl.h, expr.h, etc. for optabs. */
#include "expr.h"
#include "optabs.h"
@@ -496,31 +497,17 @@ vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
/* Unknown data dependence. */
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
{
- gimple earlier_stmt;
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't determine dependence between ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
- dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't determine dependence between ");
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
+ dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
-
- /* We do not vectorize basic blocks with write-write dependencies. */
- if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
- return true;
-
- /* Check that it's not a load-after-store dependence. */
- earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
- if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
- return true;
-
- return false;
+ }
}
-
- if (dump_enabled_p ())
+ else if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"determined dependence between ");
@@ -530,49 +517,23 @@ vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
dump_printf (MSG_NOTE, "\n");
}
- /* Do not vectorize basic blocks with write-write dependences. */
+ /* We do not vectorize basic blocks with write-write dependencies. */
if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
return true;
- /* Check dependence between DRA and DRB for basic block vectorization.
- If the accesses share same bases and offsets, we can compare their initial
- constant offsets to decide whether they differ or not. In case of a read-
- write dependence we check that the load is before the store to ensure that
- vectorization will not change the order of the accesses. */
-
- HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
- gimple earlier_stmt;
-
- /* Check that the data-refs have same bases and offsets. If not, we can't
- determine if they are dependent. */
- if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
- || !dr_equal_offsets_p (dra, drb))
- return true;
-
- /* Check the types. */
- type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
- type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
-
- if (type_size_a != type_size_b
- || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
- TREE_TYPE (DR_REF (drb))))
- return true;
-
- init_a = TREE_INT_CST_LOW (DR_INIT (dra));
- init_b = TREE_INT_CST_LOW (DR_INIT (drb));
-
- /* Two different locations - no dependence. */
- if (init_a != init_b)
- return false;
-
- /* We have a read-write dependence. Check that the load is before the store.
+ /* If we have a read-write dependence check that the load is before the store.
When we vectorize basic blocks, vector load can be only before
corresponding scalar load, and vector store can be only after its
corresponding scalar store. So the order of the acceses is preserved in
case the load is before the store. */
- earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
+ gimple earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
- return false;
+ {
+ /* That only holds for load-store pairs taking part in vectorization. */
+ if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dra)))
+ && STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (drb))))
+ return false;
+ }
return true;
}
@@ -1096,7 +1057,8 @@ vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
*new_slot = slot;
}
- if (!supportable_dr_alignment && unlimited_cost_model ())
+ if (!supportable_dr_alignment
+ && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
slot->count += VECT_MAX_COST;
}
@@ -1206,7 +1168,7 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
res.peel_info.dr = NULL;
res.body_cost_vec = stmt_vector_for_cost ();
- if (!unlimited_cost_model ())
+ if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
{
res.inside_cost = INT_MAX;
res.outside_cost = INT_MAX;
@@ -1435,7 +1397,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
vectorization factor.
We do this automtically for cost model, since we calculate cost
for every peeling option. */
- if (unlimited_cost_model ())
+ if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
possible_npeel_number = vf /nelements;
/* Handle the aligned case. We may decide to align some other
@@ -1443,7 +1405,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
if (DR_MISALIGNMENT (dr) == 0)
{
npeel_tmp = 0;
- if (unlimited_cost_model ())
+ if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
possible_npeel_number++;
}
@@ -2957,6 +2919,24 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
enum machine_mode pmode;
int punsignedp, pvolatilep;
+ base = DR_REF (dr);
+ /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
+ see if we can use the def stmt of the address. */
+ if (is_gimple_call (stmt)
+ && gimple_call_internal_p (stmt)
+ && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
+ || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
+ && TREE_CODE (base) == MEM_REF
+ && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
+ && integer_zerop (TREE_OPERAND (base, 1))
+ && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
+ {
+ gimple def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
+ if (is_gimple_assign (def_stmt)
+ && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
+ base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
+ }
+
/* The gather builtins need address of the form
loop_invariant + vector * {1, 2, 4, 8}
or
@@ -2969,7 +2949,7 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
vectorized. The following code attempts to find such a preexistng
SSA_NAME OFF and put the loop invariants into a tree BASE
that can be gimplified before the loop. */
- base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
+ base = get_inner_reference (base, &pbitsize, &pbitpos, &off,
&pmode, &punsignedp, &pvolatilep, false);
gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
@@ -3167,10 +3147,11 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
if (loop_vinfo)
{
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+
loop = LOOP_VINFO_LOOP (loop_vinfo);
- if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo))
- || find_data_references_in_loop
- (loop, &LOOP_VINFO_DATAREFS (loop_vinfo)))
+ datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+ if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3179,7 +3160,57 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
return false;
}
- datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ gimple_stmt_iterator gsi;
+
+ for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (!find_data_references_in_stmt (loop, stmt, &datarefs))
+ {
+ if (is_gimple_call (stmt) && loop->safelen)
+ {
+ tree fndecl = gimple_call_fndecl (stmt), op;
+ if (fndecl != NULL_TREE)
+ {
+ struct cgraph_node *node = cgraph_get_node (fndecl);
+ if (node != NULL && node->simd_clones != NULL)
+ {
+ unsigned int j, n = gimple_call_num_args (stmt);
+ for (j = 0; j < n; j++)
+ {
+ op = gimple_call_arg (stmt, j);
+ if (DECL_P (op)
+ || (REFERENCE_CLASS_P (op)
+ && get_base_address (op)))
+ break;
+ }
+ op = gimple_call_lhs (stmt);
+ /* Ignore #pragma omp declare simd functions
+ if they don't have data references in the
+ call stmt itself. */
+ if (j == n
+ && !(op
+ && (DECL_P (op)
+ || (REFERENCE_CLASS_P (op)
+ && get_base_address (op)))))
+ continue;
+ }
+ }
+ }
+ LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: loop contains function "
+ "calls or data references that cannot "
+ "be analyzed\n");
+ return false;
+ }
+ }
+ }
+
+ LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
}
else
{
@@ -3415,7 +3446,10 @@ again:
offset = unshare_expr (DR_OFFSET (dr));
init = unshare_expr (DR_INIT (dr));
- if (is_gimple_call (stmt))
+ if (is_gimple_call (stmt)
+ && (!gimple_call_internal_p (stmt)
+ || (gimple_call_internal_fn (stmt) != IFN_MASK_LOAD
+ && gimple_call_internal_fn (stmt) != IFN_MASK_STORE)))
{
if (dump_enabled_p ())
{
@@ -5066,6 +5100,14 @@ vect_supportable_dr_alignment (struct data_reference *dr,
if (aligned_access_p (dr) && !check_aligned_accesses)
return dr_aligned;
+ /* For now assume all conditional loads/stores support unaligned
+ access without any special code. */
+ if (is_gimple_call (stmt)
+ && gimple_call_internal_p (stmt)
+ && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
+ || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
+ return dr_unaligned_supported;
+
if (loop_vinfo)
{
vect_loop = LOOP_VINFO_LOOP (loop_vinfo);