summaryrefslogtreecommitdiff
path: root/gcc/tree-vect-data-refs.c
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2017-11-13 16:28:05 +0000
committerSam Thursfield <sam.thursfield@codethink.co.uk>2017-11-13 16:29:09 +0000
commit03ac50856c9fc8c96b7a17239ee40a10397750a7 (patch)
treea648c6d3428e4757e003f6ed1748adb9613065db /gcc/tree-vect-data-refs.c
parent34efdaf078b01a7387007c4e6bde6db86384c4b7 (diff)
downloadgcc-tarball-03ac50856c9fc8c96b7a17239ee40a10397750a7.tar.gz
gcc 7.2.0
This is imported manually due to a bug in the tarball import script. See the baserock-dev mailing list archives (November 2017) for a more detailed explaination of the issue.
Diffstat (limited to 'gcc/tree-vect-data-refs.c')
-rw-r--r--gcc/tree-vect-data-refs.c6182
1 files changed, 0 insertions, 6182 deletions
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
deleted file mode 100644
index aa504b6a1c..0000000000
--- a/gcc/tree-vect-data-refs.c
+++ /dev/null
@@ -1,6182 +0,0 @@
-/* Data References Analysis and Manipulation Utilities for Vectorization.
- Copyright (C) 2003-2017 Free Software Foundation, Inc.
- Contributed by Dorit Naishlos <dorit@il.ibm.com>
- and Ira Rosen <irar@il.ibm.com>
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "backend.h"
-#include "target.h"
-#include "rtl.h"
-#include "tree.h"
-#include "gimple.h"
-#include "predict.h"
-#include "memmodel.h"
-#include "tm_p.h"
-#include "ssa.h"
-#include "optabs-tree.h"
-#include "cgraph.h"
-#include "dumpfile.h"
-#include "alias.h"
-#include "fold-const.h"
-#include "stor-layout.h"
-#include "tree-eh.h"
-#include "gimplify.h"
-#include "gimple-iterator.h"
-#include "gimplify-me.h"
-#include "tree-ssa-loop-ivopts.h"
-#include "tree-ssa-loop-manip.h"
-#include "tree-ssa-loop.h"
-#include "cfgloop.h"
-#include "tree-scalar-evolution.h"
-#include "tree-vectorizer.h"
-#include "expr.h"
-#include "builtins.h"
-#include "params.h"
-
-/* Return true if load- or store-lanes optab OPTAB is implemented for
- COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
-
-static bool
-vect_lanes_optab_supported_p (const char *name, convert_optab optab,
- tree vectype, unsigned HOST_WIDE_INT count)
-{
- machine_mode mode, array_mode;
- bool limit_p;
-
- mode = TYPE_MODE (vectype);
- limit_p = !targetm.array_mode_supported_p (mode, count);
- array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
- MODE_INT, limit_p);
-
- if (array_mode == BLKmode)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]\n",
- GET_MODE_NAME (mode), count);
- return false;
- }
-
- if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "cannot use %s<%s><%s>\n", name,
- GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
- return false;
- }
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
- GET_MODE_NAME (mode));
-
- return true;
-}
-
-
-/* Return the smallest scalar part of STMT.
- This is used to determine the vectype of the stmt. We generally set the
- vectype according to the type of the result (lhs). For stmts whose
- result-type is different than the type of the arguments (e.g., demotion,
- promotion), vectype will be reset appropriately (later). Note that we have
- to visit the smallest datatype in this function, because that determines the
- VF. If the smallest datatype in the loop is present only as the rhs of a
- promotion operation - we'd miss it.
- Such a case, where a variable of this datatype does not appear in the lhs
- anywhere in the loop, can only occur if it's an invariant: e.g.:
- 'int_x = (int) short_inv', which we'd expect to have been optimized away by
- invariant motion. However, we cannot rely on invariant motion to always
- take invariants out of the loop, and so in the case of promotion we also
- have to check the rhs.
- LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
- types. */
-
-tree
-vect_get_smallest_scalar_type (gimple *stmt, HOST_WIDE_INT *lhs_size_unit,
- HOST_WIDE_INT *rhs_size_unit)
-{
- tree scalar_type = gimple_expr_type (stmt);
- HOST_WIDE_INT lhs, rhs;
-
- lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
-
- if (is_gimple_assign (stmt)
- && (gimple_assign_cast_p (stmt)
- || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
- || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
- || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
- {
- tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
-
- rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
- if (rhs < lhs)
- scalar_type = rhs_type;
- }
-
- *lhs_size_unit = lhs;
- *rhs_size_unit = rhs;
- return scalar_type;
-}
-
-
-/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
- tested at run-time. Return TRUE if DDR was successfully inserted.
- Return false if versioning is not supported. */
-
-static bool
-vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
-{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-
- if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
- return false;
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "mark for run-time aliasing test between ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
- dump_printf (MSG_NOTE, "\n");
- }
-
- if (optimize_loop_nest_for_size_p (loop))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "versioning not supported when optimizing"
- " for size.\n");
- return false;
- }
-
- /* FORNOW: We don't support versioning with outer-loop vectorization. */
- if (loop->inner)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "versioning not yet supported for outer-loops.\n");
- return false;
- }
-
- /* FORNOW: We don't support creating runtime alias tests for non-constant
- step. */
- if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
- || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "versioning not yet supported for non-constant "
- "step\n");
- return false;
- }
-
- LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
- return true;
-}
-
-
-/* Function vect_analyze_data_ref_dependence.
-
- Return TRUE if there (might) exist a dependence between a memory-reference
- DRA and a memory-reference DRB. When versioning for alias may check a
- dependence at run-time, return FALSE. Adjust *MAX_VF according to
- the data dependence. */
-
-static bool
-vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
- loop_vec_info loop_vinfo, int *max_vf)
-{
- unsigned int i;
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- struct data_reference *dra = DDR_A (ddr);
- struct data_reference *drb = DDR_B (ddr);
- stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
- stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
- lambda_vector dist_v;
- unsigned int loop_depth;
-
- /* In loop analysis all data references should be vectorizable. */
- if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
- || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
- gcc_unreachable ();
-
- /* Independent data accesses. */
- if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
- return false;
-
- if (dra == drb
- || (DR_IS_READ (dra) && DR_IS_READ (drb)))
- return false;
-
- /* We do not have to consider dependences between accesses that belong
- to the same group. */
- if (GROUP_FIRST_ELEMENT (stmtinfo_a)
- && GROUP_FIRST_ELEMENT (stmtinfo_a) == GROUP_FIRST_ELEMENT (stmtinfo_b))
- return false;
-
- /* Even if we have an anti-dependence then, as the vectorized loop covers at
- least two scalar iterations, there is always also a true dependence.
- As the vectorizer does not re-order loads and stores we can ignore
- the anti-dependence if TBAA can disambiguate both DRs similar to the
- case with known negative distance anti-dependences (positive
- distance anti-dependences would violate TBAA constraints). */
- if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
- || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
- && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
- get_alias_set (DR_REF (drb))))
- return false;
-
- /* Unknown data dependence. */
- if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
- {
- /* If user asserted safelen consecutive iterations can be
- executed concurrently, assume independence. */
- if (loop->safelen >= 2)
- {
- if (loop->safelen < *max_vf)
- *max_vf = loop->safelen;
- LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
- return false;
- }
-
- if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
- || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "versioning for alias not supported for: "
- "can't determine dependence between ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- DR_REF (dra));
- dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- DR_REF (drb));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return true;
- }
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "versioning for alias required: "
- "can't determine dependence between ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- DR_REF (dra));
- dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- DR_REF (drb));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
-
- /* Add to list of ddrs that need to be tested at run-time. */
- return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
- }
-
- /* Known data dependence. */
- if (DDR_NUM_DIST_VECTS (ddr) == 0)
- {
- /* If user asserted safelen consecutive iterations can be
- executed concurrently, assume independence. */
- if (loop->safelen >= 2)
- {
- if (loop->safelen < *max_vf)
- *max_vf = loop->safelen;
- LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
- return false;
- }
-
- if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
- || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "versioning for alias not supported for: "
- "bad dist vector for ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- DR_REF (dra));
- dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- DR_REF (drb));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return true;
- }
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "versioning for alias required: "
- "bad dist vector for ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
- dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Add to list of ddrs that need to be tested at run-time. */
- return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
- }
-
- loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
- FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
- {
- int dist = dist_v[loop_depth];
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "dependence distance = %d.\n", dist);
-
- if (dist == 0)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "dependence distance == 0 between ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
-
- /* When we perform grouped accesses and perform implicit CSE
- by detecting equal accesses and doing disambiguation with
- runtime alias tests like for
- .. = a[i];
- .. = a[i+1];
- a[i] = ..;
- a[i+1] = ..;
- *p = ..;
- .. = a[i];
- .. = a[i+1];
- where we will end up loading { a[i], a[i+1] } once, make
- sure that inserting group loads before the first load and
- stores after the last store will do the right thing.
- Similar for groups like
- a[i] = ...;
- ... = a[i];
- a[i+1] = ...;
- where loads from the group interleave with the store. */
- if (STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
- || STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
- {
- gimple *earlier_stmt;
- earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
- if (DR_IS_WRITE
- (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "READ_WRITE dependence in interleaving."
- "\n");
- return true;
- }
- }
-
- continue;
- }
-
- if (dist > 0 && DDR_REVERSED_P (ddr))
- {
- /* If DDR_REVERSED_P the order of the data-refs in DDR was
- reversed (to make distance vector positive), and the actual
- distance is negative. */
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "dependence distance negative.\n");
- /* Record a negative dependence distance to later limit the
- amount of stmt copying / unrolling we can perform.
- Only need to handle read-after-write dependence. */
- if (DR_IS_READ (drb)
- && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
- || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
- STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
- continue;
- }
-
- if (abs (dist) >= 2
- && abs (dist) < *max_vf)
- {
- /* The dependence distance requires reduction of the maximal
- vectorization factor. */
- *max_vf = abs (dist);
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "adjusting maximal vectorization factor to %i\n",
- *max_vf);
- }
-
- if (abs (dist) >= *max_vf)
- {
- /* Dependence distance does not create dependence, as far as
- vectorization is concerned, in this case. */
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "dependence distance >= VF.\n");
- continue;
- }
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized, possible dependence "
- "between data-refs ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
- dump_printf (MSG_NOTE, "\n");
- }
-
- return true;
- }
-
- return false;
-}
-
-/* Function vect_analyze_data_ref_dependences.
-
- Examine all the data references in the loop, and make sure there do not
- exist any data dependences between them. Set *MAX_VF according to
- the maximum vectorization factor the data dependences allow. */
-
-bool
-vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
-{
- unsigned int i;
- struct data_dependence_relation *ddr;
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_analyze_data_ref_dependences ===\n");
-
- LOOP_VINFO_DDRS (loop_vinfo)
- .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
- * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
- LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
- /* We need read-read dependences to compute STMT_VINFO_SAME_ALIGN_REFS. */
- if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
- &LOOP_VINFO_DDRS (loop_vinfo),
- LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
- return false;
-
- /* For epilogues we either have no aliases or alias versioning
- was applied to original loop. Therefore we may just get max_vf
- using VF of original loop. */
- if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
- *max_vf = LOOP_VINFO_ORIG_VECT_FACTOR (loop_vinfo);
- else
- FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
- if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
- return false;
-
- return true;
-}
-
-
-/* Function vect_slp_analyze_data_ref_dependence.
-
- Return TRUE if there (might) exist a dependence between a memory-reference
- DRA and a memory-reference DRB. When versioning for alias may check a
- dependence at run-time, return FALSE. Adjust *MAX_VF according to
- the data dependence. */
-
-static bool
-vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
-{
- struct data_reference *dra = DDR_A (ddr);
- struct data_reference *drb = DDR_B (ddr);
-
- /* We need to check dependences of statements marked as unvectorizable
- as well, they still can prohibit vectorization. */
-
- /* Independent data accesses. */
- if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
- return false;
-
- if (dra == drb)
- return false;
-
- /* Read-read is OK. */
- if (DR_IS_READ (dra) && DR_IS_READ (drb))
- return false;
-
- /* If dra and drb are part of the same interleaving chain consider
- them independent. */
- if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra)))
- && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra)))
- == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb)))))
- return false;
-
- /* Unknown data dependence. */
- if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't determine dependence between ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
- dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- }
- else if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "determined dependence between ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
- dump_printf (MSG_NOTE, "\n");
- }
-
- return true;
-}
-
-
-/* Analyze dependences involved in the transform of SLP NODE. STORES
- contain the vector of scalar stores of this instance if we are
- disambiguating the loads. */
-
-static bool
-vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node,
- vec<gimple *> stores, gimple *last_store)
-{
- /* This walks over all stmts involved in the SLP load/store done
- in NODE verifying we can sink them up to the last stmt in the
- group. */
- gimple *last_access = vect_find_last_scalar_stmt_in_slp (node);
- for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
- {
- gimple *access = SLP_TREE_SCALAR_STMTS (node)[k];
- if (access == last_access)
- continue;
- data_reference *dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (access));
- for (gimple_stmt_iterator gsi = gsi_for_stmt (access);
- gsi_stmt (gsi) != last_access; gsi_next (&gsi))
- {
- gimple *stmt = gsi_stmt (gsi);
- if (! gimple_vuse (stmt)
- || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt)))
- continue;
-
- /* If we couldn't record a (single) data reference for this
- stmt we have to give up. */
- /* ??? Here and below if dependence analysis fails we can resort
- to the alias oracle which can handle more kinds of stmts. */
- data_reference *dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
- if (!dr_b)
- return false;
-
- bool dependent = false;
- /* If we run into a store of this same instance (we've just
- marked those) then delay dependence checking until we run
- into the last store because this is where it will have
- been sunk to (and we verify if we can do that as well). */
- if (gimple_visited_p (stmt))
- {
- if (stmt != last_store)
- continue;
- unsigned i;
- gimple *store;
- FOR_EACH_VEC_ELT (stores, i, store)
- {
- data_reference *store_dr
- = STMT_VINFO_DATA_REF (vinfo_for_stmt (store));
- ddr_p ddr = initialize_data_dependence_relation
- (dr_a, store_dr, vNULL);
- dependent = vect_slp_analyze_data_ref_dependence (ddr);
- free_dependence_relation (ddr);
- if (dependent)
- break;
- }
- }
- else
- {
- ddr_p ddr = initialize_data_dependence_relation (dr_a,
- dr_b, vNULL);
- dependent = vect_slp_analyze_data_ref_dependence (ddr);
- free_dependence_relation (ddr);
- }
- if (dependent)
- return false;
- }
- }
- return true;
-}
-
-
-/* Function vect_analyze_data_ref_dependences.
-
- Examine all the data references in the basic-block, and make sure there
- do not exist any data dependences between them. Set *MAX_VF according to
- the maximum vectorization factor the data dependences allow. */
-
-bool
-vect_slp_analyze_instance_dependence (slp_instance instance)
-{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_slp_analyze_instance_dependence ===\n");
-
- /* The stores of this instance are at the root of the SLP tree. */
- slp_tree store = SLP_INSTANCE_TREE (instance);
- if (! STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (store)[0])))
- store = NULL;
-
- /* Verify we can sink stores to the vectorized stmt insert location. */
- gimple *last_store = NULL;
- if (store)
- {
- if (! vect_slp_analyze_node_dependences (instance, store, vNULL, NULL))
- return false;
-
- /* Mark stores in this instance and remember the last one. */
- last_store = vect_find_last_scalar_stmt_in_slp (store);
- for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
- gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], true);
- }
-
- bool res = true;
-
- /* Verify we can sink loads to the vectorized stmt insert location,
- special-casing stores of this instance. */
- slp_tree load;
- unsigned int i;
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load)
- if (! vect_slp_analyze_node_dependences (instance, load,
- store
- ? SLP_TREE_SCALAR_STMTS (store)
- : vNULL, last_store))
- {
- res = false;
- break;
- }
-
- /* Unset the visited flag. */
- if (store)
- for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
- gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], false);
-
- return res;
-}
-
-/* Function vect_compute_data_ref_alignment
-
- Compute the misalignment of the data reference DR.
-
- Output:
- 1. If during the misalignment computation it is found that the data reference
- cannot be vectorized then false is returned.
- 2. DR_MISALIGNMENT (DR) is defined.
-
- FOR NOW: No analysis is actually performed. Misalignment is calculated
- only for trivial cases. TODO. */
-
-bool
-vect_compute_data_ref_alignment (struct data_reference *dr)
-{
- gimple *stmt = DR_STMT (dr);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = NULL;
- tree ref = DR_REF (dr);
- tree vectype;
- tree base, base_addr;
- tree misalign = NULL_TREE;
- tree aligned_to;
- tree step;
- unsigned HOST_WIDE_INT alignment;
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "vect_compute_data_ref_alignment:\n");
-
- if (loop_vinfo)
- loop = LOOP_VINFO_LOOP (loop_vinfo);
-
- /* Initialize misalignment to unknown. */
- SET_DR_MISALIGNMENT (dr, -1);
-
- if (tree_fits_shwi_p (DR_STEP (dr)))
- misalign = DR_INIT (dr);
- aligned_to = DR_ALIGNED_TO (dr);
- base_addr = DR_BASE_ADDRESS (dr);
- vectype = STMT_VINFO_VECTYPE (stmt_info);
-
- /* In case the dataref is in an inner-loop of the loop that is being
- vectorized (LOOP), we use the base and misalignment information
- relative to the outer-loop (LOOP). This is ok only if the misalignment
- stays the same throughout the execution of the inner-loop, which is why
- we have to check that the stride of the dataref in the inner-loop evenly
- divides by the vector size. */
- if (loop && nested_in_vect_loop_p (loop, stmt))
- {
- tree step = DR_STEP (dr);
-
- if (tree_fits_shwi_p (step)
- && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "inner step divides the vector-size.\n");
- misalign = STMT_VINFO_DR_INIT (stmt_info);
- aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
- base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
- }
- else
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "inner step doesn't divide the vector-size.\n");
- misalign = NULL_TREE;
- }
- }
-
- /* Similarly we can only use base and misalignment information relative to
- an innermost loop if the misalignment stays the same throughout the
- execution of the loop. As above, this is the case if the stride of
- the dataref evenly divides by the vector size. */
- else
- {
- tree step = DR_STEP (dr);
- unsigned vf = loop ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
-
- if (tree_fits_shwi_p (step)
- && ((tree_to_shwi (step) * vf)
- % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "step doesn't divide the vector-size.\n");
- misalign = NULL_TREE;
- }
- }
-
- /* To look at alignment of the base we have to preserve an inner MEM_REF
- as that carries alignment information of the actual access. */
- base = ref;
- while (handled_component_p (base))
- base = TREE_OPERAND (base, 0);
- unsigned int base_alignment = 0;
- unsigned HOST_WIDE_INT base_bitpos;
- get_object_alignment_1 (base, &base_alignment, &base_bitpos);
- /* As data-ref analysis strips the MEM_REF down to its base operand
- to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to
- adjust things to make base_alignment valid as the alignment of
- DR_BASE_ADDRESS. */
- if (TREE_CODE (base) == MEM_REF)
- {
- /* Note all this only works if DR_BASE_ADDRESS is the same as
- MEM_REF operand zero, otherwise DR/SCEV analysis might have factored
- in other offsets. We need to rework DR to compute the alingment
- of DR_BASE_ADDRESS as long as all information is still available. */
- if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))
- {
- base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT;
- base_bitpos &= (base_alignment - 1);
- }
- else
- base_bitpos = BITS_PER_UNIT;
- }
- if (base_bitpos != 0)
- base_alignment = base_bitpos & -base_bitpos;
- /* Also look at the alignment of the base address DR analysis
- computed. */
- unsigned int base_addr_alignment = get_pointer_alignment (base_addr);
- if (base_addr_alignment > base_alignment)
- base_alignment = base_addr_alignment;
-
- if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
- DR_VECT_AUX (dr)->base_element_aligned = true;
-
- alignment = TYPE_ALIGN_UNIT (vectype);
-
- if ((compare_tree_int (aligned_to, alignment) < 0)
- || !misalign)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Unknown alignment for access: ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return true;
- }
-
- if (base_alignment < TYPE_ALIGN (vectype))
- {
- base = base_addr;
- if (TREE_CODE (base) == ADDR_EXPR)
- base = TREE_OPERAND (base, 0);
- if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "can't force alignment of ref: ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
- dump_printf (MSG_NOTE, "\n");
- }
- return true;
- }
-
- if (DECL_USER_ALIGN (base))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "not forcing alignment of user-aligned "
- "variable: ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, base);
- dump_printf (MSG_NOTE, "\n");
- }
- return true;
- }
-
- /* Force the alignment of the decl.
- NOTE: This is the only change to the code we make during
- the analysis phase, before deciding to vectorize the loop. */
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
- dump_printf (MSG_NOTE, "\n");
- }
-
- DR_VECT_AUX (dr)->base_decl = base;
- DR_VECT_AUX (dr)->base_misaligned = true;
- DR_VECT_AUX (dr)->base_element_aligned = true;
- }
-
- if (loop && nested_in_vect_loop_p (loop, stmt))
- step = STMT_VINFO_DR_STEP (stmt_info);
- else
- step = DR_STEP (dr);
- /* If this is a backward running DR then first access in the larger
- vectype actually is N-1 elements before the address in the DR.
- Adjust misalign accordingly. */
- if (tree_int_cst_sgn (step) < 0)
- {
- tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
- /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
- otherwise we wouldn't be here. */
- offset = fold_build2 (MULT_EXPR, ssizetype, offset, step);
- /* PLUS because STEP was negative. */
- misalign = size_binop (PLUS_EXPR, misalign, offset);
- }
-
- SET_DR_MISALIGNMENT (dr,
- wi::mod_floor (misalign, alignment, SIGNED).to_uhwi ());
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
-
- return true;
-}
-
-
-/* Function vect_update_misalignment_for_peel
-
- DR - the data reference whose misalignment is to be adjusted.
- DR_PEEL - the data reference whose misalignment is being made
- zero in the vector loop by the peel.
- NPEEL - the number of iterations in the peel loop if the misalignment
- of DR_PEEL is known at compile time. */
-
-static void
-vect_update_misalignment_for_peel (struct data_reference *dr,
- struct data_reference *dr_peel, int npeel)
-{
- unsigned int i;
- vec<dr_p> same_align_drs;
- struct data_reference *current_dr;
- int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
- int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
- stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
- stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
-
- /* For interleaved data accesses the step in the loop must be multiplied by
- the size of the interleaving group. */
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
- dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
- if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
- dr_peel_size *= GROUP_SIZE (peel_stmt_info);
-
- /* It can be assumed that the data refs with the same alignment as dr_peel
- are aligned in the vector loop. */
- same_align_drs
- = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
- FOR_EACH_VEC_ELT (same_align_drs, i, current_dr)
- {
- if (current_dr != dr)
- continue;
- gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
- DR_MISALIGNMENT (dr_peel) / dr_peel_size);
- SET_DR_MISALIGNMENT (dr, 0);
- return;
- }
-
- if (known_alignment_for_access_p (dr)
- && known_alignment_for_access_p (dr_peel))
- {
- bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
- int misal = DR_MISALIGNMENT (dr);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- misal += negative ? -npeel * dr_size : npeel * dr_size;
- misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
- SET_DR_MISALIGNMENT (dr, misal);
- return;
- }
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment to -1.\n");
- SET_DR_MISALIGNMENT (dr, -1);
-}
-
-
-/* Function verify_data_ref_alignment
-
- Return TRUE if DR can be handled with respect to alignment. */
-
-static bool
-verify_data_ref_alignment (data_reference_p dr)
-{
- enum dr_alignment_support supportable_dr_alignment
- = vect_supportable_dr_alignment (dr, false);
- if (!supportable_dr_alignment)
- {
- if (dump_enabled_p ())
- {
- if (DR_IS_READ (dr))
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unsupported unaligned load.");
- else
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unsupported unaligned "
- "store.");
-
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- DR_REF (dr));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
-
- if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Vectorizing an unaligned access.\n");
-
- return true;
-}
-
-/* Function vect_verify_datarefs_alignment
-
- Return TRUE if all data references in the loop can be
- handled with respect to alignment. */
-
-bool
-vect_verify_datarefs_alignment (loop_vec_info vinfo)
-{
- vec<data_reference_p> datarefs = vinfo->datarefs;
- struct data_reference *dr;
- unsigned int i;
-
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- {
- gimple *stmt = DR_STMT (dr);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
- continue;
-
- /* For interleaving, only the alignment of the first access matters. */
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
- && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
- continue;
-
- /* Strided accesses perform only component accesses, alignment is
- irrelevant for them. */
- if (STMT_VINFO_STRIDED_P (stmt_info)
- && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
- continue;
-
- if (! verify_data_ref_alignment (dr))
- return false;
- }
-
- return true;
-}
-
-/* Given an memory reference EXP return whether its alignment is less
- than its size. */
-
-static bool
-not_size_aligned (tree exp)
-{
- if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
- return true;
-
- return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
- > get_object_alignment (exp));
-}
-
-/* Function vector_alignment_reachable_p
-
- Return true if vector alignment for DR is reachable by peeling
- a few loop iterations. Return false otherwise. */
-
-static bool
-vector_alignment_reachable_p (struct data_reference *dr)
-{
- gimple *stmt = DR_STMT (dr);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
- {
- /* For interleaved access we peel only if number of iterations in
- the prolog loop ({VF - misalignment}), is a multiple of the
- number of the interleaved accesses. */
- int elem_size, mis_in_elements;
- int nelements = TYPE_VECTOR_SUBPARTS (vectype);
-
- /* FORNOW: handle only known alignment. */
- if (!known_alignment_for_access_p (dr))
- return false;
-
- elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
- mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
-
- if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
- return false;
- }
-
- /* If misalignment is known at the compile time then allow peeling
- only if natural alignment is reachable through peeling. */
- if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
- {
- HOST_WIDE_INT elmsize =
- int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
- dump_printf (MSG_NOTE,
- ". misalignment = %d.\n", DR_MISALIGNMENT (dr));
- }
- if (DR_MISALIGNMENT (dr) % elmsize)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "data size does not divide the misalignment.\n");
- return false;
- }
- }
-
- if (!known_alignment_for_access_p (dr))
- {
- tree type = TREE_TYPE (DR_REF (dr));
- bool is_packed = not_size_aligned (DR_REF (dr));
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Unknown misalignment, %snaturally aligned\n",
- is_packed ? "not " : "");
- return targetm.vectorize.vector_alignment_reachable (type, is_packed);
- }
-
- return true;
-}
-
-
-/* Calculate the cost of the memory access represented by DR. */
-
-static void
-vect_get_data_access_cost (struct data_reference *dr,
- unsigned int *inside_cost,
- unsigned int *outside_cost,
- stmt_vector_for_cost *body_cost_vec)
-{
- gimple *stmt = DR_STMT (dr);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- int ncopies = vf / nunits;
-
- if (DR_IS_READ (dr))
- vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
- NULL, body_cost_vec, false);
- else
- vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "vect_get_data_access_cost: inside_cost = %d, "
- "outside_cost = %d.\n", *inside_cost, *outside_cost);
-}
-
-
-typedef struct _vect_peel_info
-{
- struct data_reference *dr;
- int npeel;
- unsigned int count;
-} *vect_peel_info;
-
-typedef struct _vect_peel_extended_info
-{
- struct _vect_peel_info peel_info;
- unsigned int inside_cost;
- unsigned int outside_cost;
- stmt_vector_for_cost body_cost_vec;
-} *vect_peel_extended_info;
-
-
-/* Peeling hashtable helpers. */
-
-struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
-{
- static inline hashval_t hash (const _vect_peel_info *);
- static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
-};
-
-inline hashval_t
-peel_info_hasher::hash (const _vect_peel_info *peel_info)
-{
- return (hashval_t) peel_info->npeel;
-}
-
-inline bool
-peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
-{
- return (a->npeel == b->npeel);
-}
-
-
-/* Insert DR into peeling hash table with NPEEL as key. */
-
-static void
-vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
- loop_vec_info loop_vinfo, struct data_reference *dr,
- int npeel)
-{
- struct _vect_peel_info elem, *slot;
- _vect_peel_info **new_slot;
- bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
-
- elem.npeel = npeel;
- slot = peeling_htab->find (&elem);
- if (slot)
- slot->count++;
- else
- {
- slot = XNEW (struct _vect_peel_info);
- slot->npeel = npeel;
- slot->dr = dr;
- slot->count = 1;
- new_slot = peeling_htab->find_slot (slot, INSERT);
- *new_slot = slot;
- }
-
- if (!supportable_dr_alignment
- && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
- slot->count += VECT_MAX_COST;
-}
-
-
-/* Traverse peeling hash table to find peeling option that aligns maximum
- number of data accesses. */
-
-int
-vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
- _vect_peel_extended_info *max)
-{
- vect_peel_info elem = *slot;
-
- if (elem->count > max->peel_info.count
- || (elem->count == max->peel_info.count
- && max->peel_info.npeel > elem->npeel))
- {
- max->peel_info.npeel = elem->npeel;
- max->peel_info.count = elem->count;
- max->peel_info.dr = elem->dr;
- }
-
- return 1;
-}
-
-
-/* Traverse peeling hash table and calculate cost for each peeling option.
- Find the one with the lowest cost. */
-
-int
-vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
- _vect_peel_extended_info *min)
-{
- vect_peel_info elem = *slot;
- int save_misalignment, dummy;
- unsigned int inside_cost = 0, outside_cost = 0, i;
- gimple *stmt = DR_STMT (elem->dr);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- struct data_reference *dr;
- stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
-
- prologue_cost_vec.create (2);
- body_cost_vec.create (2);
- epilogue_cost_vec.create (2);
-
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- {
- stmt = DR_STMT (dr);
- stmt_info = vinfo_for_stmt (stmt);
- /* For interleaving, only the alignment of the first access
- matters. */
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
- && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
- continue;
-
- /* Strided accesses perform only component accesses, alignment is
- irrelevant for them. */
- if (STMT_VINFO_STRIDED_P (stmt_info)
- && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
- continue;
-
- save_misalignment = DR_MISALIGNMENT (dr);
- vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
- vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
- &body_cost_vec);
- SET_DR_MISALIGNMENT (dr, save_misalignment);
- }
-
- outside_cost += vect_get_known_peeling_cost
- (loop_vinfo, elem->npeel, &dummy,
- &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
- &prologue_cost_vec, &epilogue_cost_vec);
-
- /* Prologue and epilogue costs are added to the target model later.
- These costs depend only on the scalar iteration cost, the
- number of peeling iterations finally chosen, and the number of
- misaligned statements. So discard the information found here. */
- prologue_cost_vec.release ();
- epilogue_cost_vec.release ();
-
- if (inside_cost < min->inside_cost
- || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
- {
- min->inside_cost = inside_cost;
- min->outside_cost = outside_cost;
- min->body_cost_vec.release ();
- min->body_cost_vec = body_cost_vec;
- min->peel_info.dr = elem->dr;
- min->peel_info.npeel = elem->npeel;
- }
- else
- body_cost_vec.release ();
-
- return 1;
-}
-
-
-/* Choose best peeling option by traversing peeling hash table and either
- choosing an option with the lowest cost (if cost model is enabled) or the
- option that aligns as many accesses as possible. */
-
-static struct data_reference *
-vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
- loop_vec_info loop_vinfo,
- unsigned int *npeel,
- stmt_vector_for_cost *body_cost_vec)
-{
- struct _vect_peel_extended_info res;
-
- res.peel_info.dr = NULL;
- res.body_cost_vec = stmt_vector_for_cost ();
-
- if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
- {
- res.inside_cost = INT_MAX;
- res.outside_cost = INT_MAX;
- peeling_htab->traverse <_vect_peel_extended_info *,
- vect_peeling_hash_get_lowest_cost> (&res);
- }
- else
- {
- res.peel_info.count = 0;
- peeling_htab->traverse <_vect_peel_extended_info *,
- vect_peeling_hash_get_most_frequent> (&res);
- }
-
- *npeel = res.peel_info.npeel;
- *body_cost_vec = res.body_cost_vec;
- return res.peel_info.dr;
-}
-
-
-/* Function vect_enhance_data_refs_alignment
-
- This pass will use loop versioning and loop peeling in order to enhance
- the alignment of data references in the loop.
-
- FOR NOW: we assume that whatever versioning/peeling takes place, only the
- original loop is to be vectorized. Any other loops that are created by
- the transformations performed in this pass - are not supposed to be
- vectorized. This restriction will be relaxed.
-
- This pass will require a cost model to guide it whether to apply peeling
- or versioning or a combination of the two. For example, the scheme that
- intel uses when given a loop with several memory accesses, is as follows:
- choose one memory access ('p') which alignment you want to force by doing
- peeling. Then, either (1) generate a loop in which 'p' is aligned and all
- other accesses are not necessarily aligned, or (2) use loop versioning to
- generate one loop in which all accesses are aligned, and another loop in
- which only 'p' is necessarily aligned.
-
- ("Automatic Intra-Register Vectorization for the Intel Architecture",
- Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
- Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
-
- Devising a cost model is the most critical aspect of this work. It will
- guide us on which access to peel for, whether to use loop versioning, how
- many versions to create, etc. The cost model will probably consist of
- generic considerations as well as target specific considerations (on
- powerpc for example, misaligned stores are more painful than misaligned
- loads).
-
- Here are the general steps involved in alignment enhancements:
-
- -- original loop, before alignment analysis:
- for (i=0; i<N; i++){
- x = q[i]; # DR_MISALIGNMENT(q) = unknown
- p[i] = y; # DR_MISALIGNMENT(p) = unknown
- }
-
- -- After vect_compute_data_refs_alignment:
- for (i=0; i<N; i++){
- x = q[i]; # DR_MISALIGNMENT(q) = 3
- p[i] = y; # DR_MISALIGNMENT(p) = unknown
- }
-
- -- Possibility 1: we do loop versioning:
- if (p is aligned) {
- for (i=0; i<N; i++){ # loop 1A
- x = q[i]; # DR_MISALIGNMENT(q) = 3
- p[i] = y; # DR_MISALIGNMENT(p) = 0
- }
- }
- else {
- for (i=0; i<N; i++){ # loop 1B
- x = q[i]; # DR_MISALIGNMENT(q) = 3
- p[i] = y; # DR_MISALIGNMENT(p) = unaligned
- }
- }
-
- -- Possibility 2: we do loop peeling:
- for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
- x = q[i];
- p[i] = y;
- }
- for (i = 3; i < N; i++){ # loop 2A
- x = q[i]; # DR_MISALIGNMENT(q) = 0
- p[i] = y; # DR_MISALIGNMENT(p) = unknown
- }
-
- -- Possibility 3: combination of loop peeling and versioning:
- for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
- x = q[i];
- p[i] = y;
- }
- if (p is aligned) {
- for (i = 3; i<N; i++){ # loop 3A
- x = q[i]; # DR_MISALIGNMENT(q) = 0
- p[i] = y; # DR_MISALIGNMENT(p) = 0
- }
- }
- else {
- for (i = 3; i<N; i++){ # loop 3B
- x = q[i]; # DR_MISALIGNMENT(q) = 0
- p[i] = y; # DR_MISALIGNMENT(p) = unaligned
- }
- }
-
- These loops are later passed to loop_transform to be vectorized. The
- vectorizer will use the alignment information to guide the transformation
- (whether to generate regular loads/stores, or with special handling for
- misalignment). */
-
-bool
-vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
-{
- vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- enum dr_alignment_support supportable_dr_alignment;
- struct data_reference *dr0 = NULL, *first_store = NULL;
- struct data_reference *dr;
- unsigned int i, j;
- bool do_peeling = false;
- bool do_versioning = false;
- bool stat;
- gimple *stmt;
- stmt_vec_info stmt_info;
- unsigned int npeel = 0;
- bool all_misalignments_unknown = true;
- unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- unsigned possible_npeel_number = 1;
- tree vectype;
- unsigned int nelements, mis, same_align_drs_max = 0;
- stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost ();
- hash_table<peel_info_hasher> peeling_htab (1);
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_enhance_data_refs_alignment ===\n");
-
- /* Reset data so we can safely be called multiple times. */
- LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
-
- /* While cost model enhancements are expected in the future, the high level
- view of the code at this time is as follows:
-
- A) If there is a misaligned access then see if peeling to align
- this access can make all data references satisfy
- vect_supportable_dr_alignment. If so, update data structures
- as needed and return true.
-
- B) If peeling wasn't possible and there is a data reference with an
- unknown misalignment that does not satisfy vect_supportable_dr_alignment
- then see if loop versioning checks can be used to make all data
- references satisfy vect_supportable_dr_alignment. If so, update
- data structures as needed and return true.
-
- C) If neither peeling nor versioning were successful then return false if
- any data reference does not satisfy vect_supportable_dr_alignment.
-
- D) Return true (all data references satisfy vect_supportable_dr_alignment).
-
- Note, Possibility 3 above (which is peeling and versioning together) is not
- being done at this time. */
-
- /* (1) Peeling to force alignment. */
-
- /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
- Considerations:
- + How many accesses will become aligned due to the peeling
- - How many accesses will become unaligned due to the peeling,
- and the cost of misaligned accesses.
- - The cost of peeling (the extra runtime checks, the increase
- in code size). */
-
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- {
- stmt = DR_STMT (dr);
- stmt_info = vinfo_for_stmt (stmt);
-
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
- continue;
-
- /* For interleaving, only the alignment of the first access
- matters. */
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
- && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
- continue;
-
- /* For invariant accesses there is nothing to enhance. */
- if (integer_zerop (DR_STEP (dr)))
- continue;
-
- /* Strided accesses perform only component accesses, alignment is
- irrelevant for them. */
- if (STMT_VINFO_STRIDED_P (stmt_info)
- && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
- continue;
-
- supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
- do_peeling = vector_alignment_reachable_p (dr);
- if (do_peeling)
- {
- if (known_alignment_for_access_p (dr))
- {
- unsigned int npeel_tmp;
- bool negative = tree_int_cst_compare (DR_STEP (dr),
- size_zero_node) < 0;
-
- /* Save info about DR in the hash table. */
- vectype = STMT_VINFO_VECTYPE (stmt_info);
- nelements = TYPE_VECTOR_SUBPARTS (vectype);
- mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
- TREE_TYPE (DR_REF (dr))));
- npeel_tmp = (negative
- ? (mis - nelements) : (nelements - mis))
- & (nelements - 1);
-
- /* For multiple types, it is possible that the bigger type access
- will have more than one peeling option. E.g., a loop with two
- types: one of size (vector size / 4), and the other one of
- size (vector size / 8). Vectorization factor will 8. If both
- access are misaligned by 3, the first one needs one scalar
- iteration to be aligned, and the second one needs 5. But the
- first one will be aligned also by peeling 5 scalar
- iterations, and in that case both accesses will be aligned.
- Hence, except for the immediate peeling amount, we also want
- to try to add full vector size, while we don't exceed
- vectorization factor.
- We do this automatically for cost model, since we calculate cost
- for every peeling option. */
- if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
- {
- if (STMT_SLP_TYPE (stmt_info))
- possible_npeel_number
- = (vf * GROUP_SIZE (stmt_info)) / nelements;
- else
- possible_npeel_number = vf / nelements;
- }
-
- /* Handle the aligned case. We may decide to align some other
- access, making DR unaligned. */
- if (DR_MISALIGNMENT (dr) == 0)
- {
- npeel_tmp = 0;
- if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
- possible_npeel_number++;
- }
-
- for (j = 0; j < possible_npeel_number; j++)
- {
- vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
- dr, npeel_tmp);
- npeel_tmp += nelements;
- }
-
- all_misalignments_unknown = false;
- /* Data-ref that was chosen for the case that all the
- misalignments are unknown is not relevant anymore, since we
- have a data-ref with known alignment. */
- dr0 = NULL;
- }
- else
- {
- /* If we don't know any misalignment values, we prefer
- peeling for data-ref that has the maximum number of data-refs
- with the same alignment, unless the target prefers to align
- stores over load. */
- if (all_misalignments_unknown)
- {
- unsigned same_align_drs
- = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
- if (!dr0
- || same_align_drs_max < same_align_drs)
- {
- same_align_drs_max = same_align_drs;
- dr0 = dr;
- }
- /* For data-refs with the same number of related
- accesses prefer the one where the misalign
- computation will be invariant in the outermost loop. */
- else if (same_align_drs_max == same_align_drs)
- {
- struct loop *ivloop0, *ivloop;
- ivloop0 = outermost_invariant_loop_for_expr
- (loop, DR_BASE_ADDRESS (dr0));
- ivloop = outermost_invariant_loop_for_expr
- (loop, DR_BASE_ADDRESS (dr));
- if ((ivloop && !ivloop0)
- || (ivloop && ivloop0
- && flow_loop_nested_p (ivloop, ivloop0)))
- dr0 = dr;
- }
-
- if (!first_store && DR_IS_WRITE (dr))
- first_store = dr;
- }
-
- /* If there are both known and unknown misaligned accesses in the
- loop, we choose peeling amount according to the known
- accesses. */
- if (!supportable_dr_alignment)
- {
- dr0 = dr;
- if (!first_store && DR_IS_WRITE (dr))
- first_store = dr;
- }
- }
- }
- else
- {
- if (!aligned_access_p (dr))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "vector alignment may not be reachable\n");
- break;
- }
- }
- }
-
- /* Check if we can possibly peel the loop. */
- if (!vect_can_advance_ivs_p (loop_vinfo)
- || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
- || loop->inner)
- do_peeling = false;
-
- if (do_peeling
- && all_misalignments_unknown
- && vect_supportable_dr_alignment (dr0, false))
- {
- /* Check if the target requires to prefer stores over loads, i.e., if
- misaligned stores are more expensive than misaligned loads (taking
- drs with same alignment into account). */
- if (first_store && DR_IS_READ (dr0))
- {
- unsigned int load_inside_cost = 0, load_outside_cost = 0;
- unsigned int store_inside_cost = 0, store_outside_cost = 0;
- unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
- unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
- stmt_vector_for_cost dummy;
- dummy.create (2);
-
- vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
- &dummy);
- vect_get_data_access_cost (first_store, &store_inside_cost,
- &store_outside_cost, &dummy);
-
- dummy.release ();
-
- /* Calculate the penalty for leaving FIRST_STORE unaligned (by
- aligning the load DR0). */
- load_inside_penalty = store_inside_cost;
- load_outside_penalty = store_outside_cost;
- for (i = 0;
- STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
- DR_STMT (first_store))).iterate (i, &dr);
- i++)
- if (DR_IS_READ (dr))
- {
- load_inside_penalty += load_inside_cost;
- load_outside_penalty += load_outside_cost;
- }
- else
- {
- load_inside_penalty += store_inside_cost;
- load_outside_penalty += store_outside_cost;
- }
-
- /* Calculate the penalty for leaving DR0 unaligned (by
- aligning the FIRST_STORE). */
- store_inside_penalty = load_inside_cost;
- store_outside_penalty = load_outside_cost;
- for (i = 0;
- STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
- DR_STMT (dr0))).iterate (i, &dr);
- i++)
- if (DR_IS_READ (dr))
- {
- store_inside_penalty += load_inside_cost;
- store_outside_penalty += load_outside_cost;
- }
- else
- {
- store_inside_penalty += store_inside_cost;
- store_outside_penalty += store_outside_cost;
- }
-
- if (load_inside_penalty > store_inside_penalty
- || (load_inside_penalty == store_inside_penalty
- && load_outside_penalty > store_outside_penalty))
- dr0 = first_store;
- }
-
- /* In case there are only loads with different unknown misalignments, use
- peeling only if it may help to align other accesses in the loop or
- if it may help improving load bandwith when we'd end up using
- unaligned loads. */
- tree dr0_vt = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr0)));
- if (!first_store
- && !STMT_VINFO_SAME_ALIGN_REFS (
- vinfo_for_stmt (DR_STMT (dr0))).length ()
- && (vect_supportable_dr_alignment (dr0, false)
- != dr_unaligned_supported
- || (builtin_vectorization_cost (vector_load, dr0_vt, 0)
- == builtin_vectorization_cost (unaligned_load, dr0_vt, -1))))
- do_peeling = false;
- }
-
- if (do_peeling && !dr0)
- {
- /* Peeling is possible, but there is no data access that is not supported
- unless aligned. So we try to choose the best possible peeling. */
-
- /* We should get here only if there are drs with known misalignment. */
- gcc_assert (!all_misalignments_unknown);
-
- /* Choose the best peeling from the hash table. */
- dr0 = vect_peeling_hash_choose_best_peeling (&peeling_htab,
- loop_vinfo, &npeel,
- &body_cost_vec);
- if (!dr0 || !npeel)
- do_peeling = false;
- }
-
- if (do_peeling)
- {
- stmt = DR_STMT (dr0);
- stmt_info = vinfo_for_stmt (stmt);
- vectype = STMT_VINFO_VECTYPE (stmt_info);
- nelements = TYPE_VECTOR_SUBPARTS (vectype);
-
- if (known_alignment_for_access_p (dr0))
- {
- bool negative = tree_int_cst_compare (DR_STEP (dr0),
- size_zero_node) < 0;
- if (!npeel)
- {
- /* Since it's known at compile time, compute the number of
- iterations in the peeled loop (the peeling factor) for use in
- updating DR_MISALIGNMENT values. The peeling factor is the
- vectorization factor minus the misalignment as an element
- count. */
- mis = DR_MISALIGNMENT (dr0);
- mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
- npeel = ((negative ? mis - nelements : nelements - mis)
- & (nelements - 1));
- }
-
- /* For interleaved data access every iteration accesses all the
- members of the group, therefore we divide the number of iterations
- by the group size. */
- stmt_info = vinfo_for_stmt (DR_STMT (dr0));
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
- npeel /= GROUP_SIZE (stmt_info);
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Try peeling by %d\n", npeel);
- }
-
- /* Ensure that all data refs can be vectorized after the peel. */
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- {
- int save_misalignment;
-
- if (dr == dr0)
- continue;
-
- stmt = DR_STMT (dr);
- stmt_info = vinfo_for_stmt (stmt);
- /* For interleaving, only the alignment of the first access
- matters. */
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
- && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
- continue;
-
- /* Strided accesses perform only component accesses, alignment is
- irrelevant for them. */
- if (STMT_VINFO_STRIDED_P (stmt_info)
- && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
- continue;
-
- save_misalignment = DR_MISALIGNMENT (dr);
- vect_update_misalignment_for_peel (dr, dr0, npeel);
- supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
- SET_DR_MISALIGNMENT (dr, save_misalignment);
-
- if (!supportable_dr_alignment)
- {
- do_peeling = false;
- break;
- }
- }
-
- if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
- {
- stat = vect_verify_datarefs_alignment (loop_vinfo);
- if (!stat)
- do_peeling = false;
- else
- {
- body_cost_vec.release ();
- return stat;
- }
- }
-
- /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */
- if (do_peeling)
- {
- unsigned max_allowed_peel
- = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT);
- if (max_allowed_peel != (unsigned)-1)
- {
- unsigned max_peel = npeel;
- if (max_peel == 0)
- {
- gimple *dr_stmt = DR_STMT (dr0);
- stmt_vec_info vinfo = vinfo_for_stmt (dr_stmt);
- tree vtype = STMT_VINFO_VECTYPE (vinfo);
- max_peel = TYPE_VECTOR_SUBPARTS (vtype) - 1;
- }
- if (max_peel > max_allowed_peel)
- {
- do_peeling = false;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Disable peeling, max peels reached: %d\n", max_peel);
- }
- }
- }
-
- /* Cost model #2 - if peeling may result in a remaining loop not
- iterating enough to be vectorized then do not peel. */
- if (do_peeling
- && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
- {
- unsigned max_peel
- = npeel == 0 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1 : npeel;
- if (LOOP_VINFO_INT_NITERS (loop_vinfo)
- < LOOP_VINFO_VECT_FACTOR (loop_vinfo) + max_peel)
- do_peeling = false;
- }
-
- if (do_peeling)
- {
- /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
- If the misalignment of DR_i is identical to that of dr0 then set
- DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
- dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
- by the peeling factor times the element size of DR_i (MOD the
- vectorization factor times the size). Otherwise, the
- misalignment of DR_i must be set to unknown. */
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- if (dr != dr0)
- {
- /* Strided accesses perform only component accesses, alignment
- is irrelevant for them. */
- stmt_info = vinfo_for_stmt (DR_STMT (dr));
- if (STMT_VINFO_STRIDED_P (stmt_info)
- && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
- continue;
-
- vect_update_misalignment_for_peel (dr, dr0, npeel);
- }
-
- LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
- if (npeel)
- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
- else
- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
- = DR_MISALIGNMENT (dr0);
- SET_DR_MISALIGNMENT (dr0, 0);
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Alignment of access forced using peeling.\n");
- dump_printf_loc (MSG_NOTE, vect_location,
- "Peeling for alignment will be applied.\n");
- }
- /* The inside-loop cost will be accounted for in vectorizable_load
- and vectorizable_store correctly with adjusted alignments.
- Drop the body_cst_vec on the floor here. */
- body_cost_vec.release ();
-
- stat = vect_verify_datarefs_alignment (loop_vinfo);
- gcc_assert (stat);
- return stat;
- }
- }
-
- body_cost_vec.release ();
-
- /* (2) Versioning to force alignment. */
-
- /* Try versioning if:
- 1) optimize loop for speed
- 2) there is at least one unsupported misaligned data ref with an unknown
- misalignment, and
- 3) all misaligned data refs with a known misalignment are supported, and
- 4) the number of runtime alignment checks is within reason. */
-
- do_versioning =
- optimize_loop_nest_for_speed_p (loop)
- && (!loop->inner); /* FORNOW */
-
- if (do_versioning)
- {
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- {
- stmt = DR_STMT (dr);
- stmt_info = vinfo_for_stmt (stmt);
-
- /* For interleaving, only the alignment of the first access
- matters. */
- if (aligned_access_p (dr)
- || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
- && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
- continue;
-
- if (STMT_VINFO_STRIDED_P (stmt_info))
- {
- /* Strided loads perform only component accesses, alignment is
- irrelevant for them. */
- if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
- continue;
- do_versioning = false;
- break;
- }
-
- supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
-
- if (!supportable_dr_alignment)
- {
- gimple *stmt;
- int mask;
- tree vectype;
-
- if (known_alignment_for_access_p (dr)
- || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
- >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
- {
- do_versioning = false;
- break;
- }
-
- stmt = DR_STMT (dr);
- vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
- gcc_assert (vectype);
-
- /* The rightmost bits of an aligned address must be zeros.
- Construct the mask needed for this test. For example,
- GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
- mask must be 15 = 0xf. */
- mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
-
- /* FORNOW: use the same mask to test all potentially unaligned
- references in the loop. The vectorizer currently supports
- a single vector size, see the reference to
- GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
- vectorization factor is computed. */
- gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
- || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
- LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
- LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
- DR_STMT (dr));
- }
- }
-
- /* Versioning requires at least one misaligned data reference. */
- if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
- do_versioning = false;
- else if (!do_versioning)
- LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
- }
-
- if (do_versioning)
- {
- vec<gimple *> may_misalign_stmts
- = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
- gimple *stmt;
-
- /* It can now be assumed that the data references in the statements
- in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
- of the loop being vectorized. */
- FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
- {
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- dr = STMT_VINFO_DATA_REF (stmt_info);
- SET_DR_MISALIGNMENT (dr, 0);
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Alignment of access forced using versioning.\n");
- }
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Versioning for alignment will be applied.\n");
-
- /* Peeling and versioning can't be done together at this time. */
- gcc_assert (! (do_peeling && do_versioning));
-
- stat = vect_verify_datarefs_alignment (loop_vinfo);
- gcc_assert (stat);
- return stat;
- }
-
- /* This point is reached if neither peeling nor versioning is being done. */
- gcc_assert (! (do_peeling || do_versioning));
-
- stat = vect_verify_datarefs_alignment (loop_vinfo);
- return stat;
-}
-
-
-/* Function vect_find_same_alignment_drs.
-
- Update group and alignment relations according to the chosen
- vectorization factor. */
-
-static void
-vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
- loop_vec_info loop_vinfo)
-{
- unsigned int i;
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- struct data_reference *dra = DDR_A (ddr);
- struct data_reference *drb = DDR_B (ddr);
- stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
- stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
- int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
- int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
- lambda_vector dist_v;
- unsigned int loop_depth;
-
- if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
- return;
-
- if (dra == drb)
- return;
-
- if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
- return;
-
- /* Loop-based vectorization and known data dependence. */
- if (DDR_NUM_DIST_VECTS (ddr) == 0)
- return;
-
- /* Data-dependence analysis reports a distance vector of zero
- for data-references that overlap only in the first iteration
- but have different sign step (see PR45764).
- So as a sanity check require equal DR_STEP. */
- if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
- return;
-
- loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
- FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
- {
- int dist = dist_v[loop_depth];
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "dependence distance = %d.\n", dist);
-
- /* Same loop iteration. */
- if (dist == 0
- || (dist % vectorization_factor == 0 && dra_size == drb_size))
- {
- /* Two references with distance zero have the same alignment. */
- STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
- STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "accesses have the same alignment.\n");
- dump_printf (MSG_NOTE,
- "dependence distance modulo vf == 0 between ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
- dump_printf (MSG_NOTE, "\n");
- }
- }
- }
-}
-
-
-/* Function vect_analyze_data_refs_alignment
-
- Analyze the alignment of the data-references in the loop.
- Return FALSE if a data reference is found that cannot be vectorized. */
-
-bool
-vect_analyze_data_refs_alignment (loop_vec_info vinfo)
-{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_analyze_data_refs_alignment ===\n");
-
- /* Mark groups of data references with same alignment using
- data dependence information. */
- vec<ddr_p> ddrs = vinfo->ddrs;
- struct data_dependence_relation *ddr;
- unsigned int i;
-
- FOR_EACH_VEC_ELT (ddrs, i, ddr)
- vect_find_same_alignment_drs (ddr, vinfo);
-
- vec<data_reference_p> datarefs = vinfo->datarefs;
- struct data_reference *dr;
-
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- {
- stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
- if (STMT_VINFO_VECTORIZABLE (stmt_info)
- && !vect_compute_data_ref_alignment (dr))
- {
- /* Strided accesses perform only component accesses, misalignment
- information is irrelevant for them. */
- if (STMT_VINFO_STRIDED_P (stmt_info)
- && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
- continue;
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: can't calculate alignment "
- "for data ref.\n");
-
- return false;
- }
- }
-
- return true;
-}
-
-
-/* Analyze alignment of DRs of stmts in NODE. */
-
-static bool
-vect_slp_analyze_and_verify_node_alignment (slp_tree node)
-{
- /* We vectorize from the first scalar stmt in the node unless
- the node is permuted in which case we start from the first
- element in the group. */
- gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
- data_reference_p first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
- if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
- first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
-
- data_reference_p dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
- if (! vect_compute_data_ref_alignment (dr)
- /* For creating the data-ref pointer we need alignment of the
- first element anyway. */
- || (dr != first_dr
- && ! vect_compute_data_ref_alignment (first_dr))
- || ! verify_data_ref_alignment (dr))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: bad data alignment in basic "
- "block.\n");
- return false;
- }
-
- return true;
-}
-
-/* Function vect_slp_analyze_instance_alignment
-
- Analyze the alignment of the data-references in the SLP instance.
- Return FALSE if a data reference is found that cannot be vectorized. */
-
-bool
-vect_slp_analyze_and_verify_instance_alignment (slp_instance instance)
-{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_slp_analyze_and_verify_instance_alignment ===\n");
-
- slp_tree node;
- unsigned i;
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
- if (! vect_slp_analyze_and_verify_node_alignment (node))
- return false;
-
- node = SLP_INSTANCE_TREE (instance);
- if (STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]))
- && ! vect_slp_analyze_and_verify_node_alignment
- (SLP_INSTANCE_TREE (instance)))
- return false;
-
- return true;
-}
-
-
-/* Analyze groups of accesses: check that DR belongs to a group of
- accesses of legal size, step, etc. Detect gaps, single element
- interleaving, and other special cases. Set grouped access info.
- Collect groups of strided stores for further use in SLP analysis.
- Worker for vect_analyze_group_access. */
-
-static bool
-vect_analyze_group_access_1 (struct data_reference *dr)
-{
- tree step = DR_STEP (dr);
- tree scalar_type = TREE_TYPE (DR_REF (dr));
- HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
- gimple *stmt = DR_STMT (dr);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- HOST_WIDE_INT dr_step = -1;
- HOST_WIDE_INT groupsize, last_accessed_element = 1;
- bool slp_impossible = false;
-
- /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
- size of the interleaving group (including gaps). */
- if (tree_fits_shwi_p (step))
- {
- dr_step = tree_to_shwi (step);
- /* Check that STEP is a multiple of type size. Otherwise there is
- a non-element-sized gap at the end of the group which we
- cannot represent in GROUP_GAP or GROUP_SIZE.
- ??? As we can handle non-constant step fine here we should
- simply remove uses of GROUP_GAP between the last and first
- element and instead rely on DR_STEP. GROUP_SIZE then would
- simply not include that gap. */
- if ((dr_step % type_size) != 0)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Step ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
- dump_printf (MSG_NOTE,
- " is not a multiple of the element size for ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
- dump_printf (MSG_NOTE, "\n");
- }
- return false;
- }
- groupsize = absu_hwi (dr_step) / type_size;
- }
- else
- groupsize = 0;
-
- /* Not consecutive access is possible only if it is a part of interleaving. */
- if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
- {
- /* Check if it this DR is a part of interleaving, and is a single
- element of the group that is accessed in the loop. */
-
- /* Gaps are supported only for loads. STEP must be a multiple of the type
- size. The size of the group must be a power of 2. */
- if (DR_IS_READ (dr)
- && (dr_step % type_size) == 0
- && groupsize > 0
- && pow2p_hwi (groupsize))
- {
- GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
- GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
- GROUP_GAP (stmt_info) = groupsize - 1;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Detected single element interleaving ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
- dump_printf (MSG_NOTE, " step ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
- dump_printf (MSG_NOTE, "\n");
- }
-
- return true;
- }
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not consecutive access ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- }
-
- if (bb_vinfo)
- {
- /* Mark the statement as unvectorizable. */
- STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
- return true;
- }
-
- dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
- STMT_VINFO_STRIDED_P (stmt_info) = true;
- return true;
- }
-
- if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
- {
- /* First stmt in the interleaving chain. Check the chain. */
- gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
- struct data_reference *data_ref = dr;
- unsigned int count = 1;
- tree prev_init = DR_INIT (data_ref);
- gimple *prev = stmt;
- HOST_WIDE_INT diff, gaps = 0;
-
- while (next)
- {
- /* Skip same data-refs. In case that two or more stmts share
- data-ref (supported only for loads), we vectorize only the first
- stmt, and the rest get their vectorized loads from the first
- one. */
- if (!tree_int_cst_compare (DR_INIT (data_ref),
- DR_INIT (STMT_VINFO_DATA_REF (
- vinfo_for_stmt (next)))))
- {
- if (DR_IS_WRITE (data_ref))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Two store stmts share the same dr.\n");
- return false;
- }
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Two or more load stmts share the same dr.\n");
-
- /* For load use the same data-ref load. */
- GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
-
- prev = next;
- next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
- continue;
- }
-
- prev = next;
- data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
-
- /* All group members have the same STEP by construction. */
- gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
-
- /* Check that the distance between two accesses is equal to the type
- size. Otherwise, we have gaps. */
- diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
- - TREE_INT_CST_LOW (prev_init)) / type_size;
- if (diff != 1)
- {
- /* FORNOW: SLP of accesses with gaps is not supported. */
- slp_impossible = true;
- if (DR_IS_WRITE (data_ref))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "interleaved store with gaps\n");
- return false;
- }
-
- gaps += diff - 1;
- }
-
- last_accessed_element += diff;
-
- /* Store the gap from the previous member of the group. If there is no
- gap in the access, GROUP_GAP is always 1. */
- GROUP_GAP (vinfo_for_stmt (next)) = diff;
-
- prev_init = DR_INIT (data_ref);
- next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
- /* Count the number of data-refs in the chain. */
- count++;
- }
-
- if (groupsize == 0)
- groupsize = count + gaps;
-
- /* This could be UINT_MAX but as we are generating code in a very
- inefficient way we have to cap earlier. See PR78699 for example. */
- if (groupsize > 4096)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "group is too large\n");
- return false;
- }
-
- /* Check that the size of the interleaving is equal to count for stores,
- i.e., that there are no gaps. */
- if (groupsize != count
- && !DR_IS_READ (dr))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "interleaved store with gaps\n");
- return false;
- }
-
- /* If there is a gap after the last load in the group it is the
- difference between the groupsize and the last accessed
- element.
- When there is no gap, this difference should be 0. */
- GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - last_accessed_element;
-
- GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Detected interleaving ");
- if (DR_IS_READ (dr))
- dump_printf (MSG_NOTE, "load ");
- else
- dump_printf (MSG_NOTE, "store ");
- dump_printf (MSG_NOTE, "of size %u starting with ",
- (unsigned)groupsize);
- dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
- if (GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
- dump_printf_loc (MSG_NOTE, vect_location,
- "There is a gap of %u elements after the group\n",
- GROUP_GAP (vinfo_for_stmt (stmt)));
- }
-
- /* SLP: create an SLP data structure for every interleaving group of
- stores for further analysis in vect_analyse_slp. */
- if (DR_IS_WRITE (dr) && !slp_impossible)
- {
- if (loop_vinfo)
- LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
- if (bb_vinfo)
- BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
- }
- }
-
- return true;
-}
-
-/* Analyze groups of accesses: check that DR belongs to a group of
- accesses of legal size, step, etc. Detect gaps, single element
- interleaving, and other special cases. Set grouped access info.
- Collect groups of strided stores for further use in SLP analysis. */
-
-static bool
-vect_analyze_group_access (struct data_reference *dr)
-{
- if (!vect_analyze_group_access_1 (dr))
- {
- /* Dissolve the group if present. */
- gimple *next;
- gimple *stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dr)));
- while (stmt)
- {
- stmt_vec_info vinfo = vinfo_for_stmt (stmt);
- next = GROUP_NEXT_ELEMENT (vinfo);
- GROUP_FIRST_ELEMENT (vinfo) = NULL;
- GROUP_NEXT_ELEMENT (vinfo) = NULL;
- stmt = next;
- }
- return false;
- }
- return true;
-}
-
-/* Analyze the access pattern of the data-reference DR.
- In case of non-consecutive accesses call vect_analyze_group_access() to
- analyze groups of accesses. */
-
-static bool
-vect_analyze_data_ref_access (struct data_reference *dr)
-{
- tree step = DR_STEP (dr);
- tree scalar_type = TREE_TYPE (DR_REF (dr));
- gimple *stmt = DR_STMT (dr);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = NULL;
-
- if (loop_vinfo)
- loop = LOOP_VINFO_LOOP (loop_vinfo);
-
- if (loop_vinfo && !step)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bad data-ref access in loop\n");
- return false;
- }
-
- /* Allow loads with zero step in inner-loop vectorization. */
- if (loop_vinfo && integer_zerop (step))
- {
- GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
- if (!nested_in_vect_loop_p (loop, stmt))
- return DR_IS_READ (dr);
- /* Allow references with zero step for outer loops marked
- with pragma omp simd only - it guarantees absence of
- loop-carried dependencies between inner loop iterations. */
- if (!loop->force_vectorize)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "zero step in inner loop of nest\n");
- return false;
- }
- }
-
- if (loop && nested_in_vect_loop_p (loop, stmt))
- {
- /* Interleaved accesses are not yet supported within outer-loop
- vectorization for references in the inner-loop. */
- GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
-
- /* For the rest of the analysis we use the outer-loop step. */
- step = STMT_VINFO_DR_STEP (stmt_info);
- if (integer_zerop (step))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "zero step in outer loop.\n");
- return DR_IS_READ (dr);
- }
- }
-
- /* Consecutive? */
- if (TREE_CODE (step) == INTEGER_CST)
- {
- HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
- if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
- || (dr_step < 0
- && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
- {
- /* Mark that it is not interleaving. */
- GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
- return true;
- }
- }
-
- if (loop && nested_in_vect_loop_p (loop, stmt))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "grouped access in outer loop.\n");
- return false;
- }
-
-
- /* Assume this is a DR handled by non-constant strided load case. */
- if (TREE_CODE (step) != INTEGER_CST)
- return (STMT_VINFO_STRIDED_P (stmt_info)
- && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
- || vect_analyze_group_access (dr)));
-
- /* Not consecutive access - check if it's a part of interleaving group. */
- return vect_analyze_group_access (dr);
-}
-
-
-
-/* A helper function used in the comparator function to sort data
- references. T1 and T2 are two data references to be compared.
- The function returns -1, 0, or 1. */
-
-static int
-compare_tree (tree t1, tree t2)
-{
- int i, cmp;
- enum tree_code code;
- char tclass;
-
- if (t1 == t2)
- return 0;
- if (t1 == NULL)
- return -1;
- if (t2 == NULL)
- return 1;
-
- STRIP_NOPS (t1);
- STRIP_NOPS (t2);
-
- if (TREE_CODE (t1) != TREE_CODE (t2))
- return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
-
- code = TREE_CODE (t1);
- switch (code)
- {
- /* For const values, we can just use hash values for comparisons. */
- case INTEGER_CST:
- case REAL_CST:
- case FIXED_CST:
- case STRING_CST:
- case COMPLEX_CST:
- case VECTOR_CST:
- {
- hashval_t h1 = iterative_hash_expr (t1, 0);
- hashval_t h2 = iterative_hash_expr (t2, 0);
- if (h1 != h2)
- return h1 < h2 ? -1 : 1;
- break;
- }
-
- case SSA_NAME:
- cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
- if (cmp != 0)
- return cmp;
-
- if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
- return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
- break;
-
- default:
- tclass = TREE_CODE_CLASS (code);
-
- /* For var-decl, we could compare their UIDs. */
- if (tclass == tcc_declaration)
- {
- if (DECL_UID (t1) != DECL_UID (t2))
- return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
- break;
- }
-
- /* For expressions with operands, compare their operands recursively. */
- for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
- {
- cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i));
- if (cmp != 0)
- return cmp;
- }
- }
-
- return 0;
-}
-
-
-/* Compare two data-references DRA and DRB to group them into chunks
- suitable for grouping. */
-
-static int
-dr_group_sort_cmp (const void *dra_, const void *drb_)
-{
- data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
- data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
- int cmp;
-
- /* Stabilize sort. */
- if (dra == drb)
- return 0;
-
- /* DRs in different loops never belong to the same group. */
- loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
- loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
- if (loopa != loopb)
- return loopa->num < loopb->num ? -1 : 1;
-
- /* Ordering of DRs according to base. */
- if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
- {
- cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb));
- if (cmp != 0)
- return cmp;
- }
-
- /* And according to DR_OFFSET. */
- if (!dr_equal_offsets_p (dra, drb))
- {
- cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
- if (cmp != 0)
- return cmp;
- }
-
- /* Put reads before writes. */
- if (DR_IS_READ (dra) != DR_IS_READ (drb))
- return DR_IS_READ (dra) ? -1 : 1;
-
- /* Then sort after access size. */
- if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
- TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0))
- {
- cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
- TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
- if (cmp != 0)
- return cmp;
- }
-
- /* And after step. */
- if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
- {
- cmp = compare_tree (DR_STEP (dra), DR_STEP (drb));
- if (cmp != 0)
- return cmp;
- }
-
- /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
- cmp = tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb));
- if (cmp == 0)
- return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
- return cmp;
-}
-
-/* Function vect_analyze_data_ref_accesses.
-
- Analyze the access pattern of all the data references in the loop.
-
- FORNOW: the only access pattern that is considered vectorizable is a
- simple step 1 (consecutive) access.
-
- FORNOW: handle only arrays and pointer accesses. */
-
-bool
-vect_analyze_data_ref_accesses (vec_info *vinfo)
-{
- unsigned int i;
- vec<data_reference_p> datarefs = vinfo->datarefs;
- struct data_reference *dr;
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_analyze_data_ref_accesses ===\n");
-
- if (datarefs.is_empty ())
- return true;
-
- /* Sort the array of datarefs to make building the interleaving chains
- linear. Don't modify the original vector's order, it is needed for
- determining what dependencies are reversed. */
- vec<data_reference_p> datarefs_copy = datarefs.copy ();
- datarefs_copy.qsort (dr_group_sort_cmp);
-
- /* Build the interleaving chains. */
- for (i = 0; i < datarefs_copy.length () - 1;)
- {
- data_reference_p dra = datarefs_copy[i];
- stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
- stmt_vec_info lastinfo = NULL;
- if (! STMT_VINFO_VECTORIZABLE (stmtinfo_a))
- {
- ++i;
- continue;
- }
- for (i = i + 1; i < datarefs_copy.length (); ++i)
- {
- data_reference_p drb = datarefs_copy[i];
- stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
- if (! STMT_VINFO_VECTORIZABLE (stmtinfo_b))
- break;
-
- /* ??? Imperfect sorting (non-compatible types, non-modulo
- accesses, same accesses) can lead to a group to be artificially
- split here as we don't just skip over those. If it really
- matters we can push those to a worklist and re-iterate
- over them. The we can just skip ahead to the next DR here. */
-
- /* DRs in a different loop should not be put into the same
- interleaving group. */
- if (gimple_bb (DR_STMT (dra))->loop_father
- != gimple_bb (DR_STMT (drb))->loop_father)
- break;
-
- /* Check that the data-refs have same first location (except init)
- and they are both either store or load (not load and store,
- not masked loads or stores). */
- if (DR_IS_READ (dra) != DR_IS_READ (drb)
- || !operand_equal_p (DR_BASE_ADDRESS (dra),
- DR_BASE_ADDRESS (drb), 0)
- || !dr_equal_offsets_p (dra, drb)
- || !gimple_assign_single_p (DR_STMT (dra))
- || !gimple_assign_single_p (DR_STMT (drb)))
- break;
-
- /* Check that the data-refs have the same constant size. */
- tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
- tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
- if (!tree_fits_uhwi_p (sza)
- || !tree_fits_uhwi_p (szb)
- || !tree_int_cst_equal (sza, szb))
- break;
-
- /* Check that the data-refs have the same step. */
- if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
- break;
-
- /* Do not place the same access in the interleaving chain twice. */
- if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) == 0)
- break;
-
- /* Check the types are compatible.
- ??? We don't distinguish this during sorting. */
- if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
- TREE_TYPE (DR_REF (drb))))
- break;
-
- /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
- HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
- HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
- gcc_assert (init_a <= init_b);
-
- /* If init_b == init_a + the size of the type * k, we have an
- interleaving, and DRA is accessed before DRB. */
- HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
- if (type_size_a == 0
- || (init_b - init_a) % type_size_a != 0)
- break;
-
- /* If we have a store, the accesses are adjacent. This splits
- groups into chunks we support (we don't support vectorization
- of stores with gaps). */
- if (!DR_IS_READ (dra)
- && (init_b - (HOST_WIDE_INT) TREE_INT_CST_LOW
- (DR_INIT (datarefs_copy[i-1]))
- != type_size_a))
- break;
-
- /* If the step (if not zero or non-constant) is greater than the
- difference between data-refs' inits this splits groups into
- suitable sizes. */
- if (tree_fits_shwi_p (DR_STEP (dra)))
- {
- HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
- if (step != 0 && step <= (init_b - init_a))
- break;
- }
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Detected interleaving ");
- if (DR_IS_READ (dra))
- dump_printf (MSG_NOTE, "load ");
- else
- dump_printf (MSG_NOTE, "store ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
- dump_printf (MSG_NOTE, "\n");
- }
-
- /* Link the found element into the group list. */
- if (!GROUP_FIRST_ELEMENT (stmtinfo_a))
- {
- GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (dra);
- lastinfo = stmtinfo_a;
- }
- GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (dra);
- GROUP_NEXT_ELEMENT (lastinfo) = DR_STMT (drb);
- lastinfo = stmtinfo_b;
- }
- }
-
- FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
- if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
- && !vect_analyze_data_ref_access (dr))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: complicated access pattern.\n");
-
- if (is_a <bb_vec_info> (vinfo))
- {
- /* Mark the statement as not vectorizable. */
- STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
- continue;
- }
- else
- {
- datarefs_copy.release ();
- return false;
- }
- }
-
- datarefs_copy.release ();
- return true;
-}
-
-
-/* Operator == between two dr_with_seg_len objects.
-
- This equality operator is used to make sure two data refs
- are the same one so that we will consider to combine the
- aliasing checks of those two pairs of data dependent data
- refs. */
-
-static bool
-operator == (const dr_with_seg_len& d1,
- const dr_with_seg_len& d2)
-{
- return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
- DR_BASE_ADDRESS (d2.dr), 0)
- && compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
- && compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
- && compare_tree (d1.seg_len, d2.seg_len) == 0;
-}
-
-/* Function comp_dr_with_seg_len_pair.
-
- Comparison function for sorting objects of dr_with_seg_len_pair_t
- so that we can combine aliasing checks in one scan. */
-
-static int
-comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
-{
- const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
- const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
- const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
- const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
-
- /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
- if a and c have the same basic address snd step, and b and d have the same
- address and step. Therefore, if any a&c or b&d don't have the same address
- and step, we don't care the order of those two pairs after sorting. */
- int comp_res;
-
- if ((comp_res = compare_tree (DR_BASE_ADDRESS (a1.dr),
- DR_BASE_ADDRESS (b1.dr))) != 0)
- return comp_res;
- if ((comp_res = compare_tree (DR_BASE_ADDRESS (a2.dr),
- DR_BASE_ADDRESS (b2.dr))) != 0)
- return comp_res;
- if ((comp_res = compare_tree (DR_STEP (a1.dr), DR_STEP (b1.dr))) != 0)
- return comp_res;
- if ((comp_res = compare_tree (DR_STEP (a2.dr), DR_STEP (b2.dr))) != 0)
- return comp_res;
- if ((comp_res = compare_tree (DR_OFFSET (a1.dr), DR_OFFSET (b1.dr))) != 0)
- return comp_res;
- if ((comp_res = compare_tree (DR_INIT (a1.dr), DR_INIT (b1.dr))) != 0)
- return comp_res;
- if ((comp_res = compare_tree (DR_OFFSET (a2.dr), DR_OFFSET (b2.dr))) != 0)
- return comp_res;
- if ((comp_res = compare_tree (DR_INIT (a2.dr), DR_INIT (b2.dr))) != 0)
- return comp_res;
-
- return 0;
-}
-
-/* Function vect_vfa_segment_size.
-
- Create an expression that computes the size of segment
- that will be accessed for a data reference. The functions takes into
- account that realignment loads may access one more vector.
-
- Input:
- DR: The data reference.
- LENGTH_FACTOR: segment length to consider.
-
- Return an expression whose value is the size of segment which will be
- accessed by DR. */
-
-static tree
-vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
-{
- tree segment_length;
-
- if (integer_zerop (DR_STEP (dr)))
- segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
- else
- segment_length = size_binop (MULT_EXPR,
- fold_convert (sizetype, DR_STEP (dr)),
- fold_convert (sizetype, length_factor));
-
- if (vect_supportable_dr_alignment (dr, false)
- == dr_explicit_realign_optimized)
- {
- tree vector_size = TYPE_SIZE_UNIT
- (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
-
- segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
- }
- return segment_length;
-}
-
-/* Function vect_no_alias_p.
-
- Given data references A and B with equal base and offset, the alias
- relation can be decided at compilation time, return TRUE if they do
- not alias to each other; return FALSE otherwise. SEGMENT_LENGTH_A
- and SEGMENT_LENGTH_B are the memory lengths accessed by A and B
- respectively. */
-
-static bool
-vect_no_alias_p (struct data_reference *a, struct data_reference *b,
- tree segment_length_a, tree segment_length_b)
-{
- gcc_assert (TREE_CODE (DR_INIT (a)) == INTEGER_CST
- && TREE_CODE (DR_INIT (b)) == INTEGER_CST);
- if (tree_int_cst_equal (DR_INIT (a), DR_INIT (b)))
- return false;
-
- tree seg_a_min = DR_INIT (a);
- tree seg_a_max = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_a_min),
- seg_a_min, segment_length_a);
- /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
- bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
- [a, a+12) */
- if (tree_int_cst_compare (DR_STEP (a), size_zero_node) < 0)
- {
- tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
- seg_a_min = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_a_max),
- seg_a_max, unit_size);
- seg_a_max = fold_build2 (PLUS_EXPR, TREE_TYPE (DR_INIT (a)),
- DR_INIT (a), unit_size);
- }
- tree seg_b_min = DR_INIT (b);
- tree seg_b_max = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_b_min),
- seg_b_min, segment_length_b);
- if (tree_int_cst_compare (DR_STEP (b), size_zero_node) < 0)
- {
- tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
- seg_b_min = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_b_max),
- seg_b_max, unit_size);
- seg_b_max = fold_build2 (PLUS_EXPR, TREE_TYPE (DR_INIT (b)),
- DR_INIT (b), unit_size);
- }
-
- if (tree_int_cst_le (seg_a_max, seg_b_min)
- || tree_int_cst_le (seg_b_max, seg_a_min))
- return true;
-
- return false;
-}
-
-/* Function vect_prune_runtime_alias_test_list.
-
- Prune a list of ddrs to be tested at run-time by versioning for alias.
- Merge several alias checks into one if possible.
- Return FALSE if resulting list of ddrs is longer then allowed by
- PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
-
-bool
-vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
-{
- vec<ddr_p> may_alias_ddrs =
- LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
- vec<dr_with_seg_len_pair_t>& comp_alias_ddrs =
- LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
- int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
-
- ddr_p ddr;
- unsigned int i;
- tree length_factor;
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_prune_runtime_alias_test_list ===\n");
-
- if (may_alias_ddrs.is_empty ())
- return true;
-
- /* Basically, for each pair of dependent data refs store_ptr_0
- and load_ptr_0, we create an expression:
-
- ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
- || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
-
- for aliasing checks. However, in some cases we can decrease
- the number of checks by combining two checks into one. For
- example, suppose we have another pair of data refs store_ptr_0
- and load_ptr_1, and if the following condition is satisfied:
-
- load_ptr_0 < load_ptr_1 &&
- load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
-
- (this condition means, in each iteration of vectorized loop,
- the accessed memory of store_ptr_0 cannot be between the memory
- of load_ptr_0 and load_ptr_1.)
-
- we then can use only the following expression to finish the
- alising checks between store_ptr_0 & load_ptr_0 and
- store_ptr_0 & load_ptr_1:
-
- ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
- || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
-
- Note that we only consider that load_ptr_0 and load_ptr_1 have the
- same basic address. */
-
- comp_alias_ddrs.create (may_alias_ddrs.length ());
-
- /* First, we collect all data ref pairs for aliasing checks. */
- FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
- {
- int comp_res;
- struct data_reference *dr_a, *dr_b;
- gimple *dr_group_first_a, *dr_group_first_b;
- tree segment_length_a, segment_length_b;
- gimple *stmt_a, *stmt_b;
-
- dr_a = DDR_A (ddr);
- stmt_a = DR_STMT (DDR_A (ddr));
- dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
- if (dr_group_first_a)
- {
- stmt_a = dr_group_first_a;
- dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
- }
-
- dr_b = DDR_B (ddr);
- stmt_b = DR_STMT (DDR_B (ddr));
- dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
- if (dr_group_first_b)
- {
- stmt_b = dr_group_first_b;
- dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
- }
-
- if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
- length_factor = scalar_loop_iters;
- else
- length_factor = size_int (vect_factor);
- segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
- segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
-
- comp_res = compare_tree (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b));
- if (comp_res == 0)
- comp_res = compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
-
- /* Alias is known at compilation time. */
- if (comp_res == 0
- && TREE_CODE (DR_STEP (dr_a)) == INTEGER_CST
- && TREE_CODE (DR_STEP (dr_b)) == INTEGER_CST
- && TREE_CODE (segment_length_a) == INTEGER_CST
- && TREE_CODE (segment_length_b) == INTEGER_CST)
- {
- if (vect_no_alias_p (dr_a, dr_b, segment_length_a, segment_length_b))
- continue;
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "not vectorized: compilation time alias.\n");
-
- return false;
- }
-
- dr_with_seg_len_pair_t dr_with_seg_len_pair
- (dr_with_seg_len (dr_a, segment_length_a),
- dr_with_seg_len (dr_b, segment_length_b));
-
- /* Canonicalize pairs by sorting the two DR members. */
- if (comp_res > 0)
- std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
-
- comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
- }
-
- /* Second, we sort the collected data ref pairs so that we can scan
- them once to combine all possible aliasing checks. */
- comp_alias_ddrs.qsort (comp_dr_with_seg_len_pair);
-
- /* Third, we scan the sorted dr pairs and check if we can combine
- alias checks of two neighboring dr pairs. */
- for (size_t i = 1; i < comp_alias_ddrs.length (); ++i)
- {
- /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
- dr_with_seg_len *dr_a1 = &comp_alias_ddrs[i-1].first,
- *dr_b1 = &comp_alias_ddrs[i-1].second,
- *dr_a2 = &comp_alias_ddrs[i].first,
- *dr_b2 = &comp_alias_ddrs[i].second;
-
- /* Remove duplicate data ref pairs. */
- if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "found equal ranges ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM,
- DR_REF (dr_a1->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM,
- DR_REF (dr_b1->dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM,
- DR_REF (dr_a2->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM,
- DR_REF (dr_b2->dr));
- dump_printf (MSG_NOTE, "\n");
- }
-
- comp_alias_ddrs.ordered_remove (i--);
- continue;
- }
-
- if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
- {
- /* We consider the case that DR_B1 and DR_B2 are same memrefs,
- and DR_A1 and DR_A2 are two consecutive memrefs. */
- if (*dr_a1 == *dr_a2)
- {
- std::swap (dr_a1, dr_b1);
- std::swap (dr_a2, dr_b2);
- }
-
- if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
- DR_BASE_ADDRESS (dr_a2->dr), 0)
- || !operand_equal_p (DR_OFFSET (dr_a1->dr),
- DR_OFFSET (dr_a2->dr), 0)
- || !tree_fits_shwi_p (DR_INIT (dr_a1->dr))
- || !tree_fits_shwi_p (DR_INIT (dr_a2->dr)))
- continue;
-
- /* Make sure dr_a1 starts left of dr_a2. */
- if (tree_int_cst_lt (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr)))
- std::swap (*dr_a1, *dr_a2);
-
- bool do_remove = false;
- unsigned HOST_WIDE_INT diff
- = (tree_to_shwi (DR_INIT (dr_a2->dr))
- - tree_to_shwi (DR_INIT (dr_a1->dr)));
-
- /* If the left segment does not extend beyond the start of the
- right segment the new segment length is that of the right
- plus the segment distance. */
- if (tree_fits_uhwi_p (dr_a1->seg_len)
- && compare_tree_int (dr_a1->seg_len, diff) <= 0)
- {
- dr_a1->seg_len = size_binop (PLUS_EXPR, dr_a2->seg_len,
- size_int (diff));
- do_remove = true;
- }
- /* Generally the new segment length is the maximum of the
- left segment size and the right segment size plus the distance.
- ??? We can also build tree MAX_EXPR here but it's not clear this
- is profitable. */
- else if (tree_fits_uhwi_p (dr_a1->seg_len)
- && tree_fits_uhwi_p (dr_a2->seg_len))
- {
- unsigned HOST_WIDE_INT seg_len_a1 = tree_to_uhwi (dr_a1->seg_len);
- unsigned HOST_WIDE_INT seg_len_a2 = tree_to_uhwi (dr_a2->seg_len);
- dr_a1->seg_len = size_int (MAX (seg_len_a1, diff + seg_len_a2));
- do_remove = true;
- }
- /* Now we check if the following condition is satisfied:
-
- DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
-
- where DIFF = DR_A2_INIT - DR_A1_INIT. However,
- SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
- have to make a best estimation. We can get the minimum value
- of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
- then either of the following two conditions can guarantee the
- one above:
-
- 1: DIFF <= MIN_SEG_LEN_B
- 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B */
- else
- {
- unsigned HOST_WIDE_INT min_seg_len_b
- = (tree_fits_uhwi_p (dr_b1->seg_len)
- ? tree_to_uhwi (dr_b1->seg_len)
- : vect_factor);
-
- if (diff <= min_seg_len_b
- || (tree_fits_uhwi_p (dr_a1->seg_len)
- && diff - tree_to_uhwi (dr_a1->seg_len) < min_seg_len_b))
- {
- dr_a1->seg_len = size_binop (PLUS_EXPR,
- dr_a2->seg_len, size_int (diff));
- do_remove = true;
- }
- }
-
- if (do_remove)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "merging ranges for ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
- dump_printf (MSG_NOTE, "\n");
- }
- comp_alias_ddrs.ordered_remove (i--);
- }
- }
- }
-
- dump_printf_loc (MSG_NOTE, vect_location,
- "improved number of alias checks from %d to %d\n",
- may_alias_ddrs.length (), comp_alias_ddrs.length ());
- if ((int) comp_alias_ddrs.length () >
- PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "number of versioning for alias "
- "run-time tests exceeds %d "
- "(--param vect-max-version-for-alias-checks)\n",
- PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
- return false;
- }
-
- /* All alias checks have been resolved at compilation time. */
- if (!comp_alias_ddrs.length ())
- LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).truncate (0);
-
- return true;
-}
-
-/* Return true if a non-affine read or write in STMT is suitable for a
- gather load or scatter store. Describe the operation in *INFO if so. */
-
-bool
-vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
- gather_scatter_info *info)
-{
- HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
- tree offtype = NULL_TREE;
- tree decl, base, off;
- machine_mode pmode;
- int punsignedp, reversep, pvolatilep = 0;
-
- base = DR_REF (dr);
- /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
- see if we can use the def stmt of the address. */
- if (is_gimple_call (stmt)
- && gimple_call_internal_p (stmt)
- && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
- || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
- && TREE_CODE (base) == MEM_REF
- && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
- && integer_zerop (TREE_OPERAND (base, 1))
- && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
- {
- gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
- if (is_gimple_assign (def_stmt)
- && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
- base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
- }
-
- /* The gather and scatter builtins need address of the form
- loop_invariant + vector * {1, 2, 4, 8}
- or
- loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
- Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
- of loop invariants/SSA_NAMEs defined in the loop, with casts,
- multiplications and additions in it. To get a vector, we need
- a single SSA_NAME that will be defined in the loop and will
- contain everything that is not loop invariant and that can be
- vectorized. The following code attempts to find such a preexistng
- SSA_NAME OFF and put the loop invariants into a tree BASE
- that can be gimplified before the loop. */
- base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
- &punsignedp, &reversep, &pvolatilep);
- gcc_assert (base && (pbitpos % BITS_PER_UNIT) == 0 && !reversep);
-
- if (TREE_CODE (base) == MEM_REF)
- {
- if (!integer_zerop (TREE_OPERAND (base, 1)))
- {
- if (off == NULL_TREE)
- {
- offset_int moff = mem_ref_offset (base);
- off = wide_int_to_tree (sizetype, moff);
- }
- else
- off = size_binop (PLUS_EXPR, off,
- fold_convert (sizetype, TREE_OPERAND (base, 1)));
- }
- base = TREE_OPERAND (base, 0);
- }
- else
- base = build_fold_addr_expr (base);
-
- if (off == NULL_TREE)
- off = size_zero_node;
-
- /* If base is not loop invariant, either off is 0, then we start with just
- the constant offset in the loop invariant BASE and continue with base
- as OFF, otherwise give up.
- We could handle that case by gimplifying the addition of base + off
- into some SSA_NAME and use that as off, but for now punt. */
- if (!expr_invariant_in_loop_p (loop, base))
- {
- if (!integer_zerop (off))
- return false;
- off = base;
- base = size_int (pbitpos / BITS_PER_UNIT);
- }
- /* Otherwise put base + constant offset into the loop invariant BASE
- and continue with OFF. */
- else
- {
- base = fold_convert (sizetype, base);
- base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
- }
-
- /* OFF at this point may be either a SSA_NAME or some tree expression
- from get_inner_reference. Try to peel off loop invariants from it
- into BASE as long as possible. */
- STRIP_NOPS (off);
- while (offtype == NULL_TREE)
- {
- enum tree_code code;
- tree op0, op1, add = NULL_TREE;
-
- if (TREE_CODE (off) == SSA_NAME)
- {
- gimple *def_stmt = SSA_NAME_DEF_STMT (off);
-
- if (expr_invariant_in_loop_p (loop, off))
- return false;
-
- if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
- break;
-
- op0 = gimple_assign_rhs1 (def_stmt);
- code = gimple_assign_rhs_code (def_stmt);
- op1 = gimple_assign_rhs2 (def_stmt);
- }
- else
- {
- if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
- return false;
- code = TREE_CODE (off);
- extract_ops_from_tree (off, &code, &op0, &op1);
- }
- switch (code)
- {
- case POINTER_PLUS_EXPR:
- case PLUS_EXPR:
- if (expr_invariant_in_loop_p (loop, op0))
- {
- add = op0;
- off = op1;
- do_add:
- add = fold_convert (sizetype, add);
- if (scale != 1)
- add = size_binop (MULT_EXPR, add, size_int (scale));
- base = size_binop (PLUS_EXPR, base, add);
- continue;
- }
- if (expr_invariant_in_loop_p (loop, op1))
- {
- add = op1;
- off = op0;
- goto do_add;
- }
- break;
- case MINUS_EXPR:
- if (expr_invariant_in_loop_p (loop, op1))
- {
- add = fold_convert (sizetype, op1);
- add = size_binop (MINUS_EXPR, size_zero_node, add);
- off = op0;
- goto do_add;
- }
- break;
- case MULT_EXPR:
- if (scale == 1 && tree_fits_shwi_p (op1))
- {
- scale = tree_to_shwi (op1);
- off = op0;
- continue;
- }
- break;
- case SSA_NAME:
- off = op0;
- continue;
- CASE_CONVERT:
- if (!POINTER_TYPE_P (TREE_TYPE (op0))
- && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
- break;
- if (TYPE_PRECISION (TREE_TYPE (op0))
- == TYPE_PRECISION (TREE_TYPE (off)))
- {
- off = op0;
- continue;
- }
- if (TYPE_PRECISION (TREE_TYPE (op0))
- < TYPE_PRECISION (TREE_TYPE (off)))
- {
- off = op0;
- offtype = TREE_TYPE (off);
- STRIP_NOPS (off);
- continue;
- }
- break;
- default:
- break;
- }
- break;
- }
-
- /* If at the end OFF still isn't a SSA_NAME or isn't
- defined in the loop, punt. */
- if (TREE_CODE (off) != SSA_NAME
- || expr_invariant_in_loop_p (loop, off))
- return false;
-
- if (offtype == NULL_TREE)
- offtype = TREE_TYPE (off);
-
- if (DR_IS_READ (dr))
- decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
- offtype, scale);
- else
- decl = targetm.vectorize.builtin_scatter (STMT_VINFO_VECTYPE (stmt_info),
- offtype, scale);
-
- if (decl == NULL_TREE)
- return false;
-
- info->decl = decl;
- info->base = base;
- info->offset = off;
- info->offset_dt = vect_unknown_def_type;
- info->offset_vectype = NULL_TREE;
- info->scale = scale;
- return true;
-}
-
-/* Function vect_analyze_data_refs.
-
- Find all the data references in the loop or basic block.
-
- The general structure of the analysis of data refs in the vectorizer is as
- follows:
- 1- vect_analyze_data_refs(loop/bb): call
- compute_data_dependences_for_loop/bb to find and analyze all data-refs
- in the loop/bb and their dependences.
- 2- vect_analyze_dependences(): apply dependence testing using ddrs.
- 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
- 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
-
-*/
-
-bool
-vect_analyze_data_refs (vec_info *vinfo, int *min_vf)
-{
- struct loop *loop = NULL;
- unsigned int i;
- struct data_reference *dr;
- tree scalar_type;
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_analyze_data_refs ===\n");
-
- if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
- loop = LOOP_VINFO_LOOP (loop_vinfo);
-
- /* Go through the data-refs, check that the analysis succeeded. Update
- pointer from stmt_vec_info struct to DR and vectype. */
-
- vec<data_reference_p> datarefs = vinfo->datarefs;
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- {
- gimple *stmt;
- stmt_vec_info stmt_info;
- tree base, offset, init;
- enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
- bool simd_lane_access = false;
- int vf;
-
-again:
- if (!dr || !DR_REF (dr))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unhandled data-ref\n");
- return false;
- }
-
- stmt = DR_STMT (dr);
- stmt_info = vinfo_for_stmt (stmt);
-
- /* Discard clobbers from the dataref vector. We will remove
- clobber stmts during vectorization. */
- if (gimple_clobber_p (stmt))
- {
- free_data_ref (dr);
- if (i == datarefs.length () - 1)
- {
- datarefs.pop ();
- break;
- }
- datarefs.ordered_remove (i);
- dr = datarefs[i];
- goto again;
- }
-
- /* Check that analysis of the data-ref succeeded. */
- if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
- || !DR_STEP (dr))
- {
- bool maybe_gather
- = DR_IS_READ (dr)
- && !TREE_THIS_VOLATILE (DR_REF (dr))
- && targetm.vectorize.builtin_gather != NULL;
- bool maybe_scatter
- = DR_IS_WRITE (dr)
- && !TREE_THIS_VOLATILE (DR_REF (dr))
- && targetm.vectorize.builtin_scatter != NULL;
- bool maybe_simd_lane_access
- = is_a <loop_vec_info> (vinfo) && loop->simduid;
-
- /* If target supports vector gather loads or scatter stores, or if
- this might be a SIMD lane access, see if they can't be used. */
- if (is_a <loop_vec_info> (vinfo)
- && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
- && !nested_in_vect_loop_p (loop, stmt))
- {
- struct data_reference *newdr
- = create_data_ref (NULL, loop_containing_stmt (stmt),
- DR_REF (dr), stmt, maybe_scatter ? false : true);
- gcc_assert (newdr != NULL && DR_REF (newdr));
- if (DR_BASE_ADDRESS (newdr)
- && DR_OFFSET (newdr)
- && DR_INIT (newdr)
- && DR_STEP (newdr)
- && integer_zerop (DR_STEP (newdr)))
- {
- if (maybe_simd_lane_access)
- {
- tree off = DR_OFFSET (newdr);
- STRIP_NOPS (off);
- if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
- && TREE_CODE (off) == MULT_EXPR
- && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
- {
- tree step = TREE_OPERAND (off, 1);
- off = TREE_OPERAND (off, 0);
- STRIP_NOPS (off);
- if (CONVERT_EXPR_P (off)
- && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off,
- 0)))
- < TYPE_PRECISION (TREE_TYPE (off)))
- off = TREE_OPERAND (off, 0);
- if (TREE_CODE (off) == SSA_NAME)
- {
- gimple *def = SSA_NAME_DEF_STMT (off);
- tree reft = TREE_TYPE (DR_REF (newdr));
- if (is_gimple_call (def)
- && gimple_call_internal_p (def)
- && (gimple_call_internal_fn (def)
- == IFN_GOMP_SIMD_LANE))
- {
- tree arg = gimple_call_arg (def, 0);
- gcc_assert (TREE_CODE (arg) == SSA_NAME);
- arg = SSA_NAME_VAR (arg);
- if (arg == loop->simduid
- /* For now. */
- && tree_int_cst_equal
- (TYPE_SIZE_UNIT (reft),
- step))
- {
- DR_OFFSET (newdr) = ssize_int (0);
- DR_STEP (newdr) = step;
- DR_ALIGNED_TO (newdr)
- = size_int (BIGGEST_ALIGNMENT);
- dr = newdr;
- simd_lane_access = true;
- }
- }
- }
- }
- }
- if (!simd_lane_access && (maybe_gather || maybe_scatter))
- {
- dr = newdr;
- if (maybe_gather)
- gatherscatter = GATHER;
- else
- gatherscatter = SCATTER;
- }
- }
- if (gatherscatter == SG_NONE && !simd_lane_access)
- free_data_ref (newdr);
- }
-
- if (gatherscatter == SG_NONE && !simd_lane_access)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: data ref analysis "
- "failed ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- }
-
- if (is_a <bb_vec_info> (vinfo))
- break;
-
- return false;
- }
- }
-
- if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: base addr of dr is a "
- "constant\n");
-
- if (is_a <bb_vec_info> (vinfo))
- break;
-
- if (gatherscatter != SG_NONE || simd_lane_access)
- free_data_ref (dr);
- return false;
- }
-
- if (TREE_THIS_VOLATILE (DR_REF (dr)))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: volatile type ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- }
-
- if (is_a <bb_vec_info> (vinfo))
- break;
-
- return false;
- }
-
- if (stmt_can_throw_internal (stmt))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: statement can throw an "
- "exception ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- }
-
- if (is_a <bb_vec_info> (vinfo))
- break;
-
- if (gatherscatter != SG_NONE || simd_lane_access)
- free_data_ref (dr);
- return false;
- }
-
- if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
- && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: statement is bitfield "
- "access ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- }
-
- if (is_a <bb_vec_info> (vinfo))
- break;
-
- if (gatherscatter != SG_NONE || simd_lane_access)
- free_data_ref (dr);
- return false;
- }
-
- base = unshare_expr (DR_BASE_ADDRESS (dr));
- offset = unshare_expr (DR_OFFSET (dr));
- init = unshare_expr (DR_INIT (dr));
-
- if (is_gimple_call (stmt)
- && (!gimple_call_internal_p (stmt)
- || (gimple_call_internal_fn (stmt) != IFN_MASK_LOAD
- && gimple_call_internal_fn (stmt) != IFN_MASK_STORE)))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: dr in a call ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- }
-
- if (is_a <bb_vec_info> (vinfo))
- break;
-
- if (gatherscatter != SG_NONE || simd_lane_access)
- free_data_ref (dr);
- return false;
- }
-
- /* Update DR field in stmt_vec_info struct. */
-
- /* If the dataref is in an inner-loop of the loop that is considered for
- for vectorization, we also want to analyze the access relative to
- the outer-loop (DR contains information only relative to the
- inner-most enclosing loop). We do that by building a reference to the
- first location accessed by the inner-loop, and analyze it relative to
- the outer-loop. */
- if (loop && nested_in_vect_loop_p (loop, stmt))
- {
- tree outer_step, outer_base, outer_init;
- HOST_WIDE_INT pbitsize, pbitpos;
- tree poffset;
- machine_mode pmode;
- int punsignedp, preversep, pvolatilep;
- affine_iv base_iv, offset_iv;
- tree dinit;
-
- /* Build a reference to the first location accessed by the
- inner-loop: *(BASE+INIT). (The first location is actually
- BASE+INIT+OFFSET, but we add OFFSET separately later). */
- tree inner_base = build_fold_indirect_ref
- (fold_build_pointer_plus (base, init));
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "analyze in outer-loop: ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
- dump_printf (MSG_NOTE, "\n");
- }
-
- outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
- &poffset, &pmode, &punsignedp,
- &preversep, &pvolatilep);
- gcc_assert (outer_base != NULL_TREE);
-
- if (pbitpos % BITS_PER_UNIT != 0)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "failed: bit offset alignment.\n");
- return false;
- }
-
- if (preversep)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "failed: reverse storage order.\n");
- return false;
- }
-
- outer_base = build_fold_addr_expr (outer_base);
- if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
- &base_iv, false))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "failed: evolution of base is not affine.\n");
- return false;
- }
-
- if (offset)
- {
- if (poffset)
- poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
- poffset);
- else
- poffset = offset;
- }
-
- if (!poffset)
- {
- offset_iv.base = ssize_int (0);
- offset_iv.step = ssize_int (0);
- }
- else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
- &offset_iv, false))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "evolution of offset is not affine.\n");
- return false;
- }
-
- outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
- split_constant_offset (base_iv.base, &base_iv.base, &dinit);
- outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
- split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
- outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
-
- outer_step = size_binop (PLUS_EXPR,
- fold_convert (ssizetype, base_iv.step),
- fold_convert (ssizetype, offset_iv.step));
-
- STMT_VINFO_DR_STEP (stmt_info) = outer_step;
- /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
- STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
- STMT_VINFO_DR_INIT (stmt_info) = outer_init;
- STMT_VINFO_DR_OFFSET (stmt_info) =
- fold_convert (ssizetype, offset_iv.base);
- STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
- size_int (highest_pow2_factor (offset_iv.base));
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "\touter base_address: ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM,
- STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
- dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM,
- STMT_VINFO_DR_OFFSET (stmt_info));
- dump_printf (MSG_NOTE,
- "\n\touter constant offset from base address: ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM,
- STMT_VINFO_DR_INIT (stmt_info));
- dump_printf (MSG_NOTE, "\n\touter step: ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM,
- STMT_VINFO_DR_STEP (stmt_info));
- dump_printf (MSG_NOTE, "\n\touter aligned to: ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM,
- STMT_VINFO_DR_ALIGNED_TO (stmt_info));
- dump_printf (MSG_NOTE, "\n");
- }
- }
-
- if (STMT_VINFO_DATA_REF (stmt_info))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: more than one data ref "
- "in stmt: ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- }
-
- if (is_a <bb_vec_info> (vinfo))
- break;
-
- if (gatherscatter != SG_NONE || simd_lane_access)
- free_data_ref (dr);
- return false;
- }
-
- STMT_VINFO_DATA_REF (stmt_info) = dr;
- if (simd_lane_access)
- {
- STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
- free_data_ref (datarefs[i]);
- datarefs[i] = dr;
- }
-
- /* Set vectype for STMT. */
- scalar_type = TREE_TYPE (DR_REF (dr));
- STMT_VINFO_VECTYPE (stmt_info)
- = get_vectype_for_scalar_type (scalar_type);
- if (!STMT_VINFO_VECTYPE (stmt_info))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: no vectype for stmt: ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
- scalar_type);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
-
- if (is_a <bb_vec_info> (vinfo))
- {
- /* No vector type is fine, the ref can still participate
- in dependence analysis, we just can't vectorize it. */
- STMT_VINFO_VECTORIZABLE (stmt_info) = false;
- continue;
- }
-
- if (gatherscatter != SG_NONE || simd_lane_access)
- {
- STMT_VINFO_DATA_REF (stmt_info) = NULL;
- if (gatherscatter != SG_NONE)
- free_data_ref (dr);
- }
- return false;
- }
- else
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "got vectype for stmt: ");
- dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
- dump_generic_expr (MSG_NOTE, TDF_SLIM,
- STMT_VINFO_VECTYPE (stmt_info));
- dump_printf (MSG_NOTE, "\n");
- }
- }
-
- /* Adjust the minimal vectorization factor according to the
- vector type. */
- vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
- if (vf > *min_vf)
- *min_vf = vf;
-
- if (gatherscatter != SG_NONE)
- {
- gather_scatter_info gs_info;
- if (!vect_check_gather_scatter (stmt, as_a <loop_vec_info> (vinfo),
- &gs_info)
- || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset)))
- {
- STMT_VINFO_DATA_REF (stmt_info) = NULL;
- free_data_ref (dr);
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- (gatherscatter == GATHER) ?
- "not vectorized: not suitable for gather "
- "load " :
- "not vectorized: not suitable for scatter "
- "store ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- }
- return false;
- }
-
- free_data_ref (datarefs[i]);
- datarefs[i] = dr;
- STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
- }
-
- else if (is_a <loop_vec_info> (vinfo)
- && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
- {
- if (nested_in_vect_loop_p (loop, stmt))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: not suitable for strided "
- "load ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- }
- return false;
- }
- STMT_VINFO_STRIDED_P (stmt_info) = true;
- }
- }
-
- /* If we stopped analysis at the first dataref we could not analyze
- when trying to vectorize a basic-block mark the rest of the datarefs
- as not vectorizable and truncate the vector of datarefs. That
- avoids spending useless time in analyzing their dependence. */
- if (i != datarefs.length ())
- {
- gcc_assert (is_a <bb_vec_info> (vinfo));
- for (unsigned j = i; j < datarefs.length (); ++j)
- {
- data_reference_p dr = datarefs[j];
- STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
- free_data_ref (dr);
- }
- datarefs.truncate (i);
- }
-
- return true;
-}
-
-
-/* Function vect_get_new_vect_var.
-
- Returns a name for a new variable. The current naming scheme appends the
- prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
- the name of vectorizer generated variables, and appends that to NAME if
- provided. */
-
-tree
-vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
-{
- const char *prefix;
- tree new_vect_var;
-
- switch (var_kind)
- {
- case vect_simple_var:
- prefix = "vect";
- break;
- case vect_scalar_var:
- prefix = "stmp";
- break;
- case vect_mask_var:
- prefix = "mask";
- break;
- case vect_pointer_var:
- prefix = "vectp";
- break;
- default:
- gcc_unreachable ();
- }
-
- if (name)
- {
- char* tmp = concat (prefix, "_", name, NULL);
- new_vect_var = create_tmp_reg (type, tmp);
- free (tmp);
- }
- else
- new_vect_var = create_tmp_reg (type, prefix);
-
- return new_vect_var;
-}
-
-/* Like vect_get_new_vect_var but return an SSA name. */
-
-tree
-vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
-{
- const char *prefix;
- tree new_vect_var;
-
- switch (var_kind)
- {
- case vect_simple_var:
- prefix = "vect";
- break;
- case vect_scalar_var:
- prefix = "stmp";
- break;
- case vect_pointer_var:
- prefix = "vectp";
- break;
- default:
- gcc_unreachable ();
- }
-
- if (name)
- {
- char* tmp = concat (prefix, "_", name, NULL);
- new_vect_var = make_temp_ssa_name (type, NULL, tmp);
- free (tmp);
- }
- else
- new_vect_var = make_temp_ssa_name (type, NULL, prefix);
-
- return new_vect_var;
-}
-
-/* Duplicate ptr info and set alignment/misaligment on NAME from DR. */
-
-static void
-vect_duplicate_ssa_name_ptr_info (tree name, data_reference *dr,
- stmt_vec_info stmt_info)
-{
- duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr));
- unsigned int align = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmt_info));
- int misalign = DR_MISALIGNMENT (dr);
- if (misalign == -1)
- mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
- else
- set_ptr_info_alignment (SSA_NAME_PTR_INFO (name), align, misalign);
-}
-
-/* Function vect_create_addr_base_for_vector_ref.
-
- Create an expression that computes the address of the first memory location
- that will be accessed for a data reference.
-
- Input:
- STMT: The statement containing the data reference.
- NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
- OFFSET: Optional. If supplied, it is be added to the initial address.
- LOOP: Specify relative to which loop-nest should the address be computed.
- For example, when the dataref is in an inner-loop nested in an
- outer-loop that is now being vectorized, LOOP can be either the
- outer-loop, or the inner-loop. The first memory location accessed
- by the following dataref ('in' points to short):
-
- for (i=0; i<N; i++)
- for (j=0; j<M; j++)
- s += in[i+j]
-
- is as follows:
- if LOOP=i_loop: &in (relative to i_loop)
- if LOOP=j_loop: &in+i*2B (relative to j_loop)
- BYTE_OFFSET: Optional, defaulted to NULL. If supplied, it is added to the
- initial address. Unlike OFFSET, which is number of elements to
- be added, BYTE_OFFSET is measured in bytes.
-
- Output:
- 1. Return an SSA_NAME whose value is the address of the memory location of
- the first vector of the data reference.
- 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
- these statement(s) which define the returned SSA_NAME.
-
- FORNOW: We are only handling array accesses with step 1. */
-
-tree
-vect_create_addr_base_for_vector_ref (gimple *stmt,
- gimple_seq *new_stmt_list,
- tree offset,
- struct loop *loop,
- tree byte_offset)
-{
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
- tree data_ref_base;
- const char *base_name;
- tree addr_base;
- tree dest;
- gimple_seq seq = NULL;
- tree base_offset;
- tree init;
- tree vect_ptr_type;
- tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-
- if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
- {
- struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
-
- gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
-
- data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
- base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
- init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
- }
- else
- {
- data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
- base_offset = unshare_expr (DR_OFFSET (dr));
- init = unshare_expr (DR_INIT (dr));
- }
-
- if (loop_vinfo)
- base_name = get_name (data_ref_base);
- else
- {
- base_offset = ssize_int (0);
- init = ssize_int (0);
- base_name = get_name (DR_REF (dr));
- }
-
- /* Create base_offset */
- base_offset = size_binop (PLUS_EXPR,
- fold_convert (sizetype, base_offset),
- fold_convert (sizetype, init));
-
- if (offset)
- {
- offset = fold_build2 (MULT_EXPR, sizetype,
- fold_convert (sizetype, offset), step);
- base_offset = fold_build2 (PLUS_EXPR, sizetype,
- base_offset, offset);
- }
- if (byte_offset)
- {
- byte_offset = fold_convert (sizetype, byte_offset);
- base_offset = fold_build2 (PLUS_EXPR, sizetype,
- base_offset, byte_offset);
- }
-
- /* base + base_offset */
- if (loop_vinfo)
- addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
- else
- {
- addr_base = build1 (ADDR_EXPR,
- build_pointer_type (TREE_TYPE (DR_REF (dr))),
- unshare_expr (DR_REF (dr)));
- }
-
- vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
- dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
- addr_base = force_gimple_operand (addr_base, &seq, true, dest);
- gimple_seq_add_seq (new_stmt_list, seq);
-
- if (DR_PTR_INFO (dr)
- && TREE_CODE (addr_base) == SSA_NAME
- && !SSA_NAME_PTR_INFO (addr_base))
- {
- vect_duplicate_ssa_name_ptr_info (addr_base, dr, stmt_info);
- if (offset || byte_offset)
- mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
- }
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "created ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, addr_base);
- dump_printf (MSG_NOTE, "\n");
- }
-
- return addr_base;
-}
-
-
-/* Function vect_create_data_ref_ptr.
-
- Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
- location accessed in the loop by STMT, along with the def-use update
- chain to appropriately advance the pointer through the loop iterations.
- Also set aliasing information for the pointer. This pointer is used by
- the callers to this function to create a memory reference expression for
- vector load/store access.
-
- Input:
- 1. STMT: a stmt that references memory. Expected to be of the form
- GIMPLE_ASSIGN <name, data-ref> or
- GIMPLE_ASSIGN <data-ref, name>.
- 2. AGGR_TYPE: the type of the reference, which should be either a vector
- or an array.
- 3. AT_LOOP: the loop where the vector memref is to be created.
- 4. OFFSET (optional): an offset to be added to the initial address accessed
- by the data-ref in STMT.
- 5. BSI: location where the new stmts are to be placed if there is no loop
- 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
- pointing to the initial address.
- 7. BYTE_OFFSET (optional, defaults to NULL): a byte offset to be added
- to the initial address accessed by the data-ref in STMT. This is
- similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET
- in bytes.
-
- Output:
- 1. Declare a new ptr to vector_type, and have it point to the base of the
- data reference (initial addressed accessed by the data reference).
- For example, for vector of type V8HI, the following code is generated:
-
- v8hi *ap;
- ap = (v8hi *)initial_address;
-
- if OFFSET is not supplied:
- initial_address = &a[init];
- if OFFSET is supplied:
- initial_address = &a[init + OFFSET];
- if BYTE_OFFSET is supplied:
- initial_address = &a[init] + BYTE_OFFSET;
-
- Return the initial_address in INITIAL_ADDRESS.
-
- 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
- update the pointer in each iteration of the loop.
-
- Return the increment stmt that updates the pointer in PTR_INCR.
-
- 3. Set INV_P to true if the access pattern of the data reference in the
- vectorized loop is invariant. Set it to false otherwise.
-
- 4. Return the pointer. */
-
-tree
-vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop,
- tree offset, tree *initial_address,
- gimple_stmt_iterator *gsi, gimple **ptr_incr,
- bool only_init, bool *inv_p, tree byte_offset)
-{
- const char *base_name;
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = NULL;
- bool nested_in_vect_loop = false;
- struct loop *containing_loop = NULL;
- tree aggr_ptr_type;
- tree aggr_ptr;
- tree new_temp;
- gimple_seq new_stmt_list = NULL;
- edge pe = NULL;
- basic_block new_bb;
- tree aggr_ptr_init;
- struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
- tree aptr;
- gimple_stmt_iterator incr_gsi;
- bool insert_after;
- tree indx_before_incr, indx_after_incr;
- gimple *incr;
- tree step;
- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-
- gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
- || TREE_CODE (aggr_type) == VECTOR_TYPE);
-
- if (loop_vinfo)
- {
- loop = LOOP_VINFO_LOOP (loop_vinfo);
- nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
- containing_loop = (gimple_bb (stmt))->loop_father;
- pe = loop_preheader_edge (loop);
- }
- else
- {
- gcc_assert (bb_vinfo);
- only_init = true;
- *ptr_incr = NULL;
- }
-
- /* Check the step (evolution) of the load in LOOP, and record
- whether it's invariant. */
- if (nested_in_vect_loop)
- step = STMT_VINFO_DR_STEP (stmt_info);
- else
- step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
-
- if (integer_zerop (step))
- *inv_p = true;
- else
- *inv_p = false;
-
- /* Create an expression for the first address accessed by this load
- in LOOP. */
- base_name = get_name (DR_BASE_ADDRESS (dr));
-
- if (dump_enabled_p ())
- {
- tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
- dump_printf_loc (MSG_NOTE, vect_location,
- "create %s-pointer variable to type: ",
- get_tree_code_name (TREE_CODE (aggr_type)));
- dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
- if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
- dump_printf (MSG_NOTE, " vectorizing an array ref: ");
- else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
- dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
- else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
- dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
- else
- dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
- dump_printf (MSG_NOTE, "\n");
- }
-
- /* (1) Create the new aggregate-pointer variable.
- Vector and array types inherit the alias set of their component
- type by default so we need to use a ref-all pointer if the data
- reference does not conflict with the created aggregated data
- reference because it is not addressable. */
- bool need_ref_all = false;
- if (!alias_sets_conflict_p (get_alias_set (aggr_type),
- get_alias_set (DR_REF (dr))))
- need_ref_all = true;
- /* Likewise for any of the data references in the stmt group. */
- else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
- {
- gimple *orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
- do
- {
- stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
- struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
- if (!alias_sets_conflict_p (get_alias_set (aggr_type),
- get_alias_set (DR_REF (sdr))))
- {
- need_ref_all = true;
- break;
- }
- orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo);
- }
- while (orig_stmt);
- }
- aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
- need_ref_all);
- aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
-
-
- /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
- vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
- def-use update cycles for the pointer: one relative to the outer-loop
- (LOOP), which is what steps (3) and (4) below do. The other is relative
- to the inner-loop (which is the inner-most loop containing the dataref),
- and this is done be step (5) below.
-
- When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
- inner-most loop, and so steps (3),(4) work the same, and step (5) is
- redundant. Steps (3),(4) create the following:
-
- vp0 = &base_addr;
- LOOP: vp1 = phi(vp0,vp2)
- ...
- ...
- vp2 = vp1 + step
- goto LOOP
-
- If there is an inner-loop nested in loop, then step (5) will also be
- applied, and an additional update in the inner-loop will be created:
-
- vp0 = &base_addr;
- LOOP: vp1 = phi(vp0,vp2)
- ...
- inner: vp3 = phi(vp1,vp4)
- vp4 = vp3 + inner_step
- if () goto inner
- ...
- vp2 = vp1 + step
- if () goto LOOP */
-
- /* (2) Calculate the initial address of the aggregate-pointer, and set
- the aggregate-pointer to point to it before the loop. */
-
- /* Create: (&(base[init_val+offset]+byte_offset) in the loop preheader. */
-
- new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
- offset, loop, byte_offset);
- if (new_stmt_list)
- {
- if (pe)
- {
- new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
- gcc_assert (!new_bb);
- }
- else
- gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
- }
-
- *initial_address = new_temp;
- aggr_ptr_init = new_temp;
-
- /* (3) Handle the updating of the aggregate-pointer inside the loop.
- This is needed when ONLY_INIT is false, and also when AT_LOOP is the
- inner-loop nested in LOOP (during outer-loop vectorization). */
-
- /* No update in loop is required. */
- if (only_init && (!loop_vinfo || at_loop == loop))
- aptr = aggr_ptr_init;
- else
- {
- /* The step of the aggregate pointer is the type size. */
- tree iv_step = TYPE_SIZE_UNIT (aggr_type);
- /* One exception to the above is when the scalar step of the load in
- LOOP is zero. In this case the step here is also zero. */
- if (*inv_p)
- iv_step = size_zero_node;
- else if (tree_int_cst_sgn (step) == -1)
- iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
-
- standard_iv_increment_position (loop, &incr_gsi, &insert_after);
-
- create_iv (aggr_ptr_init,
- fold_convert (aggr_ptr_type, iv_step),
- aggr_ptr, loop, &incr_gsi, insert_after,
- &indx_before_incr, &indx_after_incr);
- incr = gsi_stmt (incr_gsi);
- set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
-
- /* Copy the points-to information if it exists. */
- if (DR_PTR_INFO (dr))
- {
- vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr, stmt_info);
- vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr, stmt_info);
- }
- if (ptr_incr)
- *ptr_incr = incr;
-
- aptr = indx_before_incr;
- }
-
- if (!nested_in_vect_loop || only_init)
- return aptr;
-
-
- /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
- nested in LOOP, if exists. */
-
- gcc_assert (nested_in_vect_loop);
- if (!only_init)
- {
- standard_iv_increment_position (containing_loop, &incr_gsi,
- &insert_after);
- create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
- containing_loop, &incr_gsi, insert_after, &indx_before_incr,
- &indx_after_incr);
- incr = gsi_stmt (incr_gsi);
- set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
-
- /* Copy the points-to information if it exists. */
- if (DR_PTR_INFO (dr))
- {
- vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr, stmt_info);
- vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr, stmt_info);
- }
- if (ptr_incr)
- *ptr_incr = incr;
-
- return indx_before_incr;
- }
- else
- gcc_unreachable ();
-}
-
-
-/* Function bump_vector_ptr
-
- Increment a pointer (to a vector type) by vector-size. If requested,
- i.e. if PTR-INCR is given, then also connect the new increment stmt
- to the existing def-use update-chain of the pointer, by modifying
- the PTR_INCR as illustrated below:
-
- The pointer def-use update-chain before this function:
- DATAREF_PTR = phi (p_0, p_2)
- ....
- PTR_INCR: p_2 = DATAREF_PTR + step
-
- The pointer def-use update-chain after this function:
- DATAREF_PTR = phi (p_0, p_2)
- ....
- NEW_DATAREF_PTR = DATAREF_PTR + BUMP
- ....
- PTR_INCR: p_2 = NEW_DATAREF_PTR + step
-
- Input:
- DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
- in the loop.
- PTR_INCR - optional. The stmt that updates the pointer in each iteration of
- the loop. The increment amount across iterations is expected
- to be vector_size.
- BSI - location where the new update stmt is to be placed.
- STMT - the original scalar memory-access stmt that is being vectorized.
- BUMP - optional. The offset by which to bump the pointer. If not given,
- the offset is assumed to be vector_size.
-
- Output: Return NEW_DATAREF_PTR as illustrated above.
-
-*/
-
-tree
-bump_vector_ptr (tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
- gimple *stmt, tree bump)
-{
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- tree update = TYPE_SIZE_UNIT (vectype);
- gassign *incr_stmt;
- ssa_op_iter iter;
- use_operand_p use_p;
- tree new_dataref_ptr;
-
- if (bump)
- update = bump;
-
- if (TREE_CODE (dataref_ptr) == SSA_NAME)
- new_dataref_ptr = copy_ssa_name (dataref_ptr);
- else
- new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
- incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
- dataref_ptr, update);
- vect_finish_stmt_generation (stmt, incr_stmt, gsi);
-
- /* Copy the points-to information if it exists. */
- if (DR_PTR_INFO (dr))
- {
- duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
- mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
- }
-
- if (!ptr_incr)
- return new_dataref_ptr;
-
- /* Update the vector-pointer's cross-iteration increment. */
- FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
- {
- tree use = USE_FROM_PTR (use_p);
-
- if (use == dataref_ptr)
- SET_USE (use_p, new_dataref_ptr);
- else
- gcc_assert (tree_int_cst_compare (use, update) == 0);
- }
-
- return new_dataref_ptr;
-}
-
-
-/* Function vect_create_destination_var.
-
- Create a new temporary of type VECTYPE. */
-
-tree
-vect_create_destination_var (tree scalar_dest, tree vectype)
-{
- tree vec_dest;
- const char *name;
- char *new_name;
- tree type;
- enum vect_var_kind kind;
-
- kind = vectype
- ? VECTOR_BOOLEAN_TYPE_P (vectype)
- ? vect_mask_var
- : vect_simple_var
- : vect_scalar_var;
- type = vectype ? vectype : TREE_TYPE (scalar_dest);
-
- gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
-
- name = get_name (scalar_dest);
- if (name)
- new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
- else
- new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
- vec_dest = vect_get_new_vect_var (type, kind, new_name);
- free (new_name);
-
- return vec_dest;
-}
-
-/* Function vect_grouped_store_supported.
-
- Returns TRUE if interleave high and interleave low permutations
- are supported, and FALSE otherwise. */
-
-bool
-vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
-{
- machine_mode mode = TYPE_MODE (vectype);
-
- /* vect_permute_store_chain requires the group size to be equal to 3 or
- be a power of two. */
- if (count != 3 && exact_log2 (count) == -1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "the size of the group of accesses"
- " is not a power of 2 or not eqaul to 3\n");
- return false;
- }
-
- /* Check that the permutation is supported. */
- if (VECTOR_MODE_P (mode))
- {
- unsigned int i, nelt = GET_MODE_NUNITS (mode);
- unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
-
- if (count == 3)
- {
- unsigned int j0 = 0, j1 = 0, j2 = 0;
- unsigned int i, j;
-
- for (j = 0; j < 3; j++)
- {
- int nelt0 = ((3 - j) * nelt) % 3;
- int nelt1 = ((3 - j) * nelt + 1) % 3;
- int nelt2 = ((3 - j) * nelt + 2) % 3;
- for (i = 0; i < nelt; i++)
- {
- if (3 * i + nelt0 < nelt)
- sel[3 * i + nelt0] = j0++;
- if (3 * i + nelt1 < nelt)
- sel[3 * i + nelt1] = nelt + j1++;
- if (3 * i + nelt2 < nelt)
- sel[3 * i + nelt2] = 0;
- }
- if (!can_vec_perm_p (mode, false, sel))
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "permutaion op not supported by target.\n");
- return false;
- }
-
- for (i = 0; i < nelt; i++)
- {
- if (3 * i + nelt0 < nelt)
- sel[3 * i + nelt0] = 3 * i + nelt0;
- if (3 * i + nelt1 < nelt)
- sel[3 * i + nelt1] = 3 * i + nelt1;
- if (3 * i + nelt2 < nelt)
- sel[3 * i + nelt2] = nelt + j2++;
- }
- if (!can_vec_perm_p (mode, false, sel))
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "permutaion op not supported by target.\n");
- return false;
- }
- }
- return true;
- }
- else
- {
- /* If length is not equal to 3 then only power of 2 is supported. */
- gcc_assert (pow2p_hwi (count));
-
- for (i = 0; i < nelt / 2; i++)
- {
- sel[i * 2] = i;
- sel[i * 2 + 1] = i + nelt;
- }
- if (can_vec_perm_p (mode, false, sel))
- {
- for (i = 0; i < nelt; i++)
- sel[i] += nelt / 2;
- if (can_vec_perm_p (mode, false, sel))
- return true;
- }
- }
- }
-
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "permutaion op not supported by target.\n");
- return false;
-}
-
-
-/* Return TRUE if vec_store_lanes is available for COUNT vectors of
- type VECTYPE. */
-
-bool
-vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
-{
- return vect_lanes_optab_supported_p ("vec_store_lanes",
- vec_store_lanes_optab,
- vectype, count);
-}
-
-
-/* Function vect_permute_store_chain.
-
- Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
- a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
- the data correctly for the stores. Return the final references for stores
- in RESULT_CHAIN.
-
- E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
- The input is 4 vectors each containing 8 elements. We assign a number to
- each element, the input sequence is:
-
- 1st vec: 0 1 2 3 4 5 6 7
- 2nd vec: 8 9 10 11 12 13 14 15
- 3rd vec: 16 17 18 19 20 21 22 23
- 4th vec: 24 25 26 27 28 29 30 31
-
- The output sequence should be:
-
- 1st vec: 0 8 16 24 1 9 17 25
- 2nd vec: 2 10 18 26 3 11 19 27
- 3rd vec: 4 12 20 28 5 13 21 30
- 4th vec: 6 14 22 30 7 15 23 31
-
- i.e., we interleave the contents of the four vectors in their order.
-
- We use interleave_high/low instructions to create such output. The input of
- each interleave_high/low operation is two vectors:
- 1st vec 2nd vec
- 0 1 2 3 4 5 6 7
- the even elements of the result vector are obtained left-to-right from the
- high/low elements of the first vector. The odd elements of the result are
- obtained left-to-right from the high/low elements of the second vector.
- The output of interleave_high will be: 0 4 1 5
- and of interleave_low: 2 6 3 7
-
-
- The permutation is done in log LENGTH stages. In each stage interleave_high
- and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
- where the first argument is taken from the first half of DR_CHAIN and the
- second argument from it's second half.
- In our example,
-
- I1: interleave_high (1st vec, 3rd vec)
- I2: interleave_low (1st vec, 3rd vec)
- I3: interleave_high (2nd vec, 4th vec)
- I4: interleave_low (2nd vec, 4th vec)
-
- The output for the first stage is:
-
- I1: 0 16 1 17 2 18 3 19
- I2: 4 20 5 21 6 22 7 23
- I3: 8 24 9 25 10 26 11 27
- I4: 12 28 13 29 14 30 15 31
-
- The output of the second stage, i.e. the final result is:
-
- I1: 0 8 16 24 1 9 17 25
- I2: 2 10 18 26 3 11 19 27
- I3: 4 12 20 28 5 13 21 30
- I4: 6 14 22 30 7 15 23 31. */
-
-void
-vect_permute_store_chain (vec<tree> dr_chain,
- unsigned int length,
- gimple *stmt,
- gimple_stmt_iterator *gsi,
- vec<tree> *result_chain)
-{
- tree vect1, vect2, high, low;
- gimple *perm_stmt;
- tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
- tree perm_mask_low, perm_mask_high;
- tree data_ref;
- tree perm3_mask_low, perm3_mask_high;
- unsigned int i, n, log_length = exact_log2 (length);
- unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
- unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
-
- result_chain->quick_grow (length);
- memcpy (result_chain->address (), dr_chain.address (),
- length * sizeof (tree));
-
- if (length == 3)
- {
- unsigned int j0 = 0, j1 = 0, j2 = 0;
-
- for (j = 0; j < 3; j++)
- {
- int nelt0 = ((3 - j) * nelt) % 3;
- int nelt1 = ((3 - j) * nelt + 1) % 3;
- int nelt2 = ((3 - j) * nelt + 2) % 3;
-
- for (i = 0; i < nelt; i++)
- {
- if (3 * i + nelt0 < nelt)
- sel[3 * i + nelt0] = j0++;
- if (3 * i + nelt1 < nelt)
- sel[3 * i + nelt1] = nelt + j1++;
- if (3 * i + nelt2 < nelt)
- sel[3 * i + nelt2] = 0;
- }
- perm3_mask_low = vect_gen_perm_mask_checked (vectype, sel);
-
- for (i = 0; i < nelt; i++)
- {
- if (3 * i + nelt0 < nelt)
- sel[3 * i + nelt0] = 3 * i + nelt0;
- if (3 * i + nelt1 < nelt)
- sel[3 * i + nelt1] = 3 * i + nelt1;
- if (3 * i + nelt2 < nelt)
- sel[3 * i + nelt2] = nelt + j2++;
- }
- perm3_mask_high = vect_gen_perm_mask_checked (vectype, sel);
-
- vect1 = dr_chain[0];
- vect2 = dr_chain[1];
-
- /* Create interleaving stmt:
- low = VEC_PERM_EXPR <vect1, vect2,
- {j, nelt, *, j + 1, nelt + j + 1, *,
- j + 2, nelt + j + 2, *, ...}> */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
- vect2, perm3_mask_low);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
-
- vect1 = data_ref;
- vect2 = dr_chain[2];
- /* Create interleaving stmt:
- low = VEC_PERM_EXPR <vect1, vect2,
- {0, 1, nelt + j, 3, 4, nelt + j + 1,
- 6, 7, nelt + j + 2, ...}> */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
- vect2, perm3_mask_high);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[j] = data_ref;
- }
- }
- else
- {
- /* If length is not equal to 3 then only power of 2 is supported. */
- gcc_assert (pow2p_hwi (length));
-
- for (i = 0, n = nelt / 2; i < n; i++)
- {
- sel[i * 2] = i;
- sel[i * 2 + 1] = i + nelt;
- }
- perm_mask_high = vect_gen_perm_mask_checked (vectype, sel);
-
- for (i = 0; i < nelt; i++)
- sel[i] += nelt / 2;
- perm_mask_low = vect_gen_perm_mask_checked (vectype, sel);
-
- for (i = 0, n = log_length; i < n; i++)
- {
- for (j = 0; j < length/2; j++)
- {
- vect1 = dr_chain[j];
- vect2 = dr_chain[j+length/2];
-
- /* Create interleaving stmt:
- high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
- ...}> */
- high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
- perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
- vect2, perm_mask_high);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[2*j] = high;
-
- /* Create interleaving stmt:
- low = VEC_PERM_EXPR <vect1, vect2,
- {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
- ...}> */
- low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
- perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
- vect2, perm_mask_low);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[2*j+1] = low;
- }
- memcpy (dr_chain.address (), result_chain->address (),
- length * sizeof (tree));
- }
- }
-}
-
-/* Function vect_setup_realignment
-
- This function is called when vectorizing an unaligned load using
- the dr_explicit_realign[_optimized] scheme.
- This function generates the following code at the loop prolog:
-
- p = initial_addr;
- x msq_init = *(floor(p)); # prolog load
- realignment_token = call target_builtin;
- loop:
- x msq = phi (msq_init, ---)
-
- The stmts marked with x are generated only for the case of
- dr_explicit_realign_optimized.
-
- The code above sets up a new (vector) pointer, pointing to the first
- location accessed by STMT, and a "floor-aligned" load using that pointer.
- It also generates code to compute the "realignment-token" (if the relevant
- target hook was defined), and creates a phi-node at the loop-header bb
- whose arguments are the result of the prolog-load (created by this
- function) and the result of a load that takes place in the loop (to be
- created by the caller to this function).
-
- For the case of dr_explicit_realign_optimized:
- The caller to this function uses the phi-result (msq) to create the
- realignment code inside the loop, and sets up the missing phi argument,
- as follows:
- loop:
- msq = phi (msq_init, lsq)
- lsq = *(floor(p')); # load in loop
- result = realign_load (msq, lsq, realignment_token);
-
- For the case of dr_explicit_realign:
- loop:
- msq = *(floor(p)); # load in loop
- p' = p + (VS-1);
- lsq = *(floor(p')); # load in loop
- result = realign_load (msq, lsq, realignment_token);
-
- Input:
- STMT - (scalar) load stmt to be vectorized. This load accesses
- a memory location that may be unaligned.
- BSI - place where new code is to be inserted.
- ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
- is used.
-
- Output:
- REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
- target hook, if defined.
- Return value - the result of the loop-header phi node. */
-
-tree
-vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi,
- tree *realignment_token,
- enum dr_alignment_support alignment_support_scheme,
- tree init_addr,
- struct loop **at_loop)
-{
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
- struct loop *loop = NULL;
- edge pe = NULL;
- tree scalar_dest = gimple_assign_lhs (stmt);
- tree vec_dest;
- gimple *inc;
- tree ptr;
- tree data_ref;
- basic_block new_bb;
- tree msq_init = NULL_TREE;
- tree new_temp;
- gphi *phi_stmt;
- tree msq = NULL_TREE;
- gimple_seq stmts = NULL;
- bool inv_p;
- bool compute_in_loop = false;
- bool nested_in_vect_loop = false;
- struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
- struct loop *loop_for_initial_load = NULL;
-
- if (loop_vinfo)
- {
- loop = LOOP_VINFO_LOOP (loop_vinfo);
- nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
- }
-
- gcc_assert (alignment_support_scheme == dr_explicit_realign
- || alignment_support_scheme == dr_explicit_realign_optimized);
-
- /* We need to generate three things:
- 1. the misalignment computation
- 2. the extra vector load (for the optimized realignment scheme).
- 3. the phi node for the two vectors from which the realignment is
- done (for the optimized realignment scheme). */
-
- /* 1. Determine where to generate the misalignment computation.
-
- If INIT_ADDR is NULL_TREE, this indicates that the misalignment
- calculation will be generated by this function, outside the loop (in the
- preheader). Otherwise, INIT_ADDR had already been computed for us by the
- caller, inside the loop.
-
- Background: If the misalignment remains fixed throughout the iterations of
- the loop, then both realignment schemes are applicable, and also the
- misalignment computation can be done outside LOOP. This is because we are
- vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
- are a multiple of VS (the Vector Size), and therefore the misalignment in
- different vectorized LOOP iterations is always the same.
- The problem arises only if the memory access is in an inner-loop nested
- inside LOOP, which is now being vectorized using outer-loop vectorization.
- This is the only case when the misalignment of the memory access may not
- remain fixed throughout the iterations of the inner-loop (as explained in
- detail in vect_supportable_dr_alignment). In this case, not only is the
- optimized realignment scheme not applicable, but also the misalignment
- computation (and generation of the realignment token that is passed to
- REALIGN_LOAD) have to be done inside the loop.
-
- In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
- or not, which in turn determines if the misalignment is computed inside
- the inner-loop, or outside LOOP. */
-
- if (init_addr != NULL_TREE || !loop_vinfo)
- {
- compute_in_loop = true;
- gcc_assert (alignment_support_scheme == dr_explicit_realign);
- }
-
-
- /* 2. Determine where to generate the extra vector load.
-
- For the optimized realignment scheme, instead of generating two vector
- loads in each iteration, we generate a single extra vector load in the
- preheader of the loop, and in each iteration reuse the result of the
- vector load from the previous iteration. In case the memory access is in
- an inner-loop nested inside LOOP, which is now being vectorized using
- outer-loop vectorization, we need to determine whether this initial vector
- load should be generated at the preheader of the inner-loop, or can be
- generated at the preheader of LOOP. If the memory access has no evolution
- in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
- to be generated inside LOOP (in the preheader of the inner-loop). */
-
- if (nested_in_vect_loop)
- {
- tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
- bool invariant_in_outerloop =
- (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
- loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
- }
- else
- loop_for_initial_load = loop;
- if (at_loop)
- *at_loop = loop_for_initial_load;
-
- if (loop_for_initial_load)
- pe = loop_preheader_edge (loop_for_initial_load);
-
- /* 3. For the case of the optimized realignment, create the first vector
- load at the loop preheader. */
-
- if (alignment_support_scheme == dr_explicit_realign_optimized)
- {
- /* Create msq_init = *(floor(p1)) in the loop preheader */
- gassign *new_stmt;
-
- gcc_assert (!compute_in_loop);
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
- ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
- NULL_TREE, &init_addr, NULL, &inc,
- true, &inv_p);
- if (TREE_CODE (ptr) == SSA_NAME)
- new_temp = copy_ssa_name (ptr);
- else
- new_temp = make_ssa_name (TREE_TYPE (ptr));
- new_stmt = gimple_build_assign
- (new_temp, BIT_AND_EXPR, ptr,
- build_int_cst (TREE_TYPE (ptr),
- -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
- new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
- gcc_assert (!new_bb);
- data_ref
- = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
- build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
- new_stmt = gimple_build_assign (vec_dest, data_ref);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- if (pe)
- {
- new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
- gcc_assert (!new_bb);
- }
- else
- gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
-
- msq_init = gimple_assign_lhs (new_stmt);
- }
-
- /* 4. Create realignment token using a target builtin, if available.
- It is done either inside the containing loop, or before LOOP (as
- determined above). */
-
- if (targetm.vectorize.builtin_mask_for_load)
- {
- gcall *new_stmt;
- tree builtin_decl;
-
- /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
- if (!init_addr)
- {
- /* Generate the INIT_ADDR computation outside LOOP. */
- init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
- NULL_TREE, loop);
- if (loop)
- {
- pe = loop_preheader_edge (loop);
- new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
- gcc_assert (!new_bb);
- }
- else
- gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
- }
-
- builtin_decl = targetm.vectorize.builtin_mask_for_load ();
- new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
- vec_dest =
- vect_create_destination_var (scalar_dest,
- gimple_call_return_type (new_stmt));
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_call_set_lhs (new_stmt, new_temp);
-
- if (compute_in_loop)
- gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
- else
- {
- /* Generate the misalignment computation outside LOOP. */
- pe = loop_preheader_edge (loop);
- new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
- gcc_assert (!new_bb);
- }
-
- *realignment_token = gimple_call_lhs (new_stmt);
-
- /* The result of the CALL_EXPR to this builtin is determined from
- the value of the parameter and no global variables are touched
- which makes the builtin a "const" function. Requiring the
- builtin to have the "const" attribute makes it unnecessary
- to call mark_call_clobbered. */
- gcc_assert (TREE_READONLY (builtin_decl));
- }
-
- if (alignment_support_scheme == dr_explicit_realign)
- return msq;
-
- gcc_assert (!compute_in_loop);
- gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
-
-
- /* 5. Create msq = phi <msq_init, lsq> in loop */
-
- pe = loop_preheader_edge (containing_loop);
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
- msq = make_ssa_name (vec_dest);
- phi_stmt = create_phi_node (msq, containing_loop->header);
- add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
-
- return msq;
-}
-
-
-/* Function vect_grouped_load_supported.
-
- COUNT is the size of the load group (the number of statements plus the
- number of gaps). SINGLE_ELEMENT_P is true if there is actually
- only one statement, with a gap of COUNT - 1.
-
- Returns true if a suitable permute exists. */
-
-bool
-vect_grouped_load_supported (tree vectype, bool single_element_p,
- unsigned HOST_WIDE_INT count)
-{
- machine_mode mode = TYPE_MODE (vectype);
-
- /* If this is single-element interleaving with an element distance
- that leaves unused vector loads around punt - we at least create
- very sub-optimal code in that case (and blow up memory,
- see PR65518). */
- if (single_element_p && count > TYPE_VECTOR_SUBPARTS (vectype))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "single-element interleaving not supported "
- "for not adjacent vector loads\n");
- return false;
- }
-
- /* vect_permute_load_chain requires the group size to be equal to 3 or
- be a power of two. */
- if (count != 3 && exact_log2 (count) == -1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "the size of the group of accesses"
- " is not a power of 2 or not equal to 3\n");
- return false;
- }
-
- /* Check that the permutation is supported. */
- if (VECTOR_MODE_P (mode))
- {
- unsigned int i, j, nelt = GET_MODE_NUNITS (mode);
- unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
-
- if (count == 3)
- {
- unsigned int k;
- for (k = 0; k < 3; k++)
- {
- for (i = 0; i < nelt; i++)
- if (3 * i + k < 2 * nelt)
- sel[i] = 3 * i + k;
- else
- sel[i] = 0;
- if (!can_vec_perm_p (mode, false, sel))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shuffle of 3 loads is not supported by"
- " target\n");
- return false;
- }
- for (i = 0, j = 0; i < nelt; i++)
- if (3 * i + k < 2 * nelt)
- sel[i] = i;
- else
- sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
- if (!can_vec_perm_p (mode, false, sel))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shuffle of 3 loads is not supported by"
- " target\n");
- return false;
- }
- }
- return true;
- }
- else
- {
- /* If length is not equal to 3 then only power of 2 is supported. */
- gcc_assert (pow2p_hwi (count));
- for (i = 0; i < nelt; i++)
- sel[i] = i * 2;
- if (can_vec_perm_p (mode, false, sel))
- {
- for (i = 0; i < nelt; i++)
- sel[i] = i * 2 + 1;
- if (can_vec_perm_p (mode, false, sel))
- return true;
- }
- }
- }
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "extract even/odd not supported by target\n");
- return false;
-}
-
-/* Return TRUE if vec_load_lanes is available for COUNT vectors of
- type VECTYPE. */
-
-bool
-vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
-{
- return vect_lanes_optab_supported_p ("vec_load_lanes",
- vec_load_lanes_optab,
- vectype, count);
-}
-
-/* Function vect_permute_load_chain.
-
- Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
- a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
- the input data correctly. Return the final references for loads in
- RESULT_CHAIN.
-
- E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
- The input is 4 vectors each containing 8 elements. We assign a number to each
- element, the input sequence is:
-
- 1st vec: 0 1 2 3 4 5 6 7
- 2nd vec: 8 9 10 11 12 13 14 15
- 3rd vec: 16 17 18 19 20 21 22 23
- 4th vec: 24 25 26 27 28 29 30 31
-
- The output sequence should be:
-
- 1st vec: 0 4 8 12 16 20 24 28
- 2nd vec: 1 5 9 13 17 21 25 29
- 3rd vec: 2 6 10 14 18 22 26 30
- 4th vec: 3 7 11 15 19 23 27 31
-
- i.e., the first output vector should contain the first elements of each
- interleaving group, etc.
-
- We use extract_even/odd instructions to create such output. The input of
- each extract_even/odd operation is two vectors
- 1st vec 2nd vec
- 0 1 2 3 4 5 6 7
-
- and the output is the vector of extracted even/odd elements. The output of
- extract_even will be: 0 2 4 6
- and of extract_odd: 1 3 5 7
-
-
- The permutation is done in log LENGTH stages. In each stage extract_even
- and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
- their order. In our example,
-
- E1: extract_even (1st vec, 2nd vec)
- E2: extract_odd (1st vec, 2nd vec)
- E3: extract_even (3rd vec, 4th vec)
- E4: extract_odd (3rd vec, 4th vec)
-
- The output for the first stage will be:
-
- E1: 0 2 4 6 8 10 12 14
- E2: 1 3 5 7 9 11 13 15
- E3: 16 18 20 22 24 26 28 30
- E4: 17 19 21 23 25 27 29 31
-
- In order to proceed and create the correct sequence for the next stage (or
- for the correct output, if the second stage is the last one, as in our
- example), we first put the output of extract_even operation and then the
- output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
- The input for the second stage is:
-
- 1st vec (E1): 0 2 4 6 8 10 12 14
- 2nd vec (E3): 16 18 20 22 24 26 28 30
- 3rd vec (E2): 1 3 5 7 9 11 13 15
- 4th vec (E4): 17 19 21 23 25 27 29 31
-
- The output of the second stage:
-
- E1: 0 4 8 12 16 20 24 28
- E2: 2 6 10 14 18 22 26 30
- E3: 1 5 9 13 17 21 25 29
- E4: 3 7 11 15 19 23 27 31
-
- And RESULT_CHAIN after reordering:
-
- 1st vec (E1): 0 4 8 12 16 20 24 28
- 2nd vec (E3): 1 5 9 13 17 21 25 29
- 3rd vec (E2): 2 6 10 14 18 22 26 30
- 4th vec (E4): 3 7 11 15 19 23 27 31. */
-
-static void
-vect_permute_load_chain (vec<tree> dr_chain,
- unsigned int length,
- gimple *stmt,
- gimple_stmt_iterator *gsi,
- vec<tree> *result_chain)
-{
- tree data_ref, first_vect, second_vect;
- tree perm_mask_even, perm_mask_odd;
- tree perm3_mask_low, perm3_mask_high;
- gimple *perm_stmt;
- tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
- unsigned int i, j, log_length = exact_log2 (length);
- unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
- unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
-
- result_chain->quick_grow (length);
- memcpy (result_chain->address (), dr_chain.address (),
- length * sizeof (tree));
-
- if (length == 3)
- {
- unsigned int k;
-
- for (k = 0; k < 3; k++)
- {
- for (i = 0; i < nelt; i++)
- if (3 * i + k < 2 * nelt)
- sel[i] = 3 * i + k;
- else
- sel[i] = 0;
- perm3_mask_low = vect_gen_perm_mask_checked (vectype, sel);
-
- for (i = 0, j = 0; i < nelt; i++)
- if (3 * i + k < 2 * nelt)
- sel[i] = i;
- else
- sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
-
- perm3_mask_high = vect_gen_perm_mask_checked (vectype, sel);
-
- first_vect = dr_chain[0];
- second_vect = dr_chain[1];
-
- /* Create interleaving stmt (low part of):
- low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
- ...}> */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
- second_vect, perm3_mask_low);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
-
- /* Create interleaving stmt (high part of):
- high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
- ...}> */
- first_vect = data_ref;
- second_vect = dr_chain[2];
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
- second_vect, perm3_mask_high);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[k] = data_ref;
- }
- }
- else
- {
- /* If length is not equal to 3 then only power of 2 is supported. */
- gcc_assert (pow2p_hwi (length));
-
- for (i = 0; i < nelt; ++i)
- sel[i] = i * 2;
- perm_mask_even = vect_gen_perm_mask_checked (vectype, sel);
-
- for (i = 0; i < nelt; ++i)
- sel[i] = i * 2 + 1;
- perm_mask_odd = vect_gen_perm_mask_checked (vectype, sel);
-
- for (i = 0; i < log_length; i++)
- {
- for (j = 0; j < length; j += 2)
- {
- first_vect = dr_chain[j];
- second_vect = dr_chain[j+1];
-
- /* data_ref = permute_even (first_data_ref, second_data_ref); */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- first_vect, second_vect,
- perm_mask_even);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[j/2] = data_ref;
-
- /* data_ref = permute_odd (first_data_ref, second_data_ref); */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- first_vect, second_vect,
- perm_mask_odd);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[j/2+length/2] = data_ref;
- }
- memcpy (dr_chain.address (), result_chain->address (),
- length * sizeof (tree));
- }
- }
-}
-
-/* Function vect_shift_permute_load_chain.
-
- Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
- sequence of stmts to reorder the input data accordingly.
- Return the final references for loads in RESULT_CHAIN.
- Return true if successed, false otherwise.
-
- E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
- The input is 3 vectors each containing 8 elements. We assign a
- number to each element, the input sequence is:
-
- 1st vec: 0 1 2 3 4 5 6 7
- 2nd vec: 8 9 10 11 12 13 14 15
- 3rd vec: 16 17 18 19 20 21 22 23
-
- The output sequence should be:
-
- 1st vec: 0 3 6 9 12 15 18 21
- 2nd vec: 1 4 7 10 13 16 19 22
- 3rd vec: 2 5 8 11 14 17 20 23
-
- We use 3 shuffle instructions and 3 * 3 - 1 shifts to create such output.
-
- First we shuffle all 3 vectors to get correct elements order:
-
- 1st vec: ( 0 3 6) ( 1 4 7) ( 2 5)
- 2nd vec: ( 8 11 14) ( 9 12 15) (10 13)
- 3rd vec: (16 19 22) (17 20 23) (18 21)
-
- Next we unite and shift vector 3 times:
-
- 1st step:
- shift right by 6 the concatenation of:
- "1st vec" and "2nd vec"
- ( 0 3 6) ( 1 4 7) |( 2 5) _ ( 8 11 14) ( 9 12 15)| (10 13)
- "2nd vec" and "3rd vec"
- ( 8 11 14) ( 9 12 15) |(10 13) _ (16 19 22) (17 20 23)| (18 21)
- "3rd vec" and "1st vec"
- (16 19 22) (17 20 23) |(18 21) _ ( 0 3 6) ( 1 4 7)| ( 2 5)
- | New vectors |
-
- So that now new vectors are:
-
- 1st vec: ( 2 5) ( 8 11 14) ( 9 12 15)
- 2nd vec: (10 13) (16 19 22) (17 20 23)
- 3rd vec: (18 21) ( 0 3 6) ( 1 4 7)
-
- 2nd step:
- shift right by 5 the concatenation of:
- "1st vec" and "3rd vec"
- ( 2 5) ( 8 11 14) |( 9 12 15) _ (18 21) ( 0 3 6)| ( 1 4 7)
- "2nd vec" and "1st vec"
- (10 13) (16 19 22) |(17 20 23) _ ( 2 5) ( 8 11 14)| ( 9 12 15)
- "3rd vec" and "2nd vec"
- (18 21) ( 0 3 6) |( 1 4 7) _ (10 13) (16 19 22)| (17 20 23)
- | New vectors |
-
- So that now new vectors are:
-
- 1st vec: ( 9 12 15) (18 21) ( 0 3 6)
- 2nd vec: (17 20 23) ( 2 5) ( 8 11 14)
- 3rd vec: ( 1 4 7) (10 13) (16 19 22) READY
-
- 3rd step:
- shift right by 5 the concatenation of:
- "1st vec" and "1st vec"
- ( 9 12 15) (18 21) |( 0 3 6) _ ( 9 12 15) (18 21)| ( 0 3 6)
- shift right by 3 the concatenation of:
- "2nd vec" and "2nd vec"
- (17 20 23) |( 2 5) ( 8 11 14) _ (17 20 23)| ( 2 5) ( 8 11 14)
- | New vectors |
-
- So that now all vectors are READY:
- 1st vec: ( 0 3 6) ( 9 12 15) (18 21)
- 2nd vec: ( 2 5) ( 8 11 14) (17 20 23)
- 3rd vec: ( 1 4 7) (10 13) (16 19 22)
-
- This algorithm is faster than one in vect_permute_load_chain if:
- 1. "shift of a concatination" is faster than general permutation.
- This is usually so.
- 2. The TARGET machine can't execute vector instructions in parallel.
- This is because each step of the algorithm depends on previous.
- The algorithm in vect_permute_load_chain is much more parallel.
-
- The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
-*/
-
-static bool
-vect_shift_permute_load_chain (vec<tree> dr_chain,
- unsigned int length,
- gimple *stmt,
- gimple_stmt_iterator *gsi,
- vec<tree> *result_chain)
-{
- tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
- tree perm2_mask1, perm2_mask2, perm3_mask;
- tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
- gimple *perm_stmt;
-
- tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
- unsigned int i;
- unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
- unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-
- result_chain->quick_grow (length);
- memcpy (result_chain->address (), dr_chain.address (),
- length * sizeof (tree));
-
- if (pow2p_hwi (length) && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 4)
- {
- unsigned int j, log_length = exact_log2 (length);
- for (i = 0; i < nelt / 2; ++i)
- sel[i] = i * 2;
- for (i = 0; i < nelt / 2; ++i)
- sel[nelt / 2 + i] = i * 2 + 1;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shuffle of 2 fields structure is not \
- supported by target\n");
- return false;
- }
- perm2_mask1 = vect_gen_perm_mask_checked (vectype, sel);
-
- for (i = 0; i < nelt / 2; ++i)
- sel[i] = i * 2 + 1;
- for (i = 0; i < nelt / 2; ++i)
- sel[nelt / 2 + i] = i * 2;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shuffle of 2 fields structure is not \
- supported by target\n");
- return false;
- }
- perm2_mask2 = vect_gen_perm_mask_checked (vectype, sel);
-
- /* Generating permutation constant to shift all elements.
- For vector length 8 it is {4 5 6 7 8 9 10 11}. */
- for (i = 0; i < nelt; i++)
- sel[i] = nelt / 2 + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shift permutation is not supported by target\n");
- return false;
- }
- shift1_mask = vect_gen_perm_mask_checked (vectype, sel);
-
- /* Generating permutation constant to select vector from 2.
- For vector length 8 it is {0 1 2 3 12 13 14 15}. */
- for (i = 0; i < nelt / 2; i++)
- sel[i] = i;
- for (i = nelt / 2; i < nelt; i++)
- sel[i] = nelt + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "select is not supported by target\n");
- return false;
- }
- select_mask = vect_gen_perm_mask_checked (vectype, sel);
-
- for (i = 0; i < log_length; i++)
- {
- for (j = 0; j < length; j += 2)
- {
- first_vect = dr_chain[j];
- second_vect = dr_chain[j + 1];
-
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- first_vect, first_vect,
- perm2_mask1);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- vect[0] = data_ref;
-
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- second_vect, second_vect,
- perm2_mask2);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- vect[1] = data_ref;
-
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- vect[0], vect[1], shift1_mask);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[j/2 + length/2] = data_ref;
-
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- vect[0], vect[1], select_mask);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[j/2] = data_ref;
- }
- memcpy (dr_chain.address (), result_chain->address (),
- length * sizeof (tree));
- }
- return true;
- }
- if (length == 3 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 2)
- {
- unsigned int k = 0, l = 0;
-
- /* Generating permutation constant to get all elements in rigth order.
- For vector length 8 it is {0 3 6 1 4 7 2 5}. */
- for (i = 0; i < nelt; i++)
- {
- if (3 * k + (l % 3) >= nelt)
- {
- k = 0;
- l += (3 - (nelt % 3));
- }
- sel[i] = 3 * k + (l % 3);
- k++;
- }
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shuffle of 3 fields structure is not \
- supported by target\n");
- return false;
- }
- perm3_mask = vect_gen_perm_mask_checked (vectype, sel);
-
- /* Generating permutation constant to shift all elements.
- For vector length 8 it is {6 7 8 9 10 11 12 13}. */
- for (i = 0; i < nelt; i++)
- sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shift permutation is not supported by target\n");
- return false;
- }
- shift1_mask = vect_gen_perm_mask_checked (vectype, sel);
-
- /* Generating permutation constant to shift all elements.
- For vector length 8 it is {5 6 7 8 9 10 11 12}. */
- for (i = 0; i < nelt; i++)
- sel[i] = 2 * (nelt / 3) + 1 + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shift permutation is not supported by target\n");
- return false;
- }
- shift2_mask = vect_gen_perm_mask_checked (vectype, sel);
-
- /* Generating permutation constant to shift all elements.
- For vector length 8 it is {3 4 5 6 7 8 9 10}. */
- for (i = 0; i < nelt; i++)
- sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shift permutation is not supported by target\n");
- return false;
- }
- shift3_mask = vect_gen_perm_mask_checked (vectype, sel);
-
- /* Generating permutation constant to shift all elements.
- For vector length 8 it is {5 6 7 8 9 10 11 12}. */
- for (i = 0; i < nelt; i++)
- sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shift permutation is not supported by target\n");
- return false;
- }
- shift4_mask = vect_gen_perm_mask_checked (vectype, sel);
-
- for (k = 0; k < 3; k++)
- {
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- dr_chain[k], dr_chain[k],
- perm3_mask);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- vect[k] = data_ref;
- }
-
- for (k = 0; k < 3; k++)
- {
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- vect[k % 3], vect[(k + 1) % 3],
- shift1_mask);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- vect_shift[k] = data_ref;
- }
-
- for (k = 0; k < 3; k++)
- {
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- vect_shift[(4 - k) % 3],
- vect_shift[(3 - k) % 3],
- shift2_mask);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- vect[k] = data_ref;
- }
-
- (*result_chain)[3 - (nelt % 3)] = vect[2];
-
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
- vect[0], shift3_mask);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[nelt % 3] = data_ref;
-
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
- vect[1], shift4_mask);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[0] = data_ref;
- return true;
- }
- return false;
-}
-
-/* Function vect_transform_grouped_load.
-
- Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
- to perform their permutation and ascribe the result vectorized statements to
- the scalar statements.
-*/
-
-void
-vect_transform_grouped_load (gimple *stmt, vec<tree> dr_chain, int size,
- gimple_stmt_iterator *gsi)
-{
- machine_mode mode;
- vec<tree> result_chain = vNULL;
-
- /* DR_CHAIN contains input data-refs that are a part of the interleaving.
- RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
- vectors, that are ready for vector computation. */
- result_chain.create (size);
-
- /* If reassociation width for vector type is 2 or greater target machine can
- execute 2 or more vector instructions in parallel. Otherwise try to
- get chain for loads group using vect_shift_permute_load_chain. */
- mode = TYPE_MODE (STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)));
- if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
- || pow2p_hwi (size)
- || !vect_shift_permute_load_chain (dr_chain, size, stmt,
- gsi, &result_chain))
- vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
- vect_record_grouped_load_vectors (stmt, result_chain);
- result_chain.release ();
-}
-
-/* RESULT_CHAIN contains the output of a group of grouped loads that were
- generated as part of the vectorization of STMT. Assign the statement
- for each vector to the associated scalar statement. */
-
-void
-vect_record_grouped_load_vectors (gimple *stmt, vec<tree> result_chain)
-{
- gimple *first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
- gimple *next_stmt, *new_stmt;
- unsigned int i, gap_count;
- tree tmp_data_ref;
-
- /* Put a permuted data-ref in the VECTORIZED_STMT field.
- Since we scan the chain starting from it's first node, their order
- corresponds the order of data-refs in RESULT_CHAIN. */
- next_stmt = first_stmt;
- gap_count = 1;
- FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
- {
- if (!next_stmt)
- break;
-
- /* Skip the gaps. Loads created for the gaps will be removed by dead
- code elimination pass later. No need to check for the first stmt in
- the group, since it always exists.
- GROUP_GAP is the number of steps in elements from the previous
- access (if there is no gap GROUP_GAP is 1). We skip loads that
- correspond to the gaps. */
- if (next_stmt != first_stmt
- && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
- {
- gap_count++;
- continue;
- }
-
- while (next_stmt)
- {
- new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
- /* We assume that if VEC_STMT is not NULL, this is a case of multiple
- copies, and we put the new vector statement in the first available
- RELATED_STMT. */
- if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
- STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
- else
- {
- if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
- {
- gimple *prev_stmt =
- STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
- gimple *rel_stmt =
- STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
- while (rel_stmt)
- {
- prev_stmt = rel_stmt;
- rel_stmt =
- STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
- }
-
- STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
- new_stmt;
- }
- }
-
- next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
- gap_count = 1;
- /* If NEXT_STMT accesses the same DR as the previous statement,
- put the same TMP_DATA_REF as its vectorized statement; otherwise
- get the next data-ref from RESULT_CHAIN. */
- if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
- break;
- }
- }
-}
-
-/* Function vect_force_dr_alignment_p.
-
- Returns whether the alignment of a DECL can be forced to be aligned
- on ALIGNMENT bit boundary. */
-
-bool
-vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
-{
- if (!VAR_P (decl))
- return false;
-
- if (decl_in_symtab_p (decl)
- && !symtab_node::get (decl)->can_increase_alignment_p ())
- return false;
-
- if (TREE_STATIC (decl))
- return (alignment <= MAX_OFILE_ALIGNMENT);
- else
- return (alignment <= MAX_STACK_ALIGNMENT);
-}
-
-
-/* Return whether the data reference DR is supported with respect to its
- alignment.
- If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
- it is aligned, i.e., check if it is possible to vectorize it with different
- alignment. */
-
-enum dr_alignment_support
-vect_supportable_dr_alignment (struct data_reference *dr,
- bool check_aligned_accesses)
-{
- gimple *stmt = DR_STMT (dr);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- machine_mode mode = TYPE_MODE (vectype);
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *vect_loop = NULL;
- bool nested_in_vect_loop = false;
-
- if (aligned_access_p (dr) && !check_aligned_accesses)
- return dr_aligned;
-
- /* For now assume all conditional loads/stores support unaligned
- access without any special code. */
- if (is_gimple_call (stmt)
- && gimple_call_internal_p (stmt)
- && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
- || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
- return dr_unaligned_supported;
-
- if (loop_vinfo)
- {
- vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
- nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
- }
-
- /* Possibly unaligned access. */
-
- /* We can choose between using the implicit realignment scheme (generating
- a misaligned_move stmt) and the explicit realignment scheme (generating
- aligned loads with a REALIGN_LOAD). There are two variants to the
- explicit realignment scheme: optimized, and unoptimized.
- We can optimize the realignment only if the step between consecutive
- vector loads is equal to the vector size. Since the vector memory
- accesses advance in steps of VS (Vector Size) in the vectorized loop, it
- is guaranteed that the misalignment amount remains the same throughout the
- execution of the vectorized loop. Therefore, we can create the
- "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
- at the loop preheader.
-
- However, in the case of outer-loop vectorization, when vectorizing a
- memory access in the inner-loop nested within the LOOP that is now being
- vectorized, while it is guaranteed that the misalignment of the
- vectorized memory access will remain the same in different outer-loop
- iterations, it is *not* guaranteed that is will remain the same throughout
- the execution of the inner-loop. This is because the inner-loop advances
- with the original scalar step (and not in steps of VS). If the inner-loop
- step happens to be a multiple of VS, then the misalignment remains fixed
- and we can use the optimized realignment scheme. For example:
-
- for (i=0; i<N; i++)
- for (j=0; j<M; j++)
- s += a[i+j];
-
- When vectorizing the i-loop in the above example, the step between
- consecutive vector loads is 1, and so the misalignment does not remain
- fixed across the execution of the inner-loop, and the realignment cannot
- be optimized (as illustrated in the following pseudo vectorized loop):
-
- for (i=0; i<N; i+=4)
- for (j=0; j<M; j++){
- vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
- // when j is {0,1,2,3,4,5,6,7,...} respectively.
- // (assuming that we start from an aligned address).
- }
-
- We therefore have to use the unoptimized realignment scheme:
-
- for (i=0; i<N; i+=4)
- for (j=k; j<M; j+=4)
- vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
- // that the misalignment of the initial address is
- // 0).
-
- The loop can then be vectorized as follows:
-
- for (k=0; k<4; k++){
- rt = get_realignment_token (&vp[k]);
- for (i=0; i<N; i+=4){
- v1 = vp[i+k];
- for (j=k; j<M; j+=4){
- v2 = vp[i+j+VS-1];
- va = REALIGN_LOAD <v1,v2,rt>;
- vs += va;
- v1 = v2;
- }
- }
- } */
-
- if (DR_IS_READ (dr))
- {
- bool is_packed = false;
- tree type = (TREE_TYPE (DR_REF (dr)));
-
- if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
- && (!targetm.vectorize.builtin_mask_for_load
- || targetm.vectorize.builtin_mask_for_load ()))
- {
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-
- /* If we are doing SLP then the accesses need not have the
- same alignment, instead it depends on the SLP group size. */
- if (loop_vinfo
- && STMT_SLP_TYPE (stmt_info)
- && (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- * GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)))
- % TYPE_VECTOR_SUBPARTS (vectype) != 0))
- ;
- else if (!loop_vinfo
- || (nested_in_vect_loop
- && (TREE_INT_CST_LOW (DR_STEP (dr))
- != GET_MODE_SIZE (TYPE_MODE (vectype)))))
- return dr_explicit_realign;
- else
- return dr_explicit_realign_optimized;
- }
- if (!known_alignment_for_access_p (dr))
- is_packed = not_size_aligned (DR_REF (dr));
-
- if (targetm.vectorize.support_vector_misalignment
- (mode, type, DR_MISALIGNMENT (dr), is_packed))
- /* Can't software pipeline the loads, but can at least do them. */
- return dr_unaligned_supported;
- }
- else
- {
- bool is_packed = false;
- tree type = (TREE_TYPE (DR_REF (dr)));
-
- if (!known_alignment_for_access_p (dr))
- is_packed = not_size_aligned (DR_REF (dr));
-
- if (targetm.vectorize.support_vector_misalignment
- (mode, type, DR_MISALIGNMENT (dr), is_packed))
- return dr_unaligned_supported;
- }
-
- /* Unsupported. */
- return dr_unaligned_unsupported;
-}