diff options
author | dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-02-17 08:47:28 +0000 |
---|---|---|
committer | dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-02-17 08:47:28 +0000 |
commit | f2983e95a3143759f0604692866729370d023b14 (patch) | |
tree | 156dba144110c82b3dd2deaad5f86c03a6dda30f /gcc/tree-vect-transform.c | |
parent | e9739f5f664c690b0e9c27b757ed7a19b8a2c265 (diff) | |
download | gcc-f2983e95a3143759f0604692866729370d023b14.tar.gz |
* Makefile.in (tree-vect-analyze.o, tree-vect-transform.o): New.
(tree-vectorizer.o): Added missing dependencies.
* tree-vectorizer.h (vect_dump, vect_verbosity_level): Added extern
decleration.
(slpeel_tree_peel_loop_to_edge): Function externalized (had a static
declaration in tree-vectorizer.c, now has an extern declaration in
tree-vectorizer.h).
(slpeel_make_loop_iterate_ntimes, slpeel_can_duplicate_loop_p,
slpeel_verify_cfg_after_peeling, vect_strip_conversion,
get_vectype_for_scalar_type, vect_is_simple_use,
vect_is_simple_iv_evolution, vect_can_force_dr_alignment_p,
vect_supportable_dr_alignment, new_loop_vec_info, destroy_loop_vec_info,
new_stmt_vec_info, vect_analyze_loop, vectorizable_load,
vectorizable_store, vectorizable_operation, vectorizable_assignment,
vect_transform_loop, vect_print_dump_info, vect_set_verbosity_level,
find_loop_location): Likewise.
* tree-vectorizer.c (langhooks.h): #include removed.
(slpeel_tree_peel_loop_to_edge): Function externalized. Declaration
moved to tree-vectorized.h.
(slpeel_make_loop_iterate_ntimes, slpeel_can_duplicate_loop_p,
slpeel_verify_cfg_after_peeling, vect_strip_conversion,
get_vectype_for_scalar_type, vect_is_simple_use,
vect_is_simple_iv_evolution, vect_can_force_dr_alignment_p,
vect_supportable_dr_alignment, new_loop_vec_info,
destroy_loop_vec_info, new_stmt_vec_info, vect_print_dump_info,
vect_set_verbosity_level, find_loop_location): Likewise.
(vect_analyze_loop): Function externalized. Declaration moved to
tree-vectorized.h. Function definition moved to tree-vect-analyze.c.
(vect_analyze_loop_form): Moved to tree-vect-analyze.c.
(vect_mark_stmts_to_be_vectorized, vect_analyze_scalar_cycles,
vect_analyze_data_ref_accesses, vect_analyze_data_ref_dependences,
vect_analyze_data_refs_alignment, vect_compute_data_refs_alignment,
vect_enhance_data_refs_alignment, vect_analyze_operations,
exist_non_indexing_operands_for_use_p, vect_mark_relevant,
vect_stmt_relevant_p, vect_get_loop_niters,
vect_analyze_data_ref_dependence, vect_compute_data_ref_alignment,
vect_analyze_data_ref_access, vect_analyze_pointer_ref_access,
vect_can_advance_ivs_p, vect_get_ptr_offset, vect_analyze_offset_expr,
vect_base_addr_differ_p, vect_object_analysis, vect_address_analysis,
vect_get_memtag): Likewise.
(vectorizable_load): Function externalized. Declaration moved to
tree-vectorized.h. Function definition moved to tree-vect-transform.c.
(vectorizable_store, vectorizable_operation, vectorizable_assignment,
vect_transform_loop): Likewise.
(vect_transform_stmt): Moved to tree-vect-transform.c.
(vect_align_data_ref, vect_create_destination_var,
vect_create_data_ref_ptr, vect_create_index_for_vector_ref,
vect_create_addr_base_for_vector_ref, vect_get_new_vect_var,
vect_get_vec_def_for_operand, vect_init_vector,
vect_finish_stmt_generation, vect_generate_tmps_on_preheader,
vect_build_loop_niters, vect_update_ivs_after_vectorizer,
vect_gen_niters_for_prolog_loop, vect_update_inits_of_dr,
vect_update_inits_of_drs, vect_do_peeling_for_alignment,
vect_do_peeling_for_loop_bound): Likewise.
* tree-vect-analyze.c: New file.
* tree-vect-transform.c: New file.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@95153 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vect-transform.c')
-rw-r--r-- | gcc/tree-vect-transform.c | 1746 |
1 files changed, 1746 insertions, 0 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c new file mode 100644 index 00000000000..5f71256e14e --- /dev/null +++ b/gcc/tree-vect-transform.c @@ -0,0 +1,1746 @@ +/* Transformation Utilities for Loop Vectorization. + Copyright (C) 2003,2004,2005 Free Software Foundation, Inc. + Contributed by Dorit Naishlos <dorit@il.ibm.com> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "errors.h" +#include "ggc.h" +#include "tree.h" +#include "target.h" +#include "rtl.h" +#include "basic-block.h" +#include "diagnostic.h" +#include "tree-flow.h" +#include "tree-dump.h" +#include "timevar.h" +#include "cfgloop.h" +#include "expr.h" +#include "optabs.h" +#include "tree-data-ref.h" +#include "tree-chrec.h" +#include "tree-scalar-evolution.h" +#include "tree-vectorizer.h" +#include "langhooks.h" +#include "tree-pass.h" +#include "toplev.h" + +/* Utility functions for the code transformation. */ +static bool vect_transform_stmt (tree, block_stmt_iterator *); +static void vect_align_data_ref (tree); +static tree vect_create_destination_var (tree, tree); +static tree vect_create_data_ref_ptr + (tree, block_stmt_iterator *, tree, tree *, bool); +static tree vect_create_index_for_vector_ref (loop_vec_info); +static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree); +static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); +static tree vect_get_vec_def_for_operand (tree, tree); +static tree vect_init_vector (tree, tree); +static void vect_finish_stmt_generation + (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); + +/* Utility function dealing with loop peeling (not peeling itself). */ +static void vect_generate_tmps_on_preheader + (loop_vec_info, tree *, tree *, tree *); +static tree vect_build_loop_niters (loop_vec_info); +static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge); +static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree); +static void vect_update_inits_of_dr (struct data_reference *, tree niters); +static void vect_update_inits_of_drs (loop_vec_info, tree); +static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *); +static void vect_do_peeling_for_loop_bound + (loop_vec_info, tree *, struct loops *); + + +/* Function vect_get_new_vect_var. + + Returns a name for a new variable. The current naming scheme appends the + prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to + the name of vectorizer generated variables, and appends that to NAME if + provided. */ + +static tree +vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) +{ + const char *prefix; + int prefix_len; + tree new_vect_var; + + if (var_kind == vect_simple_var) + prefix = "vect_"; + else + prefix = "vect_p"; + + prefix_len = strlen (prefix); + + if (name) + new_vect_var = create_tmp_var (type, concat (prefix, name, NULL)); + else + new_vect_var = create_tmp_var (type, prefix); + + return new_vect_var; +} + + +/* Function vect_create_index_for_vector_ref. + + Create (and return) an index variable, along with it's update chain in the + loop. This variable will be used to access a memory location in a vector + operation. + + Input: + LOOP: The loop being vectorized. + BSI: The block_stmt_iterator where STMT is. Any new stmts created by this + function can be added here, or in the loop pre-header. + + Output: + Return an index that will be used to index a vector array. It is expected + that a pointer to the first vector will be used as the base address for the + indexed reference. + + FORNOW: we are not trying to be efficient, just creating a new index each + time from scratch. At this time all vector references could use the same + index. + + TODO: create only one index to be used by all vector references. Record + the index in the LOOP_VINFO the first time this procedure is called and + return it on subsequent calls. The increment of this index must be placed + just before the conditional expression that ends the single block loop. */ + +static tree +vect_create_index_for_vector_ref (loop_vec_info loop_vinfo) +{ + tree init, step; + block_stmt_iterator incr_bsi; + bool insert_after; + tree indx_before_incr, indx_after_incr; + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree incr; + + /* It is assumed that the base pointer used for vectorized access contains + the address of the first vector. Therefore the index used for vectorized + access must be initialized to zero and incremented by 1. */ + + init = integer_zero_node; + step = integer_one_node; + + standard_iv_increment_position (loop, &incr_bsi, &insert_after); + create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after, + &indx_before_incr, &indx_after_incr); + incr = bsi_stmt (incr_bsi); + get_stmt_operands (incr); + set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo)); + + return indx_before_incr; +} + + +/* Function vect_create_addr_base_for_vector_ref. + + Create an expression that computes the address of the first memory location + that will be accessed for a data reference. + + Input: + STMT: The statement containing the data reference. + NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list. + OFFSET: Optional. If supplied, it is be added to the initial address. + + Output: + 1. Return an SSA_NAME whose value is the address of the memory location of + the first vector of the data reference. + 2. If new_stmt_list is not NULL_TREE after return then the caller must insert + these statement(s) which define the returned SSA_NAME. + + FORNOW: We are only handling array accesses with step 1. */ + +static tree +vect_create_addr_base_for_vector_ref (tree stmt, + tree *new_stmt_list, + tree offset) +{ + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + tree data_ref_base = + unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)); + tree base_name = build_fold_indirect_ref (data_ref_base); + tree ref = DR_REF (dr); + tree scalar_type = TREE_TYPE (ref); + tree scalar_ptr_type = build_pointer_type (scalar_type); + tree vec_stmt; + tree new_temp; + tree addr_base, addr_expr; + tree dest, new_stmt; + tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info)); + + /* Create base_offset */ + dest = create_tmp_var (TREE_TYPE (base_offset), "base_off"); + add_referenced_tmp_var (dest); + base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest); + append_to_statement_list_force (new_stmt, new_stmt_list); + + if (offset) + { + tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset"); + add_referenced_tmp_var (tmp); + offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset, + STMT_VINFO_VECT_STEP (stmt_info))); + base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset), + base_offset, offset)); + base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp); + append_to_statement_list_force (new_stmt, new_stmt_list); + } + + /* base + base_offset */ + addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base, + base_offset)); + + /* addr_expr = addr_base */ + addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var, + get_name (base_name)); + add_referenced_tmp_var (addr_expr); + vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base); + new_temp = make_ssa_name (addr_expr, vec_stmt); + TREE_OPERAND (vec_stmt, 0) = new_temp; + append_to_statement_list_force (vec_stmt, new_stmt_list); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "created "); + print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); + } + return new_temp; +} + + +/* Function vect_align_data_ref. + + Handle mislignment of a memory accesses. + + FORNOW: Can't handle misaligned accesses. + Make sure that the dataref is aligned. */ + +static void +vect_align_data_ref (tree stmt) +{ + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + + /* FORNOW: can't handle misaligned accesses; + all accesses expected to be aligned. */ + gcc_assert (aligned_access_p (dr)); +} + + +/* Function vect_create_data_ref_ptr. + + Create a memory reference expression for vector access, to be used in a + vector load/store stmt. The reference is based on a new pointer to vector + type (vp). + + Input: + 1. STMT: a stmt that references memory. Expected to be of the form + MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>. + 2. BSI: block_stmt_iterator where new stmts can be added. + 3. OFFSET (optional): an offset to be added to the initial address accessed + by the data-ref in STMT. + 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain + pointing to the initial address. + + Output: + 1. Declare a new ptr to vector_type, and have it point to the base of the + data reference (initial addressed accessed by the data reference). + For example, for vector of type V8HI, the following code is generated: + + v8hi *vp; + vp = (v8hi *)initial_address; + + if OFFSET is not supplied: + initial_address = &a[init]; + if OFFSET is supplied: + initial_address = &a[init + OFFSET]; + + Return the initial_address in INITIAL_ADDRESS. + + 2. Create a data-reference in the loop based on the new vector pointer vp, + and using a new index variable 'idx' as follows: + + vp' = vp + update + + where if ONLY_INIT is true: + update = zero + and otherwise + update = idx + vector_type_size + + Return the pointer vp'. + + + FORNOW: handle only aligned and consecutive accesses. */ + +static tree +vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset, + tree *initial_address, bool only_init) +{ + tree base_name; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree vect_ptr_type; + tree vect_ptr; + tree tag; + v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt); + v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt); + vuse_optype vuses = STMT_VUSE_OPS (stmt); + int nvuses, nv_may_defs, nv_must_defs; + int i; + tree new_temp; + tree vec_stmt; + tree new_stmt_list = NULL_TREE; + tree idx; + edge pe = loop_preheader_edge (loop); + basic_block new_bb; + tree vect_ptr_init; + tree vectype_size; + tree ptr_update; + tree data_ref_ptr; + tree type, tmp, size; + + base_name = build_fold_indirect_ref (unshare_expr ( + STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info))); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + tree data_ref_base = base_name; + fprintf (vect_dump, "create array_ref of type: "); + print_generic_expr (vect_dump, vectype, TDF_SLIM); + if (TREE_CODE (data_ref_base) == VAR_DECL) + fprintf (vect_dump, " vectorizing a one dimensional array ref: "); + else if (TREE_CODE (data_ref_base) == ARRAY_REF) + fprintf (vect_dump, " vectorizing a multidimensional array ref: "); + else if (TREE_CODE (data_ref_base) == COMPONENT_REF) + fprintf (vect_dump, " vectorizing a record based array ref: "); + else if (TREE_CODE (data_ref_base) == SSA_NAME) + fprintf (vect_dump, " vectorizing a pointer ref: "); + print_generic_expr (vect_dump, base_name, TDF_SLIM); + } + + /** (1) Create the new vector-pointer variable: **/ + + vect_ptr_type = build_pointer_type (vectype); + vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, + get_name (base_name)); + add_referenced_tmp_var (vect_ptr); + + + /** (2) Handle aliasing information of the new vector-pointer: **/ + + tag = STMT_VINFO_MEMTAG (stmt_info); + gcc_assert (tag); + get_var_ann (vect_ptr)->type_mem_tag = tag; + + /* Mark for renaming all aliased variables + (i.e, the may-aliases of the type-mem-tag). */ + nvuses = NUM_VUSES (vuses); + nv_may_defs = NUM_V_MAY_DEFS (v_may_defs); + nv_must_defs = NUM_V_MUST_DEFS (v_must_defs); + for (i = 0; i < nvuses; i++) + { + tree use = VUSE_OP (vuses, i); + if (TREE_CODE (use) == SSA_NAME) + bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid); + } + for (i = 0; i < nv_may_defs; i++) + { + tree def = V_MAY_DEF_RESULT (v_may_defs, i); + if (TREE_CODE (def) == SSA_NAME) + bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid); + } + for (i = 0; i < nv_must_defs; i++) + { + tree def = V_MUST_DEF_RESULT (v_must_defs, i); + if (TREE_CODE (def) == SSA_NAME) + bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid); + } + + + /** (3) Calculate the initial address the vector-pointer, and set + the vector-pointer to point to it before the loop: **/ + + /* Create: (&(base[init_val+offset]) in the loop preheader. */ + new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list, + offset); + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list); + gcc_assert (!new_bb); + *initial_address = new_temp; + + /* Create: p = (vectype *) initial_base */ + vec_stmt = fold_convert (vect_ptr_type, new_temp); + vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); + new_temp = make_ssa_name (vect_ptr, vec_stmt); + TREE_OPERAND (vec_stmt, 0) = new_temp; + new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt); + gcc_assert (!new_bb); + vect_ptr_init = TREE_OPERAND (vec_stmt, 0); + + + /** (4) Handle the updating of the vector-pointer inside the loop: **/ + + if (only_init) /* No update in loop is required. */ + return vect_ptr_init; + + idx = vect_create_index_for_vector_ref (loop_vinfo); + + /* Create: update = idx * vectype_size */ + tmp = create_tmp_var (integer_type_node, "update"); + add_referenced_tmp_var (tmp); + size = TYPE_SIZE (vect_ptr_type); + type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); + ptr_update = create_tmp_var (type, "update"); + add_referenced_tmp_var (ptr_update); + vectype_size = TYPE_SIZE_UNIT (vectype); + vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size); + vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt); + new_temp = make_ssa_name (tmp, vec_stmt); + TREE_OPERAND (vec_stmt, 0) = new_temp; + bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); + vec_stmt = fold_convert (type, new_temp); + vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt); + new_temp = make_ssa_name (ptr_update, vec_stmt); + TREE_OPERAND (vec_stmt, 0) = new_temp; + bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); + + /* Create: data_ref_ptr = vect_ptr_init + update */ + vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp); + vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); + new_temp = make_ssa_name (vect_ptr, vec_stmt); + TREE_OPERAND (vec_stmt, 0) = new_temp; + bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); + data_ref_ptr = TREE_OPERAND (vec_stmt, 0); + + return data_ref_ptr; +} + + +/* Function vect_create_destination_var. + + Create a new temporary of type VECTYPE. */ + +static tree +vect_create_destination_var (tree scalar_dest, tree vectype) +{ + tree vec_dest; + const char *new_name; + + gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); + + new_name = get_name (scalar_dest); + if (!new_name) + new_name = "var_"; + vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name); + add_referenced_tmp_var (vec_dest); + + return vec_dest; +} + + +/* Function vect_init_vector. + + Insert a new stmt (INIT_STMT) that initializes a new vector variable with + the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be + used in the vectorization of STMT. */ + +static tree +vect_init_vector (tree stmt, tree vector_var) +{ + stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree new_var; + tree init_stmt; + tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); + tree vec_oprnd; + edge pe; + tree new_temp; + basic_block new_bb; + + new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_"); + add_referenced_tmp_var (new_var); + + init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var); + new_temp = make_ssa_name (new_var, init_stmt); + TREE_OPERAND (init_stmt, 0) = new_temp; + + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, init_stmt); + gcc_assert (!new_bb); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "created new init_stmt: "); + print_generic_expr (vect_dump, init_stmt, TDF_SLIM); + } + + vec_oprnd = TREE_OPERAND (init_stmt, 0); + return vec_oprnd; +} + + +/* Function vect_get_vec_def_for_operand. + + OP is an operand in STMT. This function returns a (vector) def that will be + used in the vectorized stmt for STMT. + + In the case that OP is an SSA_NAME which is defined in the loop, then + STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. + + In case OP is an invariant or constant, a new stmt that creates a vector def + needs to be introduced. */ + +static tree +vect_get_vec_def_for_operand (tree op, tree stmt) +{ + tree vec_oprnd; + tree vec_stmt; + tree def_stmt; + stmt_vec_info def_stmt_info = NULL; + stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); + int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + basic_block bb; + tree vec_inv; + tree t = NULL_TREE; + tree def; + int i; + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "vect_get_vec_def_for_operand: "); + print_generic_expr (vect_dump, op, TDF_SLIM); + } + + /** ===> Case 1: operand is a constant. **/ + + if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST) + { + /* Create 'vect_cst_ = {cst,cst,...,cst}' */ + + tree vec_cst; + + /* Build a tree with vector elements. */ + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits); + + for (i = nunits - 1; i >= 0; --i) + { + t = tree_cons (NULL_TREE, op, t); + } + vec_cst = build_vector (vectype, t); + return vect_init_vector (stmt, vec_cst); + } + + gcc_assert (TREE_CODE (op) == SSA_NAME); + + /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/ + + def_stmt = SSA_NAME_DEF_STMT (op); + def_stmt_info = vinfo_for_stmt (def_stmt); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "vect_get_vec_def_for_operand: def_stmt: "); + print_generic_expr (vect_dump, def_stmt, TDF_SLIM); + } + + + /** ==> Case 2.1: operand is defined inside the loop. **/ + + if (def_stmt_info) + { + /* Get the def from the vectorized stmt. */ + + vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); + gcc_assert (vec_stmt); + vec_oprnd = TREE_OPERAND (vec_stmt, 0); + return vec_oprnd; + } + + + /** ==> Case 2.2: operand is defined by the loop-header phi-node - + it is a reduction/induction. **/ + + bb = bb_for_stmt (def_stmt); + if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "reduction/induction - unsupported."); + internal_error ("no support for reduction/induction"); /* FORNOW */ + } + + + /** ==> Case 2.3: operand is defined outside the loop - + it is a loop invariant. */ + + switch (TREE_CODE (def_stmt)) + { + case PHI_NODE: + def = PHI_RESULT (def_stmt); + break; + case MODIFY_EXPR: + def = TREE_OPERAND (def_stmt, 0); + break; + case NOP_EXPR: + def = TREE_OPERAND (def_stmt, 0); + gcc_assert (IS_EMPTY_STMT (def_stmt)); + def = op; + break; + default: + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "unsupported defining stmt: "); + print_generic_expr (vect_dump, def_stmt, TDF_SLIM); + } + internal_error ("unsupported defining stmt"); + } + + /* Build a tree with vector elements. + Create 'vec_inv = {inv,inv,..,inv}' */ + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "Create vector_inv."); + + for (i = nunits - 1; i >= 0; --i) + { + t = tree_cons (NULL_TREE, def, t); + } + + vec_inv = build_constructor (vectype, t); + return vect_init_vector (stmt, vec_inv); +} + + +/* Function vect_finish_stmt_generation. + + Insert a new stmt. */ + +static void +vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) +{ + bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "add new stmt: "); + print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); + } + +#ifdef ENABLE_CHECKING + /* Make sure bsi points to the stmt that is being vectorized. */ + gcc_assert (stmt == bsi_stmt (*bsi)); +#endif + +#ifdef USE_MAPPED_LOCATION + SET_EXPR_LOCATION (vec_stmt, EXPR_LOCUS (stmt)); +#else + SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt)); +#endif +} + + +/* Function vectorizable_assignment. + + Check if STMT performs an assignment (copy) that can be vectorized. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt to replace it, put it in VEC_STMT, and insert it at BSI. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + +bool +vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) +{ + tree vec_dest; + tree scalar_dest; + tree op; + tree vec_oprnd; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + tree new_temp; + + /* Is vectorizable assignment? */ + + if (TREE_CODE (stmt) != MODIFY_EXPR) + return false; + + scalar_dest = TREE_OPERAND (stmt, 0); + if (TREE_CODE (scalar_dest) != SSA_NAME) + return false; + + op = TREE_OPERAND (stmt, 1); + if (!vect_is_simple_use (op, loop_vinfo, NULL)) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "use not simple."); + return false; + } + + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; + return true; + } + + /** Transform. **/ + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "transform assignment."); + + /* Handle def. */ + vec_dest = vect_create_destination_var (scalar_dest, vectype); + + /* Handle use. */ + op = TREE_OPERAND (stmt, 1); + vec_oprnd = vect_get_vec_def_for_operand (op, stmt); + + /* Arguments are ready. create the new vector stmt. */ + *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd); + new_temp = make_ssa_name (vec_dest, *vec_stmt); + TREE_OPERAND (*vec_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, *vec_stmt, bsi); + + return true; +} + + +/* Function vectorizable_operation. + + Check if STMT performs a binary or unary operation that can be vectorized. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt to replace it, put it in VEC_STMT, and insert it at BSI. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + +bool +vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) +{ + tree vec_dest; + tree scalar_dest; + tree operation; + tree op0, op1 = NULL; + tree vec_oprnd0, vec_oprnd1=NULL; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + int i; + enum tree_code code; + enum machine_mode vec_mode; + tree new_temp; + int op_type; + tree op; + optab optab; + + /* Is STMT a vectorizable binary/unary operation? */ + if (TREE_CODE (stmt) != MODIFY_EXPR) + return false; + + if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME) + return false; + + operation = TREE_OPERAND (stmt, 1); + code = TREE_CODE (operation); + optab = optab_for_tree_code (code, vectype); + + /* Support only unary or binary operations. */ + op_type = TREE_CODE_LENGTH (code); + if (op_type != unary_op && op_type != binary_op) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type); + return false; + } + + for (i = 0; i < op_type; i++) + { + op = TREE_OPERAND (operation, i); + if (!vect_is_simple_use (op, loop_vinfo, NULL)) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "use not simple."); + return false; + } + } + + /* Supportable by target? */ + if (!optab) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "no optab."); + return false; + } + vec_mode = TYPE_MODE (vectype); + if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "op not supported by target."); + return false; + } + + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; + return true; + } + + /** Transform. **/ + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "transform binary/unary operation."); + + /* Handle def. */ + scalar_dest = TREE_OPERAND (stmt, 0); + vec_dest = vect_create_destination_var (scalar_dest, vectype); + + /* Handle uses. */ + op0 = TREE_OPERAND (operation, 0); + vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt); + + if (op_type == binary_op) + { + op1 = TREE_OPERAND (operation, 1); + vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt); + } + + /* Arguments are ready. create the new vector stmt. */ + + if (op_type == binary_op) + *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, + build2 (code, vectype, vec_oprnd0, vec_oprnd1)); + else + *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, + build1 (code, vectype, vec_oprnd0)); + new_temp = make_ssa_name (vec_dest, *vec_stmt); + TREE_OPERAND (*vec_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, *vec_stmt, bsi); + + return true; +} + + +/* Function vectorizable_store. + + Check if STMT defines a non scalar data-ref (array/pointer/structure) that + can be vectorized. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt to replace it, put it in VEC_STMT, and insert it at BSI. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + +bool +vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) +{ + tree scalar_dest; + tree data_ref; + tree op; + tree vec_oprnd1; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + enum machine_mode vec_mode; + tree dummy; + enum dr_alignment_support alignment_support_cheme; + + /* Is vectorizable store? */ + + if (TREE_CODE (stmt) != MODIFY_EXPR) + return false; + + scalar_dest = TREE_OPERAND (stmt, 0); + if (TREE_CODE (scalar_dest) != ARRAY_REF + && TREE_CODE (scalar_dest) != INDIRECT_REF) + return false; + + op = TREE_OPERAND (stmt, 1); + if (!vect_is_simple_use (op, loop_vinfo, NULL)) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "use not simple."); + return false; + } + + vec_mode = TYPE_MODE (vectype); + /* FORNOW. In some cases can vectorize even if data-type not supported + (e.g. - array initialization with 0). */ + if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing) + return false; + + if (!STMT_VINFO_DATA_REF (stmt_info)) + return false; + + + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; + return true; + } + + /** Transform. **/ + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "transform store"); + + alignment_support_cheme = vect_supportable_dr_alignment (dr); + gcc_assert (alignment_support_cheme); + gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */ + + /* Handle use - get the vectorized def from the defining stmt. */ + vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt); + + /* Handle def. */ + /* FORNOW: make sure the data reference is aligned. */ + vect_align_data_ref (stmt); + data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); + data_ref = build_fold_indirect_ref (data_ref); + + /* Arguments are ready. create the new vector stmt. */ + *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1); + vect_finish_stmt_generation (stmt, *vec_stmt, bsi); + + return true; +} + + +/* vectorizable_load. + + Check if STMT reads a non scalar data-ref (array/pointer/structure) that + can be vectorized. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt to replace it, put it in VEC_STMT, and insert it at BSI. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + +bool +vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) +{ + tree scalar_dest; + tree vec_dest = NULL; + tree data_ref = NULL; + tree op; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree new_temp; + int mode; + tree init_addr; + tree new_stmt; + tree dummy; + basic_block new_bb; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + edge pe = loop_preheader_edge (loop); + enum dr_alignment_support alignment_support_cheme; + + /* Is vectorizable load? */ + + if (TREE_CODE (stmt) != MODIFY_EXPR) + return false; + + scalar_dest = TREE_OPERAND (stmt, 0); + if (TREE_CODE (scalar_dest) != SSA_NAME) + return false; + + op = TREE_OPERAND (stmt, 1); + if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF) + return false; + + if (!STMT_VINFO_DATA_REF (stmt_info)) + return false; + + mode = (int) TYPE_MODE (vectype); + + /* FORNOW. In some cases can vectorize even if data-type not supported + (e.g. - data copies). */ + if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing) + { + if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) + fprintf (vect_dump, "Aligned load, but unsupported type."); + return false; + } + + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; + return true; + } + + /** Transform. **/ + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "transform load."); + + alignment_support_cheme = vect_supportable_dr_alignment (dr); + gcc_assert (alignment_support_cheme); + + if (alignment_support_cheme == dr_aligned + || alignment_support_cheme == dr_unaligned_supported) + { + /* Create: + p = initial_addr; + indx = 0; + loop { + vec_dest = *(p); + indx = indx + 1; + } + */ + + vec_dest = vect_create_destination_var (scalar_dest, vectype); + data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); + if (aligned_access_p (dr)) + data_ref = build_fold_indirect_ref (data_ref); + else + { + int mis = DR_MISALIGNMENT (dr); + tree tmis = (mis == -1 ? size_zero_node : size_int (mis)); + tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT)); + data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis); + } + new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); + new_temp = make_ssa_name (vec_dest, new_stmt); + TREE_OPERAND (new_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, new_stmt, bsi); + } + else if (alignment_support_cheme == dr_unaligned_software_pipeline) + { + /* Create: + p1 = initial_addr; + msq_init = *(floor(p1)) + p2 = initial_addr + VS - 1; + magic = have_builtin ? builtin_result : initial_address; + indx = 0; + loop { + p2' = p2 + indx * vectype_size + lsq = *(floor(p2')) + vec_dest = realign_load (msq, lsq, magic) + indx = indx + 1; + msq = lsq; + } + */ + + tree offset; + tree magic; + tree phi_stmt; + tree msq_init; + tree msq, lsq; + tree dataref_ptr; + tree params; + + /* <1> Create msq_init = *(floor(p1)) in the loop preheader */ + vec_dest = vect_create_destination_var (scalar_dest, vectype); + data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, + &init_addr, true); + data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref); + new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); + new_temp = make_ssa_name (vec_dest, new_stmt); + TREE_OPERAND (new_stmt, 0) = new_temp; + new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); + gcc_assert (!new_bb); + msq_init = TREE_OPERAND (new_stmt, 0); + + + /* <2> Create lsq = *(floor(p2')) in the loop */ + offset = build_int_cst (integer_type_node, + GET_MODE_NUNITS (TYPE_MODE (vectype))); + offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1); + vec_dest = vect_create_destination_var (scalar_dest, vectype); + dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false); + data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr); + new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); + new_temp = make_ssa_name (vec_dest, new_stmt); + TREE_OPERAND (new_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, new_stmt, bsi); + lsq = TREE_OPERAND (new_stmt, 0); + + + /* <3> */ + if (targetm.vectorize.builtin_mask_for_load) + { + /* Create permutation mask, if required, in loop preheader. */ + tree builtin_decl; + params = build_tree_list (NULL_TREE, init_addr); + vec_dest = vect_create_destination_var (scalar_dest, vectype); + builtin_decl = targetm.vectorize.builtin_mask_for_load (); + new_stmt = build_function_call_expr (builtin_decl, params); + new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); + new_temp = make_ssa_name (vec_dest, new_stmt); + TREE_OPERAND (new_stmt, 0) = new_temp; + new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); + gcc_assert (!new_bb); + magic = TREE_OPERAND (new_stmt, 0); + + /* Since we have just created a CALL_EXPR, we may need to + rename call-clobbered variables. */ + mark_call_clobbered_vars_to_rename (); + } + else + { + /* Use current address instead of init_addr for reduced reg pressure. + */ + magic = dataref_ptr; + } + + + /* <4> Create msq = phi <msq_init, lsq> in loop */ + vec_dest = vect_create_destination_var (scalar_dest, vectype); + msq = make_ssa_name (vec_dest, NULL_TREE); + phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */ + SSA_NAME_DEF_STMT (msq) = phi_stmt; + add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop)); + add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop)); + + + /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */ + vec_dest = vect_create_destination_var (scalar_dest, vectype); + new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic); + new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); + new_temp = make_ssa_name (vec_dest, new_stmt); + TREE_OPERAND (new_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, new_stmt, bsi); + } + else + gcc_unreachable (); + + *vec_stmt = new_stmt; + return true; +} + + +/* Function vect_transform_stmt. + + Create a vectorized stmt to replace STMT, and insert it at BSI. */ + +bool +vect_transform_stmt (tree stmt, block_stmt_iterator *bsi) +{ + bool is_store = false; + tree vec_stmt = NULL_TREE; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + bool done; + + switch (STMT_VINFO_TYPE (stmt_info)) + { + case op_vec_info_type: + done = vectorizable_operation (stmt, bsi, &vec_stmt); + gcc_assert (done); + break; + + case assignment_vec_info_type: + done = vectorizable_assignment (stmt, bsi, &vec_stmt); + gcc_assert (done); + break; + + case load_vec_info_type: + done = vectorizable_load (stmt, bsi, &vec_stmt); + gcc_assert (done); + break; + + case store_vec_info_type: + done = vectorizable_store (stmt, bsi, &vec_stmt); + gcc_assert (done); + is_store = true; + break; + default: + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "stmt not supported."); + gcc_unreachable (); + } + + STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; + + return is_store; +} + + +/* This function builds ni_name = number of iterations loop executes + on the loop preheader. */ + +static tree +vect_build_loop_niters (loop_vec_info loop_vinfo) +{ + tree ni_name, stmt, var; + edge pe; + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); + + var = create_tmp_var (TREE_TYPE (ni), "niters"); + add_referenced_tmp_var (var); + ni_name = force_gimple_operand (ni, &stmt, false, var); + + pe = loop_preheader_edge (loop); + if (stmt) + { + basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt); + gcc_assert (!new_bb); + } + + return ni_name; +} + + +/* This function generates the following statements: + + ni_name = number of iterations loop executes + ratio = ni_name / vf + ratio_mult_vf_name = ratio * vf + + and places them at the loop preheader edge. */ + +static void +vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, + tree *ni_name_ptr, + tree *ratio_mult_vf_name_ptr, + tree *ratio_name_ptr) +{ + + edge pe; + basic_block new_bb; + tree stmt, ni_name; + tree var; + tree ratio_name; + tree ratio_mult_vf_name; + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree ni = LOOP_VINFO_NITERS (loop_vinfo); + int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf)); + + pe = loop_preheader_edge (loop); + + /* Generate temporary variable that contains + number of iterations loop executes. */ + + ni_name = vect_build_loop_niters (loop_vinfo); + + /* Create: ratio = ni >> log2(vf) */ + + var = create_tmp_var (TREE_TYPE (ni), "bnd"); + add_referenced_tmp_var (var); + ratio_name = make_ssa_name (var, NULL_TREE); + stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name, + build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf)); + SSA_NAME_DEF_STMT (ratio_name) = stmt; + + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, stmt); + gcc_assert (!new_bb); + + /* Create: ratio_mult_vf = ratio << log2 (vf). */ + + var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); + add_referenced_tmp_var (var); + ratio_mult_vf_name = make_ssa_name (var, NULL_TREE); + stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name, + build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf)); + SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt; + + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, stmt); + gcc_assert (!new_bb); + + *ni_name_ptr = ni_name; + *ratio_mult_vf_name_ptr = ratio_mult_vf_name; + *ratio_name_ptr = ratio_name; + + return; +} + + +/* Function vect_update_ivs_after_vectorizer. + + "Advance" the induction variables of LOOP to the value they should take + after the execution of LOOP. This is currently necessary because the + vectorizer does not handle induction variables that are used after the + loop. Such a situation occurs when the last iterations of LOOP are + peeled, because: + 1. We introduced new uses after LOOP for IVs that were not originally used + after LOOP: the IVs of LOOP are now used by an epilog loop. + 2. LOOP is going to be vectorized; this means that it will iterate N/VF + times, whereas the loop IVs should be bumped N times. + + Input: + - LOOP - a loop that is going to be vectorized. The last few iterations + of LOOP were peeled. + - NITERS - the number of iterations that LOOP executes (before it is + vectorized). i.e, the number of times the ivs should be bumped. + - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path + coming out from LOOP on which there are uses of the LOOP ivs + (this is the path from LOOP->exit to epilog_loop->preheader). + + The new definitions of the ivs are placed in LOOP->exit. + The phi args associated with the edge UPDATE_E in the bb + UPDATE_E->dest are updated accordingly. + + Assumption 1: Like the rest of the vectorizer, this function assumes + a single loop exit that has a single predecessor. + + Assumption 2: The phi nodes in the LOOP header and in update_bb are + organized in the same order. + + Assumption 3: The access function of the ivs is simple enough (see + vect_can_advance_ivs_p). This assumption will be relaxed in the future. + + Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path + coming out of LOOP on which the ivs of LOOP are used (this is the path + that leads to the epilog loop; other paths skip the epilog loop). This + path starts with the edge UPDATE_E, and its destination (denoted update_bb) + needs to have its phis updated. + */ + +static void +vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, + edge update_e) +{ + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + basic_block exit_bb = loop->exit_edges[0]->dest; + tree phi, phi1; + basic_block update_bb = update_e->dest; + + /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */ + + /* Make sure there exists a single-predecessor exit bb: */ + gcc_assert (EDGE_COUNT (exit_bb->preds) == 1); + + for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb); + phi && phi1; + phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1)) + { + tree access_fn = NULL; + tree evolution_part; + tree init_expr; + tree step_expr; + tree var, stmt, ni, ni_name; + block_stmt_iterator last_bsi; + + /* Skip virtual phi's. */ + if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "virtual phi. skip."); + continue; + } + + access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); + gcc_assert (access_fn); + evolution_part = + unshare_expr (evolution_part_in_loop_num (access_fn, loop->num)); + gcc_assert (evolution_part != NULL_TREE); + + /* FORNOW: We do not support IVs whose evolution function is a polynomial + of degree >= 2 or exponential. */ + gcc_assert (!tree_is_chrec (evolution_part)); + + step_expr = evolution_part; + init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, + loop->num)); + + ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr), + build2 (MULT_EXPR, TREE_TYPE (niters), + niters, step_expr), init_expr); + + var = create_tmp_var (TREE_TYPE (init_expr), "tmp"); + add_referenced_tmp_var (var); + + ni_name = force_gimple_operand (ni, &stmt, false, var); + + /* Insert stmt into exit_bb. */ + last_bsi = bsi_last (exit_bb); + if (stmt) + bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT); + + /* Fix phi expressions in the successor bb. */ + gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) == + PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0))); + SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name); + } +} + + +/* Function vect_do_peeling_for_loop_bound + + Peel the last iterations of the loop represented by LOOP_VINFO. + The peeled iterations form a new epilog loop. Given that the loop now + iterates NITERS times, the new epilog loop iterates + NITERS % VECTORIZATION_FACTOR times. + + The original loop will later be made to iterate + NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */ + +static void +vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, + struct loops *loops) +{ + + tree ni_name, ratio_mult_vf_name; + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + struct loop *new_loop; + edge update_e; +#ifdef ENABLE_CHECKING + int loop_num; +#endif + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "=== vect_transtorm_for_unknown_loop_bound ==="); + + /* Generate the following variables on the preheader of original loop: + + ni_name = number of iteration the original loop executes + ratio = ni_name / vf + ratio_mult_vf_name = ratio * vf */ + vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, + &ratio_mult_vf_name, ratio); + + /* Update loop info. */ + loop->pre_header = loop_preheader_edge (loop)->src; + loop->pre_header_edges[0] = loop_preheader_edge (loop); + +#ifdef ENABLE_CHECKING + loop_num = loop->num; +#endif + new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0], + ratio_mult_vf_name, ni_name, false); +#ifdef ENABLE_CHECKING + gcc_assert (new_loop); + gcc_assert (loop_num == loop->num); + slpeel_verify_cfg_after_peeling (loop, new_loop); +#endif + + /* A guard that controls whether the new_loop is to be executed or skipped + is placed in LOOP->exit. LOOP->exit therefore has two successors - one + is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other + is a bb after NEW_LOOP, where these IVs are not used. Find the edge that + is on the path where the LOOP IVs are used and need to be updated. */ + + if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest) + update_e = EDGE_PRED (new_loop->pre_header, 0); + else + update_e = EDGE_PRED (new_loop->pre_header, 1); + + /* Update IVs of original loop as if they were advanced + by ratio_mult_vf_name steps. */ + vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e); + + /* After peeling we have to reset scalar evolution analyzer. */ + scev_reset (); + + return; +} + + +/* Function vect_gen_niters_for_prolog_loop + + Set the number of iterations for the loop represented by LOOP_VINFO + to the minimum between LOOP_NITERS (the original iteration count of the loop) + and the misalignment of DR - the first data reference recorded in + LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of + this loop, the data reference DR will refer to an aligned location. + + The following computation is generated: + + compute address misalignment in bytes: + addr_mis = addr & (vectype_size - 1) + + prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) ) + + (elem_size = element type size; an element is the scalar element + whose type is the inner type of the vectype) */ + +static tree +vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) +{ + struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); + int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree var, stmt; + tree iters, iters_name; + edge pe; + basic_block new_bb; + tree dr_stmt = DR_STMT (dr); + stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT; + tree elem_misalign; + tree byte_misalign; + tree new_stmts = NULL_TREE; + tree start_addr = + vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE); + tree ptr_type = TREE_TYPE (start_addr); + tree size = TYPE_SIZE (ptr_type); + tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); + tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1); + tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1); + tree niters_type = TREE_TYPE (loop_niters); + tree elem_size_log = + build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf)); + tree vf_tree = build_int_cst (unsigned_type_node, vf); + + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, new_stmts); + gcc_assert (!new_bb); + + /* Create: byte_misalign = addr & (vectype_size - 1) */ + byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1); + + /* Create: elem_misalign = byte_misalign / element_size */ + elem_misalign = + build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log); + + /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */ + iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign); + iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1); + iters = fold_convert (niters_type, iters); + + /* Create: prolog_loop_niters = min (iters, loop_niters) */ + /* If the loop bound is known at compile time we already verified that it is + greater than vf; since the misalignment ('iters') is at most vf, there's + no need to generate the MIN_EXPR in this case. */ + if (TREE_CODE (loop_niters) != INTEGER_CST) + iters = build2 (MIN_EXPR, niters_type, iters, loop_niters); + + var = create_tmp_var (niters_type, "prolog_loop_niters"); + add_referenced_tmp_var (var); + iters_name = force_gimple_operand (iters, &stmt, false, var); + + /* Insert stmt on loop preheader edge. */ + pe = loop_preheader_edge (loop); + if (stmt) + { + basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt); + gcc_assert (!new_bb); + } + + return iters_name; +} + + +/* Function vect_update_inits_of_dr + + NITERS iterations were peeled from LOOP. DR represents a data reference + in LOOP. This function updates the information recorded in DR to + account for the fact that the first NITERS iterations had already been + executed. Specifically, it updates the OFFSET field of stmt_info. */ + +static void +vect_update_inits_of_dr (struct data_reference *dr, tree niters) +{ + stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr)); + tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info); + + niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters, + STMT_VINFO_VECT_STEP (stmt_info))); + offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters)); + STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset; +} + + +/* Function vect_update_inits_of_drs + + NITERS iterations were peeled from the loop represented by LOOP_VINFO. + This function updates the information recorded for the data references in + the loop to account for the fact that the first NITERS iterations had + already been executed. Specifically, it updates the initial_condition of the + access_function of all the data_references in the loop. */ + +static void +vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) +{ + unsigned int i; + varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); + varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); + + if (vect_dump && (dump_flags & TDF_DETAILS)) + fprintf (vect_dump, "=== vect_update_inits_of_dr ==="); + + for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) + { + struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); + vect_update_inits_of_dr (dr, niters); + } + + for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) + { + struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); + vect_update_inits_of_dr (dr, niters); + } +} + + +/* Function vect_do_peeling_for_alignment + + Peel the first 'niters' iterations of the loop represented by LOOP_VINFO. + 'niters' is set to the misalignment of one of the data references in the + loop, thereby forcing it to refer to an aligned location at the beginning + of the execution of this loop. The data reference for which we are + peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */ + +static void +vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) +{ + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree niters_of_prolog_loop, ni_name; + tree n_iters; + struct loop *new_loop; + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "=== vect_do_peeling_for_alignment ==="); + + ni_name = vect_build_loop_niters (loop_vinfo); + niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name); + + /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ + new_loop = + slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop), + niters_of_prolog_loop, ni_name, true); +#ifdef ENABLE_CHECKING + gcc_assert (new_loop); + slpeel_verify_cfg_after_peeling (new_loop, loop); +#endif + + /* Update number of times loop executes. */ + n_iters = LOOP_VINFO_NITERS (loop_vinfo); + LOOP_VINFO_NITERS (loop_vinfo) = + build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop); + + /* Update the init conditions of the access functions of all data refs. */ + vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop); + + /* After peeling we have to reset scalar evolution analyzer. */ + scev_reset (); + + return; +} + + +/* Function vect_transform_loop. + + The analysis phase has determined that the loop is vectorizable. + Vectorize the loop - created vectorized stmts to replace the scalar + stmts in the loop, and update the loop exit condition. */ + +void +vect_transform_loop (loop_vec_info loop_vinfo, + struct loops *loops ATTRIBUTE_UNUSED) +{ + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); + int nbbs = loop->num_nodes; + block_stmt_iterator si; + int i; + tree ratio = NULL; + int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "=== vec_transform_loop ==="); + + + /* Peel the loop if there are data refs with unknown alignment. + Only one data ref with unknown store is allowed. */ + + if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo)) + vect_do_peeling_for_alignment (loop_vinfo, loops); + + /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a + compile time constant), or it is a constant that doesn't divide by the + vectorization factor, then an epilog loop needs to be created. + We therefore duplicate the loop: the original loop will be vectorized, + and will compute the first (n/VF) iterations. The second copy of the loop + will remain scalar and will compute the remaining (n%VF) iterations. + (VF is the vectorization factor). */ + + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)) + vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops); + else + ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), + LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); + + /* 1) Make sure the loop header has exactly two entries + 2) Make sure we have a preheader basic block. */ + + gcc_assert (EDGE_COUNT (loop->header->preds) == 2); + + loop_split_edge_with (loop_preheader_edge (loop), NULL); + + + /* FORNOW: the vectorizer supports only loops which body consist + of one basic block (header + empty latch). When the vectorizer will + support more involved loop forms, the order by which the BBs are + traversed need to be reconsidered. */ + + for (i = 0; i < nbbs; i++) + { + basic_block bb = bbs[i]; + + for (si = bsi_start (bb); !bsi_end_p (si);) + { + tree stmt = bsi_stmt (si); + stmt_vec_info stmt_info; + bool is_store; + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "------>vectorizing statement: "); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + stmt_info = vinfo_for_stmt (stmt); + gcc_assert (stmt_info); + if (!STMT_VINFO_RELEVANT_P (stmt_info)) + { + bsi_next (&si); + continue; + } +#ifdef ENABLE_CHECKING + /* FORNOW: Verify that all stmts operate on the same number of + units and no inner unrolling is necessary. */ + gcc_assert + (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info))) + == vectorization_factor); +#endif + /* -------- vectorize statement ------------ */ + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "transform statement."); + + is_store = vect_transform_stmt (stmt, &si); + if (is_store) + { + /* free the attached stmt_vec_info and remove the stmt. */ + stmt_ann_t ann = stmt_ann (stmt); + free (stmt_info); + set_stmt_info (ann, NULL); + bsi_remove (&si); + continue; + } + + bsi_next (&si); + } /* stmts in BB */ + } /* BBs in loop */ + + slpeel_make_loop_iterate_ntimes (loop, ratio); + + if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo))) + fprintf (vect_dump, "LOOP VECTORIZED."); +} |