diff options
author | dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-08-19 12:02:48 +0000 |
---|---|---|
committer | dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-08-19 12:02:48 +0000 |
commit | b0eb8c663b1bca6c460b0a6754fd8c49ca018266 (patch) | |
tree | 25bb935b47a86dcee54460eecb1a8c69809ec5da /gcc/tree-vectorizer.c | |
parent | 221e9a92bd54d3f572f14697a066205ee80ec187 (diff) | |
download | gcc-b0eb8c663b1bca6c460b0a6754fd8c49ca018266.tar.gz |
* tree-data-refs.c (split_constant_offset): Expose.
* tree-data-refs.h (split_constant_offset): Add declaration.
* tree-vectorizer.h (dr_alignment_support): Renamed
dr_unaligned_software_pipeline to dr_explicit_realign_optimized.
Added a new value dr_explicit_realign.
(_stmt_vec_info): Added new fields: dr_base_address, dr_init,
dr_offset, dr_step, and dr_aligned_to, along with new access
functions for these fields: STMT_VINFO_DR_BASE_ADDRESS,
STMT_VINFO_DR_INIT, STMT_VINFO_DR_OFFSET, STMT_VINFO_DR_STEP, and
STMT_VINFO_DR_ALIGNED_TO.
* tree-vectorizer.c (vect_supportable_dr_alignment): Add
documentation.
In case of outer-loop vectorization with non-fixed misalignment - use
the dr_explicit_realign scheme instead of the optimized realignment
scheme.
(new_stmt_vec_info): Initialize new fields.
* tree-vect-analyze.c (vect_compute_data_ref_alignment): Handle the
'nested_in_vect_loop' case. Change verbosity level.
(vect_analyze_data_ref_access): Handle the 'nested_in_vect_loop' case.
Don't fail on zero step in the outer-loop for loads.
(vect_analyze_data_refs): Call split_constant_offset to calculate base,
offset and init relative to the outer-loop.
* tree-vect-transform.c (vect_create_data_ref_ptr): Replace the unused
BSI function argument with a new function argument - at_loop.
Simplify the condition that determines STEP. Takes additional argument
INV_P. Support outer-loop vectorization (handle the nested_in_vect_loop
case), including zero step in the outer-loop. Call
vect_create_addr_base_for_vector_ref with additional argument.
(vect_create_addr_base_for_vector_ref): Takes additional argument LOOP.
Updated function documentation. Handle the 'nested_in_vect_loop' case.
Fixed and simplified calculation of step.
(vectorizable_store): Call vect_create_data_ref_ptr with loop instead
of bsi, and with additional argument. Call bump_vector_ptr with
additional argument. Fix typos. Handle the 'nested_in_vect_loop' case.
(vect_setup_realignment): Takes additional arguments INIT_ADDR and
DR_ALIGNMENT_SUPPORT. Returns another value AT_LOOP. Handle the case
when the realignment setup needs to take place inside the loop. Support
the dr_explicit_realign scheme. Allow generating the optimized
realignment scheme for outer-loop vectorization. Added documentation.
(vectorizable_load): Support the dr_explicit_realign scheme. Handle the
'nested_in_vect_loop' case, including loads that are invariant in the
outer-loop and the realignment schemes. Handle the case when the
realignment setup needs to take place inside the loop. Call
vect_setup_realignment with additional arguments. Call
vect_create_data_ref_ptr with additional argument and with loop instead
of bsi. Fix 80-column overflow. Fix typos. Rename PHI_STMT to PHI.
(vect_gen_niters_for_prolog_loop): Call
vect_create_addr_base_for_vector_ref with additional arguments.
(vect_create_cond_for_align_checks): Likewise.
(bump_vector_ptr): Updated to support the new dr_explicit_realign
scheme: takes additional argument bump; argument ptr_incr is now
optional; updated documentation.
(vect_init_vector): Takes additional argument (bsi). Use it, if
available, to insert the vector initialization.
(get_initial_def_for_induction): Pass additional argument in call to
vect_init_vector.
(vect_get_vec_def_for_operand): Likewise.
(vect_setup_realignment): Likewise.
(vectorizable_load): Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@127624 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vectorizer.c')
-rw-r--r-- | gcc/tree-vectorizer.c | 97 |
1 files changed, 93 insertions, 4 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 20c867c708b..372334dddd9 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1345,6 +1345,13 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo) STMT_VINFO_IN_PATTERN_P (res) = false; STMT_VINFO_RELATED_STMT (res) = NULL; STMT_VINFO_DATA_REF (res) = NULL; + + STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; + STMT_VINFO_DR_OFFSET (res) = NULL; + STMT_VINFO_DR_INIT (res) = NULL; + STMT_VINFO_DR_STEP (res) = NULL; + STMT_VINFO_DR_ALIGNED_TO (res) = NULL; + if (TREE_CODE (stmt) == PHI_NODE && is_loop_header_bb_p (bb_for_stmt (stmt))) STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; else @@ -1655,21 +1662,103 @@ get_vectype_for_scalar_type (tree scalar_type) enum dr_alignment_support vect_supportable_dr_alignment (struct data_reference *dr) { - tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))); + tree stmt = DR_STMT (dr); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); enum machine_mode mode = (int) TYPE_MODE (vectype); + struct loop *vect_loop = LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)); + bool nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt); + bool invariant_in_outerloop = false; if (aligned_access_p (dr)) return dr_aligned; + if (nested_in_vect_loop) + { + tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info); + invariant_in_outerloop = + (tree_int_cst_compare (outerloop_step, size_zero_node) == 0); + } + /* Possibly unaligned access. */ + + /* We can choose between using the implicit realignment scheme (generating + a misaligned_move stmt) and the explicit realignment scheme (generating + aligned loads with a REALIGN_LOAD). There are two variants to the explicit + realignment scheme: optimized, and unoptimized. + We can optimize the realignment only if the step between consecutive + vector loads is equal to the vector size. Since the vector memory + accesses advance in steps of VS (Vector Size) in the vectorized loop, it + is guaranteed that the misalignment amount remains the same throughout the + execution of the vectorized loop. Therefore, we can create the + "realignment token" (the permutation mask that is passed to REALIGN_LOAD) + at the loop preheader. + + However, in the case of outer-loop vectorization, when vectorizing a + memory access in the inner-loop nested within the LOOP that is now being + vectorized, while it is guaranteed that the misalignment of the + vectorized memory access will remain the same in different outer-loop + iterations, it is *not* guaranteed that is will remain the same throughout + the execution of the inner-loop. This is because the inner-loop advances + with the original scalar step (and not in steps of VS). If the inner-loop + step happens to be a multiple of VS, then the misalignment remaines fixed + and we can use the optimized realignment scheme. For example: + + for (i=0; i<N; i++) + for (j=0; j<M; j++) + s += a[i+j]; + + When vectorizing the i-loop in the above example, the step between + consecutive vector loads is 1, and so the misalignment does not remain + fixed across the execution of the inner-loop, and the realignment cannot + be optimized (as illustrated in the following pseudo vectorized loop): + + for (i=0; i<N; i+=4) + for (j=0; j<M; j++){ + vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...} + // when j is {0,1,2,3,4,5,6,7,...} respectively. + // (assuming that we start from an aligned address). + } + + We therefore have to use the unoptimized realignment scheme: + + for (i=0; i<N; i+=4) + for (j=k; j<M; j+=4) + vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming + // that the misalignment of the initial address is + // 0). + + The loop can then be vectorized as follows: + + for (k=0; k<4; k++){ + rt = get_realignment_token (&vp[k]); + for (i=0; i<N; i+=4){ + v1 = vp[i+k]; + for (j=k; j<M; j+=4){ + v2 = vp[i+j+VS-1]; + va = REALIGN_LOAD <v1,v2,rt>; + vs += va; + v1 = v2; + } + } + } */ + if (DR_IS_READ (dr)) { - if (optab_handler (vec_realign_load_optab, mode)->insn_code != CODE_FOR_nothing + if (optab_handler (vec_realign_load_optab, mode)->insn_code != + CODE_FOR_nothing && (!targetm.vectorize.builtin_mask_for_load || targetm.vectorize.builtin_mask_for_load ())) - return dr_unaligned_software_pipeline; + { + if (nested_in_vect_loop + && TREE_INT_CST_LOW (DR_STEP (dr)) != UNITS_PER_SIMD_WORD) + return dr_explicit_realign; + else + return dr_explicit_realign_optimized; + } - if (optab_handler (movmisalign_optab, mode)->insn_code != CODE_FOR_nothing) + if (optab_handler (movmisalign_optab, mode)->insn_code != + CODE_FOR_nothing) /* Can't software pipeline the loads, but can at least do them. */ return dr_unaligned_supported; } |