diff options
author | rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-10-25 21:29:48 +0000 |
---|---|---|
committer | rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-10-25 21:29:48 +0000 |
commit | e21c468f20819a1d6545741af280a77e4f89c8e0 (patch) | |
tree | 646bec23201b08685d3560602023733c6d9f7259 /gcc | |
parent | 4b50824be623275a757eacd82ef171fd69cf455f (diff) | |
download | gcc-e21c468f20819a1d6545741af280a77e4f89c8e0.tar.gz |
Change vec_perm checking and expansion level.
The can_vec_perm_p interface changed to use a C integer array. This
allows easy re-use from the rtl level and the gimple level within
the vectorizer. It allows both to determine if a given permutation
is (un-)supported without having to create tree/rtl garbage.
The expand_vec_perm interface changed to use rtl. This allows easy
re-use from the rtl level, so that expand_vec_perm can be used in the
fallback implementation of other optabs.
* target.def (vec_perm_const_ok): Change parameters to mode and
array of indicies.
* doc/tm.texi: Rebuild.
* config/i386/i386.c (ix86_vectorize_vec_perm_const_ok): Change
parameters to mode and array of indicies.
* expr.c (expand_expr_real_2) [VEC_PERM_EXPR]: Expand operands here.
* optabs.c (can_vec_perm_p): Rename from can_vec_perm_expr_p.
Change parameters to mode and array of indicies.
(expand_vec_perm_1): Rename from expand_vec_perm_expr_1.
(expand_vec_perm): Rename from expand_vec_perm_expr. Change
parameters to mode and rtx inputs. Try lowering to QImode
vec_perm_const before trying fully variable permutation.
* optabs.h: Update decls.
* tree-vect-generic.c (lower_vec_perm): Extract array of indices from
VECTOR_CST to pass to can_vec_perm_p.
* tree-vect-slp.c (vect_get_mask_element): Change mask parameter type
from int pointer to unsigned char pointer.
(vect_transform_slp_perm_load): Update for change to can_vec_perm_p.
* tree-vect-stmts.c (perm_mask_for_reverse): Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@180449 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 20 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 18 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 2 | ||||
-rw-r--r-- | gcc/expr.c | 8 | ||||
-rw-r--r-- | gcc/optabs.c | 215 | ||||
-rw-r--r-- | gcc/optabs.h | 4 | ||||
-rw-r--r-- | gcc/target.def | 7 | ||||
-rw-r--r-- | gcc/tree-vect-generic.c | 14 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 36 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 22 |
10 files changed, 198 insertions, 148 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b15e4717ea6..af51915eae5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,25 @@ 2011-10-25 Richard Henderson <rth@redhat.com> + * target.def (vec_perm_const_ok): Change parameters to mode and + array of indicies. + * doc/tm.texi: Rebuild. + * config/i386/i386.c (ix86_vectorize_vec_perm_const_ok): Change + parameters to mode and array of indicies. + * expr.c (expand_expr_real_2) [VEC_PERM_EXPR]: Expand operands here. + * optabs.c (can_vec_perm_p): Rename from can_vec_perm_expr_p. + Change parameters to mode and array of indicies. + (expand_vec_perm_1): Rename from expand_vec_perm_expr_1. + (expand_vec_perm): Rename from expand_vec_perm_expr. Change + parameters to mode and rtx inputs. Try lowering to QImode + vec_perm_const before trying fully variable permutation. + * optabs.h: Update decls. + * tree-vect-generic.c (lower_vec_perm): Extract array of indices from + VECTOR_CST to pass to can_vec_perm_p. + * tree-vect-slp.c (vect_get_mask_element): Change mask parameter type + from int pointer to unsigned char pointer. + (vect_transform_slp_perm_load): Update for change to can_vec_perm_p. + * tree-vect-stmts.c (perm_mask_for_reverse): Likewise. + * tree.def (VEC_EXTRACT_EVEN_EXPR): Fix typo in text name. (VEC_EXTRACT_ODD_EXPR, VEC_INTERLEAVE_HIGH_EXPR, VEC_INTERLEAVE_LOW_EXPR): Likewise. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e336d5abcd5..0d5063e0e7e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -36477,14 +36477,14 @@ ix86_expand_vec_perm_const (rtx operands[4]) /* Implement targetm.vectorize.vec_perm_const_ok. */ static bool -ix86_vectorize_vec_perm_const_ok (tree vec_type, tree mask) +ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode, + const unsigned char *sel) { struct expand_vec_perm_d d; unsigned int i, nelt, which; bool ret, one_vec; - tree list; - d.vmode = TYPE_MODE (vec_type); + d.vmode = vmode; d.nelt = nelt = GET_MODE_NUNITS (d.vmode); d.testing_p = true; @@ -36505,19 +36505,13 @@ ix86_vectorize_vec_perm_const_ok (tree vec_type, tree mask) /* Extract the values from the vector CST into the permutation array in D. */ - list = TREE_VECTOR_CST_ELTS (mask); - for (i = which = 0; i < nelt; ++i, list = TREE_CHAIN (list)) + memcpy (d.perm, sel, nelt); + for (i = which = 0; i < nelt; ++i) { - unsigned HOST_WIDE_INT e; - - gcc_checking_assert (host_integerp (TREE_VALUE (list), 1)); - e = tree_low_cst (TREE_VALUE (list), 1); + unsigned char e = d.perm[i]; gcc_assert (e < 2 * nelt); - which |= (e < nelt ? 1 : 2); - d.perm[i] = e; } - gcc_assert (list == NULL); /* For all elements from second vector, fold the elements to first. */ if (which == 2) diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index a43ce3d1099..422f74c5874 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5711,7 +5711,7 @@ misalignment value (@var{misalign}). Return true if vector alignment is reachable (by peeling N iterations) for the given type. @end deftypefn -@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (tree @var{vec_type}, tree @var{mask}) +@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (enum @var{machine_mode}, const unsigned char *@var{sel}) Return true if a vector created for @code{vec_perm_const} is valid. @end deftypefn diff --git a/gcc/expr.c b/gcc/expr.c index a4cfee005d0..121db5eaf21 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -8752,9 +8752,11 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode, goto binop; case VEC_PERM_EXPR: - target = expand_vec_perm_expr (type, treeop0, treeop1, treeop2, target); - gcc_assert (target); - return target; + expand_operands (treeop0, treeop1, target, &op0, &op1, EXPAND_NORMAL); + op2 = expand_normal (treeop2); + temp = expand_vec_perm (mode, op0, op1, op2, target); + gcc_assert (temp); + return temp; case DOT_PROD_EXPR: { diff --git a/gcc/optabs.c b/gcc/optabs.c index 5036856524a..26669f404ff 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -6701,20 +6701,22 @@ vector_compare_rtx (tree cond, bool unsignedp, enum insn_code icode) of the CPU. SEL may be NULL, which stands for an unknown constant. */ bool -can_vec_perm_expr_p (tree type, tree sel) +can_vec_perm_p (enum machine_mode mode, bool variable, + const unsigned char *sel) { - enum machine_mode mode, qimode; - mode = TYPE_MODE (type); + enum machine_mode qimode; /* If the target doesn't implement a vector mode for the vector type, then no operations are supported. */ if (!VECTOR_MODE_P (mode)) return false; - if (sel == NULL || TREE_CODE (sel) == VECTOR_CST) + if (!variable) { if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing - && (sel == NULL || targetm.vectorize.vec_perm_const_ok (type, sel))) + && (sel == NULL + || targetm.vectorize.vec_perm_const_ok == NULL + || targetm.vectorize.vec_perm_const_ok (mode, sel))) return true; } @@ -6722,6 +6724,8 @@ can_vec_perm_expr_p (tree type, tree sel) return true; /* We allow fallback to a QI vector mode, and adjust the mask. */ + if (GET_MODE_INNER (mode) == QImode) + return false; qimode = mode_for_vector (QImode, GET_MODE_SIZE (mode)); if (!VECTOR_MODE_P (qimode)) return false; @@ -6732,9 +6736,9 @@ can_vec_perm_expr_p (tree type, tree sel) if (direct_optab_handler (vec_perm_optab, qimode) == CODE_FOR_nothing) return false; - /* In order to support the lowering of non-constant permutations, + /* In order to support the lowering of variable permutations, we need to support shifts and adds. */ - if (sel != NULL && TREE_CODE (sel) != VECTOR_CST) + if (variable) { if (GET_MODE_UNIT_SIZE (mode) > 2 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing @@ -6747,11 +6751,11 @@ can_vec_perm_expr_p (tree type, tree sel) return true; } -/* A subroutine of expand_vec_perm_expr for expanding one vec_perm insn. */ +/* A subroutine of expand_vec_perm for expanding one vec_perm insn. */ static rtx -expand_vec_perm_expr_1 (enum insn_code icode, rtx target, - rtx v0, rtx v1, rtx sel) +expand_vec_perm_1 (enum insn_code icode, rtx target, + rtx v0, rtx v1, rtx sel) { enum machine_mode tmode = GET_MODE (target); enum machine_mode smode = GET_MODE (sel); @@ -6783,119 +6787,130 @@ expand_vec_perm_expr_1 (enum insn_code icode, rtx target, return NULL_RTX; } -/* Generate instructions for VEC_PERM_EXPR given its type and three - operands. */ +/* Generate instructions for vec_perm optab given its mode + and three operands. */ + rtx -expand_vec_perm_expr (tree type, tree v0, tree v1, tree sel, rtx target) +expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) { enum insn_code icode; - enum machine_mode mode = TYPE_MODE (type); enum machine_mode qimode; - rtx v0_rtx, v1_rtx, sel_rtx, *vec, vt, tmp; unsigned int i, w, e, u; + rtx tmp, sel_qi; + rtvec vec; - if (!target) + if (!target || GET_MODE (target) != mode) target = gen_reg_rtx (mode); - v0_rtx = expand_normal (v0); - if (operand_equal_p (v0, v1, 0)) - v1_rtx = v0_rtx; - else - v1_rtx = expand_normal (v1); - sel_rtx = expand_normal (sel); + + w = GET_MODE_SIZE (mode); + e = GET_MODE_NUNITS (mode); + u = GET_MODE_UNIT_SIZE (mode); + + /* Set QIMODE to a different vector mode with byte elements. + If no such mode, or if MODE already has byte elements, use VOIDmode. */ + qimode = VOIDmode; + if (GET_MODE_INNER (mode) != QImode) + { + qimode = mode_for_vector (QImode, w); + if (!VECTOR_MODE_P (qimode)) + qimode = VOIDmode; + } /* If the input is a constant, expand it specially. */ - if (CONSTANT_P (sel_rtx)) + if (CONSTANT_P (sel)) { icode = direct_optab_handler (vec_perm_const_optab, mode); - if (icode != CODE_FOR_nothing - && targetm.vectorize.vec_perm_const_ok (TREE_TYPE (v0), sel) - && (tmp = expand_vec_perm_expr_1 (icode, target, v0_rtx, - v1_rtx, sel_rtx)) != NULL) - return tmp; + if (icode != CODE_FOR_nothing) + { + tmp = expand_vec_perm_1 (icode, target, v0, v1, sel); + if (tmp) + return tmp; + } + + /* Fall back to a constant byte-based permutation. */ + if (qimode != VOIDmode) + { + icode = direct_optab_handler (vec_perm_const_optab, qimode); + if (icode != CODE_FOR_nothing) + { + vec = rtvec_alloc (w); + for (i = 0; i < e; ++i) + { + unsigned int j, this_e; + + this_e = INTVAL (XVECEXP (sel, 0, i)); + this_e &= 2 * e - 1; + this_e *= u; + + for (j = 0; j < u; ++j) + RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j); + } + sel_qi = gen_rtx_CONST_VECTOR (qimode, vec); + + tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target), + gen_lowpart (qimode, v0), + gen_lowpart (qimode, v1), sel_qi); + if (tmp) + return gen_lowpart (mode, tmp); + } + } } - /* Otherwise fall back to a fully variable permuation. */ + /* Otherwise expand as a fully variable permuation. */ icode = direct_optab_handler (vec_perm_optab, mode); - if (icode != CODE_FOR_nothing - && (tmp = expand_vec_perm_expr_1 (icode, target, v0_rtx, - v1_rtx, sel_rtx)) != NULL) - return tmp; + if (icode != CODE_FOR_nothing) + { + tmp = expand_vec_perm_1 (icode, target, v0, v1, sel); + if (tmp) + return tmp; + } /* As a special case to aid several targets, lower the element-based permutation to a byte-based permutation and try again. */ - qimode = mode_for_vector (QImode, GET_MODE_SIZE (mode)); - if (!VECTOR_MODE_P (qimode)) + if (qimode == VOIDmode) return NULL_RTX; - - /* ??? For completeness, we ought to check the QImode version of - vec_perm_const_optab. But all users of this implicit lowering - feature implement the variable vec_perm_optab. */ icode = direct_optab_handler (vec_perm_optab, qimode); if (icode == CODE_FOR_nothing) return NULL_RTX; - w = GET_MODE_SIZE (mode); - e = GET_MODE_NUNITS (mode); - u = GET_MODE_UNIT_SIZE (mode); - vec = XALLOCAVEC (rtx, w); - - if (CONSTANT_P (sel_rtx)) - { - unsigned int j; - for (i = 0; i < e; ++i) - { - unsigned int this_e = INTVAL (XVECEXP (sel_rtx, 0, i)); - this_e &= 2 * e - 1; - this_e *= u; - - for (j = 0; j < u; ++j) - vec[i * u + j] = GEN_INT (this_e + j); - } - sel_rtx = gen_rtx_CONST_VECTOR (qimode, gen_rtvec_v (w, vec)); - } + /* Multiply each element by its byte size. */ + if (u == 2) + sel = expand_simple_binop (mode, PLUS, sel, sel, sel, 0, OPTAB_DIRECT); else - { - /* Multiply each element by its byte size. */ - if (u == 2) - sel_rtx = expand_simple_binop (mode, PLUS, sel_rtx, sel_rtx, - sel_rtx, 0, OPTAB_DIRECT); - else - sel_rtx = expand_simple_binop (mode, ASHIFT, sel_rtx, - GEN_INT (exact_log2 (u)), - sel_rtx, 0, OPTAB_DIRECT); - gcc_assert (sel_rtx); - - /* Broadcast the low byte each element into each of its bytes. */ - for (i = 0; i < w; ++i) - { - int this_e = i / u * u; - if (BYTES_BIG_ENDIAN) - this_e += u - 1; - vec[i] = GEN_INT (this_e); - } - vt = gen_rtx_CONST_VECTOR (qimode, gen_rtvec_v (w, vec)); - sel_rtx = gen_lowpart (qimode, sel_rtx); - sel_rtx = expand_vec_perm_expr_1 (icode, gen_reg_rtx (qimode), - sel_rtx, sel_rtx, vt); - gcc_assert (sel_rtx != NULL); - - /* Add the byte offset to each byte element. */ - /* Note that the definition of the indicies here is memory ordering, - so there should be no difference between big and little endian. */ - for (i = 0; i < w; ++i) - vec[i] = GEN_INT (i % u); - vt = gen_rtx_CONST_VECTOR (qimode, gen_rtvec_v (w, vec)); - sel_rtx = expand_simple_binop (qimode, PLUS, sel_rtx, vt, - NULL_RTX, 0, OPTAB_DIRECT); - gcc_assert (sel_rtx); - } - - tmp = expand_vec_perm_expr_1 (icode, gen_lowpart (qimode, target), - gen_lowpart (qimode, v0_rtx), - gen_lowpart (qimode, v1_rtx), sel_rtx); - gcc_assert (tmp != NULL); - - return gen_lowpart (mode, tmp); + sel = expand_simple_binop (mode, ASHIFT, sel, GEN_INT (exact_log2 (u)), + sel, 0, OPTAB_DIRECT); + gcc_assert (sel != NULL); + + /* Broadcast the low byte each element into each of its bytes. */ + vec = rtvec_alloc (w); + for (i = 0; i < w; ++i) + { + int this_e = i / u * u; + if (BYTES_BIG_ENDIAN) + this_e += u - 1; + RTVEC_ELT (vec, i) = GEN_INT (this_e); + } + tmp = gen_rtx_CONST_VECTOR (qimode, vec); + sel = gen_lowpart (qimode, sel); + sel = expand_vec_perm (qimode, sel, sel, tmp, NULL); + gcc_assert (sel != NULL); + + /* Add the byte offset to each byte element. */ + /* Note that the definition of the indicies here is memory ordering, + so there should be no difference between big and little endian. */ + vec = rtvec_alloc (w); + for (i = 0; i < w; ++i) + RTVEC_ELT (vec, i) = GEN_INT (i % u); + tmp = gen_rtx_CONST_VECTOR (qimode, vec); + sel = expand_simple_binop (qimode, PLUS, sel, tmp, sel, 0, OPTAB_DIRECT); + gcc_assert (sel != NULL); + + tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target), + gen_lowpart (qimode, v0), + gen_lowpart (qimode, v1), sel); + if (tmp) + tmp = gen_lowpart (mode, tmp); + return tmp; } diff --git a/gcc/optabs.h b/gcc/optabs.h index 86e0ec93bc1..9e3c5b06fc4 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -901,10 +901,10 @@ extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx); extern rtx expand_vec_shift_expr (sepops, rtx); /* Return tree if target supports vector operations for VEC_PERM_EXPR. */ -extern bool can_vec_perm_expr_p (tree, tree); +extern bool can_vec_perm_p (enum machine_mode, bool, const unsigned char *); /* Generate code for VEC_PERM_EXPR. */ -extern rtx expand_vec_perm_expr (tree, tree, tree, tree, rtx); +extern rtx expand_vec_perm (enum machine_mode, rtx, rtx, rtx, rtx); /* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing if the target does not have such an insn. */ diff --git a/gcc/target.def b/gcc/target.def index 60fad2a813a..f89bb519369 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -985,12 +985,13 @@ DEFHOOK bool, (const_tree type, bool is_packed), default_builtin_vector_alignment_reachable) -/* Return true if a vector created for vec_perm_const is valid. */ +/* Return true if a vector created for vec_perm_const is valid. + A NULL indicates that all constants are valid permutations. */ DEFHOOK (vec_perm_const_ok, "", - bool, (tree vec_type, tree mask), - hook_bool_tree_tree_true) + bool, (enum machine_mode, const unsigned char *sel), + NULL) /* Return true if the target supports misaligned store/load of a specific factor denoted in the third parameter. The last parameter diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index 37ff807b1b5..42ce2e3d729 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -641,13 +641,23 @@ lower_vec_perm (gimple_stmt_iterator *gsi) location_t loc = gimple_location (gsi_stmt (*gsi)); unsigned i; - if (can_vec_perm_expr_p (vect_type, mask)) + if (TREE_CODE (mask) == VECTOR_CST) + { + unsigned char *sel_int = XALLOCAVEC (unsigned char, elements); + tree vals = TREE_VECTOR_CST_ELTS (mask); + + for (i = 0; i < elements; ++i, vals = TREE_CHAIN (vals)) + sel_int[i] = TREE_INT_CST_LOW (TREE_VALUE (vals)); + + if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int)) + return; + } + else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL)) return; warning_at (loc, OPT_Wvector_operation_performance, "vector shuffling operation will be expanded piecewise"); - v = VEC_alloc (constructor_elt, gc, elements); for (i = 0; i < elements; i++) { diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index f75817b985f..790f2dd0a75 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2356,7 +2356,7 @@ vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt, static bool vect_get_mask_element (gimple stmt, int first_mask_element, int m, int mask_nunits, bool only_one_vec, int index, - int *mask, int *current_mask_element, + unsigned char *mask, int *current_mask_element, bool *need_next_vector, int *number_of_mask_fixes, bool *mask_fixed, bool *needs_first_vector) { @@ -2443,14 +2443,18 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain, gimple next_scalar_stmt; int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); int first_mask_element; - int index, unroll_factor, *mask, current_mask_element, ncopies; + int index, unroll_factor, current_mask_element, ncopies; + unsigned char *mask; bool only_one_vec = false, need_next_vector = false; int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter; int number_of_mask_fixes = 1; bool mask_fixed = false; bool needs_first_vector = false; + enum machine_mode mode; - if (!can_vec_perm_expr_p (vectype, NULL_TREE)) + mode = TYPE_MODE (vectype); + + if (!can_vec_perm_p (mode, false, NULL)) { if (vect_print_dump_info (REPORT_DETAILS)) { @@ -2467,7 +2471,7 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain, (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1); mask_type = get_vectype_for_scalar_type (mask_element_type); nunits = TYPE_VECTOR_SUBPARTS (vectype); - mask = (int *) xmalloc (sizeof (int) * nunits); + mask = XALLOCAVEC (unsigned char, nunits); unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE @@ -2529,6 +2533,18 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain, { tree mask_vec = NULL; + if (!can_vec_perm_p (mode, false, mask)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "unsupported vect permute { "); + for (i = 0; i < nunits; ++i) + fprintf (vect_dump, "%d ", mask[i]); + fprintf (vect_dump, "}\n"); + } + return false; + } + while (--index >= 0) { tree t = build_int_cst (mask_element_type, mask[index]); @@ -2537,17 +2553,6 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain, mask_vec = build_vector (mask_type, mask_vec); index = 0; - if (!can_vec_perm_expr_p (vectype, mask_vec)) - { - if (vect_print_dump_info (REPORT_DETAILS)) - { - fprintf (vect_dump, "unsupported vect permute "); - print_generic_expr (vect_dump, mask_vec, 0); - } - free (mask); - return false; - } - if (!analyze_only) { if (need_next_vector) @@ -2569,7 +2574,6 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain, } } - free (mask); return true; } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 1aba74601c1..4cd582dad89 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -4090,25 +4090,29 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, static tree perm_mask_for_reverse (tree vectype) { - tree mask_element_type, mask_type, mask_vec = NULL; + tree mask_elt_type, mask_type, mask_vec; int i, nunits; + unsigned char *sel; - if (!can_vec_perm_expr_p (vectype, NULL_TREE)) + nunits = TYPE_VECTOR_SUBPARTS (vectype); + sel = XALLOCAVEC (unsigned char, nunits); + + for (i = 0; i < nunits; ++i) + sel[i] = nunits - 1 - i; + + if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) return NULL; - mask_element_type + mask_elt_type = lang_hooks.types.type_for_size (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1); - mask_type = get_vectype_for_scalar_type (mask_element_type); - nunits = TYPE_VECTOR_SUBPARTS (vectype); + mask_type = get_vectype_for_scalar_type (mask_elt_type); + mask_vec = NULL; for (i = 0; i < nunits; i++) - mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec); + mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, i), mask_vec); mask_vec = build_vector (mask_type, mask_vec); - if (!can_vec_perm_expr_p (vectype, mask_vec)) - return NULL; - return mask_vec; } |