summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPrathamesh Kulkarni <prathamesh.kulkarni@linaro.org>2022-05-25 00:26:28 +0530
committerPrathamesh Kulkarni <prathamesh.kulkarni@linaro.org>2022-05-25 00:42:00 +0530
commitae8decf1d2b8329af59592b4fa78ee8dfab3ba5e (patch)
tree276aaf3fc98f7ef3454430e574fc66affe4265b5
parent2f4f7de787e5844515d27b2269fc472f95a9916a (diff)
downloadgcc-ae8decf1d2b8329af59592b4fa78ee8dfab3ba5e.tar.gz
Add new parameter to vec_perm_const hook for specifying operand mode.
The rationale of the patch is to support vec_perm_expr of the form: lhs = vec_perm_expr<rhs, mask> where lhs and rhs are vector types with different lengths but have same element type. For example, lhs is SVE vector and rhs is corresponding AdvSIMD vector. It would also allow to express extract even/odd and interleave operations with a VEC_PERM_EXPR. The interleave currently has the issue that we have to artificially widen the inputs with "dont-care" elements. gcc/ChangeLog: * target.def (vec_perm_const): Define new parameter op_mode and update doc. * doc/tm.texi: Regenerate. * config/aarch64/aarch64.cc (aarch64_vectorize_vec_perm_const): Adjust vec_perm_const hook to add new parameter op_mode and return false if result and operand modes do not match. * config/arm/arm.cc (arm_vectorize_vec_perm_const): Likewise. * config/gcn/gcn.cc (gcn_vectorize_vec_perm_const): Likewise. * config/ia64/ia64.cc (ia64_vectorize_vec_perm_const): Likewise. * config/mips/mips.cc (mips_vectorize_vec_perm_const): Likewise. * config/rs6000/rs6000.cc (rs6000_vectorize_vec_perm_const): Likewise * config/s390/s390.cc (s390_vectorize_vec_perm_const): Likewise. * config/sparc/sparc.cc (sparc_vectorize_vec_perm_const): Likewise. * config/i386/i386-expand.cc (ix86_vectorize_vec_perm_const): Likewise. * config/i386/i386-expand.h (ix86_vectorize_vec_perm_const): Adjust prototype. * config/i386/sse.md (ashrv4di3): Adjust call to vec_perm_const hook. (ashrv2di3): Likewise. * optabs.cc (expand_vec_perm_const): Likewise. * optabs-query.h (can_vec_perm_const_p): Adjust prototype. * optabs-query.cc (can_vec_perm_const_p): Define new parameter op_mode and pass it to vec_perm_const hook. (can_mult_highpart_p): Adjust call to can_vec_perm_const_p. * match.pd (vec_perm X Y CST): Likewise. * tree-ssa-forwprop.cc (simplify_vector_constructor): Likewise. * tree-vect-data-refs.cc (vect_grouped_store_supported): Likewise. (vect_grouped_load_supported): Likewise. (vect_shift_permute_load_chain): Likewise. * tree-vect-generic.cc (lower_vec_perm): Likewise. * tree-vect-loop-manip.cc (interleave_supported_p): Likewise. * tree-vect-loop.cc (have_whole_vector_shift): Likewise. * tree-vect-patterns.cc (vect_recog_rotate_pattern): Likewise. * tree-vect-slp.cc (can_duplicate_and_interleave_p): Likewise. (vect_transform_slp_perm_load): Likewise. (vectorizable_slp_permutation): Likewise. * tree-vect-stmts.cc (perm_mask_for_reverse): Likewise. (vectorizable_bswap): Likewise. (scan_store_can_perm_p): Likewise. (vect_gen_perm_mask_checked): Likewise.
-rw-r--r--gcc/config/aarch64/aarch64.cc8
-rw-r--r--gcc/config/arm/arm.cc6
-rw-r--r--gcc/config/gcn/gcn.cc7
-rw-r--r--gcc/config/i386/i386-expand.cc8
-rw-r--r--gcc/config/i386/i386-expand.h5
-rw-r--r--gcc/config/i386/sse.md10
-rw-r--r--gcc/config/ia64/ia64.cc12
-rw-r--r--gcc/config/mips/mips.cc8
-rw-r--r--gcc/config/rs6000/rs6000.cc8
-rw-r--r--gcc/config/s390/s390.cc6
-rw-r--r--gcc/config/sparc/sparc.cc11
-rw-r--r--gcc/doc/tm.texi18
-rw-r--r--gcc/match.pd8
-rw-r--r--gcc/optabs-query.cc16
-rw-r--r--gcc/optabs-query.h4
-rw-r--r--gcc/optabs.cc7
-rw-r--r--gcc/target.def18
-rw-r--r--gcc/tree-ssa-forwprop.cc6
-rw-r--r--gcc/tree-vect-data-refs.cc35
-rw-r--r--gcc/tree-vect-generic.cc5
-rw-r--r--gcc/tree-vect-loop-manip.cc3
-rw-r--r--gcc/tree-vect-loop.cc2
-rw-r--r--gcc/tree-vect-patterns.cc3
-rw-r--r--gcc/tree-vect-slp.cc10
-rw-r--r--gcc/tree-vect-stmts.cc11
25 files changed, 150 insertions, 85 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index ba5b6be850a..d4c575ce976 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -24115,9 +24115,13 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
static bool
-aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
- rtx op1, const vec_perm_indices &sel)
+aarch64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
+ rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel)
{
+ if (vmode != op_mode)
+ return false;
+
struct expand_vec_perm_d d;
/* Check whether the mask can be applied to a single vector. */
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 2afe0445ed5..70c2d50f0cc 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -31813,9 +31813,13 @@ arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
static bool
-arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
+arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
+ rtx target, rtx op0, rtx op1,
const vec_perm_indices &sel)
{
+ if (vmode != op_mode)
+ return false;
+
struct expand_vec_perm_d d;
int i, nelt, which;
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 5e75a1b63aa..6fc20d3f659 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -4161,10 +4161,13 @@ gcn_make_vec_perm_address (unsigned int *perm)
permutations. */
static bool
-gcn_vectorize_vec_perm_const (machine_mode vmode, rtx dst,
- rtx src0, rtx src1,
+gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
+ rtx dst, rtx src0, rtx src1,
const vec_perm_indices & sel)
{
+ if (vmode != op_mode)
+ return false;
+
unsigned int nelt = GET_MODE_NUNITS (vmode);
gcc_assert (VECTOR_MODE_P (vmode));
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index e3bd661470b..5cd7b994aad 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -22069,9 +22069,13 @@ canonicalize_perm (struct expand_vec_perm_d *d)
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
bool
-ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
- rtx op1, const vec_perm_indices &sel)
+ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
+ rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel)
{
+ if (vmode != op_mode)
+ return false;
+
struct expand_vec_perm_d d;
unsigned char perm[MAX_VECT_LEN];
unsigned int i, nelt, which;
diff --git a/gcc/config/i386/i386-expand.h b/gcc/config/i386/i386-expand.h
index 9d320c29552..6c650196c9c 100644
--- a/gcc/config/i386/i386-expand.h
+++ b/gcc/config/i386/i386-expand.h
@@ -48,8 +48,9 @@ rtx gen_push (rtx arg);
rtx gen_pop (rtx arg);
rtx ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
machine_mode mode, int ignore);
-bool ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
- rtx op1, const vec_perm_indices &sel);
+bool ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
+ rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel);
bool ix86_notrack_prefixed_insn_p (rtx_insn *);
machine_mode ix86_split_reduction (machine_mode mode);
void ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode, rtx op0,
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f261ff60d1c..8b2602bfa79 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15842,8 +15842,9 @@
sel[7] = 15;
}
vec_perm_indices indices (sel, 2, 8);
- bool ok = targetm.vectorize.vec_perm_const (V8SImode, target,
- arg0, arg1, indices);
+ bool ok = targetm.vectorize.vec_perm_const (V8SImode, V8SImode,
+ target, arg0, arg1,
+ indices);
gcc_assert (ok);
emit_move_insn (operands[0],
lowpart_subreg (V4DImode, target, V8SImode));
@@ -24629,8 +24630,9 @@
sel[3] = 7;
}
vec_perm_indices indices (sel, arg0 != arg1 ? 2 : 1, 4);
- bool ok = targetm.vectorize.vec_perm_const (V4SImode, target,
- arg0, arg1, indices);
+ bool ok = targetm.vectorize.vec_perm_const (V4SImode, V4SImode,
+ target, arg0, arg1,
+ indices);
gcc_assert (ok);
emit_move_insn (operands[0],
lowpart_subreg (V2DImode, target, V4SImode));
diff --git a/gcc/config/ia64/ia64.cc b/gcc/config/ia64/ia64.cc
index f9fb681a36c..25e4a47e363 100644
--- a/gcc/config/ia64/ia64.cc
+++ b/gcc/config/ia64/ia64.cc
@@ -332,8 +332,8 @@ static fixed_size_mode ia64_get_reg_raw_mode (int regno);
static section * ia64_hpux_function_section (tree, enum node_frequency,
bool, bool);
-static bool ia64_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
- const vec_perm_indices &);
+static bool ia64_vectorize_vec_perm_const (machine_mode, machine_mode, rtx,
+ rtx, rtx, const vec_perm_indices &);
static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
@@ -11751,9 +11751,13 @@ ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
static bool
-ia64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
- rtx op1, const vec_perm_indices &sel)
+ia64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
+ rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel)
{
+ if (vmode != op_mode)
+ return false;
+
struct expand_vec_perm_d d;
unsigned char perm[MAX_VECT_LEN];
unsigned int i, nelt, which;
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index e64928f4113..5eb845960e1 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -21790,9 +21790,13 @@ mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
static bool
-mips_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
- rtx op1, const vec_perm_indices &sel)
+mips_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
+ rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel)
{
+ if (vmode != op_mode)
+ return false;
+
struct expand_vec_perm_d d;
int i, nelt, which;
unsigned char orig_perm[MAX_VECT_LEN];
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index d4defc855d0..0af2085adc0 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -23294,9 +23294,13 @@ rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
static bool
-rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
- rtx op1, const vec_perm_indices &sel)
+rs6000_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
+ rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel)
{
+ if (vmode != op_mode)
+ return false;
+
bool testing_p = !target;
/* AltiVec (and thus VSX) can handle arbitrary permutations. */
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d46aba66a91..444b1ec20d7 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17175,9 +17175,13 @@ vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
hook is supposed to emit the required INSNs. */
bool
-s390_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
+s390_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
+ rtx target, rtx op0, rtx op1,
const vec_perm_indices &sel)
{
+ if (vmode != op_mode)
+ return false;
+
struct expand_vec_perm_d d;
unsigned int i, nelt;
diff --git a/gcc/config/sparc/sparc.cc b/gcc/config/sparc/sparc.cc
index aca925befe1..c72c38e1999 100644
--- a/gcc/config/sparc/sparc.cc
+++ b/gcc/config/sparc/sparc.cc
@@ -712,7 +712,8 @@ static bool sparc_modes_tieable_p (machine_mode, machine_mode);
static bool sparc_can_change_mode_class (machine_mode, machine_mode,
reg_class_t);
static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
-static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
+static bool sparc_vectorize_vec_perm_const (machine_mode, machine_mode,
+ rtx, rtx, rtx,
const vec_perm_indices &);
static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
static HARD_REG_SET sparc_zero_call_used_regs (HARD_REG_SET);
@@ -13035,9 +13036,13 @@ sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
/* Implement TARGET_VEC_PERM_CONST. */
static bool
-sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
- rtx op1, const vec_perm_indices &sel)
+sparc_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
+ rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel)
{
+ if (vmode != op_mode)
+ return false;
+
if (!TARGET_VIS2)
return false;
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c5006afc00d..b0ea39884aa 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6088,14 +6088,18 @@ for the given scalar type @var{type}. @var{is_packed} is false if the scalar
access using @var{type} is known to be naturally aligned.
@end deftypefn
-@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST (machine_mode @var{mode}, rtx @var{output}, rtx @var{in0}, rtx @var{in1}, const vec_perm_indices @var{&sel})
+@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST (machine_mode @var{mode}, machine_mode @var{op_mode}, rtx @var{output}, rtx @var{in0}, rtx @var{in1}, const vec_perm_indices @var{&sel})
This hook is used to test whether the target can permute up to two
-vectors of mode @var{mode} using the permutation vector @code{sel}, and
-also to emit such a permutation. In the former case @var{in0}, @var{in1}
-and @var{out} are all null. In the latter case @var{in0} and @var{in1} are
-the source vectors and @var{out} is the destination vector; all three are
-operands of mode @var{mode}. @var{in1} is the same as @var{in0} if
-@var{sel} describes a permutation on one vector instead of two.
+vectors of mode @var{op_mode} using the permutation vector @code{sel},
+producing a vector of mode @var{mode}. The hook is also used to emit such
+a permutation.
+
+When the hook is being used to test whether the target supports a permutation,
+@var{in0}, @var{in1}, and @var{out} are all null. When the hook is being used
+to emit a permutation, @var{in0} and @var{in1} are the source vectors of mode
+@var{op_mode} and @var{out} is the destination vector of mode @var{mode}.
+@var{in1} is the same as @var{in0} if @var{sel} describes a permutation on one
+vector instead of two.
Return true if the operation is possible, emitting instructions for it
if rtxes are provided.
diff --git a/gcc/match.pd b/gcc/match.pd
index c2fed9b9b00..183a0d4123f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7642,6 +7642,8 @@ and,
(with
{
tree op0 = @0, op1 = @1, op2 = @2;
+ machine_mode result_mode = TYPE_MODE (type);
+ machine_mode op_mode = TYPE_MODE (TREE_TYPE (op0));
/* Build a vector of integers from the tree mask. */
vec_perm_builder builder;
@@ -7703,7 +7705,7 @@ and,
insert from a CONSTRUCTOR or constant and use a BIT_INSERT_EXPR
in that case. But only if the vector mode is supported,
otherwise this is invalid GIMPLE. */
- if (TYPE_MODE (type) != BLKmode
+ if (op_mode != BLKmode
&& (TREE_CODE (cop0) == VECTOR_CST
|| TREE_CODE (cop0) == CONSTRUCTOR
|| TREE_CODE (cop1) == VECTOR_CST
@@ -7749,12 +7751,12 @@ and,
2-argument version. */
tree oldop2 = op2;
if (sel.ninputs () == 2
- || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
+ || can_vec_perm_const_p (result_mode, op_mode, sel, false))
op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
else
{
vec_perm_indices sel2 (builder, 2, nelts);
- if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
+ if (can_vec_perm_const_p (result_mode, op_mode, sel2, false))
op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2);
else
/* Not directly supported with either encoding,
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 68dc679cc6a..809482b8092 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -407,9 +407,9 @@ can_vec_perm_var_p (machine_mode mode)
}
/* Return true if the target directly supports VEC_PERM_EXPRs on vectors
- of mode MODE using the selector SEL. ALLOW_VARIABLE_P is true if it
- is acceptable to force the selector into a register and use a variable
- permute (if the target supports that).
+ of mode OP_MODE and result vector of mode MODE using the selector SEL.
+ ALLOW_VARIABLE_P is true if it is acceptable to force the selector into a
+ register and use a variable permute (if the target supports that).
Note that additional permutations representing whole-vector shifts may
also be handled via the vec_shr or vec_shl optab, but only where the
@@ -417,8 +417,8 @@ can_vec_perm_var_p (machine_mode mode)
with here. */
bool
-can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel,
- bool allow_variable_p)
+can_vec_perm_const_p (machine_mode mode, machine_mode op_mode,
+ const vec_perm_indices &sel, bool allow_variable_p)
{
/* If the target doesn't implement a vector mode for the vector type,
then no operations are supported. */
@@ -448,7 +448,7 @@ can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel,
if (targetm.vectorize.vec_perm_const != NULL)
{
- if (targetm.vectorize.vec_perm_const (mode, NULL_RTX, NULL_RTX,
+ if (targetm.vectorize.vec_perm_const (mode, op_mode, NULL_RTX, NULL_RTX,
NULL_RTX, sel))
return true;
@@ -534,7 +534,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
+ (i & ~1)
+ ((i & 1) ? nunits : 0));
vec_perm_indices indices (sel, 2, nunits);
- if (can_vec_perm_const_p (mode, indices))
+ if (can_vec_perm_const_p (mode, mode, indices))
return 2;
}
}
@@ -550,7 +550,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
for (unsigned int i = 0; i < 3; ++i)
sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
vec_perm_indices indices (sel, 2, nunits);
- if (can_vec_perm_const_p (mode, indices))
+ if (can_vec_perm_const_p (mode, mode, indices))
return 3;
}
}
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
index b9c9fd6f64d..945d2a803b6 100644
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -178,8 +178,8 @@ bool can_conditionally_move_p (machine_mode mode);
opt_machine_mode qimode_for_vec_perm (machine_mode);
bool selector_fits_mode_p (machine_mode, const vec_perm_indices &);
bool can_vec_perm_var_p (machine_mode);
-bool can_vec_perm_const_p (machine_mode, const vec_perm_indices &,
- bool = true);
+bool can_vec_perm_const_p (machine_mode, machine_mode,
+ const vec_perm_indices &, bool = true);
/* Find a widening optab even if it doesn't widen as much as we want. */
#define find_widening_optab_handler(A, B, C) \
find_widening_optab_handler_and_mode (A, B, C, NULL)
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 3d8fa3abdfe..c0a68471d2d 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -6250,7 +6250,10 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
if (single_arg_p)
v1 = v0;
- if (targetm.vectorize.vec_perm_const (mode, target, v0, v1, indices))
+ gcc_checking_assert (GET_MODE (v0) == GET_MODE (v1));
+ machine_mode op_mode = GET_MODE (v0);
+ if (targetm.vectorize.vec_perm_const (mode, op_mode, target, v0, v1,
+ indices))
return target;
}
@@ -6264,7 +6267,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
v0_qi = gen_lowpart (qimode, v0);
v1_qi = gen_lowpart (qimode, v1);
if (targetm.vectorize.vec_perm_const != NULL
- && targetm.vectorize.vec_perm_const (qimode, target_qi, v0_qi,
+ && targetm.vectorize.vec_perm_const (qimode, qimode, target_qi, v0_qi,
v1_qi, qimode_indices))
return gen_lowpart (mode, target_qi);
}
diff --git a/gcc/target.def b/gcc/target.def
index d85adf36a39..2a7fa68f83d 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1878,12 +1878,16 @@ access using @var{type} is known to be naturally aligned.",
DEFHOOK
(vec_perm_const,
"This hook is used to test whether the target can permute up to two\n\
-vectors of mode @var{mode} using the permutation vector @code{sel}, and\n\
-also to emit such a permutation. In the former case @var{in0}, @var{in1}\n\
-and @var{out} are all null. In the latter case @var{in0} and @var{in1} are\n\
-the source vectors and @var{out} is the destination vector; all three are\n\
-operands of mode @var{mode}. @var{in1} is the same as @var{in0} if\n\
-@var{sel} describes a permutation on one vector instead of two.\n\
+vectors of mode @var{op_mode} using the permutation vector @code{sel},\n\
+producing a vector of mode @var{mode}. The hook is also used to emit such\n\
+a permutation.\n\
+\n\
+When the hook is being used to test whether the target supports a permutation,\n\
+@var{in0}, @var{in1}, and @var{out} are all null. When the hook is being used\n\
+to emit a permutation, @var{in0} and @var{in1} are the source vectors of mode\n\
+@var{op_mode} and @var{out} is the destination vector of mode @var{mode}.\n\
+@var{in1} is the same as @var{in0} if @var{sel} describes a permutation on one\n\
+vector instead of two.\n\
\n\
Return true if the operation is possible, emitting instructions for it\n\
if rtxes are provided.\n\
@@ -1894,7 +1898,7 @@ try the equivalent byte operation. If that also fails, it will try forcing\n\
the selector into a register and using the @var{vec_perm@var{mode}}\n\
instruction pattern. There is no need for the hook to handle these two\n\
implementation approaches itself.",
- bool, (machine_mode mode, rtx output, rtx in0, rtx in1,
+ bool, (machine_mode mode, machine_mode op_mode, rtx output, rtx in0, rtx in1,
const vec_perm_indices &sel),
NULL)
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index d698a483ff1..c387e84ad59 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -2953,7 +2953,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
: (elts[0].second == 0 && elts[0].first == 0
? 0 : refnelts) + i);
vec_perm_indices indices (sel, orig[1] ? 2 : 1, refnelts);
- if (!can_vec_perm_const_p (TYPE_MODE (perm_type), indices))
+ machine_mode vmode = TYPE_MODE (perm_type);
+ if (!can_vec_perm_const_p (vmode, vmode, indices))
return false;
mask_type
= build_vector_type (build_nonstandard_integer_type (elem_size, 1),
@@ -3002,7 +3003,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
sel.quick_push (elts[i].first
? elts[i].second + nelts : i);
vec_perm_indices indices (sel, 2, nelts);
- if (!can_vec_perm_const_p (TYPE_MODE (type), indices))
+ machine_mode vmode = TYPE_MODE (type);
+ if (!can_vec_perm_const_p (vmode, vmode, indices))
return false;
mask_type
= build_vector_type (build_nonstandard_integer_type (elem_size, 1),
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 09223baf718..d20a10a1524 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -5347,7 +5347,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
sel[3 * i + nelt2] = 0;
}
indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (mode, indices))
+ if (!can_vec_perm_const_p (mode, mode, indices))
{
if (dump_enabled_p ())
dump_printf (MSG_MISSED_OPTIMIZATION,
@@ -5365,7 +5365,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
sel[3 * i + nelt2] = nelt + j2++;
}
indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (mode, indices))
+ if (!can_vec_perm_const_p (mode, mode, indices))
{
if (dump_enabled_p ())
dump_printf (MSG_MISSED_OPTIMIZATION,
@@ -5390,12 +5390,12 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
sel[i * 2 + 1] = i + nelt;
}
vec_perm_indices indices (sel, 2, nelt);
- if (can_vec_perm_const_p (mode, indices))
+ if (can_vec_perm_const_p (mode, mode, indices))
{
for (i = 0; i < 6; i++)
sel[i] += exact_div (nelt, 2);
indices.new_vector (sel, 2, nelt);
- if (can_vec_perm_const_p (mode, indices))
+ if (can_vec_perm_const_p (mode, mode, indices))
return true;
}
}
@@ -5963,7 +5963,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p,
else
sel[i] = 0;
indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (mode, indices))
+ if (!can_vec_perm_const_p (mode, mode, indices))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5977,7 +5977,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p,
else
sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (mode, indices))
+ if (!can_vec_perm_const_p (mode, mode, indices))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6000,12 +6000,12 @@ vect_grouped_load_supported (tree vectype, bool single_element_p,
for (i = 0; i < 3; i++)
sel[i] = i * 2;
vec_perm_indices indices (sel, 2, nelt);
- if (can_vec_perm_const_p (mode, indices))
+ if (can_vec_perm_const_p (mode, mode, indices))
{
for (i = 0; i < 3; i++)
sel[i] = i * 2 + 1;
indices.new_vector (sel, 2, nelt);
- if (can_vec_perm_const_p (mode, indices))
+ if (can_vec_perm_const_p (mode, mode, indices))
return true;
}
}
@@ -6327,6 +6327,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
gimple *perm_stmt;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ machine_mode vmode = TYPE_MODE (vectype);
unsigned int i;
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
@@ -6351,7 +6352,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
for (i = 0; i < nelt / 2; ++i)
sel[nelt / 2 + i] = i * 2 + 1;
vec_perm_indices indices (sel, 2, nelt);
- if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
+ if (!can_vec_perm_const_p (vmode, vmode, indices))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6366,7 +6367,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
for (i = 0; i < nelt / 2; ++i)
sel[nelt / 2 + i] = i * 2;
indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
+ if (!can_vec_perm_const_p (vmode, vmode, indices))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6381,7 +6382,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
for (i = 0; i < nelt; i++)
sel[i] = nelt / 2 + i;
indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
+ if (!can_vec_perm_const_p (vmode, vmode, indices))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6397,7 +6398,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
for (i = nelt / 2; i < nelt; i++)
sel[i] = nelt + i;
indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
+ if (!can_vec_perm_const_p (vmode, vmode, indices))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6461,7 +6462,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
k++;
}
vec_perm_indices indices (sel, 2, nelt);
- if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
+ if (!can_vec_perm_const_p (vmode, vmode, indices))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6476,7 +6477,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
for (i = 0; i < nelt; i++)
sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
+ if (!can_vec_perm_const_p (vmode, vmode, indices))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6490,7 +6491,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
for (i = 0; i < nelt; i++)
sel[i] = 2 * (nelt / 3) + 1 + i;
indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
+ if (!can_vec_perm_const_p (vmode, vmode, indices))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6504,7 +6505,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
for (i = 0; i < nelt; i++)
sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
+ if (!can_vec_perm_const_p (vmode, vmode, indices))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6518,7 +6519,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
for (i = 0; i < nelt; i++)
sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
+ if (!can_vec_perm_const_p (vmode, vmode, indices))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index f7de64cea40..92aba5d4af6 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -1527,7 +1527,10 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
&& tree_to_vec_perm_builder (&sel_int, mask))
{
vec_perm_indices indices (sel_int, 2, elements);
- if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
+ machine_mode vmode = TYPE_MODE (vect_type);
+ tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
+ machine_mode lhs_mode = TYPE_MODE (lhs_type);
+ if (can_vec_perm_const_p (lhs_mode, vmode, indices))
{
gimple_assign_set_rhs3 (stmt, mask);
update_stmt (stmt);
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 1d4337eb261..11dc6cbf576 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -312,7 +312,8 @@ interleave_supported_p (vec_perm_indices *indices, tree vectype,
sel.quick_push (base + i + nelts);
}
indices->new_vector (sel, 2, nelts);
- return can_vec_perm_const_p (TYPE_MODE (vectype), *indices);
+ return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
+ *indices);
}
/* Try to use permutes to define the masks in DEST_RGM using the masks
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index f204b72a752..246347b9b08 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -4527,7 +4527,7 @@ have_whole_vector_shift (machine_mode mode)
{
calc_vec_perm_mask_for_shift (i, nelt, &sel);
indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (mode, indices, false))
+ if (!can_vec_perm_const_p (mode, mode, indices, false))
return false;
}
return true;
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index ac49c1a26e1..0fad4dbd094 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -2649,7 +2649,8 @@ vect_recog_rotate_pattern (vec_info *vinfo,
vec_perm_indices indices (elts, 1,
TYPE_VECTOR_SUBPARTS (char_vectype));
- if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
+ machine_mode vmode = TYPE_MODE (char_vectype);
+ if (can_vec_perm_const_p (vmode, vmode, indices))
{
/* vectorizable_bswap can handle the __builtin_bswap16 if we
undo the argument promotion. */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index cdfff1ab9f6..fe9361c338e 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -420,8 +420,9 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
}
vec_perm_indices indices1 (sel1, 2, nelts);
vec_perm_indices indices2 (sel2, 2, nelts);
- if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
- && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
+ machine_mode vmode = TYPE_MODE (vector_type);
+ if (can_vec_perm_const_p (vmode, vmode, indices1)
+ && can_vec_perm_const_p (vmode, vmode, indices2))
{
if (nvectors_out)
*nvectors_out = nvectors;
@@ -6762,7 +6763,7 @@ vect_transform_slp_perm_load (vec_info *vinfo,
if (index == count && !noop_p)
{
indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
- if (!can_vec_perm_const_p (mode, indices))
+ if (!can_vec_perm_const_p (mode, mode, indices))
{
if (dump_enabled_p ())
{
@@ -7122,8 +7123,9 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
{
indices.new_vector (mask, second_vec.first == -1U ? 1 : 2, nunits);
bool identity_p = indices.series_p (0, 1, 0, 1);
+ machine_mode vmode = TYPE_MODE (vectype);
if (!identity_p
- && !can_vec_perm_const_p (TYPE_MODE (vectype), indices))
+ && !can_vec_perm_const_p (vmode, vmode, indices))
{
if (dump_enabled_p ())
{
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 8327e9d047e..346d8ce2804 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2016,7 +2016,8 @@ perm_mask_for_reverse (tree vectype)
sel.quick_push (nunits - 1 - i);
vec_perm_indices indices (sel, 1, nunits);
- if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
+ indices))
return NULL_TREE;
return vect_gen_perm_mask_checked (vectype, indices);
}
@@ -3168,7 +3169,8 @@ vectorizable_bswap (vec_info *vinfo,
elts.quick_push ((i + 1) * word_bytes - j - 1);
vec_perm_indices indices (elts, 1, num_bytes);
- if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
+ machine_mode vmode = TYPE_MODE (char_vectype);
+ if (!can_vec_perm_const_p (vmode, vmode, indices))
return false;
if (! vec_stmt)
@@ -6712,7 +6714,7 @@ scan_store_can_perm_p (tree vectype, tree init,
sel[j] = nunits + k;
}
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
- if (!can_vec_perm_const_p (vec_mode, indices))
+ if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
{
if (i == units_log2)
return -1;
@@ -8582,7 +8584,8 @@ vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
tree
vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
{
- gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
+ machine_mode vmode = TYPE_MODE (vectype);
+ gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
return vect_gen_perm_mask_any (vectype, sel);
}