summaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386-expand.c
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2021-06-23 16:14:31 +0200
committerUros Bizjak <ubizjak@gmail.com>2021-06-23 16:16:18 +0200
commit37e93925366676201b526624e9f8dc32d82b4ff2 (patch)
treeed32b90a49693afd597530f0cb5be3c673437b5c /gcc/config/i386/i386-expand.c
parent371c1992624c9269e2d5747561a8b27b30e485ee (diff)
downloadgcc-37e93925366676201b526624e9f8dc32d82b4ff2.tar.gz
i386: Add PPERM two-operand 64bit vector permutation [PR89021]
Add emulation of V8QI PPERM permutations for TARGET_XOP target. Similar to PSHUFB, the permutation is performed with V16QI PPERM instruction, where selector is defined in V16QI mode with inactive elements set to 0x80. Specific to two operand permutations is the remapping of elements from the second operand (e.g. e[8] -> e[16]), as we have to account for the inactive elements from the first operand. 2021-06-23 Uroš Bizjak <ubizjak@gmail.com> gcc/ PR target/89021 * config/i386/i386-expand.c (expand_vec_perm_pshufb): Handle 64bit modes for TARGET_XOP. Use indirect gen_* functions. * config/i386/mmx.md (mmx_ppermv64): New insn pattern. * config/i386/i386.md (unspec): Move UNSPEC_XOP_PERMUTE from ... * config/i386/sse.md (unspec): ... here.
Diffstat (limited to 'gcc/config/i386/i386-expand.c')
-rw-r--r--gcc/config/i386/i386-expand.c75
1 files changed, 61 insertions, 14 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 2986b49065c..9c922bf1bf1 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -17467,10 +17467,23 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
if (!d->one_operand_p)
{
- if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
+ if (GET_MODE_SIZE (d->vmode) == 8)
+ {
+ if (!TARGET_XOP)
+ return false;
+ vmode = V8QImode;
+ }
+ else if (GET_MODE_SIZE (d->vmode) == 16)
+ {
+ if (!TARGET_XOP)
+ return false;
+ }
+ else if (GET_MODE_SIZE (d->vmode) == 32)
{
- if (TARGET_AVX2
- && valid_perm_using_mode_p (V2TImode, d))
+ if (!TARGET_AVX2)
+ return false;
+
+ if (valid_perm_using_mode_p (V2TImode, d))
{
if (d->testing_p)
return true;
@@ -17492,6 +17505,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
}
return false;
}
+ else
+ return false;
}
else
{
@@ -17651,8 +17666,22 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
{
rtx m128 = GEN_INT (-128);
+ /* Remap elements from the second operand, as we have to
+ account for inactive top 8 elements from the first operand. */
+ if (!d->one_operand_p)
+ for (i = 0; i < nelt; ++i)
+ {
+ int ival = INTVAL (rperm[i]);
+ if (ival >= 8)
+ ival += 8;
+ rperm[i] = GEN_INT (ival);
+ }
+
+ /* V8QI is emulated with V16QI instruction, fill inactive
+ elements in the top 8 positions with zeros. */
for (i = nelt; i < 16; ++i)
rperm[i] = m128;
+
vpmode = V16QImode;
}
@@ -17660,36 +17689,54 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
gen_rtvec_v (GET_MODE_NUNITS (vpmode), rperm));
vperm = force_reg (vpmode, vperm);
- target = d->target;
- if (d->vmode != vmode)
+ if (vmode == d->vmode)
+ target = d->target;
+ else
target = gen_reg_rtx (vmode);
+
op0 = gen_lowpart (vmode, d->op0);
+
if (d->one_operand_p)
{
+ rtx (*gen) (rtx, rtx, rtx);
+
if (vmode == V8QImode)
- emit_insn (gen_mmx_pshufbv8qi3 (target, op0, vperm));
+ gen = gen_mmx_pshufbv8qi3;
else if (vmode == V16QImode)
- emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
+ gen = gen_ssse3_pshufbv16qi3;
else if (vmode == V32QImode)
- emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
+ gen = gen_avx2_pshufbv32qi3;
else if (vmode == V64QImode)
- emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
+ gen = gen_avx512bw_pshufbv64qi3;
else if (vmode == V8SFmode)
- emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
+ gen = gen_avx2_permvarv8sf;
else if (vmode == V8SImode)
- emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
+ gen = gen_avx2_permvarv8si;
else if (vmode == V16SFmode)
- emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
+ gen = gen_avx512f_permvarv16sf;
else if (vmode == V16SImode)
- emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
+ gen = gen_avx512f_permvarv16si;
else
gcc_unreachable ();
+
+ emit_insn (gen (target, op0, vperm));
}
else
{
+ rtx (*gen) (rtx, rtx, rtx, rtx);
+
op1 = gen_lowpart (vmode, d->op1);
- emit_insn (gen_xop_pperm (target, op0, op1, vperm));
+
+ if (vmode == V8QImode)
+ gen = gen_mmx_ppermv64;
+ else if (vmode == V16QImode)
+ gen = gen_xop_pperm;
+ else
+ gcc_unreachable ();
+
+ emit_insn (gen (target, op0, op1, vperm));
}
+
if (target != d->target)
emit_move_insn (d->target, gen_lowpart (d->vmode, target));