diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2021-06-23 16:14:31 +0200 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2021-06-23 16:16:18 +0200 |
commit | 37e93925366676201b526624e9f8dc32d82b4ff2 (patch) | |
tree | ed32b90a49693afd597530f0cb5be3c673437b5c /gcc/config/i386/i386-expand.c | |
parent | 371c1992624c9269e2d5747561a8b27b30e485ee (diff) | |
download | gcc-37e93925366676201b526624e9f8dc32d82b4ff2.tar.gz |
i386: Add PPERM two-operand 64bit vector permutation [PR89021]
Add emulation of V8QI PPERM permutations for TARGET_XOP target. Similar
to PSHUFB, the permutation is performed with V16QI PPERM instruction,
where selector is defined in V16QI mode with inactive elements set to 0x80.
Specific to two operand permutations is the remapping of elements from
the second operand (e.g. e[8] -> e[16]), as we have to account for the
inactive elements from the first operand.
2021-06-23 Uroš Bizjak <ubizjak@gmail.com>
gcc/
PR target/89021
* config/i386/i386-expand.c (expand_vec_perm_pshufb):
Handle 64bit modes for TARGET_XOP. Use indirect gen_* functions.
* config/i386/mmx.md (mmx_ppermv64): New insn pattern.
* config/i386/i386.md (unspec): Move UNSPEC_XOP_PERMUTE from ...
* config/i386/sse.md (unspec): ... here.
Diffstat (limited to 'gcc/config/i386/i386-expand.c')
-rw-r--r-- | gcc/config/i386/i386-expand.c | 75 |
1 files changed, 61 insertions, 14 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 2986b49065c..9c922bf1bf1 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -17467,10 +17467,23 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) if (!d->one_operand_p) { - if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16) + if (GET_MODE_SIZE (d->vmode) == 8) + { + if (!TARGET_XOP) + return false; + vmode = V8QImode; + } + else if (GET_MODE_SIZE (d->vmode) == 16) + { + if (!TARGET_XOP) + return false; + } + else if (GET_MODE_SIZE (d->vmode) == 32) { - if (TARGET_AVX2 - && valid_perm_using_mode_p (V2TImode, d)) + if (!TARGET_AVX2) + return false; + + if (valid_perm_using_mode_p (V2TImode, d)) { if (d->testing_p) return true; @@ -17492,6 +17505,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) } return false; } + else + return false; } else { @@ -17651,8 +17666,22 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) { rtx m128 = GEN_INT (-128); + /* Remap elements from the second operand, as we have to + account for inactive top 8 elements from the first operand. */ + if (!d->one_operand_p) + for (i = 0; i < nelt; ++i) + { + int ival = INTVAL (rperm[i]); + if (ival >= 8) + ival += 8; + rperm[i] = GEN_INT (ival); + } + + /* V8QI is emulated with V16QI instruction, fill inactive + elements in the top 8 positions with zeros. */ for (i = nelt; i < 16; ++i) rperm[i] = m128; + vpmode = V16QImode; } @@ -17660,36 +17689,54 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) gen_rtvec_v (GET_MODE_NUNITS (vpmode), rperm)); vperm = force_reg (vpmode, vperm); - target = d->target; - if (d->vmode != vmode) + if (vmode == d->vmode) + target = d->target; + else target = gen_reg_rtx (vmode); + op0 = gen_lowpart (vmode, d->op0); + if (d->one_operand_p) { + rtx (*gen) (rtx, rtx, rtx); + if (vmode == V8QImode) - emit_insn (gen_mmx_pshufbv8qi3 (target, op0, vperm)); + gen = gen_mmx_pshufbv8qi3; else if (vmode == V16QImode) - emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm)); + gen = gen_ssse3_pshufbv16qi3; else if (vmode == V32QImode) - emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm)); + gen = gen_avx2_pshufbv32qi3; else if (vmode == V64QImode) - emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm)); + gen = gen_avx512bw_pshufbv64qi3; else if (vmode == V8SFmode) - emit_insn (gen_avx2_permvarv8sf (target, op0, vperm)); + gen = gen_avx2_permvarv8sf; else if (vmode == V8SImode) - emit_insn (gen_avx2_permvarv8si (target, op0, vperm)); + gen = gen_avx2_permvarv8si; else if (vmode == V16SFmode) - emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm)); + gen = gen_avx512f_permvarv16sf; else if (vmode == V16SImode) - emit_insn (gen_avx512f_permvarv16si (target, op0, vperm)); + gen = gen_avx512f_permvarv16si; else gcc_unreachable (); + + emit_insn (gen (target, op0, vperm)); } else { + rtx (*gen) (rtx, rtx, rtx, rtx); + op1 = gen_lowpart (vmode, d->op1); - emit_insn (gen_xop_pperm (target, op0, op1, vperm)); + + if (vmode == V8QImode) + gen = gen_mmx_ppermv64; + else if (vmode == V16QImode) + gen = gen_xop_pperm; + else + gcc_unreachable (); + + emit_insn (gen (target, op0, op1, vperm)); } + if (target != d->target) emit_move_insn (d->target, gen_lowpart (d->vmode, target)); |