summaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r--gcc/config/i386/i386.c26
1 files changed, 22 insertions, 4 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index af58f7cf681..b8e63965bb2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -36021,6 +36021,8 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
return ok;
}
+static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
+
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
a two vector permutation into a single vector permutation by using
an interleave operation to merge the vectors. */
@@ -36047,6 +36049,17 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
/* For 32-byte modes allow even d->op0 == d->op1.
The lack of cross-lane shuffling in some instructions
might prevent a single insn shuffle. */
+ dfinal = *d;
+ dfinal.testing_p = true;
+ /* If expand_vec_perm_interleave3 can expand this into
+ a 3 insn sequence, give up and let it be expanded as
+ 3 insn sequence. While that is one insn longer,
+ it doesn't need a memory operand and in the common
+ case that both interleave low and high permutations
+ with the same operands are adjacent needs 4 insns
+ for both after CSE. */
+ if (expand_vec_perm_interleave3 (&dfinal))
+ return false;
}
else
return false;
@@ -36886,18 +36899,23 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
stopping once we have promoted to V4SImode and then use pshufd. */
do
{
- optab otab = vec_interleave_low_optab;
+ rtx dest;
+ rtx (*gen) (rtx, rtx, rtx)
+ = vmode == V16QImode ? gen_vec_interleave_lowv16qi
+ : gen_vec_interleave_lowv8hi;
if (elt >= nelt2)
{
- otab = vec_interleave_high_optab;
+ gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
+ : gen_vec_interleave_highv8hi;
elt -= nelt2;
}
nelt2 /= 2;
- op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
+ dest = gen_reg_rtx (vmode);
+ emit_insn (gen (dest, op0, op0));
vmode = get_mode_wider_vector (vmode);
- op0 = gen_lowpart (vmode, op0);
+ op0 = gen_lowpart (vmode, dest);
}
while (vmode != V4SImode);