diff options
author | kyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-07-04 14:06:27 +0000 |
---|---|---|
committer | kyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-07-04 14:06:27 +0000 |
commit | 6125d36e20e2e67352407e309fe547a17d74c800 (patch) | |
tree | b2e4e6b910488050c7e03244e2c4e4818c2eb571 | |
parent | 6df5adf114787b3b2eb8d210872d711a45f33663 (diff) | |
download | gcc-6125d36e20e2e67352407e309fe547a17d74c800.tar.gz |
gcc/
* config/i386/i386.c (ix86_expand_vec_perm): Add handle one-operand
permutation for TARGET_AVX512F.
(ix86_expand_vec_one_operand_perm_avx512): New function.
(expand_vec_perm_1): Invoke introduced function.
* tree-vect-loop.c (vect_transform_loop): Clear-up safelen value since
it may be not valid after vectorization.
gcc/testsuite/
* gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c: New test.
* gcc/testsuite/gcc.target/i386/avx512f-vect-perm-2.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@237982 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 77 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-vect-perm-2.c | 14 |
5 files changed, 119 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c021b34409b..76911ff6b8c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2016-07-04 Yuri Rumyantsev <ysrumyan@gmail.com> + + * config/i386/i386.c (ix86_expand_vec_perm): Add handle one-operand + permutation for TARGET_AVX512F. + (ix86_expand_vec_one_operand_perm_avx512): New function. + (expand_vec_perm_1): Invoke introduced function. + * tree-vect-loop.c (vect_transform_loop): Clear-up safelen value since + it may be not valid after vectorization. + 2016-07-04 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> PR target/63874 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 70b13c81bc8..9eaf4144965 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -24307,6 +24307,33 @@ ix86_expand_vec_perm (rtx operands[]) e = GET_MODE_UNIT_SIZE (mode); gcc_assert (w <= 64); + if (TARGET_AVX512F && one_operand_shuffle) + { + rtx (*gen) (rtx, rtx, rtx) = NULL; + switch (mode) + { + case V16SImode: + gen =gen_avx512f_permvarv16si; + break; + case V16SFmode: + gen = gen_avx512f_permvarv16sf; + break; + case V8DImode: + gen = gen_avx512f_permvarv8di; + break; + case V8DFmode: + gen = gen_avx512f_permvarv8df; + break; + default: + break; + } + if (gen != NULL) + { + emit_insn (gen (target, op0, mask)); + return; + } + } + if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL)) return; @@ -50444,6 +50471,52 @@ canonicalize_vector_int_perm (const struct expand_vec_perm_d *d, return true; } +/* Try to expand one-operand permutation with constant mask. */ + +static bool +ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d) +{ + machine_mode mode = GET_MODE (d->op0); + machine_mode maskmode = mode; + rtx (*gen) (rtx, rtx, rtx) = NULL; + rtx target, op0, mask; + rtx vec[64]; + + if (!rtx_equal_p (d->op0, d->op1)) + return false; + + if (!TARGET_AVX512F) + return false; + + switch (mode) + { + case V16SImode: + gen = gen_avx512f_permvarv16si; + break; + case V16SFmode: + gen = gen_avx512f_permvarv16sf; + maskmode = V16SImode; + break; + case V8DImode: + gen = gen_avx512f_permvarv8di; + break; + case V8DFmode: + gen = gen_avx512f_permvarv8df; + maskmode = V8DImode; + break; + default: + return false; + } + + target = d->target; + op0 = d->op0; + for (int i = 0; i < d->nelt; ++i) + vec[i] = GEN_INT (d->perm[i]); + mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); + emit_insn (gen (target, op0, force_reg (maskmode, mask))); + return true; +} + /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D in a single instruction. */ @@ -50611,6 +50684,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_palignr (d, true)) return true; + /* Try the AVX512F vperm{s,d} instructions. */ + if (ix86_expand_vec_one_operand_perm_avx512 (d)) + return true; + /* Try the AVX512F vpermi2 instructions. */ if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) return true; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 23e010ed834..95d850ae69d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-07-04 Yuri Rumyantsev <ysrumyan@gmail.com> + + * gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c: New test. + * gcc/testsuite/gcc.target/i386/avx512f-vect-perm-2.c: New test. + 2016-07-04 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> PR target/63874 diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c new file mode 100644 index 00000000000..ea6760d481c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=knl" } */
+/* { dg-final { scan-assembler-times "vpermps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+
+#define N 1024
+float f1[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+float f2[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+
+void foo ()
+{
+ int j;
+ for (j=0; j<N; j++)
+ f1[j] += f2[N-j];
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-2.c new file mode 100644 index 00000000000..29d00d75ac9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-2.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=knl" } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+
+#define N 1024
+double d1[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+double d2[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+
+void foo ()
+{
+ int j;
+ for (j=0; j<N; j++)
+ d1[j] += d2[N-j];
+}
|