summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4>2015-10-02 14:36:41 +0000
committerkyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4>2015-10-02 14:36:41 +0000
commit8f83f53edc2df6bd7b54cd18bec03167538ec542 (patch)
tree898925099dd89d15bbea12571dbdd71caf97f402
parentaa1be4c786208d1c0419bcf248e64a30d9f57741 (diff)
downloadgcc-8f83f53edc2df6bd7b54cd18bec03167538ec542.tar.gz
Update extract_even_odd w/ AVX-512BW insns.
gcc/ * config/i386/i386.c (expand_vec_perm_even_odd_trunc): New. (expand_vec_perm_even_odd_1): Handle V64QImode. (ix86_expand_vec_perm_const_1): Try expansion with expand_vec_perm_even_odd_trunc as well. * config/i386/sse.md (VI124_AVX512F): Rename to ... (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW): This. Extend to V54QI. (define_mode_iterator VI248_AVX2_8_AVX512F): Rename to ... (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW): This. Extend to V32HI and V16SI. (define_insn "avx512bw_<code>v32hiv32qi2"): Unhide pattern name. (define_expand "vec_pack_trunc_<mode>"): Update iterator name. (define_expand "vec_unpacks_lo_<mode>"): Ditto. (define_expand "vec_unpacks_hi_<mode>"): Ditto. (define_expand "vec_unpacku_lo_<mode>"): Ditto. (define_expand "vec_unpacku_hi_<mode>"): Ditto. gcc/testsuite/ * gcc.target/i386/vect-pack-trunc-1.c: New test. * gcc.target/i386/vect-pack-trunc-2.c: Ditto. * gcc.target/i386/vect-perm-even-1.c: Ditto. * gcc.target/i386/vect-perm-odd-1.c: Ditto. * gcc.target/i386/vect-unpack-1.c: Ditto. * gcc.target/i386/vect-unpack-2.c: Ditto. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@228394 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog19
-rw-r--r--gcc/config/i386/i386.c61
-rw-r--r--gcc/config/i386/sse.md24
-rw-r--r--gcc/testsuite/ChangeLog9
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-pack-trunc-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-perm-even-1.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-perm-odd-1.c45
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-unpack-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-unpack-2.c27
10 files changed, 288 insertions, 12 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3adc4cfb928..152872207da 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,24 @@
2015-10-02 Kirill Yukhin <kirill.yukhin@intel.com>
+ * config/i386/i386.c (expand_vec_perm_even_odd_trunc): New.
+ (expand_vec_perm_even_odd_1): Handle V64QImode.
+ (ix86_expand_vec_perm_const_1): Try expansion with
+ expand_vec_perm_even_odd_trunc as well.
+ * config/i386/sse.md (VI124_AVX512F): Rename to ...
+ (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW): This. Extend
+ to V54QI.
+ (define_mode_iterator VI248_AVX2_8_AVX512F): Rename to ...
+ (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW): This. Extend
+ to V32HI and V16SI.
+ (define_insn "avx512bw_<code>v32hiv32qi2"): Unhide pattern name.
+ (define_expand "vec_pack_trunc_<mode>"): Update iterator name.
+ (define_expand "vec_unpacks_lo_<mode>"): Ditto.
+ (define_expand "vec_unpacks_hi_<mode>"): Ditto.
+ (define_expand "vec_unpacku_lo_<mode>"): Ditto.
+ (define_expand "vec_unpacku_hi_<mode>"): Ditto.
+
+2015-10-02 Kirill Yukhin <kirill.yukhin@intel.com>
+
* doc/invoke.texi: Mention -mavx512vl, -mavx512bw, -mavx512dq,
-mavx521vbmi, -mavx512ifma. Add missing opindex-es.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cfeba76e8f2..1ccc33e525c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -50171,6 +50171,62 @@ expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
return true;
}
+/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
+ and extract-odd permutations of two V64QI operands
+ with two "shifts", two "truncs" and one "concat" insns for "odd"
+ and two "truncs" and one concat insn for "even."
+ Have already failed all two instruction sequences. */
+
+static bool
+expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
+{
+ rtx t1, t2, t3, t4;
+ unsigned i, odd, nelt = d->nelt;
+
+ if (!TARGET_AVX512BW
+ || d->one_operand_p
+ || d->vmode != V64QImode)
+ return false;
+
+ /* Check that permutation is even or odd. */
+ odd = d->perm[0];
+ if (odd > 1)
+ return false;
+
+ for (i = 1; i < nelt; ++i)
+ if (d->perm[i] != 2 * i + odd)
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+
+ if (odd)
+ {
+ t1 = gen_reg_rtx (V32HImode);
+ t2 = gen_reg_rtx (V32HImode);
+ emit_insn (gen_lshrv32hi3 (t1,
+ gen_lowpart (V32HImode, d->op0),
+ GEN_INT (8)));
+ emit_insn (gen_lshrv32hi3 (t2,
+ gen_lowpart (V32HImode, d->op1),
+ GEN_INT (8)));
+ }
+ else
+ {
+ t1 = gen_lowpart (V32HImode, d->op0);
+ t2 = gen_lowpart (V32HImode, d->op1);
+ }
+
+ t3 = gen_reg_rtx (V32QImode);
+ t4 = gen_reg_rtx (V32QImode);
+ emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
+ emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
+ emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
+
+ return true;
+}
+
/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
and extract-odd permutations. */
@@ -50273,6 +50329,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
case V32QImode:
return expand_vec_perm_even_odd_pack (d);
+ case V64QImode:
+ return expand_vec_perm_even_odd_trunc (d);
+
case V4DImode:
if (!TARGET_AVX2)
{
@@ -50734,6 +50793,8 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
/* Try sequences of four instructions. */
+ if (expand_vec_perm_even_odd_trunc (d))
+ return true;
if (expand_vec_perm_vpshufb2_vpermq (d))
return true;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 4eefb4529b8..013681ca918 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -381,8 +381,8 @@
[(V16HI "TARGET_AVX2") V8HI
(V8SI "TARGET_AVX2") V4SI])
-(define_mode_iterator VI124_AVX512F
- [(V32QI "TARGET_AVX2") V16QI
+(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
@@ -398,9 +398,9 @@
[(V8SI "TARGET_AVX2") V4SI
(V4DI "TARGET_AVX2") V2DI])
-(define_mode_iterator VI248_AVX2_8_AVX512F
- [(V16HI "TARGET_AVX2") V8HI
- (V8SI "TARGET_AVX2") V4SI
+(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
+ [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+ (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI248_AVX512BW_AVX512VL
@@ -8749,7 +8749,7 @@
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
"TARGET_AVX512F")
-(define_insn "*avx512bw_<code>v32hiv32qi2"
+(define_insn "avx512bw_<code>v32hiv32qi2"
[(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
(any_truncate:V32QI
(match_operand:V32HI 1 "register_operand" "v,v")))]
@@ -11331,8 +11331,8 @@
(define_expand "vec_pack_trunc_<mode>"
[(match_operand:<ssepackmode> 0 "register_operand")
- (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
- (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
+ (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
+ (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
"TARGET_SSE2"
{
rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
@@ -13221,25 +13221,25 @@
(define_expand "vec_unpacks_lo_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI124_AVX512F 1 "register_operand")]
+ (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
"TARGET_SSE2"
"ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
(define_expand "vec_unpacks_hi_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI124_AVX512F 1 "register_operand")]
+ (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
"TARGET_SSE2"
"ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
(define_expand "vec_unpacku_lo_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI124_AVX512F 1 "register_operand")]
+ (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
"TARGET_SSE2"
"ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
(define_expand "vec_unpacku_hi_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI124_AVX512F 1 "register_operand")]
+ (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
"TARGET_SSE2"
"ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ced7d33bfaf..efea8e42a49 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,12 @@
+2015-10-02 Kirill Yukhin <kirill.yukhin@intel.com>
+
+ * gcc.target/i386/vect-pack-trunc-1.c: New test.
+ * gcc.target/i386/vect-pack-trunc-2.c: Ditto.
+ * gcc.target/i386/vect-perm-even-1.c: Ditto.
+ * gcc.target/i386/vect-perm-odd-1.c: Ditto.
+ * gcc.target/i386/vect-unpack-1.c: Ditto.
+ * gcc.target/i386/vect-unpack-2.c: Ditto.
+
2015-10-02 Marek Polacek <polacek@redhat.com>
PR c/64249
diff --git a/gcc/testsuite/gcc.target/i386/vect-pack-trunc-1.c b/gcc/testsuite/gcc.target/i386/vect-pack-trunc-1.c
new file mode 100644
index 00000000000..774d4bd055a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-pack-trunc-1.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+
+#include "avx512bw-check.h"
+
+#define N 400
+unsigned char yy[10000];
+
+void
+__attribute__ ((noinline)) foo (unsigned short s)
+{
+ unsigned short i;
+ for (i = 0; i < s; i++)
+ yy[i] = (unsigned char) i;
+}
+
+void
+avx512bw_test ()
+{
+ unsigned short i;
+ foo (N);
+
+ for (i = 0; i < N; i++)
+ if ( (unsigned char)i != yy [i] )
+ abort ();
+}
+
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%zmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c b/gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c
new file mode 100644
index 00000000000..a1a075f41a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+
+#include "avx512bw-check.h"
+
+#define N 400
+unsigned short yy[10000];
+
+void
+__attribute__ ((noinline)) foo (unsigned int s)
+{
+ unsigned int i;
+ for (i = 0; i < s; i++)
+ yy[i] = (unsigned short) i;
+}
+
+void
+avx512bw_test ()
+{
+ unsigned int i;
+ foo (N);
+ for (i = 0; i < N; i++)
+ if ( (unsigned short)i != yy [i] )
+ abort ();
+}
+
+/* { dg-final { scan-assembler-times "vpermi2w\[ \\t\]+\[^\n\]*%zmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-perm-even-1.c b/gcc/testsuite/gcc.target/i386/vect-perm-even-1.c
new file mode 100644
index 00000000000..a2ff73d2382
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-perm-even-1.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+
+#include "avx512bw-check.h"
+
+#define N 400
+unsigned char yy[10000];
+unsigned char xx[10000];
+
+void
+__attribute__ ((noinline)) foo (unsigned short s)
+{
+ unsigned short i;
+ for (i = 0; i < s; i++)
+ yy[i] = xx [i*2 + 1];
+}
+
+void
+avx512bw_test ()
+{
+ unsigned short i;
+ unsigned char j = 0;
+ for (i = 0; i < 2 * N + 1; i++, j++)
+ xx [i] = j;
+
+ foo (N);
+
+ for (i = 0; i < N; i++)
+ if ( (unsigned char)(2*i+1) != yy [i] )
+ abort ();
+}
+
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%zmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-perm-odd-1.c b/gcc/testsuite/gcc.target/i386/vect-perm-odd-1.c
new file mode 100644
index 00000000000..65f1a80c04b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-perm-odd-1.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+
+#include "avx512bw-check.h"
+
+#define N 400
+
+typedef struct
+{
+ unsigned char real;
+ unsigned char imag;
+} complex8_t;
+
+void
+__attribute__ ((noinline)) foo (unsigned char *a,
+ complex8_t *x, unsigned len)
+{
+ unsigned i;
+ for (i = 0; i < len; i++)
+ a[i] = x[i].imag + x[i].real;
+}
+
+void
+avx512bw_test ()
+{
+ unsigned short i;
+ unsigned char j = 0;
+ complex8_t x [N];
+ unsigned char a [N];
+
+ for (i = 0; i < N; i++, j++)
+ {
+ x [i].real = j;
+ x [i].imag = j;
+ }
+
+ foo (a, x, N);
+
+ j = 0;
+ for (i = 0; i < N; i++, j++)
+ if ( a[i] != (unsigned char)(j+j) )
+ abort ();
+}
+
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%zmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-unpack-1.c b/gcc/testsuite/gcc.target/i386/vect-unpack-1.c
new file mode 100644
index 00000000000..eedca475d02
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-unpack-1.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+
+#include "avx512bw-check.h"
+
+#define N 255
+unsigned int yy[10000];
+
+void
+__attribute__ ((noinline)) foo (unsigned char s)
+{
+ unsigned char i;
+ for (i = 0; i < s; i++)
+ yy[i] = (unsigned int) i;
+}
+
+void
+avx512bw_test ()
+{
+ unsigned char i;
+ foo (N);
+ for (i = 0; i < N; i++)
+ if ( (unsigned int)i != yy [i] )
+ abort ();
+}
+
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \\t\]+\[^\n\]*%zmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-unpack-2.c b/gcc/testsuite/gcc.target/i386/vect-unpack-2.c
new file mode 100644
index 00000000000..b825f0c02ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-unpack-2.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+
+#include "avx512bw-check.h"
+
+#define N 120
+signed int yy[10000];
+
+void
+__attribute__ ((noinline)) foo (signed char s)
+{
+ signed char i;
+ for (i = 0; i < s; i++)
+ yy[i] = (signed int) i;
+}
+
+void
+avx512bw_test ()
+{
+ signed char i;
+ foo (N);
+ for (i = 0; i < N; i++)
+ if ( (signed int)i != yy [i] )
+ abort ();
+}
+
+/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\n\]*%zmm" 2 } } */