diff options
author | kyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-10-02 14:36:41 +0000 |
---|---|---|
committer | kyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-10-02 14:36:41 +0000 |
commit | 8f83f53edc2df6bd7b54cd18bec03167538ec542 (patch) | |
tree | 898925099dd89d15bbea12571dbdd71caf97f402 | |
parent | aa1be4c786208d1c0419bcf248e64a30d9f57741 (diff) | |
download | gcc-8f83f53edc2df6bd7b54cd18bec03167538ec542.tar.gz |
Update extract_even_odd w/ AVX-512BW insns.
gcc/
* config/i386/i386.c (expand_vec_perm_even_odd_trunc): New.
(expand_vec_perm_even_odd_1): Handle V64QImode.
(ix86_expand_vec_perm_const_1): Try expansion with
expand_vec_perm_even_odd_trunc as well.
* config/i386/sse.md (VI124_AVX512F): Rename to ...
(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW): This. Extend
to V54QI.
(define_mode_iterator VI248_AVX2_8_AVX512F): Rename to ...
(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW): This. Extend
to V32HI and V16SI.
(define_insn "avx512bw_<code>v32hiv32qi2"): Unhide pattern name.
(define_expand "vec_pack_trunc_<mode>"): Update iterator name.
(define_expand "vec_unpacks_lo_<mode>"): Ditto.
(define_expand "vec_unpacks_hi_<mode>"): Ditto.
(define_expand "vec_unpacku_lo_<mode>"): Ditto.
(define_expand "vec_unpacku_hi_<mode>"): Ditto.
gcc/testsuite/
* gcc.target/i386/vect-pack-trunc-1.c: New test.
* gcc.target/i386/vect-pack-trunc-2.c: Ditto.
* gcc.target/i386/vect-perm-even-1.c: Ditto.
* gcc.target/i386/vect-perm-odd-1.c: Ditto.
* gcc.target/i386/vect-unpack-1.c: Ditto.
* gcc.target/i386/vect-unpack-2.c: Ditto.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@228394 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 61 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 24 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vect-pack-trunc-1.c | 28 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c | 27 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vect-perm-even-1.c | 33 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vect-perm-odd-1.c | 45 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vect-unpack-1.c | 27 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vect-unpack-2.c | 27 |
10 files changed, 288 insertions, 12 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3adc4cfb928..152872207da 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,24 @@ 2015-10-02 Kirill Yukhin <kirill.yukhin@intel.com> + * config/i386/i386.c (expand_vec_perm_even_odd_trunc): New. + (expand_vec_perm_even_odd_1): Handle V64QImode. + (ix86_expand_vec_perm_const_1): Try expansion with + expand_vec_perm_even_odd_trunc as well. + * config/i386/sse.md (VI124_AVX512F): Rename to ... + (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW): This. Extend + to V54QI. + (define_mode_iterator VI248_AVX2_8_AVX512F): Rename to ... + (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW): This. Extend + to V32HI and V16SI. + (define_insn "avx512bw_<code>v32hiv32qi2"): Unhide pattern name. + (define_expand "vec_pack_trunc_<mode>"): Update iterator name. + (define_expand "vec_unpacks_lo_<mode>"): Ditto. + (define_expand "vec_unpacks_hi_<mode>"): Ditto. + (define_expand "vec_unpacku_lo_<mode>"): Ditto. + (define_expand "vec_unpacku_hi_<mode>"): Ditto. + +2015-10-02 Kirill Yukhin <kirill.yukhin@intel.com> + * doc/invoke.texi: Mention -mavx512vl, -mavx512bw, -mavx512dq, -mavx521vbmi, -mavx512ifma. Add missing opindex-es. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index cfeba76e8f2..1ccc33e525c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -50171,6 +50171,62 @@ expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d) return true; } +/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even + and extract-odd permutations of two V64QI operands + with two "shifts", two "truncs" and one "concat" insns for "odd" + and two "truncs" and one concat insn for "even." + Have already failed all two instruction sequences. */ + +static bool +expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d) +{ + rtx t1, t2, t3, t4; + unsigned i, odd, nelt = d->nelt; + + if (!TARGET_AVX512BW + || d->one_operand_p + || d->vmode != V64QImode) + return false; + + /* Check that permutation is even or odd. */ + odd = d->perm[0]; + if (odd > 1) + return false; + + for (i = 1; i < nelt; ++i) + if (d->perm[i] != 2 * i + odd) + return false; + + if (d->testing_p) + return true; + + + if (odd) + { + t1 = gen_reg_rtx (V32HImode); + t2 = gen_reg_rtx (V32HImode); + emit_insn (gen_lshrv32hi3 (t1, + gen_lowpart (V32HImode, d->op0), + GEN_INT (8))); + emit_insn (gen_lshrv32hi3 (t2, + gen_lowpart (V32HImode, d->op1), + GEN_INT (8))); + } + else + { + t1 = gen_lowpart (V32HImode, d->op0); + t2 = gen_lowpart (V32HImode, d->op1); + } + + t3 = gen_reg_rtx (V32QImode); + t4 = gen_reg_rtx (V32QImode); + emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1)); + emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2)); + emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4)); + + return true; +} + /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even and extract-odd permutations. */ @@ -50273,6 +50329,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) case V32QImode: return expand_vec_perm_even_odd_pack (d); + case V64QImode: + return expand_vec_perm_even_odd_trunc (d); + case V4DImode: if (!TARGET_AVX2) { @@ -50734,6 +50793,8 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) /* Try sequences of four instructions. */ + if (expand_vec_perm_even_odd_trunc (d)) + return true; if (expand_vec_perm_vpshufb2_vpermq (d)) return true; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4eefb4529b8..013681ca918 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -381,8 +381,8 @@ [(V16HI "TARGET_AVX2") V8HI (V8SI "TARGET_AVX2") V4SI]) -(define_mode_iterator VI124_AVX512F - [(V32QI "TARGET_AVX2") V16QI +(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI]) @@ -398,9 +398,9 @@ [(V8SI "TARGET_AVX2") V4SI (V4DI "TARGET_AVX2") V2DI]) -(define_mode_iterator VI248_AVX2_8_AVX512F - [(V16HI "TARGET_AVX2") V8HI - (V8SI "TARGET_AVX2") V4SI +(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW + [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI + (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI248_AVX512BW_AVX512VL @@ -8749,7 +8749,7 @@ (match_operand:<avx512fmaskmode> 2 "register_operand")))] "TARGET_AVX512F") -(define_insn "*avx512bw_<code>v32hiv32qi2" +(define_insn "avx512bw_<code>v32hiv32qi2" [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") (any_truncate:V32QI (match_operand:V32HI 1 "register_operand" "v,v")))] @@ -11331,8 +11331,8 @@ (define_expand "vec_pack_trunc_<mode>" [(match_operand:<ssepackmode> 0 "register_operand") - (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand") - (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")] + (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand") + (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")] "TARGET_SSE2" { rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]); @@ -13221,25 +13221,25 @@ (define_expand "vec_unpacks_lo_<mode>" [(match_operand:<sseunpackmode> 0 "register_operand") - (match_operand:VI124_AVX512F 1 "register_operand")] + (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] "TARGET_SSE2" "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;") (define_expand "vec_unpacks_hi_<mode>" [(match_operand:<sseunpackmode> 0 "register_operand") - (match_operand:VI124_AVX512F 1 "register_operand")] + (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] "TARGET_SSE2" "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;") (define_expand "vec_unpacku_lo_<mode>" [(match_operand:<sseunpackmode> 0 "register_operand") - (match_operand:VI124_AVX512F 1 "register_operand")] + (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] "TARGET_SSE2" "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;") (define_expand "vec_unpacku_hi_<mode>" [(match_operand:<sseunpackmode> 0 "register_operand") - (match_operand:VI124_AVX512F 1 "register_operand")] + (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] "TARGET_SSE2" "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ced7d33bfaf..efea8e42a49 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2015-10-02 Kirill Yukhin <kirill.yukhin@intel.com> + + * gcc.target/i386/vect-pack-trunc-1.c: New test. + * gcc.target/i386/vect-pack-trunc-2.c: Ditto. + * gcc.target/i386/vect-perm-even-1.c: Ditto. + * gcc.target/i386/vect-perm-odd-1.c: Ditto. + * gcc.target/i386/vect-unpack-1.c: Ditto. + * gcc.target/i386/vect-unpack-2.c: Ditto. + 2015-10-02 Marek Polacek <polacek@redhat.com> PR c/64249 diff --git a/gcc/testsuite/gcc.target/i386/vect-pack-trunc-1.c b/gcc/testsuite/gcc.target/i386/vect-pack-trunc-1.c new file mode 100644 index 00000000000..774d4bd055a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-pack-trunc-1.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */ + +#include "avx512bw-check.h" + +#define N 400 +unsigned char yy[10000]; + +void +__attribute__ ((noinline)) foo (unsigned short s) +{ + unsigned short i; + for (i = 0; i < s; i++) + yy[i] = (unsigned char) i; +} + +void +avx512bw_test () +{ + unsigned short i; + foo (N); + + for (i = 0; i < N; i++) + if ( (unsigned char)i != yy [i] ) + abort (); +} + +/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%zmm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c b/gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c new file mode 100644 index 00000000000..a1a075f41a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */ + +#include "avx512bw-check.h" + +#define N 400 +unsigned short yy[10000]; + +void +__attribute__ ((noinline)) foo (unsigned int s) +{ + unsigned int i; + for (i = 0; i < s; i++) + yy[i] = (unsigned short) i; +} + +void +avx512bw_test () +{ + unsigned int i; + foo (N); + for (i = 0; i < N; i++) + if ( (unsigned short)i != yy [i] ) + abort (); +} + +/* { dg-final { scan-assembler-times "vpermi2w\[ \\t\]+\[^\n\]*%zmm" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-perm-even-1.c b/gcc/testsuite/gcc.target/i386/vect-perm-even-1.c new file mode 100644 index 00000000000..a2ff73d2382 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-perm-even-1.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */ + +#include "avx512bw-check.h" + +#define N 400 +unsigned char yy[10000]; +unsigned char xx[10000]; + +void +__attribute__ ((noinline)) foo (unsigned short s) +{ + unsigned short i; + for (i = 0; i < s; i++) + yy[i] = xx [i*2 + 1]; +} + +void +avx512bw_test () +{ + unsigned short i; + unsigned char j = 0; + for (i = 0; i < 2 * N + 1; i++, j++) + xx [i] = j; + + foo (N); + + for (i = 0; i < N; i++) + if ( (unsigned char)(2*i+1) != yy [i] ) + abort (); +} + +/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%zmm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-perm-odd-1.c b/gcc/testsuite/gcc.target/i386/vect-perm-odd-1.c new file mode 100644 index 00000000000..65f1a80c04b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-perm-odd-1.c @@ -0,0 +1,45 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */ + +#include "avx512bw-check.h" + +#define N 400 + +typedef struct +{ + unsigned char real; + unsigned char imag; +} complex8_t; + +void +__attribute__ ((noinline)) foo (unsigned char *a, + complex8_t *x, unsigned len) +{ + unsigned i; + for (i = 0; i < len; i++) + a[i] = x[i].imag + x[i].real; +} + +void +avx512bw_test () +{ + unsigned short i; + unsigned char j = 0; + complex8_t x [N]; + unsigned char a [N]; + + for (i = 0; i < N; i++, j++) + { + x [i].real = j; + x [i].imag = j; + } + + foo (a, x, N); + + j = 0; + for (i = 0; i < N; i++, j++) + if ( a[i] != (unsigned char)(j+j) ) + abort (); +} + +/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%zmm" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-unpack-1.c b/gcc/testsuite/gcc.target/i386/vect-unpack-1.c new file mode 100644 index 00000000000..eedca475d02 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-unpack-1.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */ + +#include "avx512bw-check.h" + +#define N 255 +unsigned int yy[10000]; + +void +__attribute__ ((noinline)) foo (unsigned char s) +{ + unsigned char i; + for (i = 0; i < s; i++) + yy[i] = (unsigned int) i; +} + +void +avx512bw_test () +{ + unsigned char i; + foo (N); + for (i = 0; i < N; i++) + if ( (unsigned int)i != yy [i] ) + abort (); +} + +/* { dg-final { scan-assembler-times "vpmovzxbw\[ \\t\]+\[^\n\]*%zmm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-unpack-2.c b/gcc/testsuite/gcc.target/i386/vect-unpack-2.c new file mode 100644 index 00000000000..b825f0c02ee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-unpack-2.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */ + +#include "avx512bw-check.h" + +#define N 120 +signed int yy[10000]; + +void +__attribute__ ((noinline)) foo (signed char s) +{ + signed char i; + for (i = 0; i < s; i++) + yy[i] = (signed int) i; +} + +void +avx512bw_test () +{ + signed char i; + foo (N); + for (i = 0; i < N; i++) + if ( (signed int)i != yy [i] ) + abort (); +} + +/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\n\]*%zmm" 2 } } */ |