diff options
author | hjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-05-22 16:14:13 +0000 |
---|---|---|
committer | hjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-05-22 16:14:13 +0000 |
commit | e737dbc0227e88c49ac122c17191e51c9091c6e0 (patch) | |
tree | 50dbe47fb6ff3e2cfbdee9bb2ea846a97f4f11d1 | |
parent | 2d77189206294ccdefcb104013b2a82802dedb79 (diff) | |
download | gcc-e737dbc0227e88c49ac122c17191e51c9091c6e0.tar.gz |
2007-05-22 H.J. Lu <hongjiu.lu@intel.com>
* gcc.dg/i386-cpuid.h (bit_SSE4_1): New.
(bit_SSE4_2): Likewise.
(bit_POPCNT): Likewise.
* gcc.target/i386/i386.exp (check_effective_target_sse4): New.
Check if assembler supports SSE4 instructions.
* gcc.target/i386/sse4_1-blendpd.c: New file.
* gcc.target/i386/sse4_1-blendps.c: Likewise.
* gcc.target/i386/sse4_1-blendvpd.c: Likewise.
* gcc.target/i386/sse4_1-blendvps.c: Likewise.
* gcc.target/i386/sse4_1-check.h: Likewise.
* gcc.target/i386/sse4_1-dppd-1.c: Likewise.
* gcc.target/i386/sse4_1-dppd-2.c: Likewise.
* gcc.target/i386/sse4_1-dpps-1.c: Likewise.
* gcc.target/i386/sse4_1-dpps-2.c: Likewise.
* gcc.target/i386/sse4_1-extractps.c: Likewise.
* gcc.target/i386/sse4_1-insertps-1.c: Likewise.
* gcc.target/i386/sse4_1-insertps-2.c: Likewise.
* gcc.target/i386/sse4_1-movntdqa.c: Likewise.
* gcc.target/i386/sse4_1-mpsadbw.c: Likewise.
* gcc.target/i386/sse4_1-packusdw.c: Likewise.
* gcc.target/i386/sse4_1-pblendvb.c: Likewise.
* gcc.target/i386/sse4_1-pblendw.c: Likewise.
* gcc.target/i386/sse4_1-pcmpeqq.c: Likewise.
* gcc.target/i386/sse4_1-pextrb.c: Likewise.
* gcc.target/i386/sse4_1-pextrd.c: Likewise.
* gcc.target/i386/sse4_1-pextrq.c: Likewise.
* gcc.target/i386/sse4_1-pextrw.c: Likewise.
* gcc.target/i386/sse4_1-phminposuw.c: Likewise.
* gcc.target/i386/sse4_1-pinsrb.c: Likewise.
* gcc.target/i386/sse4_1-pinsrd.c: Likewise.
* gcc.target/i386/sse4_1-pinsrq.c: Likewise.
* gcc.target/i386/sse4_1-pmaxsb.c: Likewise.
* gcc.target/i386/sse4_1-pmaxsd.c: Likewise.
* gcc.target/i386/sse4_1-pmaxud.c: Likewise.
* gcc.target/i386/sse4_1-pmaxuw.c: Likewise.
* gcc.target/i386/sse4_1-pminsb.c: Likewise.
* gcc.target/i386/sse4_1-pminsd.c: Likewise.
* gcc.target/i386/sse4_1-pminud.c: Likewise.
* gcc.target/i386/sse4_1-pminuw.c: Likewise.
* gcc.target/i386/sse4_1-pmovsxbd.c: Likewise.
* gcc.target/i386/sse4_1-pmovsxbq.c: Likewise.
* gcc.target/i386/sse4_1-pmovsxbw.c: Likewise.
* gcc.target/i386/sse4_1-pmovsxdq.c: Likewise.
* gcc.target/i386/sse4_1-pmovsxwd.c: Likewise.
* gcc.target/i386/sse4_1-pmovsxwq.c: Likewise.
* gcc.target/i386/sse4_1-pmovzxbd.c: Likewise.
* gcc.target/i386/sse4_1-pmovzxbq.c: Likewise.
* gcc.target/i386/sse4_1-pmovzxbw.c: Likewise.
* gcc.target/i386/sse4_1-pmovzxdq.c: Likewise.
* gcc.target/i386/sse4_1-pmovzxwd.c: Likewise.
* gcc.target/i386/sse4_1-pmovzxwq.c: Likewise.
* gcc.target/i386/sse4_1-pmuldq.c: Likewise.
* gcc.target/i386/sse4_1-pmulld.c: Likewise.
* gcc.target/i386/sse4_1-ptest-1.c: Likewise.
* gcc.target/i386/sse4_1-ptest-2.c: Likewise.
* gcc.target/i386/sse4_1-ptest-3.c: Likewise.
* gcc.target/i386/sse4_1-round.h: Likewise.
* gcc.target/i386/sse4_1-roundpd-1.c: Likewise.
* gcc.target/i386/sse4_1-roundpd-2.c: Likewise.
* gcc.target/i386/sse4_1-roundpd-3.c: Likewise.
* gcc.target/i386/sse4_1-roundps-1.c: Likewise.
* gcc.target/i386/sse4_1-roundps-2.c: Likewise.
* gcc.target/i386/sse4_1-roundps-3.c: Likewise.
* gcc.target/i386/sse4_1-roundsd-1.c: Likewise.
* gcc.target/i386/sse4_1-roundsd-2.c: Likewise.
* gcc.target/i386/sse4_1-roundsd-3.c: Likewise.
* gcc.target/i386/sse4_1-roundsd-4.c: Likewise.
* gcc.target/i386/sse4_1-roundss-1.c: Likewise.
* gcc.target/i386/sse4_1-roundss-2.c: Likewise.
* gcc.target/i386/sse4_1-roundss-3.c: Likewise.
* gcc.target/i386/sse4_1-roundss-4.c: Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@124947 138bc75d-0d04-0410-961f-82ee72b054a4
69 files changed, 3383 insertions, 0 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c27bbe1a1c9..a74038d80e0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,79 @@ +2007-05-22 H.J. Lu <hongjiu.lu@intel.com> + + * gcc.dg/i386-cpuid.h (bit_SSE4_1): New. + (bit_SSE4_2): Likewise. + (bit_POPCNT): Likewise. + + * gcc.target/i386/i386.exp (check_effective_target_sse4): New. + Check if assembler supports SSE4 instructions. + + * gcc.target/i386/sse4_1-blendpd.c: New file. + * gcc.target/i386/sse4_1-blendps.c: Likewise. + * gcc.target/i386/sse4_1-blendvpd.c: Likewise. + * gcc.target/i386/sse4_1-blendvps.c: Likewise. + * gcc.target/i386/sse4_1-check.h: Likewise. + * gcc.target/i386/sse4_1-dppd-1.c: Likewise. + * gcc.target/i386/sse4_1-dppd-2.c: Likewise. + * gcc.target/i386/sse4_1-dpps-1.c: Likewise. + * gcc.target/i386/sse4_1-dpps-2.c: Likewise. + * gcc.target/i386/sse4_1-extractps.c: Likewise. + * gcc.target/i386/sse4_1-insertps-1.c: Likewise. + * gcc.target/i386/sse4_1-insertps-2.c: Likewise. + * gcc.target/i386/sse4_1-movntdqa.c: Likewise. + * gcc.target/i386/sse4_1-mpsadbw.c: Likewise. + * gcc.target/i386/sse4_1-packusdw.c: Likewise. + * gcc.target/i386/sse4_1-pblendvb.c: Likewise. + * gcc.target/i386/sse4_1-pblendw.c: Likewise. + * gcc.target/i386/sse4_1-pcmpeqq.c: Likewise. + * gcc.target/i386/sse4_1-pextrb.c: Likewise. + * gcc.target/i386/sse4_1-pextrd.c: Likewise. + * gcc.target/i386/sse4_1-pextrq.c: Likewise. + * gcc.target/i386/sse4_1-pextrw.c: Likewise. + * gcc.target/i386/sse4_1-phminposuw.c: Likewise. + * gcc.target/i386/sse4_1-pinsrb.c: Likewise. + * gcc.target/i386/sse4_1-pinsrd.c: Likewise. + * gcc.target/i386/sse4_1-pinsrq.c: Likewise. + * gcc.target/i386/sse4_1-pmaxsb.c: Likewise. + * gcc.target/i386/sse4_1-pmaxsd.c: Likewise. + * gcc.target/i386/sse4_1-pmaxud.c: Likewise. + * gcc.target/i386/sse4_1-pmaxuw.c: Likewise. + * gcc.target/i386/sse4_1-pminsb.c: Likewise. + * gcc.target/i386/sse4_1-pminsd.c: Likewise. + * gcc.target/i386/sse4_1-pminud.c: Likewise. + * gcc.target/i386/sse4_1-pminuw.c: Likewise. + * gcc.target/i386/sse4_1-pmovsxbd.c: Likewise. + * gcc.target/i386/sse4_1-pmovsxbq.c: Likewise. + * gcc.target/i386/sse4_1-pmovsxbw.c: Likewise. + * gcc.target/i386/sse4_1-pmovsxdq.c: Likewise. + * gcc.target/i386/sse4_1-pmovsxwd.c: Likewise. + * gcc.target/i386/sse4_1-pmovsxwq.c: Likewise. + * gcc.target/i386/sse4_1-pmovzxbd.c: Likewise. + * gcc.target/i386/sse4_1-pmovzxbq.c: Likewise. + * gcc.target/i386/sse4_1-pmovzxbw.c: Likewise. + * gcc.target/i386/sse4_1-pmovzxdq.c: Likewise. + * gcc.target/i386/sse4_1-pmovzxwd.c: Likewise. + * gcc.target/i386/sse4_1-pmovzxwq.c: Likewise. + * gcc.target/i386/sse4_1-pmuldq.c: Likewise. + * gcc.target/i386/sse4_1-pmulld.c: Likewise. + * gcc.target/i386/sse4_1-ptest-1.c: Likewise. + * gcc.target/i386/sse4_1-ptest-2.c: Likewise. + * gcc.target/i386/sse4_1-ptest-3.c: Likewise. + * gcc.target/i386/sse4_1-round.h: Likewise. + * gcc.target/i386/sse4_1-roundpd-1.c: Likewise. + * gcc.target/i386/sse4_1-roundpd-2.c: Likewise. + * gcc.target/i386/sse4_1-roundpd-3.c: Likewise. + * gcc.target/i386/sse4_1-roundps-1.c: Likewise. + * gcc.target/i386/sse4_1-roundps-2.c: Likewise. + * gcc.target/i386/sse4_1-roundps-3.c: Likewise. + * gcc.target/i386/sse4_1-roundsd-1.c: Likewise. + * gcc.target/i386/sse4_1-roundsd-2.c: Likewise. + * gcc.target/i386/sse4_1-roundsd-3.c: Likewise. + * gcc.target/i386/sse4_1-roundsd-4.c: Likewise. + * gcc.target/i386/sse4_1-roundss-1.c: Likewise. + * gcc.target/i386/sse4_1-roundss-2.c: Likewise. + * gcc.target/i386/sse4_1-roundss-3.c: Likewise. + * gcc.target/i386/sse4_1-roundss-4.c: Likewise. + 2007-05-22 Francois-Xavier Coudert <fxcoudert@gcc.gnu.org> PR fortran/31627 diff --git a/gcc/testsuite/gcc.dg/i386-cpuid.h b/gcc/testsuite/gcc.dg/i386-cpuid.h index c7b999c7fdf..8c7a39f45a9 100644 --- a/gcc/testsuite/gcc.dg/i386-cpuid.h +++ b/gcc/testsuite/gcc.dg/i386-cpuid.h @@ -5,6 +5,9 @@ /* %ecx */ #define bit_SSE3 (1 << 0) #define bit_SSSE3 (1 << 9) +#define bit_SSE4_1 (1 << 19) +#define bit_SSE4_2 (1 << 20) +#define bit_POPCNT (1 << 23) /* %edx */ #define bit_CMOV (1 << 15) diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp index c2e1336e1f3..f982045b35c 100644 --- a/gcc/testsuite/gcc.target/i386/i386.exp +++ b/gcc/testsuite/gcc.target/i386/i386.exp @@ -37,6 +37,20 @@ proc check_effective_target_ssse3 { } { } "-O2 -mssse3" ] } +# Return 1 if sse4 instructions can be compiled. +proc check_effective_target_sse4 { } { + return [check_no_compiler_messages sse4.1 object { + typedef long long __m128i __attribute__ ((__vector_size__ (16))); + typedef int __v4si __attribute__ ((__vector_size__ (16))); + + __m128i _mm_mullo_epi32 (__m128i __X, __m128i __Y) + { + return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, + (__v4si)__Y); + } + } "-O2 -msse4.1" ] +} + # Return 1 if sse4a instructions can be compiled. proc check_effective_target_sse4a { } { return [check_no_compiler_messages sse4a object { diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-blendpd.c b/gcc/testsuite/gcc.target/i386/sse4_1-blendpd.c new file mode 100644 index 00000000000..8f51a99cd34 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-blendpd.c @@ -0,0 +1,81 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define NUM 20 + +#ifndef MASK +#define MASK 0x03 +#endif + +static void +init_blendpd (double *src1, double *src2) +{ + int i, sign = 1; + + for (i = 0; i < NUM * 2; i++) + { + src1[i] = i * i * sign; + src2[i] = (i + 20) * sign; + sign = -sign; + } +} + +static int +check_blendpd (__m128d *dst, double *src1, double *src2) +{ + double tmp[2]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + + for(j = 0; j < 2; j++) + if ((MASK & (1 << j))) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +sse4_1_test (void) +{ + __m128d x, y; + union + { + __m128d x[NUM]; + double d[NUM * 2]; + } dst, src1, src2; + union + { + __m128d x; + double d[2]; + } src3; + int i; + + init_blendpd (src1.d, src2.d); + + /* Check blendpd imm8, m128, xmm */ + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blend_pd (src1.x[i], src2.x[i], MASK); + if (check_blendpd (&dst.x[i], &src1.d[i * 2], &src2.d[i * 2])) + abort (); + } + + /* Check blendpd imm8, xmm, xmm */ + src3.x = _mm_setzero_pd (); + + x = _mm_blend_pd (dst.x[2], src3.x, MASK); + y = _mm_blend_pd (src3.x, dst.x[2], MASK); + + if (check_blendpd (&x, &dst.d[4], &src3.d[0])) + abort (); + + if (check_blendpd (&y, &src3.d[0], &dst.d[4])) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-blendps.c b/gcc/testsuite/gcc.target/i386/sse4_1-blendps.c new file mode 100644 index 00000000000..6e0d3d1528c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-blendps.c @@ -0,0 +1,78 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define NUM 20 + +#ifndef MASK +#define MASK 0x0f +#endif + +static void +init_blendps (float *src1, float *src2) +{ + int i, sign = 1; + + for (i = 0; i < NUM * 4; i++) + { + src1[i] = i * i * sign; + src2[i] = (i + 20) * sign; + sign = -sign; + } +} + +static int +check_blendps (__m128 *dst, float *src1, float *src2) +{ + float tmp[4]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 4; j++) + if ((MASK & (1 << j))) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +sse4_1_test (void) +{ + __m128 x, y; + union + { + __m128 x[NUM]; + float f[NUM * 4]; + } dst, src1, src2; + union + { + __m128 x; + float f[4]; + } src3; + int i; + + init_blendps (src1.f, src2.f); + + /* Check blendps imm8, m128, xmm */ + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK); + if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4])) + abort (); + } + + /* Check blendps imm8, xmm, xmm */ + x = _mm_blend_ps (dst.x[2], src3.x, MASK); + y = _mm_blend_ps (src3.x, dst.x[2], MASK); + + if (check_blendps (&x, &dst.f[8], &src3.f[0])) + abort (); + + if (check_blendps (&y, &src3.f[0], &dst.f[8])) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-blendvpd.c b/gcc/testsuite/gcc.target/i386/sse4_1-blendvpd.c new file mode 100644 index 00000000000..3892f826b71 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-blendvpd.c @@ -0,0 +1,65 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define NUM 20 + +static void +init_blendvpd (double *src1, double *src2, double *mask) +{ + int i, msk, sign = 1; + + msk = -1; + for (i = 0; i < NUM * 2; i++) + { + if((i % 2) == 0) + msk++; + src1[i] = i* (i + 1) * sign; + src2[i] = (i + 20) * sign; + mask[i] = (i + 120) * i; + if( (msk & (1 << (i % 2)))) + mask[i] = -mask[i]; + sign = -sign; + } +} + +static int +check_blendvpd (__m128d *dst, double *src1, double *src2, + double *mask) +{ + double tmp[2]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 2; j++) + if (mask [j] < 0.0) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +sse4_1_test (void) +{ + union + { + __m128d x[NUM]; + double d[NUM * 2]; + } dst, src1, src2, mask; + int i; + + init_blendvpd (src1.d, src2.d, mask.d); + + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blendv_pd (src1.x[i], src2.x[i], mask.x[i]); + if (check_blendvpd (&dst.x[i], &src1.d[i * 2], &src2.d[i * 2], + &mask.d[i * 2])) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-blendvps.c b/gcc/testsuite/gcc.target/i386/sse4_1-blendvps.c new file mode 100644 index 00000000000..6fff7766595 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-blendvps.c @@ -0,0 +1,65 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define NUM 20 + +static void +init_blendvps (float *src1, float *src2, float *mask) +{ + int i, msk, sign = 1; + + msk = -1; + for (i = 0; i < NUM * 4; i++) + { + if((i % 4) == 0) + msk++; + src1[i] = i* (i + 1) * sign; + src2[i] = (i + 20) * sign; + mask[i] = (i + 120) * i; + if( (msk & (1 << (i % 4)))) + mask[i] = -mask[i]; + sign = -sign; + } +} + +static int +check_blendvps (__m128 *dst, float *src1, float *src2, + float *mask) +{ + float tmp[4]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 4; j++) + if (mask [j] < 0.0) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +sse4_1_test (void) +{ + union + { + __m128 x[NUM]; + float f[NUM * 4]; + } dst, src1, src2, mask; + int i; + + init_blendvps (src1.f, src2.f, mask.f); + + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blendv_ps (src1.x[i], src2.x[i], mask.x[i]); + if (check_blendvps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4], + &mask.f[i * 4])) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-check.h b/gcc/testsuite/gcc.target/i386/sse4_1-check.h new file mode 100644 index 00000000000..78e80511c3c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-check.h @@ -0,0 +1,22 @@ +#include <stdio.h> +#include <stdlib.h> + +#include "../auto-host.h" + +#include "../../gcc.dg/i386-cpuid.h" + +static void sse4_1_test (void); + +int +main () +{ + unsigned long cpu_facilities; + + cpu_facilities = i386_cpuid_ecx (); + + /* Run SSE4.1 test only if host has SSE4.1 support. */ + if ((cpu_facilities & bit_SSE4_1)) + sse4_1_test (); + + exit (0); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-dppd-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-dppd-1.c new file mode 100644 index 00000000000..0a150542c60 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-dppd-1.c @@ -0,0 +1,63 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define lmskN 0x00 +#define lmsk0 0x01 +#define lmsk1 0x02 +#define lmsk01 0x03 + +#define hmskA 0x30 +#define hmsk0 0x10 +#define hmsk1 0x20 +#define hmsk01 0x30 +#define hmskN 0x00 + +#ifndef HIMASK +#define HIMASK hmskA +#endif + +static void +sse4_1_test (void) +{ + union + { + __m128d x; + double d[2]; + } val1, val2, res[4]; + int masks[4]; + int i, j; + + val1.d[0] = 2.; + val1.d[1] = 3.; + + val2.d[0] = 10.; + val2.d[1] = 100.; + + res[0].x = _mm_dp_pd (val1.x, val2.x, HIMASK | lmskN); + res[1].x = _mm_dp_pd (val1.x, val2.x, HIMASK | lmsk0); + res[2].x = _mm_dp_pd (val1.x, val2.x, HIMASK | lmsk1); + res[3].x = _mm_dp_pd (val1.x, val2.x, HIMASK | lmsk01); + + masks[0] = HIMASK | lmskN; + masks[1] = HIMASK | lmsk0; + masks[2] = HIMASK | lmsk1; + masks[3] = HIMASK | lmsk01; + + for (i = 0; i < 4; i++) + { + double tmp = 0.; + + for (j = 0; j < 2; j++) + if (HIMASK & (0x10 << j)) + tmp = tmp + (val1.d[j] * val2.d[j]); + + for (j = 0; j < 2; j++) + if ((masks[i] & (1 << j)) && res[i].d[j] != tmp) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-dppd-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-dppd-2.c new file mode 100644 index 00000000000..2a8a0bcdcf8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-dppd-2.c @@ -0,0 +1,64 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define lmskN 0x00 +#define lmsk0 0x01 +#define lmsk1 0x02 +#define lmsk01 0x03 + +#define hmskA 0x30 +#define hmsk0 0x10 +#define hmsk1 0x20 +#define hmsk01 0x30 +#define hmskN 0x00 + +#ifndef HIMASK +#define HIMASK hmskA +#endif + +#ifndef LOMASK +#define LOMASK lmsk01 +#endif + +static void +sse4_1_test (void) +{ + union + { + __m128d x; + double d[2]; + } val1[4], val2[4], res[4], chk[4]; + int i, j; + double tmp; + + for (i = 0; i < 4; i++) + { + val1[i].d [0] = 2.; + val1[i].d [1] = 3.; + + val2[i].d [0] = 10.; + val2[i].d [1] = 100.; + + tmp = 0.; + for (j = 0; j < 2; j++) + if ((HIMASK & (0x10 << j))) + tmp += val1[i].d [j] * val2[i].d [j]; + + for (j = 0; j < 2; j++) + if ((LOMASK & (1 << j))) + chk[i].d[j] = tmp; + } + + for (i = 0; i < 4; i++) + { + res[i].x = _mm_dp_pd (val1[i].x, val2[i].x, HIMASK | LOMASK); + if (memcmp (&res[i], &chk[i], sizeof (chk[i]))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-dpps-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-dpps-1.c new file mode 100644 index 00000000000..08c008399a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-dpps-1.c @@ -0,0 +1,106 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define lmskN 0x00 +#define lmsk0 0x01 +#define lmsk1 0x02 +#define lmsk2 0x04 +#define lmsk3 0x08 +#define lmsk01 0x03 +#define lmsk02 0x05 +#define lmsk03 0x09 +#define lmsk12 0x06 +#define lmsk13 0x0A +#define lmsk23 0x0C +#define lmskA 0x0F + +#define hmskN 0x00 +#define hmskA 0xF0 +#define hmsk0 0x10 +#define hmsk1 0x20 +#define hmsk2 0x40 +#define hmsk3 0x80 +#define hmsk01 0x30 +#define hmsk02 0x50 +#define hmsk03 0x90 +#define hmsk12 0x60 +#define hmsk13 0xA0 +#define hmsk23 0xC0 + +#ifndef HIMASK +#define HIMASK hmskA +#endif + +static void +sse4_1_test (void) +{ + union + { + __m128 x; + float f[4]; + } val1, val2, res[16]; + int masks[16]; + int i, j; + + val1.f[0] = 2.; + val1.f[1] = 3.; + val1.f[2] = 4.; + val1.f[3] = 5.; + + val2.f[0] = 10.; + val2.f[1] = 100.; + val2.f[2] = 1000.; + val2.f[3] = 10000.; + + res[0].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk0); + res[1].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk1); + res[2].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk2); + res[3].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk3); + res[4].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk01); + res[5].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk02); + res[6].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk03); + res[7].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk12); + res[8].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk13); + res[9].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk23); + res[10].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk0)); + res[11].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk1)); + res[12].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk2)); + res[13].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk3)); + res[14].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskN); + res[15].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskA); + + masks[0] = HIMASK | lmsk0; + masks[1] = HIMASK | lmsk1; + masks[2] = HIMASK | lmsk2; + masks[3] = HIMASK | lmsk3; + masks[4] = HIMASK | lmsk01; + masks[5] = HIMASK | lmsk02; + masks[6] = HIMASK | lmsk03; + masks[7] = HIMASK | lmsk12; + masks[8] = HIMASK | lmsk13; + masks[9] = HIMASK | lmsk23; + masks[10] = HIMASK | (0x0F & ~lmsk0); + masks[11] = HIMASK | (0x0F & ~lmsk1); + masks[12] = HIMASK | (0x0F & ~lmsk2); + masks[13] = HIMASK | (0x0F & ~lmsk3); + masks[14] = HIMASK | lmskN; + masks[15] = HIMASK | lmskA; + + for (i = 0; i <= 15; i++) + { + float tmp = 0.; + + for (j = 0; j < 4; j++) + if ((HIMASK & (0x10 << j))) + tmp += val1.f[j] * val2.f[j]; + + for (j = 0; j < 4; j++) + if ((masks[i] & (1 << j)) && res[i].f[j] != tmp) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-dpps-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-dpps-2.c new file mode 100644 index 00000000000..1c9a7c9342e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-dpps-2.c @@ -0,0 +1,83 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define lmskN 0x00 +#define lmsk0 0x01 +#define lmsk1 0x02 +#define lmsk2 0x04 +#define lmsk3 0x08 +#define lmsk01 0x03 +#define lmsk02 0x05 +#define lmsk03 0x09 +#define lmsk12 0x06 +#define lmsk13 0x0A +#define lmsk23 0x0C +#define lmskA 0x0F + +#define hmskN 0x00 +#define hmskA 0xF0 +#define hmsk0 0x10 +#define hmsk1 0x20 +#define hmsk2 0x40 +#define hmsk3 0x80 +#define hmsk01 0x30 +#define hmsk02 0x50 +#define hmsk03 0x90 +#define hmsk12 0x60 +#define hmsk13 0xA0 +#define hmsk23 0xC0 + +#ifndef HIMASK +#define HIMASK hmskA +#endif + +#ifndef LOMASK +#define LOMASK lmskA +#endif + +static void +sse4_1_test (void) +{ + union + { + __m128 x; + float f[4]; + } val1[16], val2[16], res[16], chk[16]; + int i,j; + float tmp; + + for (i = 0; i < 16; i++) + { + val1[i].f[0] = 2.; + val1[i].f[1] = 3.; + val1[i].f[2] = 4.; + val1[i].f[3] = 5.; + + val2[i].f[0] = 10.; + val2[i].f[1] = 100.; + val2[i].f[2] = 1000.; + val2[i].f[3] = 10000.; + + tmp = 0.; + for (j = 0; j < 4; j++) + if ((HIMASK & (0x10 << j))) + tmp += val1[i].f [j] * val2[i].f [j]; + + for (j = 0; j < 4; j++) + if ((LOMASK & (1 << j))) + chk[i].f[j] = tmp; + } + + for (i = 0; i < 16; i++) + { + res[i].x = _mm_dp_ps (val1[i].x, val2[i].x, HIMASK | LOMASK); + if (memcmp (&res[i], &chk[i], sizeof (chk[i]))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-extractps.c b/gcc/testsuite/gcc.target/i386/sse4_1-extractps.c new file mode 100644 index 00000000000..03a18dd1be4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-extractps.c @@ -0,0 +1,64 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +int masks[4]; + +#define msk0 0x00 +#define msk1 0x01 +#define msk2 0x02 +#define msk3 0x03 + +static void +sse4_1_test (void) +{ + union + { + __m128 x; + float f[4]; + } val1, val2; + union + { + int i; + float f; + } res[4]; + float resm[4]; + int i; + + val1.f[0] = 10.; + val1.f[1] = 2.; + val1.f[2] = 3.; + val1.f[3] = 40.; + + val2.f[0] = 77.; + val2.f[1] = 21.; + val2.f[2] = 34.; + val2.f[3] = 49.; + + res[0].i = _mm_extract_ps (val1.x, msk0); + res[1].i = _mm_extract_ps (val1.x, msk1); + res[2].i = _mm_extract_ps (val1.x, msk2); + res[3].i = _mm_extract_ps (val1.x, msk3); + + _MM_EXTRACT_FLOAT (resm[0], val2.x, msk0); + _MM_EXTRACT_FLOAT (resm[1], val2.x, msk1); + _MM_EXTRACT_FLOAT (resm[2], val2.x, msk2); + _MM_EXTRACT_FLOAT (resm[3], val2.x, msk3); + + masks[0] = msk0; + masks[1] = msk1; + masks[2] = msk2; + masks[3] = msk3; + + for( i=0; i < 4; i++ ) + { + if (res[i].f != val1.f[masks[i]]) + abort (); + if (resm[i] != val2.f[masks[i]]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-insertps-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-insertps-1.c new file mode 100644 index 00000000000..eaaaf0d8544 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-insertps-1.c @@ -0,0 +1,71 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define msk0 0x01 +#define msk1 0x10 +#define msk2 0x29 +#define msk3 0x30 + +#define msk4 0xFC +#define msk5 0x05 +#define msk6 0x0A +#define msk7 0x0F + +static void +sse4_1_test (void) +{ + union + { + __m128 x; + float f[4]; + } res[8], val1, val2, tmp; + int masks[8]; + int i, j; + + val2.f[0] = 55.0; + val2.f[1] = 55.0; + val2.f[2] = 55.0; + val2.f[3] = 55.0; + + val1.f[0] = 1.; + val1.f[1] = 2.; + val1.f[2] = 3.; + val1.f[3] = 4.; + + res[0].x = _mm_insert_ps (val2.x, val1.x, msk0); + res[1].x = _mm_insert_ps (val2.x, val1.x, msk1); + res[2].x = _mm_insert_ps (val2.x, val1.x, msk2); + res[3].x = _mm_insert_ps (val2.x, val1.x, msk3); + + masks[0] = msk0; + masks[1] = msk1; + masks[2] = msk2; + masks[3] = msk3; + + for (i = 0; i < 4; i++) + res[i + 4].x = _mm_insert_ps (val2.x, val1.x, msk4); + + masks[4] = msk4; + masks[5] = msk4; + masks[6] = msk4; + masks[7] = msk4; + + for (i=0; i < 8; i++) + { + tmp = val2; + tmp.f[(masks[i] & 0x30) >> 4] = val1.f[(masks[i] & 0xC0) >> 6]; + + for (j = 0; j < 4; j++) + if (masks[i] & (0x1 << j)) + tmp.f[j] = 0.f; + + if (memcmp (&res[i], &tmp, sizeof (tmp))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-insertps-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-insertps-2.c new file mode 100644 index 00000000000..fa9e277b8ea --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-insertps-2.c @@ -0,0 +1,44 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +static void +sse4_1_test (void) +{ + union + { + __m128 x; + float f[4]; + } vals[4], val; + int i, j; + + val.f[0]= 1.; + val.f[1]= 2.; + val.f[2]= 3.; + val.f[3]= 4.; + + vals[0].x = _MM_PICK_OUT_PS (val.x, 0); + vals[1].x = _MM_PICK_OUT_PS (val.x, 1); + vals[2].x = _MM_PICK_OUT_PS (val.x, 2); + vals[3].x = _MM_PICK_OUT_PS (val.x, 3); + + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + if ((j != 0 && vals[i].f[j] != 0) + || (j == 0 && vals[i].f[j] != val.f[i])) + abort (); + + if (_MM_MK_INSERTPS_NDX(0, 0, 0x1) != 0x01 + || _MM_MK_INSERTPS_NDX(0, 1, 0x2) != 0x12 + || _MM_MK_INSERTPS_NDX(0, 2, 0x3) != 0x23 + || _MM_MK_INSERTPS_NDX(0, 3, 0x4) != 0x34 + || _MM_MK_INSERTPS_NDX(1, 0, 0x5) != 0x45 + || _MM_MK_INSERTPS_NDX(1, 1, 0x6) != 0x56 + || _MM_MK_INSERTPS_NDX(2, 2, 0x7) != 0xA7 + || _MM_MK_INSERTPS_NDX(3, 3, 0x8) != 0xF8) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-movntdqa.c b/gcc/testsuite/gcc.target/i386/sse4_1-movntdqa.c new file mode 100644 index 00000000000..a08e317c95a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-movntdqa.c @@ -0,0 +1,43 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define NUM 20 + +static void +init_movntdqa (int *src) +{ + int i, j, sign = 1; + + for (i = 0; i < NUM; i++) + for (j = 0; j < 4; j++) + { + src[i * 4 + j] = j * i * i * sign; + sign = -sign; + } +} + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM]; + int i[NUM * 4]; + } dst, src; + int i; + + init_movntdqa (src.i); + + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_stream_load_si128 (&src.x[i]); + + for (i = 0; i < NUM; i++) + if (memcmp (&dst.x[i], &src.x[i], sizeof(src.x[i]))) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-mpsadbw.c b/gcc/testsuite/gcc.target/i386/sse4_1-mpsadbw.c new file mode 100644 index 00000000000..f327c145b27 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-mpsadbw.c @@ -0,0 +1,122 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define msk0 0xC0 +#define msk1 0x01 +#define msk2 0xF2 +#define msk3 0x03 +#define msk4 0x84 +#define msk5 0x05 +#define msk6 0xE6 +#define msk7 0x67 + +static __m128i +compute_mpsadbw (unsigned char *v1, unsigned char *v2, int mask) +{ + union + { + __m128i x; + unsigned short s[8]; + } ret; + unsigned char s[4]; + int i, j; + int offs1, offs2; + + offs2 = 4 * (mask & 3); + for (i = 0; i < 4; i++) + s[i] = v2[offs2 + i]; + + offs1 = 4 * ((mask & 4) >> 2); + for (j = 0; j < 8; j++) + { + ret.s[j] = 0; + for (i = 0; i < 4; i++) + ret.s[j] += abs (v1[offs1 + j + i] - s[i]); + } + + return ret.x; +} + +static void +sse4_1_test (void) +{ + union + { + __m128i x; + unsigned int i[4]; + unsigned char c[16]; + } val1, val2, val3 [8]; + __m128i res[8], tmp; + unsigned char masks[8]; + int i; + + val1.i[0] = 0x35251505; + val1.i[1] = 0x75655545; + val1.i[2] = 0xB5A59585; + val1.i[3] = 0xF5E5D5C5; + + val2.i[0] = 0x31211101; + val2.i[1] = 0x71615141; + val2.i[2] = 0xB1A19181; + val2.i[3] = 0xF1E1D1C1; + + for (i=0; i < 8; i++) + switch (i % 3) + { + case 1: + val3[i].i[0] = 0xF1E1D1C1; + val3[i].i[1] = 0xB1A19181; + val3[i].i[2] = 0x71615141; + val3[i].i[3] = 0x31211101; + break; + default: + val3[i].x = val2.x; + break; + } + + /* Check mpsadbw imm8, xmm, xmm. */ + res[0] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk0); + res[1] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk1); + res[2] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk2); + res[3] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk3); + res[4] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk4); + res[5] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk5); + res[6] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk6); + res[7] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk7); + + masks[0] = msk0; + masks[1] = msk1; + masks[2] = msk2; + masks[3] = msk3; + masks[4] = msk4; + masks[5] = msk5; + masks[6] = msk6; + masks[7] = msk7; + + for (i=0; i < 8; i++) + { + tmp = compute_mpsadbw (val1.c, val2.c, masks[i]); + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } + + /* Check mpsadbw imm8, m128, xmm. */ + for (i=0; i < 8; i++) + { + res[i] = _mm_mpsadbw_epu8 (val1.x, val3[i].x, msk4); + masks[i] = msk4; + } + + for (i=0; i < 8; i++) + { + tmp = compute_mpsadbw (val1.c, val3[i].c, masks[i]); + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-packusdw.c b/gcc/testsuite/gcc.target/i386/sse4_1-packusdw.c new file mode 100644 index 00000000000..d21f4463863 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-packusdw.c @@ -0,0 +1,65 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 64 + +static unsigned short +int_to_ushort (int iVal) +{ + unsigned short sVal; + + if (iVal < 0) + sVal = 0; + else if (iVal > 0xffff) + sVal = 0xffff; + else sVal = iVal; + + return sVal; +} + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 4]; + int i[NUM]; + } src1, src2; + union + { + __m128i x[NUM / 4]; + unsigned short s[NUM * 2]; + } dst; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src1.i[i] = i * i * sign; + src2.i[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 4) + dst.x[i / 4] = _mm_packus_epi32 (src1.x [i / 4], src2.x [i / 4]); + + for (i = 0; i < NUM; i ++) + { + int dstIndex; + unsigned short sVal; + + sVal = int_to_ushort (src1.i[i]); + dstIndex = (i % 4) + (i / 4) * 8; + if (sVal != dst.s[dstIndex]) + abort (); + + sVal = int_to_ushort (src2.i[i]); + dstIndex += 4; + if (sVal != dst.s[dstIndex]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pblendvb.c b/gcc/testsuite/gcc.target/i386/sse4_1-pblendvb.c new file mode 100644 index 00000000000..fe93a783b6a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pblendvb.c @@ -0,0 +1,62 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define NUM 20 + +static void +init_pblendvb (unsigned char *src1, unsigned char *src2, + unsigned char *mask) +{ + int i, sign = 1; + + for (i = 0; i < NUM * 16; i++) + { + src1[i] = i* i * sign; + src2[i] = (i + 20) * sign; + mask[i] = (i % 3) + ((i * (14 + sign)) + ^ (src1[i] | src2[i] | (i*3))); + sign = -sign; + } +} + +static int +check_pblendvb (__m128i *dst, unsigned char *src1, + unsigned char *src2, unsigned char *mask) +{ + unsigned char tmp[16]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 16; j++) + if (mask [j] & 0x80) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM]; + unsigned char c[NUM * 16]; + } dst, src1, src2, mask; + int i; + + init_pblendvb (src1.c, src2.c, mask.c); + + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blendv_epi8 (src1.x[i], src2.x[i], mask.x[i]); + if (check_pblendvb (&dst.x[i], &src1.c[i * 16], &src2.c[i * 16], + &mask.c[i * 16])) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pblendw.c b/gcc/testsuite/gcc.target/i386/sse4_1-pblendw.c new file mode 100644 index 00000000000..07e59e8a24d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pblendw.c @@ -0,0 +1,80 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define NUM 20 + +#ifndef MASK +#define MASK 0x0f +#endif + +static void +init_pblendw (short *src1, short *src2) +{ + int i, sign = 1; + + for (i = 0; i < NUM * 8; i++) + { + src1[i] = i * i * sign; + src2[i] = (i + 20) * sign; + sign = -sign; + } +} + +static int +check_pblendw (__m128i *dst, short *src1, short *src2) +{ + short tmp[8]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 8; j++) + if ((MASK & (1 << j))) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +sse4_1_test (void) +{ + __m128i x, y; + union + { + __m128i x[NUM]; + short s[NUM * 8]; + } dst, src1, src2; + union + { + __m128i x; + short s[8]; + } src3; + int i; + + init_pblendw (src1.s, src2.s); + + /* Check pblendw imm8, m128, xmm */ + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK); + if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8])) + abort (); + } + + /* Check pblendw imm8, xmm, xmm */ + src3.x = _mm_setzero_si128 (); + + x = _mm_blend_epi16 (dst.x[2], src3.x, MASK); + y = _mm_blend_epi16 (src3.x, dst.x[2], MASK); + + if (check_pblendw (&x, &dst.s[16], &src3.s[0])) + abort (); + + if (check_pblendw (&y, &src3.s[0], &dst.s[16])) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pcmpeqq.c b/gcc/testsuite/gcc.target/i386/sse4_1-pcmpeqq.c new file mode 100644 index 00000000000..34653c4f4c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pcmpeqq.c @@ -0,0 +1,38 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 64 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 2]; + long long ll[NUM]; + } dst, src1, src2; + int i, sign=1; + long long is_eq; + + for (i = 0; i < NUM; i++) + { + src1.ll[i] = i * i * sign; + src2.ll[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cmpeq_epi64(src1.x [i / 2], src2.x [i / 2]); + + for (i = 0; i < NUM; i++) + { + is_eq = src1.ll[i] == src2.ll[i] ? 0xffffffffffffffffLL : 0LL; + if (is_eq != dst.ll[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pextrb.c b/gcc/testsuite/gcc.target/i386/sse4_1-pextrb.c new file mode 100644 index 00000000000..dc07d02b93d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pextrb.c @@ -0,0 +1,80 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define msk0 0 +#define msk1 1 +#define msk2 2 +#define msk3 3 +#define msk4 4 +#define msk5 5 +#define msk6 6 +#define msk7 7 +#define msk8 8 +#define msk9 9 +#define msk10 10 +#define msk11 11 +#define msk12 12 +#define msk13 13 +#define msk14 14 +#define msk15 15 + +static void +sse4_1_test (void) +{ + union + { + __m128i x; + int i[4]; + char c[16]; + } val1; + int res[16], masks[16]; + int i; + + val1.i[0] = 0x04030201; + val1.i[1] = 0x08070605; + val1.i[2] = 0x0C0B0A09; + val1.i[3] = 0x100F0E0D; + + res[0] = _mm_extract_epi8 (val1.x, msk0); + res[1] = _mm_extract_epi8 (val1.x, msk1); + res[2] = _mm_extract_epi8 (val1.x, msk2); + res[3] = _mm_extract_epi8 (val1.x, msk3); + res[4] = _mm_extract_epi8 (val1.x, msk4); + res[5] = _mm_extract_epi8 (val1.x, msk5); + res[6] = _mm_extract_epi8 (val1.x, msk6); + res[7] = _mm_extract_epi8 (val1.x, msk7); + res[8] = _mm_extract_epi8 (val1.x, msk8); + res[9] = _mm_extract_epi8 (val1.x, msk9); + res[10] = _mm_extract_epi8 (val1.x, msk10); + res[11] = _mm_extract_epi8 (val1.x, msk11); + res[12] = _mm_extract_epi8 (val1.x, msk12); + res[13] = _mm_extract_epi8 (val1.x, msk13); + res[14] = _mm_extract_epi8 (val1.x, msk14); + res[15] = _mm_extract_epi8 (val1.x, msk15); + + masks[0] = msk0; + masks[1] = msk1; + masks[2] = msk2; + masks[3] = msk3; + masks[4] = msk4; + masks[5] = msk5; + masks[6] = msk6; + masks[7] = msk7; + masks[8] = msk8; + masks[9] = msk9; + masks[10] = msk10; + masks[11] = msk11; + masks[12] = msk12; + masks[13] = msk13; + masks[14] = msk14; + masks[15] = msk15; + + for (i = 0; i < 16; i++) + if (res[i] != val1.c [masks[i]]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pextrd.c b/gcc/testsuite/gcc.target/i386/sse4_1-pextrd.c new file mode 100644 index 00000000000..b078ca26af6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pextrd.c @@ -0,0 +1,43 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define msk0 0 +#define msk1 1 +#define msk2 2 +#define msk3 3 + +static void +sse4_1_test (void) +{ + union + { + __m128i x; + int i[4]; + } val1; + int res[4], masks[4]; + int i; + + val1.i[0] = 0x04030201; + val1.i[1] = 0x08070605; + val1.i[2] = 0x0C0B0A09; + val1.i[3] = 0x100F0E0D; + + res[0] = _mm_extract_epi32 (val1.x, msk0); + res[1] = _mm_extract_epi32 (val1.x, msk1); + res[2] = _mm_extract_epi32 (val1.x, msk2); + res[3] = _mm_extract_epi32 (val1.x, msk3); + + masks[0] = msk0; + masks[1] = msk1; + masks[2] = msk2; + masks[3] = msk3; + + for (i = 0; i < 4; i++) + if (res[i] != val1.i [masks[i]]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pextrq.c b/gcc/testsuite/gcc.target/i386/sse4_1-pextrq.c new file mode 100644 index 00000000000..999ed8f0634 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pextrq.c @@ -0,0 +1,36 @@ +/* { dg-do run { target { { i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define msk0 0 +#define msk1 1 + +static void +sse4_1_test (void) +{ + union + { + __m128i x; + long long ll[2]; + } val1; + long long res[2]; + int masks[2]; + int i; + + val1.ll[0] = 0x0807060504030201LL; + val1.ll[1] = 0x100F0E0D0C0B0A09LL; + + res[0] = _mm_extract_epi64 (val1.x, msk0); + res[1] = _mm_extract_epi64 (val1.x, msk1); + + masks[0] = msk0; + masks[1] = msk1; + + for (i = 0; i < 2; i++) + if (res[i] != val1.ll [masks[i]]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pextrw.c b/gcc/testsuite/gcc.target/i386/sse4_1-pextrw.c new file mode 100644 index 00000000000..ebfac1a6ad3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pextrw.c @@ -0,0 +1,56 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define msk0 0 +#define msk1 1 +#define msk2 2 +#define msk3 3 +#define msk4 4 +#define msk5 5 +#define msk6 6 +#define msk7 7 + +static void +sse4_1_test (void) +{ + union + { + __m128i x; + int i[4]; + short s[8]; + } val1; + int res[8], masks[8]; + int i; + + val1.i[0] = 0x04030201; + val1.i[1] = 0x08070605; + val1.i[2] = 0x0C0B0A09; + val1.i[3] = 0x100F0E0D; + + res[0] = _mm_extract_epi16 (val1.x, msk0); + res[1] = _mm_extract_epi16 (val1.x, msk1); + res[2] = _mm_extract_epi16 (val1.x, msk2); + res[3] = _mm_extract_epi16 (val1.x, msk3); + res[4] = _mm_extract_epi16 (val1.x, msk4); + res[5] = _mm_extract_epi16 (val1.x, msk5); + res[6] = _mm_extract_epi16 (val1.x, msk6); + res[7] = _mm_extract_epi16 (val1.x, msk7); + + masks[0] = msk0; + masks[1] = msk1; + masks[2] = msk2; + masks[3] = msk3; + masks[4] = msk4; + masks[5] = msk5; + masks[6] = msk6; + masks[7] = msk7; + + for (i = 0; i < 8; i++) + if (res[i] != val1.s [masks[i]]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-phminposuw.c b/gcc/testsuite/gcc.target/i386/sse4_1-phminposuw.c new file mode 100644 index 00000000000..43f1cade760 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-phminposuw.c @@ -0,0 +1,49 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 64 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM/8]; + unsigned short s[NUM]; + } src; + unsigned short minVal[NUM/8]; + int minInd[NUM/8]; + unsigned short minValScalar, minIndScalar; + int i, j, res; + + for (i = 0; i < NUM; i++) + src.s[i] = i * i / (i + i / 3.14 + 1.0); + + for (i = 0, j = 0; i < NUM; i += 8, j++) + { + res = _mm_cvtsi128_si32 (_mm_minpos_epu16 (src.x [i/8])); + minVal[j] = res & 0xffff; + minInd[j] = (res >> 16) & 0x3; + } + + for (i = 0; i < NUM; i += 8) + { + minValScalar = src.s[i]; + minIndScalar = 0; + + for (j = i + 1; j < i + 8; j++) + if (minValScalar > src.s[j]) + { + minValScalar = src.s[j]; + minIndScalar = j - i; + } + + if (minValScalar != minVal[i/8] && minIndScalar != minInd[i/8]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pinsrb.c b/gcc/testsuite/gcc.target/i386/sse4_1-pinsrb.c new file mode 100644 index 00000000000..2e886c59d92 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pinsrb.c @@ -0,0 +1,102 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define msk0 0x00 +#define msk1 0x01 +#define msk2 0x02 +#define msk3 0x03 +#define msk4 0x04 +#define msk5 0x05 +#define msk6 0x06 +#define msk7 0x07 +#define msk8 0x08 +#define msk9 0x09 +#define mskA 0x0A +#define mskB 0x0B +#define mskC 0x0C +#define mskD 0x0D +#define mskE 0x0E +#define mskF 0x0F + +static void +sse4_1_test (void) +{ + union + { + __m128i x; + unsigned int i[4]; + unsigned char c[16]; + } res [16], val, tmp; + int masks[16]; + unsigned char ins[4] = { 3, 4, 5, 6 }; + int i; + + val.i[0] = 0x35251505; + val.i[1] = 0x75655545; + val.i[2] = 0xB5A59585; + val.i[3] = 0xF5E5D5C5; + + /* Check pinsrb imm8, r32, xmm. */ + res[0].x = _mm_insert_epi8 (val.x, ins[0], msk0); + res[1].x = _mm_insert_epi8 (val.x, ins[0], msk1); + res[2].x = _mm_insert_epi8 (val.x, ins[0], msk2); + res[3].x = _mm_insert_epi8 (val.x, ins[0], msk3); + res[4].x = _mm_insert_epi8 (val.x, ins[0], msk4); + res[5].x = _mm_insert_epi8 (val.x, ins[0], msk5); + res[6].x = _mm_insert_epi8 (val.x, ins[0], msk6); + res[7].x = _mm_insert_epi8 (val.x, ins[0], msk7); + res[8].x = _mm_insert_epi8 (val.x, ins[0], msk8); + res[9].x = _mm_insert_epi8 (val.x, ins[0], msk9); + res[10].x = _mm_insert_epi8 (val.x, ins[0], mskA); + res[11].x = _mm_insert_epi8 (val.x, ins[0], mskB); + res[12].x = _mm_insert_epi8 (val.x, ins[0], mskC); + res[13].x = _mm_insert_epi8 (val.x, ins[0], mskD); + res[14].x = _mm_insert_epi8 (val.x, ins[0], mskE); + res[15].x = _mm_insert_epi8 (val.x, ins[0], mskF); + + masks[0] = msk0; + masks[1] = msk1; + masks[2] = msk2; + masks[3] = msk3; + masks[4] = msk4; + masks[5] = msk5; + masks[6] = msk6; + masks[7] = msk7; + masks[8] = msk8; + masks[9] = msk9; + masks[10] = mskA; + masks[11] = mskB; + masks[12] = mskC; + masks[13] = mskD; + masks[14] = mskE; + masks[15] = mskF; + + for (i = 0; i < 16; i++) + { + tmp.x = val.x; + tmp.c[masks[i]] = ins[0]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } + + /* Check pinsrb imm8, m8, xmm. */ + for (i = 0; i < 16; i++) + { + res[i].x = _mm_insert_epi8 (val.x, ins[i % 4], msk0); + masks[i] = msk0; + } + + for (i = 0; i < 16; i++) + { + tmp.x = val.x; + tmp.c[masks[i]] = ins[i % 4]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pinsrd.c b/gcc/testsuite/gcc.target/i386/sse4_1-pinsrd.c new file mode 100644 index 00000000000..85c39a011f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pinsrd.c @@ -0,0 +1,65 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define msk0 0x00 +#define msk1 0x01 +#define msk2 0x02 +#define msk3 0x03 + +static void +sse4_1_test (void) +{ + union + { + __m128i x; + unsigned int i[4]; + } res [4], val, tmp; + static unsigned int ins[4] = { 3, 4, 5, 6 }; + int masks[4]; + int i; + + val.i[0] = 55; + val.i[1] = 55; + val.i[2] = 55; + val.i[3] = 55; + + /* Check pinsrd imm8, r32, xmm. */ + res[0].x = _mm_insert_epi32 (val.x, ins[0], msk0); + res[1].x = _mm_insert_epi32 (val.x, ins[0], msk1); + res[2].x = _mm_insert_epi32 (val.x, ins[0], msk2); + res[3].x = _mm_insert_epi32 (val.x, ins[0], msk3); + + masks[0] = msk0; + masks[1] = msk1; + masks[2] = msk2; + masks[3] = msk3; + + for (i = 0; i < 4; i++) + { + tmp.x = val.x; + tmp.i[masks[i]] = ins[0]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } + + /* Check pinsrd imm8, m32, xmm. */ + for (i = 0; i < 4; i++) + { + res[i].x = _mm_insert_epi32 (val.x, ins[i], msk0); + masks[i] = msk0; + } + + for (i = 0; i < 4; i++) + { + tmp.x = val.x; + tmp.i[masks[i]] = ins[i]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pinsrq.c b/gcc/testsuite/gcc.target/i386/sse4_1-pinsrq.c new file mode 100644 index 00000000000..2aa25a0bed5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pinsrq.c @@ -0,0 +1,58 @@ +/* { dg-do run { target { { i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define msk0 0x00 +#define msk1 0x01 + +static void +sse4_1_test (void) +{ + union + { + __m128i x; + unsigned long long ll[2]; + } res [4], val, tmp; + int masks[4]; + static unsigned long long ins[2] = + { 0xAABBAABBAABBAABBLL, 0xCCDDCCDDCCDDCCDDLL }; + int i; + + val.ll[0] = 0x0807060504030201LL; + val.ll[1] = 0x100F0E0D0C0B0A09LL; + + /* Check pinsrq imm8, r64, xmm. */ + res[0].x = _mm_insert_epi64 (val.x, ins[0], msk0); + res[1].x = _mm_insert_epi64 (val.x, ins[0], msk1); + + masks[0] = msk0; + masks[1] = msk1; + + for (i = 0; i < 2; i++) + { + tmp.x = val.x; + tmp.ll[masks[i]] = ins[0]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } + + /* Check pinsrq imm8, m64, xmm. */ + for (i = 0; i < 2; i++) + { + res[i].x = _mm_insert_epi64 (val.x, ins[i], msk0); + masks[i] = msk0; + } + + for (i = 0; i < 2; i++) + { + tmp.x = val.x; + tmp.ll[masks[i]] = ins[i]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmaxsb.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmaxsb.c new file mode 100644 index 00000000000..116f164a6c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmaxsb.c @@ -0,0 +1,38 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 1024 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 16]; + char i[NUM]; + } dst, src1, src2; + int i, sign = 1; + char max; + + for (i = 0; i < NUM; i++) + { + src1.i[i] = i * i * sign; + src2.i[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 16) + dst.x[i / 16] = _mm_max_epi8 (src1.x[i / 16], src2.x[i / 16]); + + for (i = 0; i < NUM; i++) + { + max = src1.i[i] <= src2.i[i] ? src2.i[i] : src1.i[i]; + if (max != dst.i[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmaxsd.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmaxsd.c new file mode 100644 index 00000000000..2f8dfedc2b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmaxsd.c @@ -0,0 +1,38 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 64 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 4]; + int i[NUM]; + } dst, src1, src2; + int i, sign = 1; + int max; + + for (i = 0; i < NUM; i++) + { + src1.i[i] = i * i * sign; + src2.i[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 4) + dst.x[i / 4] = _mm_max_epi32 (src1.x[i / 4], src2.x[i / 4]); + + for (i = 0; i < NUM; i++) + { + max = src1.i[i] <= src2.i[i] ? src2.i[i] : src1.i[i]; + if (max != dst.i[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmaxud.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmaxud.c new file mode 100644 index 00000000000..d8f5d9a090e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmaxud.c @@ -0,0 +1,39 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 64 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 4]; + unsigned int i[NUM]; + } dst, src1, src2; + int i; + unsigned int max; + + for (i = 0; i < NUM; i++) + { + src1.i[i] = i * i; + src2.i[i] = i + 20; + if ((i % 4)) + src2.i[i] |= 0x80000000; + } + + for (i = 0; i < NUM; i += 4) + dst.x[i / 4] = _mm_max_epu32 (src1.x[i / 4], src2.x[i / 4]); + + for (i = 0; i < NUM; i++) + { + max = src1.i[i] <= src2.i[i] ? src2.i[i] : src1.i[i]; + if (max != dst.i[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmaxuw.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmaxuw.c new file mode 100644 index 00000000000..b0330cbb0f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmaxuw.c @@ -0,0 +1,39 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 64 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 8]; + unsigned short i[NUM]; + } dst, src1, src2; + int i; + unsigned short max; + + for (i = 0; i < NUM; i++) + { + src1.i[i] = i * i; + src2.i[i] = i + 20; + if ((i % 8)) + src2.i[i] |= 0x8000; + } + + for (i = 0; i < NUM; i += 8) + dst.x[i / 8] = _mm_max_epu16 (src1.x[i / 8], src2.x[i / 8]); + + for (i = 0; i < NUM; i++) + { + max = src1.i[i] <= src2.i[i] ? src2.i[i] : src1.i[i]; + if (max != dst.i[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pminsb.c b/gcc/testsuite/gcc.target/i386/sse4_1-pminsb.c new file mode 100644 index 00000000000..35ad78f82c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pminsb.c @@ -0,0 +1,38 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 1024 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 16]; + char i[NUM]; + } dst, src1, src2; + int i, sign = 1; + char min; + + for (i = 0; i < NUM; i++) + { + src1.i[i] = i * i * sign; + src2.i[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 16) + dst.x[i / 16] = _mm_min_epi8 (src1.x[i / 16], src2.x[i / 16]); + + for (i = 0; i < NUM; i++) + { + min = src1.i[i] >= src2.i[i] ? src2.i[i] : src1.i[i]; + if (min != dst.i[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pminsd.c b/gcc/testsuite/gcc.target/i386/sse4_1-pminsd.c new file mode 100644 index 00000000000..62485c4b81e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pminsd.c @@ -0,0 +1,38 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 64 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 4]; + int i[NUM]; + } dst, src1, src2; + int i, sign = 1; + int min; + + for (i = 0; i < NUM; i++) + { + src1.i[i] = i * i * sign; + src2.i[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 4) + dst.x[i / 4] = _mm_min_epi32 (src1.x[i / 4], src2.x[i / 4]); + + for (i = 0; i < NUM; i++) + { + min = src1.i[i] >= src2.i[i] ? src2.i[i] : src1.i[i]; + if (min != dst.i[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pminud.c b/gcc/testsuite/gcc.target/i386/sse4_1-pminud.c new file mode 100644 index 00000000000..82e1012bca8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pminud.c @@ -0,0 +1,39 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 64 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 4]; + unsigned int i[NUM]; + } dst, src1, src2; + int i; + unsigned int min; + + for (i = 0; i < NUM; i++) + { + src1.i[i] = i * i; + src2.i[i] = i + 20; + if ((i % 4)) + src2.i[i] |= 0x80000000; + } + + for (i = 0; i < NUM; i += 4) + dst.x[i / 4] = _mm_min_epu32 (src1.x[i / 4], src2.x[i / 4]); + + for (i = 0; i < NUM; i++) + { + min = src1.i[i] >= src2.i[i] ? src2.i[i] : src1.i[i]; + if (min != dst.i[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pminuw.c b/gcc/testsuite/gcc.target/i386/sse4_1-pminuw.c new file mode 100644 index 00000000000..0edff3c17f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pminuw.c @@ -0,0 +1,39 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 64 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 8]; + unsigned short i[NUM]; + } dst, src1, src2; + int i; + unsigned short min; + + for (i = 0; i < NUM; i++) + { + src1.i[i] = i * i; + src2.i[i] = i + 20; + if ((i % 8)) + src2.i[i] |= 0x8000; + } + + for (i = 0; i < NUM; i += 8) + dst.x[i / 8] = _mm_min_epu16 (src1.x[i / 8], src2.x[i / 8]); + + for (i = 0; i < NUM; i++) + { + min = src1.i[i] >= src2.i[i] ? src2.i[i] : src1.i[i]; + if (min != dst.i[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbd.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbd.c new file mode 100644 index 00000000000..9443ecfc941 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbd.c @@ -0,0 +1,34 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 128 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 4]; + int i[NUM]; + char c[NUM * 4]; + } dst, src; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src.c[(i % 4) + (i / 4) * 16] = i * i * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 4) + dst.x [i / 4] = _mm_cvtepi8_epi32 (src.x [i / 4]); + + for (i = 0; i < NUM; i++) + if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbq.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbq.c new file mode 100644 index 00000000000..11c5c82ac4c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbq.c @@ -0,0 +1,34 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 128 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 2]; + long long ll[NUM]; + char c[NUM * 8]; + } dst, src; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src.c[(i % 2) + (i / 2) * 16] = i * i * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cvtepi8_epi64 (src.x [i / 2]); + + for (i = 0; i < NUM; i++) + if (src.c[(i % 2) + (i / 2) * 16] != dst.ll[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbw.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbw.c new file mode 100644 index 00000000000..6c6504c19f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbw.c @@ -0,0 +1,34 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 128 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 8]; + short s[NUM]; + char c[NUM * 2]; + } dst, src; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src.c[(i % 8) + (i / 8) * 16] = i * i * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 8) + dst.x [i / 8] = _mm_cvtepi8_epi16 (src.x [i / 8]); + + for (i = 0; i < NUM; i++) + if (src.c[(i % 8) + (i / 8) * 16] != dst.s[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxdq.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxdq.c new file mode 100644 index 00000000000..f27b369ff23 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxdq.c @@ -0,0 +1,34 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 128 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 2]; + long long ll[NUM]; + int i[NUM * 2]; + } dst, src; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src.i[(i % 2) + (i / 2) * 4] = i * i * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cvtepi32_epi64 (src.x [i / 2]); + + for (i = 0; i < NUM; i++) + if (src.i[(i % 2) + (i / 2) * 4] != dst.ll[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxwd.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxwd.c new file mode 100644 index 00000000000..fbd0e5f6323 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxwd.c @@ -0,0 +1,34 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 128 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 4]; + int i[NUM]; + short s[NUM * 2]; + } dst, src; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src.s[(i % 4) + (i / 4) * 8] = i * i * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 4) + dst.x [i / 4] = _mm_cvtepi16_epi32 (src.x [i / 4]); + + for (i = 0; i < NUM; i++) + if (src.s[(i % 4) + (i / 4) * 8] != dst.i[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxwq.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxwq.c new file mode 100644 index 00000000000..67ab24c2f3c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmovsxwq.c @@ -0,0 +1,34 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 128 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 2]; + long long ll[NUM]; + short s[NUM * 4]; + } dst, src; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src.s[(i % 2) + (i / 2) * 8] = i * i * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cvtepi16_epi64 (src.x [i / 2]); + + for (i = 0; i < NUM; i++) + if (src.s[(i % 2) + (i / 2) * 8] != dst.ll[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbd.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbd.c new file mode 100644 index 00000000000..1319b524508 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbd.c @@ -0,0 +1,35 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 128 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 4]; + unsigned int i[NUM]; + unsigned char c[NUM * 4]; + } dst, src; + int i; + + for (i = 0; i < NUM; i++) + { + src.c[(i % 4) + (i / 4) * 16] = i * i; + if ((i % 4)) + src.c[(i % 4) + (i / 4) * 16] |= 0x80; + } + + for (i = 0; i < NUM; i += 4) + dst.x [i / 4] = _mm_cvtepu8_epi32 (src.x [i / 4]); + + for (i = 0; i < NUM; i++) + if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbq.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbq.c new file mode 100644 index 00000000000..53a880ca622 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbq.c @@ -0,0 +1,35 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 128 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 2]; + unsigned long long ll[NUM]; + unsigned char c[NUM * 8]; + } dst, src; + int i; + + for (i = 0; i < NUM; i++) + { + src.c[(i % 2) + (i / 2) * 16] = i * i; + if ((i % 2)) + src.c[(i % 2) + (i / 2) * 16] |= 0x80; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cvtepu8_epi64 (src.x [i / 2]); + + for (i = 0; i < NUM; i++) + if (src.c[(i % 2) + (i / 2) * 16] != dst.ll[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbw.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbw.c new file mode 100644 index 00000000000..8f9f9f18aee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbw.c @@ -0,0 +1,35 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 128 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 8]; + unsigned short s[NUM]; + unsigned char c[NUM * 2]; + } dst, src; + int i; + + for (i = 0; i < NUM; i++) + { + src.c[(i % 8) + (i / 8) * 16] = i * i; + if ((i % 4)) + src.c[(i % 8) + (i / 8) * 16] |= 0x80; + } + + for (i = 0; i < NUM; i += 8) + dst.x [i / 8] = _mm_cvtepu8_epi16 (src.x [i / 8]); + + for (i = 0; i < NUM; i++) + if (src.c[(i % 8) + (i / 8) * 16] != dst.s[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxdq.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxdq.c new file mode 100644 index 00000000000..e6df123e191 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxdq.c @@ -0,0 +1,35 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 128 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 2]; + unsigned long long ll[NUM]; + unsigned int i[NUM * 2]; + } dst, src; + int i; + + for (i = 0; i < NUM; i++) + { + src.i[(i % 2) + (i / 2) * 4] = i * i; + if ((i % 2)) + src.i[(i % 2) + (i / 2) * 4] |= 0x80000000; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cvtepu32_epi64 (src.x [i / 2]); + + for (i = 0; i < NUM; i++) + if (src.i[(i % 2) + (i / 2) * 4] != dst.ll[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxwd.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxwd.c new file mode 100644 index 00000000000..840c2fc8196 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxwd.c @@ -0,0 +1,35 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 128 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 4]; + unsigned int i[NUM]; + unsigned short s[NUM * 2]; + } dst, src; + int i; + + for (i = 0; i < NUM; i++) + { + src.s[(i % 4) + (i / 4) * 8] = i * i; + if ((i % 4)) + src.s[(i % 4) + (i / 4) * 8] |= 0x8000; + } + + for (i = 0; i < NUM; i += 4) + dst.x [i / 4] = _mm_cvtepu16_epi32 (src.x [i / 4]); + + for (i = 0; i < NUM; i++) + if (src.s[(i % 4) + (i / 4) * 8] != dst.i[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxwq.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxwq.c new file mode 100644 index 00000000000..2f9a9525185 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmovzxwq.c @@ -0,0 +1,35 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 128 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 2]; + unsigned long long ll[NUM]; + unsigned short s[NUM * 4]; + } dst, src; + int i; + + for (i = 0; i < NUM; i++) + { + src.s[(i % 2) + (i / 2) * 8] = i * i; + if ((i % 2)) + src.s[(i % 2) + (i / 2) * 8] |= 0x8000; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cvtepu16_epi64 (src.x [i / 2]); + + for (i = 0; i < NUM; i++) + if (src.s[(i % 2) + (i / 2) * 8] != dst.ll[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmuldq.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmuldq.c new file mode 100644 index 00000000000..cd85a17a71c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmuldq.c @@ -0,0 +1,43 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 64 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 2]; + long long ll[NUM]; + } dst; + union + { + __m128i x[NUM / 2]; + int i[NUM * 2]; + } src1, src2; + int i, sign = 1; + long long value; + + for (i = 0; i < NUM; i += 2) + { + src1.i[i] = i * i * sign; + src2.i[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 2) + dst.x[i / 2] = _mm_mul_epi32 (src1.x[i / 2], src2.x[i / 2]); + + for (i = 0; i < NUM; i++) + { + value = (long long) src1.i[i * 2] * (long long) src2.i[i * 2]; + if (value != dst.ll[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pmulld.c b/gcc/testsuite/gcc.target/i386/sse4_1-pmulld.c new file mode 100644 index 00000000000..136b712956e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pmulld.c @@ -0,0 +1,38 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +#define NUM 64 + +static void +sse4_1_test (void) +{ + union + { + __m128i x[NUM / 4]; + int i[NUM]; + } dst, src1, src2; + int i, sign = 1; + int value; + + for (i = 0; i < NUM; i++) + { + src1.i[i] = i * i * sign; + src2.i[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 4) + dst.x[i / 4] = _mm_mullo_epi32 (src1.x[i / 4], src2.x[i / 4]); + + for (i = 0; i < NUM; i++) + { + value = src1.i[i] * src2.i[i]; + if (value != dst.i[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-ptest-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-ptest-1.c new file mode 100644 index 00000000000..246caeecb15 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-ptest-1.c @@ -0,0 +1,109 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +static int +make_ptestz (__m128i m, __m128i v) +{ + union + { + __m128i x; + unsigned char c[16]; + } val, mask; + int i, z; + + mask.x = m; + val.x = v; + + z = 1; + for (i = 0; i < 16; i++) + if ((mask.c[i] & val.c[i])) + { + z = 0; + break; + } + return z; +} + +static int +make_ptestc (__m128i m, __m128i v) +{ + union + { + __m128i x; + unsigned char c[16]; + } val, mask; + int i, c; + + mask.x = m; + val.x = v; + + c = 1; + for (i = 0; i < 16; i++) + if ((val.c[i] & ~mask.c[i])) + { + c = 0; + break; + } + return c; +} + +static void +sse4_1_test (void) +{ + union + { + __m128i x; + unsigned int i[4]; + } val[4]; + int i, j, l; + int res[32]; + + val[0].i[0] = 0x11111111; + val[0].i[1] = 0x00000000; + val[0].i[2] = 0x00000000; + val[0].i[3] = 0x11111111; + + val[1].i[0] = 0x00000000; + val[1].i[1] = 0x11111111; + val[1].i[2] = 0x11111111; + val[1].i[3] = 0x00000000; + + val[2].i[0] = 0; + val[2].i[1] = 0; + val[2].i[2] = 0; + val[2].i[3] = 0; + + val[3].i[0] = 0xffffffff; + val[3].i[1] = 0xffffffff; + val[3].i[2] = 0xffffffff; + val[3].i[3] = 0xffffffff; + + l = 0; + for(i = 0; i < 4; i++) + for(j = 0; j < 4; j++) + { + res[l++] = _mm_testz_si128 (val[j].x, val[i].x); + res[l++] = _mm_testc_si128 (val[j].x, val[i].x); + } + + l = 0; + for(i = 0; i < 4; i++) + for(j = 0; j < 4; j++) + { + if (res[l++] != make_ptestz (val[j].x, val[i].x)) + abort (); + if (res[l++] != make_ptestc (val[j].x, val[i].x)) + abort (); + } + + if (res[2] != _mm_testz_si128 (val[1].x, val[0].x)) + abort (); + + if (res[3] != _mm_testc_si128 (val[1].x, val[0].x)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-ptest-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-ptest-2.c new file mode 100644 index 00000000000..8f12217680f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-ptest-2.c @@ -0,0 +1,88 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +static int +make_ptestnzc (__m128i m, __m128i v) +{ + union + { + __m128i x; + unsigned char c[16]; + } val, mask; + int i, z, c; + + mask.x = m; + val.x = v; + + z = c = 1; + for (i = 0; i < 16; i++) + { + if ((mask.c[i] & val.c[i])) + z = 0; + if ((~mask.c[i] & val.c[i])) + c = 0; + } + + return (z == 0 && c == 0) ? 1 : 0; +} + +static void +sse4_1_test (void) +{ + union + { + __m128i x; + unsigned int i[4]; + } val[4]; + int i, j, l; + int res[32]; + + val[0].i[0] = 0x11111111; + val[0].i[1] = 0x00000000; + val[0].i[2] = 0x00000000; + val[0].i[3] = 0x11111111; + + val[1].i[0] = 0x00000000; + val[1].i[1] = 0x11111111; + val[1].i[2] = 0x11111111; + val[1].i[3] = 0x00000000; + + val[2].i[0] = 0; + val[2].i[1] = 0; + val[2].i[2] = 0; + val[2].i[3] = 0; + + val[3].i[0] = 0xffffffff; + val[3].i[1] = 0xffffffff; + val[3].i[2] = 0xffffffff; + val[3].i[3] = 0xffffffff; + + l = 0; + for(i = 0; i < 4; i++) + for(j = 0; j < 4; j++) + { + res[l++] = _mm_testnzc_si128 (val[j].x, val[i].x); + res[l++] = _mm_testnzc_si128 (val[j].x, val[i].x); + } + + l = 0; + for(i = 0; i < 4; i++) + for(j = 0; j < 4; j++) + { + if (res[l++] != make_ptestnzc (val[j].x, val[i].x)) + abort (); + if (res[l++] != make_ptestnzc (val[j].x, val[i].x)) + abort (); + } + + if (res[2] != _mm_testnzc_si128 (val[1].x, val[0].x)) + abort (); + + if (res[3] != _mm_testnzc_si128 (val[1].x, val[0].x)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-ptest-3.c b/gcc/testsuite/gcc.target/i386/sse4_1-ptest-3.c new file mode 100644 index 00000000000..65602d9ddf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-ptest-3.c @@ -0,0 +1,77 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> + +static void +sse4_1_test (void) +{ + union + { + __m128i x; + unsigned int i[4]; + } val[4]; + int correct_zeros[4]; + int correct_ones[4]; + int correct_mixed[4]; + int zeros[4]; + int ones[4]; + int mixed[4]; + int i; + __m128i v; + + val[0].i[0] = 0x11111111; + val[0].i[1] = 0x00000000; + val[0].i[2] = 0x00000000; + val[0].i[3] = 0x11111111; + correct_zeros[0] = 0; + correct_ones[0] = 0; + correct_mixed[0] = 1; + + val[1].i[0] = 0x00000000; + val[1].i[1] = 0x11111111; + val[1].i[2] = 0x11111111; + val[1].i[3] = 0x00000000; + correct_zeros[1] = 0; + correct_ones[1] = 0; + correct_mixed[1] = 1; + + val[2].i[0] = 0; + val[2].i[1] = 0; + val[2].i[2] = 0; + val[2].i[3] = 0; + correct_zeros[2] = 1; + correct_ones[2] = 0; + correct_mixed[2] = 0; + + val[3].i[0] = 0xffffffff; + val[3].i[1] = 0xffffffff; + val[3].i[2] = 0xffffffff; + val[3].i[3] = 0xffffffff; + correct_zeros[3] = 0; + correct_ones[3] = 1; + correct_mixed[3] = 0; + + for (i=0; i < 4; i++) + zeros[i] = _mm_test_all_zeros (val[i].x, val[i].x); + + for( i=0; i < 4; i++ ) + ones[i] = _mm_test_all_ones (val[i].x); + + v = _mm_cmpeq_epi32 (val[0].x, val[0].x); + for( i=0; i < 4; i++ ) + mixed[i] = _mm_test_mix_ones_zeros (val[i].x, v); + + for( i=0; i < 4; i++ ) + { + if (zeros[i] != correct_zeros[i]) + abort (); + if (ones[i] != correct_ones[i]) + abort (); + if (mixed[i] != correct_mixed[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-round.h b/gcc/testsuite/gcc.target/i386/sse4_1-round.h new file mode 100644 index 00000000000..0210ac130b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-round.h @@ -0,0 +1,95 @@ +#include <smmintrin.h> +#include <math.h> + +#define NUM 64 + +static void +init_round (FP_T *src) +{ + int i, sign = 1; + FP_T f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1)* f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI *sign); + sign = -sign; + } +} + +static FP_T +do_round (FP_T f, int type) +{ + short saved_cw, new_cw, clr_mask; + FP_T ret; + + if ((type & 4)) + { + type = 0; + clr_mask = 0xFFFF; + } + else + { + type = 0x003F | ((type & 3) << 10); + clr_mask = ~0x0C3F; + } + + __asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f)); + + __asm__ ("fstcw %0" : "=m" (*&saved_cw)); + new_cw = saved_cw & clr_mask; + new_cw |= type; + __asm__ ("fldcw %0" : : "m" (*&new_cw)); + + __asm__ ("frndint\n" + "fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret)); + __asm__ ("fldcw %0" : : "m" (*&saved_cw)); + return ret; +} + +static void +sse4_1_test (void) +{ + int i; + FP_T f; + union + { + VEC_T x[NUM / LOOP_INCREMENT]; + FP_T f[NUM]; + } dst, src; + + init_round (src.f); + + for (i = 0; i < NUM / LOOP_INCREMENT; i++) + dst.x[i] = ROUND_INTRIN (src.x[i], ROUND_MODE); + + for (i = 0; i < NUM; i += CHECK_LOOP_INCREMENT) + { + f = do_round (src.f[i], CHECK_ROUND_MODE); + if (f != dst.f[i]) + abort (); + } + + if (_MM_FROUND_TO_NEAREST_INT != 0x00 + || _MM_FROUND_TO_NEG_INF != 0x01 + || _MM_FROUND_TO_POS_INF != 0x02 + || _MM_FROUND_TO_ZERO != 0x03 + || _MM_FROUND_CUR_DIRECTION != 0x04 + || _MM_FROUND_RAISE_EXC != 0x00 + || _MM_FROUND_NO_EXC != 0x08 + || _MM_FROUND_NINT != 0x00 + || _MM_FROUND_FLOOR != 0x01 + || _MM_FROUND_CEIL != 0x02 + || _MM_FROUND_TRUNC != 0x03 + || _MM_FROUND_RINT != 0x04 + || _MM_FROUND_NEARBYINT != 0x0C) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundpd-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundpd-1.c new file mode 100644 index 00000000000..d39b16e636e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundpd-1.c @@ -0,0 +1,18 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#define VEC_T __m128d +#define FP_T double +#define ASM_SUFFIX "l" + +#define ROUND_INTRIN(x, mode) _mm_ceil_pd(x) +#define ROUND_MODE _MM_FROUND_CEIL +#define CHECK_ROUND_MODE 0x02 + +#define LOOP_INCREMENT 2 +#define CHECK_LOOP_INCREMENT 1 + +#include "sse4_1-round.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundpd-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundpd-2.c new file mode 100644 index 00000000000..48fb90f7922 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundpd-2.c @@ -0,0 +1,18 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#define VEC_T __m128d +#define FP_T double +#define ASM_SUFFIX "l" + +#define ROUND_INTRIN _mm_round_pd +#define ROUND_MODE _MM_FROUND_NINT +#define CHECK_ROUND_MODE 0x00 + +#define LOOP_INCREMENT 2 +#define CHECK_LOOP_INCREMENT 1 + +#include "sse4_1-round.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundpd-3.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundpd-3.c new file mode 100644 index 00000000000..6753977ad85 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundpd-3.c @@ -0,0 +1,18 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#define VEC_T __m128d +#define FP_T double +#define ASM_SUFFIX "l" + +#define ROUND_INTRIN(x, mode) _mm_floor_pd(x) +#define ROUND_MODE _MM_FROUND_FLOOR +#define CHECK_ROUND_MODE 0x01 + +#define LOOP_INCREMENT 2 +#define CHECK_LOOP_INCREMENT 1 + +#include "sse4_1-round.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundps-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundps-1.c new file mode 100644 index 00000000000..370ee13508d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundps-1.c @@ -0,0 +1,18 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#define VEC_T __m128 +#define FP_T float +#define ASM_SUFFIX "s" + +#define ROUND_INTRIN(x, mode) _mm_ceil_ps(x) +#define ROUND_MODE _MM_FROUND_CEIL +#define CHECK_ROUND_MODE 0x02 + +#define LOOP_INCREMENT 4 +#define CHECK_LOOP_INCREMENT 1 + +#include "sse4_1-round.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundps-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundps-2.c new file mode 100644 index 00000000000..b8b15b8c0f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundps-2.c @@ -0,0 +1,18 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#define VEC_T __m128 +#define FP_T float +#define ASM_SUFFIX "s" + +#define ROUND_INTRIN _mm_round_ps +#define ROUND_MODE _MM_FROUND_NINT +#define CHECK_ROUND_MODE 0x00 + +#define LOOP_INCREMENT 4 +#define CHECK_LOOP_INCREMENT 1 + +#include "sse4_1-round.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundps-3.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundps-3.c new file mode 100644 index 00000000000..00ce877f724 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundps-3.c @@ -0,0 +1,18 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#define VEC_T __m128 +#define FP_T float +#define ASM_SUFFIX "s" + +#define ROUND_INTRIN(x, mode) _mm_floor_ps(x) +#define ROUND_MODE _MM_FROUND_FLOOR +#define CHECK_ROUND_MODE 0x01 + +#define LOOP_INCREMENT 4 +#define CHECK_LOOP_INCREMENT 1 + +#include "sse4_1-round.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundsd-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundsd-1.c new file mode 100644 index 00000000000..45810528193 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundsd-1.c @@ -0,0 +1,18 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#define VEC_T __m128d +#define FP_T double +#define ASM_SUFFIX "l" + +#define ROUND_INTRIN(x, mode) _mm_ceil_sd(x, x) +#define ROUND_MODE _MM_FROUND_CEIL +#define CHECK_ROUND_MODE 0x02 + +#define LOOP_INCREMENT 2 +#define CHECK_LOOP_INCREMENT 2 + +#include "sse4_1-round.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundsd-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundsd-2.c new file mode 100644 index 00000000000..48bcbc321a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundsd-2.c @@ -0,0 +1,18 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#define VEC_T __m128d +#define FP_T double +#define ASM_SUFFIX "l" + +#define ROUND_INTRIN(x, mode) _mm_round_sd(x, x, mode) +#define ROUND_MODE _MM_FROUND_NINT +#define CHECK_ROUND_MODE 0x00 + +#define LOOP_INCREMENT 2 +#define CHECK_LOOP_INCREMENT 2 + +#include "sse4_1-round.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundsd-3.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundsd-3.c new file mode 100644 index 00000000000..bc5390cce8c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundsd-3.c @@ -0,0 +1,18 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#define VEC_T __m128d +#define FP_T double +#define ASM_SUFFIX "l" + +#define ROUND_INTRIN(x, mode) _mm_floor_sd(x, x) +#define ROUND_MODE _MM_FROUND_FLOOR +#define CHECK_ROUND_MODE 0x01 + +#define LOOP_INCREMENT 2 +#define CHECK_LOOP_INCREMENT 2 + +#include "sse4_1-round.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundsd-4.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundsd-4.c new file mode 100644 index 00000000000..f63c4d49470 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundsd-4.c @@ -0,0 +1,91 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <math.h> +#include <string.h> + +#define NUM 64 + +static void +init_round (double *src) +{ + int i, sign = 1; + double d = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1)* d * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + d = d * src[i]; + } + else if (i == (NUM / 2)) + d = rand (); + else if ((i % 6) == 0) + d = 1 / (d * (i + 1) * src[i] * M_PI *sign); + sign = -sign; + } +} + +static double +do_round (double f, int type) +{ + short saved_cw, new_cw, clr_mask; + double ret; + + if ((type & 4)) + { + type = 0; + clr_mask = 0xFFFF; + } + else + { + type = 0x003F | ((type & 3) << 10); + clr_mask = ~0x0C3F; + } + + __asm__ ("fldl %0" : : "m" (*&f)); + + __asm__ ("fstcw %0" : "=m" (*&saved_cw)); + new_cw = saved_cw & clr_mask; + new_cw |= type; + __asm__ ("fldcw %0" : : "m" (*&new_cw)); + + __asm__ ("frndint\n" + "fstpl %0\n" : "=m" (*&ret)); + __asm__ ("fldcw %0" : : "m" (*&saved_cw)); + return ret; +} + +static void +sse4_1_test (void) +{ + int i; + double f; + union + { + __m128d x[NUM / 2]; + double d[NUM]; + } dst, src; + + init_round (src.d); + memset (&dst, 0, NUM * sizeof(double)); + + for (i = 0; i < NUM / 2 ; i++) + dst.x[i] = _mm_round_sd (dst.x[i], src.x[i], _MM_FROUND_TRUNC); + + for (i = 0; i < NUM; i += 2) + { + if (dst.d[i + 1] != 0.0) + abort (); + + f = do_round (src.d[i], 0x03); + if (f != dst.d[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundss-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundss-1.c new file mode 100644 index 00000000000..68fba94a8b9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundss-1.c @@ -0,0 +1,18 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#define VEC_T __m128 +#define FP_T float +#define ASM_SUFFIX "s" + +#define ROUND_INTRIN(x, mode) _mm_ceil_ss(x, x) +#define ROUND_MODE _MM_FROUND_CEIL +#define CHECK_ROUND_MODE 0x02 + +#define LOOP_INCREMENT 4 +#define CHECK_LOOP_INCREMENT 4 + +#include "sse4_1-round.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundss-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundss-2.c new file mode 100644 index 00000000000..77f791c4752 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundss-2.c @@ -0,0 +1,18 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#define VEC_T __m128 +#define FP_T float +#define ASM_SUFFIX "s" + +#define ROUND_INTRIN(x, mode) _mm_round_ss(x, x, mode) +#define ROUND_MODE _MM_FROUND_NINT +#define CHECK_ROUND_MODE 0x00 + +#define LOOP_INCREMENT 4 +#define CHECK_LOOP_INCREMENT 4 + +#include "sse4_1-round.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundss-3.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundss-3.c new file mode 100644 index 00000000000..8be6af1caef --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundss-3.c @@ -0,0 +1,18 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#define VEC_T __m128 +#define FP_T float +#define ASM_SUFFIX "s" + +#define ROUND_INTRIN(x, mode) _mm_floor_ss(x, x) +#define ROUND_MODE _MM_FROUND_FLOOR +#define CHECK_ROUND_MODE 0x01 + +#define LOOP_INCREMENT 4 +#define CHECK_LOOP_INCREMENT 4 + +#include "sse4_1-round.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundss-4.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundss-4.c new file mode 100644 index 00000000000..a42d39ba377 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundss-4.c @@ -0,0 +1,106 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <math.h> +#include <string.h> + +#define NUM 64 + +static void +init_round (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1)* f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI *sign); + sign = -sign; + } +} + +static float +do_round (float f, int type) +{ + short saved_cw, new_cw, clr_mask; + float ret; + + if ((type & 4)) + { + type = 0; + clr_mask = 0xFFFF; + } + else + { + type = 0x003F | ((type & 3) << 10); + clr_mask = ~0x0C3F; + } + + __asm__ ("flds %0" : : "m" (*&f)); + + __asm__ ("fstcw %0" : "=m" (*&saved_cw)); + new_cw = saved_cw & clr_mask; + new_cw |= type; + __asm__ ("fldcw %0" : : "m" (*&new_cw)); + + __asm__ ("frndint\n" + "fstps %0\n" : "=m" (*&ret)); + __asm__ ("fldcw %0" : : "m" (*&saved_cw)); + return ret; +} + +static void +sse4_1_test (void) +{ + int i, j; + float f; + union + { + __m128 x[NUM / 4]; + float f[NUM]; + } dst, src; + + init_round (src.f); + memset (&dst, 0, NUM * sizeof(float)); + + for (i = 0; i < NUM / 4 ; i++) + dst.x[i] = _mm_round_ss (dst.x[i], src.x[i], _MM_FROUND_RINT); + + for (i = 0; i < NUM; i += 4) + { + for (j = 0; j < 3; j++) + if (dst.f[i + j + 1] != 0.0) + abort (); + + f = do_round (src.f[i], 0x04); + if (f != dst.f[i]) + abort (); + } + + for (i = 0; i < NUM / 4 ; i++) + dst.x[i] = _mm_round_ss (dst.x[i], src.x[i], _MM_FROUND_NEARBYINT); + + for (i = 0; i < NUM; i += 4) + { + for (j = 0; j < 3; j++) + if (dst.f[i + j + 1] != 0.0) + abort (); + + f = do_round (src.f[i], 0x0c); + if (f != dst.f[i]) + abort (); + } +} |