Add AMD SSE5 support; Add iterator over function arguments; Add stdarg_p, prototype_p, function_args_count functions

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@128455 138bc75d-0d04-0410-961f-82ee72b054a4
author: meissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4> 2007-09-13 02:17:51 +0000
committer: meissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4> 2007-09-13 02:17:51 +0000
commit: 448e99f5e858735f9de2ab5cf614dd2b171cee5d (patch)
tree: 2550bf2be428ffb45e9bcb30a6c3186b44ebdc0d /gcc/testsuite/gcc.target/i386
parent: 3e66c14e753471249291f2403f35ebcbc436d353 (diff)
download: gcc-448e99f5e858735f9de2ab5cf614dd2b171cee5d.tar.gz
18 files changed, 1521 insertions, 10 deletions
diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp
index de6a9bcc33c..7cf13ab7443 100644
--- a/gcc/testsuite/gcc.target/i386/i386.exp
+++ b/gcc/testsuite/gcc.target/i386/i386.exp
@@ -64,6 +64,21 @@ proc check_effective_target_sse4a { } {
     } "-O2 -msse4a" ]
 }
 
+# Return 1 if sse5 instructions can be compiled.
+proc check_effective_target_sse5 { } {
+    return [check_no_compiler_messages sse5 object {
+	typedef long long __m128i __attribute__ ((__vector_size__ (16)));
+	typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+
+	__m128i _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
+	{
+	    return (__m128i) __builtin_ia32_pmacssww ((__v2di)__A,
+						      (__v2di)__B,
+						      (__v2di)__C);
+	}
+    } "-O2 -msse5" ]
+}
+
 # If a testcase doesn't have special options, use these.
 global DEFAULT_CFLAGS
 if ![info exists DEFAULT_CFLAGS] then {
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index 48c4e7d43f6..395cdf7ed71 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -1,9 +1,9 @@
 /* Test that {,x,e,p,t,s,a}mmintrin.h, mm3dnow.h and mm_malloc.h are
    usable with -O -std=c89 -pedantic-errors.  */
-/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -msse4.1 -msse4a" } */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -msse4.1 -msse5" } */
 
-#include <ammintrin.h>
+#include <bmmintrin.h>
 #include <smmintrin.h>
 #include <mm3dnow.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 139d927cf3b..b6c34e2447c 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,8 +1,8 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -msse4.1 -msse4a" } */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -msse4.1 -msse5 " } */
 
 /* Test that the intrinsics compile with optimization.  All of them are
-   defined as inline functions in {,x,e,p,t,s,a}mmintrin.h that reference
+   defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h that reference
    the proper builtin functions.  Defining away "static" and "__inline"
    results in all of them being compiled as proper functions.  */
 
@@ -66,5 +66,11 @@
 #define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0)
 #define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0)
 
-#include <ammintrin.h>
+/* bmmintrin.h */
+#define __builtin_ia32_protbi(A, B) __builtin_ia32_protbi(A,1)
+#define __builtin_ia32_protwi(A, B) __builtin_ia32_protwi(A,1)
+#define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1)
+#define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1)
+
+#include <bmmintrin.h>
 #include <smmintrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 8c3a2d2e636..bb51c20ddcc 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O0 -msse4.1 -msse4a" } */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O0 -msse4.1 -msse5" } */
 
 /* Test that the intrinsics compile without optimization.  All of them are
    defined as inline functions in {,x,e,p,t,s,a}mmintrin.h that reference
@@ -9,5 +9,5 @@
 #define static
 #define __inline
 
-#include <ammintrin.h>
+#include <bmmintrin.h>
 #include <smmintrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse5-check.h b/gcc/testsuite/gcc.target/i386/sse5-check.h
new file mode 100644
index 00000000000..e133ed884fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-check.h
@@ -0,0 +1,20 @@
+#include <stdlib.h>
+
+#include "cpuid.h"
+
+static void sse5_test (void);
+
+int
+main ()
+{
+  unsigned int eax, ebx, ecx, edx;
+ 
+  if (!__get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx))
+    return 0;
+
+  /* Run SSE5 test only if host has SSE5 support.  */
+  if (ecx & bit_SSE5)
+    sse5_test ();
+
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse5-fma-vector.c b/gcc/testsuite/gcc.target/i386/sse5-fma-vector.c
new file mode 100644
index 00000000000..59dc7651568
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-fma-vector.c
@@ -0,0 +1,92 @@
+/* Test that the compiler properly optimizes floating point multiply and add
+   instructions vector into fmaddps on SSE5 systems.  */
+
+/* { dg-do compile { target x86_64-*-*} } */
+/* { dg-options "-O2 -msse5 -mfused-madd -ftree-vectorize" } */
+
+extern void exit (int);
+
+typedef float     __m128  __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef double    __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+#define SIZE 10240
+
+union {
+  __m128 f_align;
+  __m128d d_align;
+  float f[SIZE];
+  double d[SIZE];
+} a, b, c, d;
+
+void
+flt_mul_add (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    a.f[i] = (b.f[i] * c.f[i]) + d.f[i];
+}
+
+void
+dbl_mul_add (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    a.d[i] = (b.d[i] * c.d[i]) + d.d[i];
+}
+
+void
+flt_mul_sub (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    a.f[i] = (b.f[i] * c.f[i]) - d.f[i];
+}
+
+void
+dbl_mul_sub (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    a.d[i] = (b.d[i] * c.d[i]) - d.d[i];
+}
+
+void
+flt_neg_mul_add (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    a.f[i] = (-(b.f[i] * c.f[i])) + d.f[i];
+}
+
+void
+dbl_neg_mul_add (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    a.d[i] = (-(b.d[i] * c.d[i])) + d.d[i];
+}
+
+int main ()
+{
+  flt_mul_add ();
+  flt_mul_sub ();
+  flt_neg_mul_add ();
+
+  dbl_mul_add ();
+  dbl_mul_sub ();
+  dbl_neg_mul_add ();
+  exit (0);
+}
+
+/* { dg-final { scan-assembler "fmaddps" } } */
+/* { dg-final { scan-assembler "fmaddpd" } } */
+/* { dg-final { scan-assembler "fmsubps" } } */
+/* { dg-final { scan-assembler "fmsubpd" } } */
+/* { dg-final { scan-assembler "fnmaddps" } } */
+/* { dg-final { scan-assembler "fnmaddpd" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse5-fma.c b/gcc/testsuite/gcc.target/i386/sse5-fma.c
new file mode 100644
index 00000000000..598cda03c04
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-fma.c
@@ -0,0 +1,81 @@
+/* Test that the compiler properly optimizes floating point multiply and add
+   instructions into fmaddss, fmsubss, fnmaddss, fnmsubss on SSE5 systems.  */
+
+/* { dg-do compile { target x86_64-*-*} } */
+/* { dg-options "-O2 -msse5 -mfused-madd" } */
+
+extern void exit (int);
+
+float
+flt_mul_add (float a, float b, float c)
+{
+  return (a * b) + c;
+}
+
+double
+dbl_mul_add (double a, double b, double c)
+{
+  return (a * b) + c;
+}
+
+float
+flt_mul_sub (float a, float b, float c)
+{
+  return (a * b) - c;
+}
+
+double
+dbl_mul_sub (double a, double b, double c)
+{
+  return (a * b) - c;
+}
+
+float
+flt_neg_mul_add (float a, float b, float c)
+{
+  return (-(a * b)) + c;
+}
+
+double
+dbl_neg_mul_add (double a, double b, double c)
+{
+  return (-(a * b)) + c;
+}
+
+float
+flt_neg_mul_sub (float a, float b, float c)
+{
+  return (-(a * b)) - c;
+}
+
+double
+dbl_neg_mul_sub (double a, double b, double c)
+{
+  return (-(a * b)) - c;
+}
+
+float  f[10] = { 2, 3, 4 };
+double d[10] = { 2, 3, 4 };
+
+int main ()
+{
+  f[3] = flt_mul_add (f[0], f[1], f[2]);
+  f[4] = flt_mul_sub (f[0], f[1], f[2]);
+  f[5] = flt_neg_mul_add (f[0], f[1], f[2]);
+  f[6] = flt_neg_mul_sub (f[0], f[1], f[2]);
+
+  d[3] = dbl_mul_add (d[0], d[1], d[2]);
+  d[4] = dbl_mul_sub (d[0], d[1], d[2]);
+  d[5] = dbl_neg_mul_add (d[0], d[1], d[2]);
+  d[6] = dbl_neg_mul_sub (d[0], d[1], d[2]);
+  exit (0);
+}
+
+/* { dg-final { scan-assembler "fmaddss" } } */
+/* { dg-final { scan-assembler "fmaddsd" } } */
+/* { dg-final { scan-assembler "fmsubss" } } */
+/* { dg-final { scan-assembler "fmsubsd" } } */
+/* { dg-final { scan-assembler "fnmaddss" } } */
+/* { dg-final { scan-assembler "fnmaddsd" } } */
+/* { dg-final { scan-assembler "fnmsubss" } } */
+/* { dg-final { scan-assembler "fnmsubsd" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse5-haddX.c b/gcc/testsuite/gcc.target/i386/sse5-haddX.c
new file mode 100644
index 00000000000..e605e070c12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-haddX.c
@@ -0,0 +1,208 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse5 } */
+/* { dg-options "-O2 -msse5" } */
+
+#include "sse5-check.h"
+
+#include <bmmintrin.h>
+#include <string.h>
+
+#define NUM 10
+
+union
+{
+  __m128i x[NUM];
+  int8_t ssi[NUM * 16];
+  int16_t si[NUM * 8];
+  int32_t li[NUM * 4];
+  int64_t lli[NUM * 2];
+} dst, res, src1;
+
+static void
+init_sbyte ()
+{
+  int i;
+  for (i=0; i < NUM * 16; i++)
+    src1.ssi[i] = i;
+}
+
+static void
+init_sword ()
+{
+  int i;
+  for (i=0; i < NUM * 8; i++)
+    src1.si[i] = i;
+}
+
+
+static void
+init_sdword ()
+{
+  int i;
+  for (i=0; i < NUM * 4; i++)
+    src1.li[i] = i;
+}
+
+static int 
+check_sbyte2word ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < NUM * 16; i = i + 16)
+    {
+      for (j = 0; j < 8; j++)
+	{
+	  t = i + (2 * j);
+	  s = (i / 2) + j;
+	  res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ;
+	  if (res.si[s] != dst.si[s]) 
+	    check_fails++;	
+	}
+    }
+}
+
+static int 
+check_sbyte2dword ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < NUM * 16; i = i + 16)
+    {
+      for (j = 0; j < 4; j++)
+	{
+	  t = i + (4 * j);
+	  s = (i / 4) + j;
+	  res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
+	              + src1.ssi[t + 3]); 
+	  if (res.li[s] != dst.li[s]) 
+	    check_fails++;
+	}
+    }
+  return check_fails++;
+}
+
+static int
+check_sbyte2qword ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < NUM * 16; i = i + 16)
+    {
+      for (j = 0; j < 2; j++)
+	{
+	  t = i + (8 * j);
+	  s = (i / 8) + j;
+	  res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] 
+		       + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5])
+	               + (src1.ssi[t + 6] + src1.ssi[t + 7])); 
+	  if (res.lli[s] != dst.lli[s]) 
+	    check_fails++;
+	}
+    }
+  return check_fails++;
+}
+
+static int
+check_sword2dword ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < (NUM * 8); i = i + 8)
+    {
+      for (j = 0; j < 4; j++)
+	{
+	  t = i + (2 * j);
+	  s = (i / 2) + j;
+	  res.li[s] = src1.si[t] + src1.si[t + 1] ;
+	  if (res.li[s] != dst.li[s]) 
+	    check_fails++;	
+	}
+    }
+}
+
+static int 
+check_sword2qword ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < NUM * 8; i = i + 8)
+    {
+      for (j = 0; j < 2; j++)
+	{
+	  t = i + (4 * j);
+	  s = (i / 4) + j;
+	  res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2]
+	               + src1.si[t + 3]); 
+	  if (res.lli[s] != dst.lli[s]) 
+	    check_fails++;
+	}
+    }
+  return check_fails++;
+}
+
+static int
+check_dword2qword ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < (NUM * 4); i = i + 4)
+    {
+      for (j = 0; j < 2; j++)
+	{
+	  t = i + (2 * j);
+	  s = (i / 2) + j;
+	  res.lli[s] = src1.li[t] + src1.li[t + 1] ;
+	  if (res.lli[s] != dst.lli[s]) 
+	    check_fails++;	
+	}
+    }
+}
+
+static void
+sse5_test (void)
+{
+  int i;
+  
+  /* Check haddbw */
+  init_sbyte ();
+  
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_haddw_epi8 (src1.x[i]);
+  
+  if (check_sbyte2word())
+  abort ();
+  
+  /* Check haddbd */
+  for (i = 0; i < (NUM ); i++)
+    dst.x[i] = _mm_haddd_epi8 (src1.x[i]);
+  
+  if (check_sbyte2dword())
+    abort (); 
+  
+  /* Check haddbq */
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_haddq_epi8 (src1.x[i]);
+  
+  if (check_sbyte2qword())
+    abort ();
+
+  /* Check haddwd */
+  init_sword ();
+
+  for (i = 0; i < (NUM ); i++)
+    dst.x[i] = _mm_haddd_epi16 (src1.x[i]);
+  
+  if (check_sword2dword())
+    abort (); 
+   
+  /* Check haddbwq */
+ 
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_haddq_epi16 (src1.x[i]);
+  
+  if (check_sword2qword())
+    abort ();
+ 
+  /* Check haddq */
+  init_sdword ();
+
+    for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_haddq_epi32 (src1.x[i]);
+  
+  if (check_dword2qword())
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse5-hadduX.c b/gcc/testsuite/gcc.target/i386/sse5-hadduX.c
new file mode 100644
index 00000000000..a55fb8a527d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-hadduX.c
@@ -0,0 +1,207 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse5 } */
+/* { dg-options "-O2 -msse5" } */
+
+#include "sse5-check.h"
+
+#include <bmmintrin.h>
+#include <string.h>
+
+#define NUM 10
+
+union
+{
+  __m128i x[NUM];
+  unsigned char  ssi[NUM * 16];
+  unsigned short si[NUM * 8];
+  unsigned int li[NUM * 4];
+  unsigned long long  lli[NUM * 2];
+} dst, res, src1;
+
+static void
+init_byte ()
+{
+  int i;
+  for (i=0; i < NUM * 16; i++)
+    src1.ssi[i] = i;
+}
+
+static void
+init_word ()
+{
+  int i;
+  for (i=0; i < NUM * 8; i++)
+    src1.si[i] = i;
+}
+
+
+static void
+init_dword ()
+{
+  int i;
+  for (i=0; i < NUM * 4; i++)
+    src1.li[i] = i;
+}
+
+static int 
+check_byte2word ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < NUM * 16; i = i + 16)
+    {
+      for (j = 0; j < 8; j++)
+	{
+	  t = i + (2 * j);
+	  s = (i / 2) + j;
+	  res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ;
+	  if (res.si[s] != dst.si[s]) 
+	    check_fails++;	
+	}
+    }
+}
+
+static int 
+check_byte2dword ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < NUM * 16; i = i + 16)
+    {
+      for (j = 0; j < 4; j++)
+	{
+	  t = i + (4 * j);
+	  s = (i / 4) + j;
+	  res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
+	              + src1.ssi[t + 3]); 
+	  if (res.li[s] != dst.li[s]) 
+	    check_fails++;
+	}
+    }
+  return check_fails++;
+}
+
+static int
+check_byte2qword ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < NUM * 16; i = i + 16)
+    {
+      for (j = 0; j < 2; j++)
+	{
+	  t = i + (8 * j);
+	  s = (i / 8) + j;
+	  res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] 
+		       + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5])
+	               + (src1.ssi[t + 6] + src1.ssi[t + 7])); 
+	  if (res.lli[s] != dst.lli[s]) 
+	    check_fails++;
+	}
+    }
+  return check_fails++;
+}
+
+static int
+check_word2dword ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < (NUM * 8); i = i + 8)
+    {
+      for (j = 0; j < 4; j++)
+	{
+	  t = i + (2 * j);
+	  s = (i / 2) + j;
+	  res.li[s] = src1.si[t] + src1.si[t + 1] ;
+	  if (res.li[s] != dst.li[s]) 
+	    check_fails++;	
+	}
+    }
+}
+
+static int 
+check_word2qword ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < NUM * 8; i = i + 8)
+    {
+      for (j = 0; j < 2; j++)
+	{
+	  t = i + (4 * j);
+	  s = (i / 4) + j;
+	  res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2]
+	               + src1.si[t + 3]); 
+	  if (res.lli[s] != dst.lli[s]) 
+	    check_fails++;
+	}
+    }
+  return check_fails++;
+}
+
+static int
+check_dword2qword ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < (NUM * 4); i = i + 4)
+    {
+      for (j = 0; j < 2; j++)
+	{
+	  t = i + (2 * j);
+	  s = (i / 2) + j;
+	  res.lli[s] = src1.li[t] + src1.li[t + 1] ;
+	  if (res.lli[s] != dst.lli[s]) 
+	    check_fails++;	
+	}
+    }
+}
+
+static void
+sse5_test (void)
+{
+  int i;
+  
+  /* Check haddubw */
+  init_byte ();
+  
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_haddw_epu8 (src1.x[i]);
+  
+  if (check_byte2word())
+  abort ();
+  
+  /* Check haddubd */
+  for (i = 0; i < (NUM ); i++)
+    dst.x[i] = _mm_haddd_epu8 (src1.x[i]);
+  
+  if (check_byte2dword())
+    abort (); 
+  
+  /* Check haddubq */
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_haddq_epu8 (src1.x[i]);
+  
+  if (check_byte2qword())
+    abort ();
+
+  /* Check hadduwd */
+  init_word ();
+
+  for (i = 0; i < (NUM ); i++)
+    dst.x[i] = _mm_haddd_epu16 (src1.x[i]);
+  
+  if (check_word2dword())
+    abort (); 
+   
+  /* Check haddbuwq */
+ 
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_haddq_epu16 (src1.x[i]);
+  
+  if (check_word2qword())
+    abort ();
+ 
+  /* Check hadudq */
+  init_dword ();
+    for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_haddq_epu32 (src1.x[i]);
+  
+  if (check_dword2qword())
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse5-hsubX.c b/gcc/testsuite/gcc.target/i386/sse5-hsubX.c
new file mode 100644
index 00000000000..03c7f79084e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-hsubX.c
@@ -0,0 +1,128 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse5 } */
+/* { dg-options "-O2 -msse5" } */
+
+#include "sse5-check.h"
+
+#include <bmmintrin.h>
+#include <string.h>
+
+#define NUM 10
+
+union
+{
+  __m128i x[NUM];
+  int8_t ssi[NUM * 16];
+  int16_t si[NUM * 8];
+  int32_t li[NUM * 4];
+  int64_t lli[NUM * 2];
+} dst, res, src1;
+
+static void
+init_sbyte ()
+{
+  int i;
+  for (i=0; i < NUM * 16; i++)
+    src1.ssi[i] = i;
+}
+
+static void
+init_sword ()
+{
+  int i;
+  for (i=0; i < NUM * 8; i++)
+    src1.si[i] = i;
+}
+
+
+static void
+init_sdword ()
+{
+  int i;
+  for (i=0; i < NUM * 4; i++)
+    src1.li[i] = i;
+}
+
+static int 
+check_sbyte2word ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < NUM * 16; i = i + 16)
+    {
+      for (j = 0; j < 8; j++)
+	{
+	  t = i + (2 * j);
+	  s = (i / 2) + j;
+	  res.si[s] = src1.ssi[t] - src1.ssi[t + 1] ;
+	  if (res.si[s] != dst.si[s]) 
+	    check_fails++;	
+	}
+    }
+}
+
+static int
+check_sword2dword ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < (NUM * 8); i = i + 8)
+    {
+      for (j = 0; j < 4; j++)
+	{
+	  t = i + (2 * j);
+	  s = (i / 2) + j;
+	  res.li[s] = src1.si[t] - src1.si[t + 1] ;
+	  if (res.li[s] != dst.li[s]) 
+	    check_fails++;	
+	}
+    }
+}
+
+static int
+check_dword2qword ()
+{
+  int i, j, s, t, check_fails = 0;
+  for (i = 0; i < (NUM * 4); i = i + 4)
+    {
+      for (j = 0; j < 2; j++)
+	{
+	  t = i + (2 * j);
+	  s = (i / 2) + j;
+	  res.lli[s] = src1.li[t] - src1.li[t + 1] ;
+	  if (res.lli[s] != dst.lli[s]) 
+	    check_fails++;	
+	}
+    }
+}
+
+static void
+sse5_test (void)
+{
+  int i;
+  
+  /* Check hsubbw */
+  init_sbyte ();
+  
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_hsubw_epi8 (src1.x[i]);
+  
+  if (check_sbyte2word())
+  abort ();
+  
+
+  /* Check hsubwd */
+  init_sword ();
+
+  for (i = 0; i < (NUM ); i++)
+    dst.x[i] = _mm_hsubd_epi16 (src1.x[i]);
+  
+  if (check_sword2dword())
+    abort (); 
+   
+   /* Check hsubdq */
+  init_sdword ();
+    for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_hsubq_epi32 (src1.x[i]);
+  
+  if (check_dword2qword())
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse5-ima-vector.c b/gcc/testsuite/gcc.target/i386/sse5-ima-vector.c
new file mode 100644
index 00000000000..260291d2985
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-ima-vector.c
@@ -0,0 +1,33 @@
+/* Test that the compiler properly optimizes vector 32-bit integer point
+   multiply and add instructions vector into pmacsdd on SSE5 systems.  */
+
+/* { dg-do compile { target x86_64-*-*} } */
+/* { dg-options "-O2 -msse5 -ftree-vectorize" } */
+
+extern void exit (int);
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+
+#define SIZE 10240
+
+union {
+  __m128i align;
+  int i[SIZE];
+} a, b, c, d;
+
+void
+int_mul_add (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    a.i[i] = (b.i[i] * c.i[i]) + d.i[i];
+}
+
+int main ()
+{
+  int_mul_add ();
+  exit (0);
+}
+
+/* { dg-final { scan-assembler "pmacsdd" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse5-maccXX.c b/gcc/testsuite/gcc.target/i386/sse5-maccXX.c
new file mode 100644
index 00000000000..9603d53ea68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-maccXX.c
@@ -0,0 +1,140 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse5 } */
+/* { dg-options "-O2 -msse5" } */
+
+#include "sse5-check.h"
+
+#include <bmmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+union
+{
+  __m128 x[NUM];
+  float f[NUM * 4];
+  __m128d y[NUM];
+  double d[NUM * 2];
+} dst, res, src1, src2, src3;
+
+
+/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate 
+   product is not rounded, only the addition is rounded. */
+
+static void
+init_maccps ()
+{
+  int i;
+  for (i = 0; i < NUM * 4; i++)
+    {
+      src1.f[i] = i;
+      src2.f[i] = i + 10;
+      src3.f[i] = i + 20;
+    }
+}
+
+static void
+init_maccpd ()
+{
+  int i;
+  for (i = 0; i < NUM * 4; i++)
+    {
+      src1.d[i] = i;
+      src2.d[i] = i + 10;
+      src3.d[i] = i + 20;
+    }
+}
+
+static int
+check_maccps ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 4; i = i + 4)
+    for (j = 0; j < 4; j++)
+      {
+	res.f[i + j] = (src1.f[i + j] * src2.f[i + j]) + src3.f[i + j];
+	if (dst.f[i + j] != res.f[i + j]) 
+	  check_fails++;
+      }
+  return check_fails++;
+}
+
+static int
+check_maccpd ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 2; i = i + 2)
+    for (j = 0; j < 2; j++)
+      {
+	res.d[i + j] = (src1.d[i + j] * src2.d[i + j]) + src3.d[i + j];
+	if (dst.d[i + j] != res.d[i + j]) 
+	  check_fails++;
+      }
+  return check_fails++;
+}
+
+
+static int
+check_maccss ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 4; i= i + 4)
+    {
+      res.f[i] = (src1.f[i] * src2.f[i]) + src3.f[i];
+      if (dst.f[i] != res.f[i]) 
+	check_fails++;
+    }	
+  return check_fails++;
+}
+
+static int
+check_maccsd ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 2; i = i + 2)
+    {
+      res.d[i] = (src1.d[i] * src2.d[i]) + src3.d[i];
+      if (dst.d[i] != res.d[i]) 
+	check_fails++;
+    }
+  return check_fails++;
+}
+
+static void
+sse5_test (void)
+{
+  int i;
+  
+  /* Check maccps */
+  init_maccps ();
+  
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_macc_ps (src1.x[i], src2.x[i], src3.x[i]);
+  
+  if (check_maccps ()) 
+    abort ();
+  
+  /* check maccss */
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_macc_ss (src1.x[i], src2.x[i], src3.x[i]);
+  
+  if (check_maccss ()) 
+    abort ();
+  
+  /* Check maccpd */
+  init_maccpd ();
+  
+  for (i = 0; i < NUM; i++)
+    dst.y[i] = _mm_macc_pd (src1.y[i], src2.y[i], src3.y[i]);
+  
+  if (check_maccpd ()) 
+    abort ();
+  
+  /* Check maccps */
+  for (i = 0; i < NUM; i++)
+    dst.y[i] = _mm_macc_sd (src1.y[i], src2.y[i], src3.y[i]);
+  
+  if (check_maccsd ()) 
+    abort ();
+  
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse5-msubXX.c b/gcc/testsuite/gcc.target/i386/sse5-msubXX.c
new file mode 100644
index 00000000000..151e8c6e51f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-msubXX.c
@@ -0,0 +1,139 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse5 } */
+/* { dg-options "-O2 -msse5" } */
+
+#include "sse5-check.h"
+
+#include <bmmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+union
+{
+  __m128 x[NUM];
+  float f[NUM * 4];
+  __m128d y[NUM];
+  double d[NUM * 2];
+} dst, res, src1, src2, src3;
+
+/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate 
+   product is not rounded, only the addition is rounded. */
+
+static void
+init_msubps ()
+{
+  int i;
+  for (i = 0; i < NUM * 4; i++)
+    {
+      src1.f[i] = i;
+      src2.f[i] = i + 10;
+      src3.f[i] = i + 20;
+    }
+}
+
+static void
+init_msubpd ()
+{
+  int i;
+  for (i = 0; i < NUM * 4; i++)
+    {
+      src1.d[i] = i;
+      src2.d[i] = i + 10;
+      src3.d[i] = i + 20;
+    }
+}
+
+static int
+check_msubps ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 4; i = i + 4)
+    for (j = 0; j < 4; j++)
+      {
+	res.f[i + j] = (src1.f[i + j] * src2.f[i + j]) - src3.f[i + j];
+	if (dst.f[i + j] != res.f[i + j]) 
+	  check_fails++;
+      }
+  return check_fails++;
+}
+
+static int
+check_msubpd ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 2; i = i + 2)
+    for (j = 0; j < 2; j++)
+      {
+	res.d[i + j] = (src1.d[i + j] * src2.d[i + j]) - src3.d[i + j];
+	if (dst.d[i + j] != res.d[i + j]) 
+	  check_fails++;
+      }
+  return check_fails++;
+}
+
+
+static int
+check_msubss ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 4; i = i + 4)
+    {
+      res.f[i] = (src1.f[i] * src2.f[i]) - src3.f[i];
+      if (dst.f[i] != res.f[i]) 
+	check_fails++;
+    }	
+  return check_fails++;
+}
+
+static int
+check_msubsd ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 2; i = i + 2)
+    {
+      res.d[i] = (src1.d[i] * src2.d[i]) - src3.d[i];
+      if (dst.d[i] != res.d[i]) 
+	check_fails++;
+    }
+  return check_fails++;
+}
+
+static void
+sse5_test (void)
+{
+  int i;
+  
+  /* Check msubps */
+  init_msubps ();
+  
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_msub_ps (src1.x[i], src2.x[i], src3.x[i]);
+  
+  if (check_msubps ()) 
+    abort ();
+  
+  /* check msubss */
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_msub_ss (src1.x[i], src2.x[i], src3.x[i]);
+  
+  if (check_msubss ()) 
+    abort ();
+  
+  /* Check msubpd */
+  init_msubpd ();
+  
+  for (i = 0; i < NUM; i++)
+    dst.y[i] = _mm_msub_pd (src1.y[i], src2.y[i], src3.y[i]);
+  
+  if (check_msubpd ()) 
+    abort ();
+  
+  /* Check msubps */
+  for (i = 0; i < NUM; i++)
+    dst.y[i] = _mm_msub_sd (src1.y[i], src2.y[i], src3.y[i]);
+  
+  if (check_msubsd ()) 
+    abort ();
+  
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse5-nmaccXX.c b/gcc/testsuite/gcc.target/i386/sse5-nmaccXX.c
new file mode 100644
index 00000000000..c5ca2bf7d1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-nmaccXX.c
@@ -0,0 +1,139 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse5 } */
+/* { dg-options "-O2 -msse5" } */
+
+#include "sse5-check.h"
+
+#include <bmmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+union
+{
+  __m128 x[NUM];
+  float f[NUM * 4];
+  __m128d y[NUM];
+  double d[NUM * 2];
+} dst, res, src1, src2, src3;
+
+/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate 
+   product is not rounded, only the addition is rounded. */
+
+static void
+init_nmaccps ()
+{
+  int i;
+  for (i = 0; i < NUM * 4; i++)
+    {
+      src1.f[i] = i;
+      src2.f[i] = i + 10;
+      src3.f[i] = i + 20;
+    }
+}
+
+static void
+init_nmaccpd ()
+{
+  int i;
+  for (i = 0; i < NUM * 4; i++)
+    {
+      src1.d[i] = i;
+      src2.d[i] = i + 10;
+      src3.d[i] = i + 20;
+    }
+}
+
+static int
+check_nmaccps ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 4; i = i + 4)
+    for (j = 0; j < 4; j++)
+      {
+	res.f[i + j] = - (src1.f[i + j] * src2.f[i + j]) + src3.f[i + j];
+	if (dst.f[i + j] != res.f[i + j]) 
+	  check_fails++;
+      }
+  return check_fails++;
+}
+
+static int
+check_nmaccpd ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 2; i = i + 2)
+    for (j = 0; j < 2; j++)
+      {
+	res.d[i + j] = - (src1.d[i + j] * src2.d[i + j]) + src3.d[i + j];
+	if (dst.d[i + j] != res.d[i + j]) 
+	  check_fails++;
+      }
+  return check_fails++;
+}
+
+
+static int
+check_nmaccss ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 4; i = i + 4)
+    {
+      res.f[i] = - (src1.f[i] * src2.f[i]) + src3.f[i];
+      if (dst.f[i] != res.f[i]) 
+	check_fails++;
+    }	
+  return check_fails++;
+}
+
+static int
+check_nmaccsd ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 2; i = i + 2)
+    {
+      res.d[i] = - (src1.d[i] * src2.d[i]) + src3.d[i];
+      if (dst.d[i] != res.d[i]) 
+	check_fails++;
+    }
+  return check_fails++;
+}
+
+static void
+sse5_test (void)
+{
+  int i;
+  
+  /* Check nmaccps */
+  init_nmaccps ();
+  
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_nmacc_ps (src1.x[i], src2.x[i], src3.x[i]);
+  
+  if (check_nmaccps ()) 
+    abort ();
+  
+  /* check nmaccss */
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_nmacc_ss (src1.x[i], src2.x[i], src3.x[i]);
+  
+  if (check_nmaccss ()) 
+    abort ();
+  
+  /* Check nmaccpd */
+  init_nmaccpd ();
+  
+  for (i = 0; i < NUM; i++)
+    dst.y[i] = _mm_nmacc_pd (src1.y[i], src2.y[i], src3.y[i]);
+  
+  if (check_nmaccpd ()) 
+    abort ();
+  
+  /* Check nmaccps */
+  for (i = 0; i < NUM; i++)
+    dst.y[i] = _mm_nmacc_sd (src1.y[i], src2.y[i], src3.y[i]);
+  
+  if (check_nmaccsd ()) 
+    abort ();
+  
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse5-nmsubXX.c b/gcc/testsuite/gcc.target/i386/sse5-nmsubXX.c
new file mode 100644
index 00000000000..acf19f9742d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-nmsubXX.c
@@ -0,0 +1,139 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse5 } */
+/* { dg-options "-O2 -msse5" } */
+
+#include "sse5-check.h"
+
+#include <bmmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+union
+{
+  __m128 x[NUM];
+  float f[NUM * 4];
+  __m128d y[NUM];
+  double d[NUM * 2];
+} dst, res, src1, src2, src3;
+
+/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate 
+   product is not rounded, only the addition is rounded. */
+
+static void
+init_nmsubps ()
+{
+  int i;
+  for (i = 0; i < NUM * 4; i++)
+    {
+      src1.f[i] = i;
+      src2.f[i] = i + 10;
+      src3.f[i] = i + 20;
+    }
+}
+
+static void
+init_nmsubpd ()
+{
+  int i;
+  for (i = 0; i < NUM * 4; i++)
+    {
+      src1.d[i] = i;
+      src2.d[i] = i + 10;
+      src3.d[i] = i + 20;
+    }
+}
+
+static int
+check_nmsubps ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 4; i = i + 4)
+    for (j = 0; j < 4; j++)
+      {
+	res.f[i + j] = - (src1.f[i + j] * src2.f[i + j]) - src3.f[i + j];
+	if (dst.f[i + j] != res.f[i + j]) 
+	  check_fails++;
+      }
+  return check_fails++;
+}
+
+static int
+check_nmsubpd ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 2; i = i + 2)
+    for (j = 0; j < 2; j++)
+      {
+	res.d[i + j] = - (src1.d[i + j] * src2.d[i + j]) - src3.d[i + j];
+	if (dst.d[i + j] != res.d[i + j]) 
+	  check_fails++;
+      }
+  return check_fails++;
+}
+
+
+static int
+check_nmsubss ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 4; i = i + 4)
+    {
+      res.f[i] = - (src1.f[i] * src2.f[i]) - src3.f[i];
+      if (dst.f[i] != res.f[i]) 
+	check_fails++;
+    }	
+  return check_fails++;
+}
+
+static int
+check_nmsubsd ()
+{
+  int i, j, check_fails = 0;
+  for (i = 0; i < NUM * 2; i = i + 2)
+    {
+      res.d[i] = - (src1.d[i] * src2.d[i]) - src3.d[i];
+      if (dst.d[i] != res.d[i]) 
+	check_fails++;
+    }
+  return check_fails++;
+}
+
+static void
+sse5_test (void)
+{
+  int i;
+  
+  /* Check nmsubps */
+  init_nmsubps ();
+  
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_nmsub_ps (src1.x[i], src2.x[i], src3.x[i]);
+  
+  if (check_nmsubps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4], &src3.f[i * 4])) 
+    abort ();
+  
+  /* check nmsubss */
+  for (i = 0; i < NUM; i++)
+    dst.x[i] = _mm_nmsub_ss (src1.x[i], src2.x[i], src3.x[i]);
+  
+  if (check_nmsubss (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4], &src3.f[i * 4])) 
+    abort ();
+  
+  /* Check nmsubpd */
+  init_nmsubpd ();
+  
+  for (i = 0; i < NUM; i++)
+    dst.y[i] = _mm_nmsub_pd (src1.y[i], src2.y[i], src3.y[i]);
+  
+  if (check_nmsubpd (&dst.y[i], &src1.d[i * 2], &src2.d[i * 2], &src3.d[i * 2])) 
+    abort ();
+  
+  /* Check nmsubps */
+  for (i = 0; i < NUM; i++)
+    dst.y[i] = _mm_nmsub_sd (src1.y[i], src2.y[i], src3.y[i]);
+  
+  if (check_nmsubsd (&dst.y[i], &src1.d[i * 2], &src2.d[i * 2], &src3.d[i * 2])) 
+    abort ();
+  
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse5-pcmov.c b/gcc/testsuite/gcc.target/i386/sse5-pcmov.c
new file mode 100644
index 00000000000..3bc2e5dbbda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-pcmov.c
@@ -0,0 +1,22 @@
+/* Test that the compiler properly optimizes conditional floating point moves
+   into the pcmov instruction on SSE5 systems.  */
+
+/* { dg-do compile { target x86_64-*-*} } */
+/* { dg-options "-O2 -msse5" } */
+
+extern void exit (int);
+
+double dbl_test (double a, double b, double c, double d)
+{
+  return (a > b) ? c : d;
+}
+
+double dbl_a = 1, dbl_b = 2, dbl_c = 3, dbl_d = 4, dbl_e;
+
+int main()
+{
+  dbl_e = dbl_test (dbl_a, dbl_b, dbl_c, dbl_d);
+  exit (0);
+}
+
+/* { dg-final { scan-assembler "pcmov" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse5-pcmov2.c b/gcc/testsuite/gcc.target/i386/sse5-pcmov2.c
new file mode 100644
index 00000000000..0bb366ce0c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-pcmov2.c
@@ -0,0 +1,22 @@
+/* Test that the compiler properly optimizes conditional floating point moves
+   into the pcmov instruction on SSE5 systems.  */
+
+/* { dg-do compile { target x86_64-*-*} } */
+/* { dg-options "-O2 -msse5" } */
+
+extern void exit (int);
+
+float flt_test (float a, float b, float c, float d)
+{
+  return (a > b) ? c : d;
+}
+
+float flt_a = 1, flt_b = 2, flt_c = 3, flt_d = 4, flt_e;
+
+int main()
+{
+  flt_e = flt_test (flt_a, flt_b, flt_c, flt_d);
+  exit (0);
+}
+
+/* { dg-final { scan-assembler "pcmov" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse5-permpX.c b/gcc/testsuite/gcc.target/i386/sse5-permpX.c
new file mode 100644
index 00000000000..d83aa58338e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse5-permpX.c
@@ -0,0 +1,120 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse5 } */
+/* { dg-options "-O2 -msse5" } */
+
+#include "sse5-check.h"
+
+#include <bmmintrin.h>
+#include <string.h>
+
+union
+{
+  __m128 x[2];
+  __m128d y[2];
+  __m128i z[2];
+  float f[8];
+  double d[4];
+  int i[8];
+  long li[4];
+} dst, res, src1, src2, src3;
+
+
+static void
+init_ddata ()
+{
+  int i;
+  for (i = 0; i < 4; i++)
+    {
+      src1.d[i] = i;
+      src2.d[i] = i + 2;
+    }
+ 
+  src3.li[0] = 3;
+  src3.li[1] = 0;
+  src3.li[2] = 1;
+  src3.li[3] = 2;
+
+  res.d[0] = 3.0;
+  res.d[1] = 0.0;
+  res.d[2] = 3.0;
+  res.d[3] = 4.0;
+}
+
+
+static void 
+init_fdata ()
+{
+  int i;
+  for (i = 0; i < 8; i++)
+    {
+      src1.f[i] = i;
+      src2.f[i] = i + 2;
+    }
+
+  src3.i[0] = 7;
+  src3.i[1] = 5;
+  src3.i[2] = 1;
+  src3.i[3] = 2;
+  src3.i[4] = 0;
+  src3.i[5] = 4;
+  src3.i[6] = 3;
+  src3.i[7] = 6; 
+
+  res.f[0] = 5.0;
+  res.f[1] = 3.0;
+  res.f[2] = 1.0;
+  res.f[3] = 2.0;
+  res.f[4] = 4.0;
+  res.f[5] = 6.0;
+  res.f[6] = 7.0;
+  res.f[7] = 8.0;
+}
+
+static int
+check_permpd ()
+{
+  int i, check_fails = 0;
+
+  for (i = 0; i < 4; i++)
+    {
+      if (res.d[i] != dst.d[i])
+	check_fails++;
+    }
+  return check_fails++;
+}
+
+static int
+check_permps ()
+{
+  int i, check_fails = 0;
+
+  for (i = 0; i < 8; i++)
+    {
+      if (res.f[i] != dst.f[i])
+	check_fails++;
+    }
+  return check_fails++;
+}
+
+static void
+sse5_test (void)
+{
+  int i;
+  init_ddata();
+
+  for (i = 0; i < 2; i++)
+    dst.y[i] = _mm_perm_pd (src1.y[i], src2.y[i], src3.z[i]);
+  
+  if (check_permpd ())
+    abort ();
+  
+  init_fdata();
+  
+  for (i = 0; i < 2; i++)
+    dst.x[i] = _mm_perm_ps (src1.x[i], src2.x[i], src3.z[i]);
+   
+  if (check_permps ())
+    abort (); 
+}
+
+
author	meissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4>	2007-09-13 02:17:51 +0000
committer	meissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4>	2007-09-13 02:17:51 +0000
commit	448e99f5e858735f9de2ab5cf614dd2b171cee5d (patch)
tree	2550bf2be428ffb45e9bcb30a6c3186b44ebdc0d /gcc/testsuite/gcc.target/i386
parent	3e66c14e753471249291f2403f35ebcbc436d353 (diff)
download	gcc-448e99f5e858735f9de2ab5cf614dd2b171cee5d.tar.gz