diff options
author | Michael Meissner <meissner@gcc.gnu.org> | 2008-07-23 10:28:06 +0000 |
---|---|---|
committer | Michael Meissner <meissner@gcc.gnu.org> | 2008-07-23 10:28:06 +0000 |
commit | ab442df7fb453434d80a779844fe1a10c0c802ab (patch) | |
tree | efd8e61a3d2ff9dcff5eb5bf03e25922191f7df5 /gcc/testsuite | |
parent | 5295185c3150a8d31685dc44248aa058246bbe73 (diff) | |
download | gcc-ab442df7fb453434d80a779844fe1a10c0c802ab.tar.gz |
Add ability to set target options (ix86 only) and optimization options on a function specific basis
From-SVN: r138075
Diffstat (limited to 'gcc/testsuite')
-rw-r--r-- | gcc/testsuite/gcc.target/i386/cold-1.c | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/funcspec-1.c | 34 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/funcspec-2.c | 99 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/funcspec-3.c | 66 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/funcspec-4.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/funcspec-5.c | 125 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/funcspec-6.c | 71 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/funcspec-7.c | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/funcspec-8.c | 161 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/funcspec-9.c | 36 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/hot-1.c | 33 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/opt-1.c | 35 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/opt-2.c | 38 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-22.c | 171 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-23.c | 108 |
15 files changed, 1017 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.target/i386/cold-1.c b/gcc/testsuite/gcc.target/i386/cold-1.c new file mode 100644 index 00000000000..bcdc471eb58 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cold-1.c @@ -0,0 +1,13 @@ +/* Test whether using attribute((cold)) really turns on -Os. Do this test + by checking whether strcpy calls the library function rather than doing + the move inline. */ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=k8" } */ +/* { dg-final { scan-assembler "(jmp|call)\t(.*)strcpy" } } */ + +void cold (char *) __attribute__((__cold__)); + +void cold (char *a) +{ + __builtin_strcpy (a, "testing 1.2.3 testing 1.2.3"); +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-1.c b/gcc/testsuite/gcc.target/i386/funcspec-1.c new file mode 100644 index 00000000000..1ee43a0bbb8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-1.c @@ -0,0 +1,34 @@ +/* Test whether using target specific options, we can generate SSE2 code on + 32-bit, which does not generate SSE2 by default, but still generate 387 code + for a function that doesn't use attribute((option)). */ +/* { dg-do compile } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O3 -ftree-vectorize -march=i386" } */ +/* { dg-final { scan-assembler "addps\[ \t\]" } } */ +/* { dg-final { scan-assembler "fsubs\[ \t\]" } } */ + +#ifndef SIZE +#define SIZE 1024 +#endif + +static float a[SIZE] __attribute__((__aligned__(16))); +static float b[SIZE] __attribute__((__aligned__(16))); +static float c[SIZE] __attribute__((__aligned__(16))); + +void sse_addnums (void) __attribute__ ((__option__ ("sse2"))); + +void +sse_addnums (void) +{ + int i = 0; + for (; i < SIZE; ++i) + a[i] = b[i] + c[i]; +} + +void +i387_subnums (void) +{ + int i = 0; + for (; i < SIZE; ++i) + a[i] = b[i] - c[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-2.c b/gcc/testsuite/gcc.target/i386/funcspec-2.c new file mode 100644 index 00000000000..eb6f48bae1f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-2.c @@ -0,0 +1,99 @@ +/* Test whether using target specific options, we can generate SSE5 code. */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -march=k8" } */ + +extern void exit (int); + +#define SSE5_ATTR __attribute__((__option__("sse5,fused-madd"))) +extern float flt_mul_add (float a, float b, float c) SSE5_ATTR; +extern float flt_mul_sub (float a, float b, float c) SSE5_ATTR; +extern float flt_neg_mul_add (float a, float b, float c) SSE5_ATTR; +extern float flt_neg_mul_sub (float a, float b, float c) SSE5_ATTR; + +extern double dbl_mul_add (double a, double b, double c) SSE5_ATTR; +extern double dbl_mul_sub (double a, double b, double c) SSE5_ATTR; +extern double dbl_neg_mul_add (double a, double b, double c) SSE5_ATTR; +extern double dbl_neg_mul_sub (double a, double b, double c) SSE5_ATTR; + +float +flt_mul_add (float a, float b, float c) +{ + return (a * b) + c; +} + +double +dbl_mul_add (double a, double b, double c) +{ + return (a * b) + c; +} + +float +flt_mul_sub (float a, float b, float c) +{ + return (a * b) - c; +} + +double +dbl_mul_sub (double a, double b, double c) +{ + return (a * b) - c; +} + +float +flt_neg_mul_add (float a, float b, float c) +{ + return (-(a * b)) + c; +} + +double +dbl_neg_mul_add (double a, double b, double c) +{ + return (-(a * b)) + c; +} + +float +flt_neg_mul_sub (float a, float b, float c) +{ + return (-(a * b)) - c; +} + +double +dbl_neg_mul_sub (double a, double b, double c) +{ + return (-(a * b)) - c; +} + +float f[10] = { 2, 3, 4 }; +double d[10] = { 2, 3, 4 }; + +int main () +{ + f[3] = flt_mul_add (f[0], f[1], f[2]); + f[4] = flt_mul_sub (f[0], f[1], f[2]); + f[5] = flt_neg_mul_add (f[0], f[1], f[2]); + f[6] = flt_neg_mul_sub (f[0], f[1], f[2]); + + d[3] = dbl_mul_add (d[0], d[1], d[2]); + d[4] = dbl_mul_sub (d[0], d[1], d[2]); + d[5] = dbl_neg_mul_add (d[0], d[1], d[2]); + d[6] = dbl_neg_mul_sub (d[0], d[1], d[2]); + exit (0); +} + +/* { dg-final { scan-assembler "fmaddss" } } */ +/* { dg-final { scan-assembler "fmaddsd" } } */ +/* { dg-final { scan-assembler "fmsubss" } } */ +/* { dg-final { scan-assembler "fmsubsd" } } */ +/* { dg-final { scan-assembler "fnmaddss" } } */ +/* { dg-final { scan-assembler "fnmaddsd" } } */ +/* { dg-final { scan-assembler "fnmsubss" } } */ +/* { dg-final { scan-assembler "fnmsubsd" } } */ +/* { dg-final { scan-assembler "call\t(.*)flt_mul_add" } } */ +/* { dg-final { scan-assembler "call\t(.*)flt_mul_sub" } } */ +/* { dg-final { scan-assembler "call\t(.*)flt_neg_mul_add" } } */ +/* { dg-final { scan-assembler "call\t(.*)flt_neg_mul_sub" } } */ +/* { dg-final { scan-assembler "call\t(.*)dbl_mul_add" } } */ +/* { dg-final { scan-assembler "call\t(.*)dbl_mul_sub" } } */ +/* { dg-final { scan-assembler "call\t(.*)dbl_neg_mul_add" } } */ +/* { dg-final { scan-assembler "call\t(.*)dbl_neg_mul_sub" } } */ diff --git a/gcc/testsuite/gcc.target/i386/funcspec-3.c b/gcc/testsuite/gcc.target/i386/funcspec-3.c new file mode 100644 index 00000000000..80ec23da09f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-3.c @@ -0,0 +1,66 @@ +/* Test whether using target specific options, we can generate popcnt by + setting the architecture. */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -march=k8" } */ + +extern void exit (int); +extern void abort (void); + +#define SSE4A_ATTR __attribute__((__option__("arch=amdfam10"))) +#define SSE42_ATTR __attribute__((__option__("sse4.2"))) + +static int sse4a_pop_i (int a) SSE4A_ATTR; +static long sse42_pop_l (long a) SSE42_ATTR; +static int generic_pop_i (int a); +static long generic_pop_l (long a); + +static +int sse4a_pop_i (int a) +{ + return __builtin_popcount (a); +} + +static +long sse42_pop_l (long a) +{ + return __builtin_popcountl (a); +} + +static +int generic_pop_i (int a) +{ + return __builtin_popcount (a); +} + +static +long generic_pop_l (long a) +{ + return __builtin_popcountl (a); +} + +int five = 5; +long seven = 7; + +int main () +{ + if (sse4a_pop_i (five) != 2) + abort (); + + if (sse42_pop_l (seven) != 3L) + abort (); + + if (generic_pop_i (five) != 2) + abort (); + + if (generic_pop_l (seven) != 3L) + abort (); + + exit (0); +} + +/* { dg-final { scan-assembler "popcntl" } } */ +/* { dg-final { scan-assembler "popcntq" } } */ +/* { dg-final { scan-assembler "call\t(.*)sse4a_pop_i" } } */ +/* { dg-final { scan-assembler "call\t(.*)sse42_pop_l" } } */ +/* { dg-final { scan-assembler "call\t(.*)popcountdi2" } } */ diff --git a/gcc/testsuite/gcc.target/i386/funcspec-4.c b/gcc/testsuite/gcc.target/i386/funcspec-4.c new file mode 100644 index 00000000000..71251c314bb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-4.c @@ -0,0 +1,14 @@ +/* Test some error conditions with function specific options. */ +/* { dg-do compile } */ + +/* no sse500 switch */ +extern void error1 (void) __attribute__((__option__("sse500"))); /* { dg-error "unknown" } */ + +/* Multiple arch switches */ +extern void error2 (void) __attribute__((__option__("arch=core2,arch=k8"))); /* { dg-error "already specified" } */ + +/* Unknown tune target */ +extern void error3 (void) __attribute__((__option__("tune=foobar"))); /* { dg-error "bad value" } */ + +/* option on a variable */ +extern int error4 __attribute__((__option__("sse2"))); /* { dg-warning "ignored" } */ diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c b/gcc/testsuite/gcc.target/i386/funcspec-5.c new file mode 100644 index 00000000000..d4204bb1411 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c @@ -0,0 +1,125 @@ +/* Test whether all of the 32-bit function specific options are accepted + without error. */ +/* { dg-do compile } */ +/* { dg-require-effective-target ilp32 } */ + +extern void test_abm (void) __attribute__((__option__("abm"))); +extern void test_aes (void) __attribute__((__option__("aes"))); +extern void test_fused_madd (void) __attribute__((__option__("fused-madd"))); +extern void test_mmx (void) __attribute__((__option__("mmx"))); +extern void test_pclmul (void) __attribute__((__option__("pclmul"))); +extern void test_popcnt (void) __attribute__((__option__("popcnt"))); +extern void test_recip (void) __attribute__((__option__("recip"))); +extern void test_sse (void) __attribute__((__option__("sse"))); +extern void test_sse2 (void) __attribute__((__option__("sse2"))); +extern void test_sse3 (void) __attribute__((__option__("sse3"))); +extern void test_sse4 (void) __attribute__((__option__("sse4"))); +extern void test_sse4_1 (void) __attribute__((__option__("sse4.1"))); +extern void test_sse4_2 (void) __attribute__((__option__("sse4.2"))); +extern void test_sse4a (void) __attribute__((__option__("sse4a"))); +extern void test_sse5 (void) __attribute__((__option__("sse5"))); +extern void test_ssse3 (void) __attribute__((__option__("ssse3"))); + +extern void test_no_abm (void) __attribute__((__option__("no-abm"))); +extern void test_no_aes (void) __attribute__((__option__("no-aes"))); +extern void test_no_fused_madd (void) __attribute__((__option__("no-fused-madd"))); +extern void test_no_mmx (void) __attribute__((__option__("no-mmx"))); +extern void test_no_pclmul (void) __attribute__((__option__("no-pclmul"))); +extern void test_no_popcnt (void) __attribute__((__option__("no-popcnt"))); +extern void test_no_recip (void) __attribute__((__option__("no-recip"))); +extern void test_no_sse (void) __attribute__((__option__("no-sse"))); +extern void test_no_sse2 (void) __attribute__((__option__("no-sse2"))); +extern void test_no_sse3 (void) __attribute__((__option__("no-sse3"))); +extern void test_no_sse4 (void) __attribute__((__option__("no-sse4"))); +extern void test_no_sse4_1 (void) __attribute__((__option__("no-sse4.1"))); +extern void test_no_sse4_2 (void) __attribute__((__option__("no-sse4.2"))); +extern void test_no_sse4a (void) __attribute__((__option__("no-sse4a"))); +extern void test_no_sse5 (void) __attribute__((__option__("no-sse5"))); +extern void test_no_ssse3 (void) __attribute__((__option__("no-ssse3"))); + +extern void test_arch_i386 (void) __attribute__((__option__("arch=i386"))); +extern void test_arch_i486 (void) __attribute__((__option__("arch=i486"))); +extern void test_arch_i586 (void) __attribute__((__option__("arch=i586"))); +extern void test_arch_pentium (void) __attribute__((__option__("arch=pentium"))); +extern void test_arch_pentium_mmx (void) __attribute__((__option__("arch=pentium-mmx"))); +extern void test_arch_winchip_c6 (void) __attribute__((__option__("arch=winchip-c6"))); +extern void test_arch_winchip2 (void) __attribute__((__option__("arch=winchip2"))); +extern void test_arch_c3 (void) __attribute__((__option__("arch=c3"))); +extern void test_arch_c3_2 (void) __attribute__((__option__("arch=c3-2"))); +extern void test_arch_i686 (void) __attribute__((__option__("arch=i686"))); +extern void test_arch_pentiumpro (void) __attribute__((__option__("arch=pentiumpro"))); +extern void test_arch_pentium2 (void) __attribute__((__option__("arch=pentium2"))); +extern void test_arch_pentium3 (void) __attribute__((__option__("arch=pentium3"))); +extern void test_arch_pentium3m (void) __attribute__((__option__("arch=pentium3m"))); +extern void test_arch_pentium_m (void) __attribute__((__option__("arch=pentium-m"))); +extern void test_arch_pentium4 (void) __attribute__((__option__("arch=pentium4"))); +extern void test_arch_pentium4m (void) __attribute__((__option__("arch=pentium4m"))); +extern void test_arch_prescott (void) __attribute__((__option__("arch=prescott"))); +extern void test_arch_nocona (void) __attribute__((__option__("arch=nocona"))); +extern void test_arch_core2 (void) __attribute__((__option__("arch=core2"))); +extern void test_arch_geode (void) __attribute__((__option__("arch=geode"))); +extern void test_arch_k6 (void) __attribute__((__option__("arch=k6"))); +extern void test_arch_k6_2 (void) __attribute__((__option__("arch=k6-2"))); +extern void test_arch_k6_3 (void) __attribute__((__option__("arch=k6-3"))); +extern void test_arch_athlon (void) __attribute__((__option__("arch=athlon"))); +extern void test_arch_athlon_tbird (void) __attribute__((__option__("arch=athlon-tbird"))); +extern void test_arch_athlon_4 (void) __attribute__((__option__("arch=athlon-4"))); +extern void test_arch_athlon_xp (void) __attribute__((__option__("arch=athlon-xp"))); +extern void test_arch_athlon_mp (void) __attribute__((__option__("arch=athlon-mp"))); +extern void test_arch_k8 (void) __attribute__((__option__("arch=k8"))); +extern void test_arch_k8_sse3 (void) __attribute__((__option__("arch=k8-sse3"))); +extern void test_arch_opteron (void) __attribute__((__option__("arch=opteron"))); +extern void test_arch_opteron_sse3 (void) __attribute__((__option__("arch=opteron-sse3"))); +extern void test_arch_athlon64 (void) __attribute__((__option__("arch=athlon64"))); +extern void test_arch_athlon64_sse3 (void) __attribute__((__option__("arch=athlon64-sse3"))); +extern void test_arch_athlon_fx (void) __attribute__((__option__("arch=athlon-fx"))); +extern void test_arch_amdfam10 (void) __attribute__((__option__("arch=amdfam10"))); +extern void test_arch_barcelona (void) __attribute__((__option__("arch=barcelona"))); +extern void test_arch_foo (void) __attribute__((__option__("arch=foo"))); /* { dg-error "bad value" } */ + +extern void test_tune_i386 (void) __attribute__((__option__("tune=i386"))); +extern void test_tune_i486 (void) __attribute__((__option__("tune=i486"))); +extern void test_tune_i586 (void) __attribute__((__option__("tune=i586"))); +extern void test_tune_pentium (void) __attribute__((__option__("tune=pentium"))); +extern void test_tune_pentium_mmx (void) __attribute__((__option__("tune=pentium-mmx"))); +extern void test_tune_winchip_c6 (void) __attribute__((__option__("tune=winchip-c6"))); +extern void test_tune_winchip2 (void) __attribute__((__option__("tune=winchip2"))); +extern void test_tune_c3 (void) __attribute__((__option__("tune=c3"))); +extern void test_tune_c3_2 (void) __attribute__((__option__("tune=c3-2"))); +extern void test_tune_i686 (void) __attribute__((__option__("tune=i686"))); +extern void test_tune_pentiumpro (void) __attribute__((__option__("tune=pentiumpro"))); +extern void test_tune_pentium2 (void) __attribute__((__option__("tune=pentium2"))); +extern void test_tune_pentium3 (void) __attribute__((__option__("tune=pentium3"))); +extern void test_tune_pentium3m (void) __attribute__((__option__("tune=pentium3m"))); +extern void test_tune_pentium_m (void) __attribute__((__option__("tune=pentium-m"))); +extern void test_tune_pentium4 (void) __attribute__((__option__("tune=pentium4"))); +extern void test_tune_pentium4m (void) __attribute__((__option__("tune=pentium4m"))); +extern void test_tune_prescott (void) __attribute__((__option__("tune=prescott"))); +extern void test_tune_nocona (void) __attribute__((__option__("tune=nocona"))); +extern void test_tune_core2 (void) __attribute__((__option__("tune=core2"))); +extern void test_tune_geode (void) __attribute__((__option__("tune=geode"))); +extern void test_tune_k6 (void) __attribute__((__option__("tune=k6"))); +extern void test_tune_k6_2 (void) __attribute__((__option__("tune=k6-2"))); +extern void test_tune_k6_3 (void) __attribute__((__option__("tune=k6-3"))); +extern void test_tune_athlon (void) __attribute__((__option__("tune=athlon"))); +extern void test_tune_athlon_tbird (void) __attribute__((__option__("tune=athlon-tbird"))); +extern void test_tune_athlon_4 (void) __attribute__((__option__("tune=athlon-4"))); +extern void test_tune_athlon_xp (void) __attribute__((__option__("tune=athlon-xp"))); +extern void test_tune_athlon_mp (void) __attribute__((__option__("tune=athlon-mp"))); +extern void test_tune_k8 (void) __attribute__((__option__("tune=k8"))); +extern void test_tune_k8_sse3 (void) __attribute__((__option__("tune=k8-sse3"))); +extern void test_tune_opteron (void) __attribute__((__option__("tune=opteron"))); +extern void test_tune_opteron_sse3 (void) __attribute__((__option__("tune=opteron-sse3"))); +extern void test_tune_athlon64 (void) __attribute__((__option__("tune=athlon64"))); +extern void test_tune_athlon64_sse3 (void) __attribute__((__option__("tune=athlon64-sse3"))); +extern void test_tune_athlon_fx (void) __attribute__((__option__("tune=athlon-fx"))); +extern void test_tune_amdfam10 (void) __attribute__((__option__("tune=amdfam10"))); +extern void test_tune_barcelona (void) __attribute__((__option__("tune=barcelona"))); +extern void test_tune_generic (void) __attribute__((__option__("tune=generic"))); +extern void test_tune_foo (void) __attribute__((__option__("tune=foo"))); /* { dg-error "bad value" } */ + +extern void test_fpmath_sse (void) __attribute__((__option__("sse2,fpmath=sse"))); +extern void test_fpmath_387 (void) __attribute__((__option__("sse2,fpmath=387"))); +extern void test_fpmath_sse_387 (void) __attribute__((__option__("sse2,fpmath=sse+387"))); +extern void test_fpmath_387_sse (void) __attribute__((__option__("sse2,fpmath=387+sse"))); +extern void test_fpmath_both (void) __attribute__((__option__("sse2,fpmath=both"))); diff --git a/gcc/testsuite/gcc.target/i386/funcspec-6.c b/gcc/testsuite/gcc.target/i386/funcspec-6.c new file mode 100644 index 00000000000..0c915975894 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-6.c @@ -0,0 +1,71 @@ +/* Test whether all of the 64-bit function specific options are accepted + without error. */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ + +extern void test_abm (void) __attribute__((__option__("abm"))); +extern void test_aes (void) __attribute__((__option__("aes"))); +extern void test_fused_madd (void) __attribute__((__option__("fused-madd"))); +extern void test_mmx (void) __attribute__((__option__("mmx"))); +extern void test_pclmul (void) __attribute__((__option__("pclmul"))); +extern void test_popcnt (void) __attribute__((__option__("popcnt"))); +extern void test_recip (void) __attribute__((__option__("recip"))); +extern void test_sse (void) __attribute__((__option__("sse"))); +extern void test_sse2 (void) __attribute__((__option__("sse2"))); +extern void test_sse3 (void) __attribute__((__option__("sse3"))); +extern void test_sse4 (void) __attribute__((__option__("sse4"))); +extern void test_sse4_1 (void) __attribute__((__option__("sse4.1"))); +extern void test_sse4_2 (void) __attribute__((__option__("sse4.2"))); +extern void test_sse4a (void) __attribute__((__option__("sse4a"))); +extern void test_sse5 (void) __attribute__((__option__("sse5"))); +extern void test_ssse3 (void) __attribute__((__option__("ssse3"))); + +extern void test_no_abm (void) __attribute__((__option__("no-abm"))); +extern void test_no_aes (void) __attribute__((__option__("no-aes"))); +extern void test_no_fused_madd (void) __attribute__((__option__("no-fused-madd"))); +extern void test_no_mmx (void) __attribute__((__option__("no-mmx"))); +extern void test_no_pclmul (void) __attribute__((__option__("no-pclmul"))); +extern void test_no_popcnt (void) __attribute__((__option__("no-popcnt"))); +extern void test_no_recip (void) __attribute__((__option__("no-recip"))); +extern void test_no_sse (void) __attribute__((__option__("no-sse"))); +extern void test_no_sse2 (void) __attribute__((__option__("no-sse2"))); +extern void test_no_sse3 (void) __attribute__((__option__("no-sse3"))); +extern void test_no_sse4 (void) __attribute__((__option__("no-sse4"))); +extern void test_no_sse4_1 (void) __attribute__((__option__("no-sse4.1"))); +extern void test_no_sse4_2 (void) __attribute__((__option__("no-sse4.2"))); +extern void test_no_sse4a (void) __attribute__((__option__("no-sse4a"))); +extern void test_no_sse5 (void) __attribute__((__option__("no-sse5"))); +extern void test_no_ssse3 (void) __attribute__((__option__("no-ssse3"))); + +extern void test_arch_nocona (void) __attribute__((__option__("arch=nocona"))); +extern void test_arch_core2 (void) __attribute__((__option__("arch=core2"))); +extern void test_arch_k8 (void) __attribute__((__option__("arch=k8"))); +extern void test_arch_k8_sse3 (void) __attribute__((__option__("arch=k8-sse3"))); +extern void test_arch_opteron (void) __attribute__((__option__("arch=opteron"))); +extern void test_arch_opteron_sse3 (void) __attribute__((__option__("arch=opteron-sse3"))); +extern void test_arch_athlon64 (void) __attribute__((__option__("arch=athlon64"))); +extern void test_arch_athlon64_sse3 (void) __attribute__((__option__("arch=athlon64-sse3"))); +extern void test_arch_athlon_fx (void) __attribute__((__option__("arch=athlon-fx"))); +extern void test_arch_amdfam10 (void) __attribute__((__option__("arch=amdfam10"))); +extern void test_arch_barcelona (void) __attribute__((__option__("arch=barcelona"))); +extern void test_arch_foo (void) __attribute__((__option__("arch=foo"))); /* { dg-error "bad value" } */ + +extern void test_tune_nocona (void) __attribute__((__option__("tune=nocona"))); +extern void test_tune_core2 (void) __attribute__((__option__("tune=core2"))); +extern void test_tune_k8 (void) __attribute__((__option__("tune=k8"))); +extern void test_tune_k8_sse3 (void) __attribute__((__option__("tune=k8-sse3"))); +extern void test_tune_opteron (void) __attribute__((__option__("tune=opteron"))); +extern void test_tune_opteron_sse3 (void) __attribute__((__option__("tune=opteron-sse3"))); +extern void test_tune_athlon64 (void) __attribute__((__option__("tune=athlon64"))); +extern void test_tune_athlon64_sse3 (void) __attribute__((__option__("tune=athlon64-sse3"))); +extern void test_tune_athlon_fx (void) __attribute__((__option__("tune=athlon-fx"))); +extern void test_tune_amdfam10 (void) __attribute__((__option__("tune=amdfam10"))); +extern void test_tune_barcelona (void) __attribute__((__option__("tune=barcelona"))); +extern void test_tune_generic (void) __attribute__((__option__("tune=generic"))); +extern void test_tune_foo (void) __attribute__((__option__("tune=foo"))); /* { dg-error "bad value" } */ + +extern void test_fpmath_sse (void) __attribute__((__option__("sse2,fpmath=sse"))); +extern void test_fpmath_387 (void) __attribute__((__option__("sse2,fpmath=387"))); +extern void test_fpmath_sse_387 (void) __attribute__((__option__("sse2,fpmath=sse+387"))); +extern void test_fpmath_387_sse (void) __attribute__((__option__("sse2,fpmath=387+sse"))); +extern void test_fpmath_both (void) __attribute__((__option__("sse2,fpmath=both"))); diff --git a/gcc/testsuite/gcc.target/i386/funcspec-7.c b/gcc/testsuite/gcc.target/i386/funcspec-7.c new file mode 100644 index 00000000000..a65ae251978 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-7.c @@ -0,0 +1,13 @@ +/* Test whether using target specific options, we can generate the reciprocal + square root instruction. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=k8 -mno-recip -mfpmath=sse -ffast-math" } */ + +float do_recip (float a) __attribute__((__option__("recip"))); +float do_normal (float a); + +float do_recip (float a) { return 1.0f / __builtin_sqrtf (a); } +float do_normal (float a) { return 1.0f / __builtin_sqrtf (a); } + +/* { dg-final { scan-assembler "sqrtss" } } */ +/* { dg-final { scan-assembler "rsqrtss" } } */ diff --git a/gcc/testsuite/gcc.target/i386/funcspec-8.c b/gcc/testsuite/gcc.target/i386/funcspec-8.c new file mode 100644 index 00000000000..115f60866a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-8.c @@ -0,0 +1,161 @@ +/* Test whether using target specific options, we can use the x86 builtin + functions in functions with the appropriate function specific options. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=k8 -mfpmath=sse" } */ + +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef int __m128w __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); + +#ifdef __SSE3__ +#error "-msse3 should not be set for this test" +#endif + +__m128d sse3_hsubpd (__m128d a, __m128d b) __attribute__((__option__("sse3"))); +__m128d generic_hsubpd (__m128d a, __m128d b); + +__m128d +sse3_hsubpd (__m128d a, __m128d b) +{ + return __builtin_ia32_hsubpd (a, b); +} + +__m128d +generic_hsubpd (__m128d a, __m128d b) +{ + return __builtin_ia32_hsubpd (a, b); /* { dg-error "needs isa option" } */ +} + +#ifdef __SSSE3__ +#error "-mssse3 should not be set for this test" +#endif + +__m128w ssse3_psignd128 (__m128w a, __m128w b) __attribute__((__option__("ssse3"))); +__m128w generic_psignd (__m128w ab, __m128w b); + +__m128w +ssse3_psignd128 (__m128w a, __m128w b) +{ + return __builtin_ia32_psignd128 (a, b); +} + +__m128w +generic_psignd128 (__m128w a, __m128w b) +{ + return __builtin_ia32_psignd128 (a, b); /* { dg-error "needs isa option" } */ +} + +#ifdef __SSE4_1__ +#error "-msse4.1 should not be set for this test" +#endif + +__m128d sse4_1_blendvpd (__m128d a, __m128d b, __m128d c) __attribute__((__option__("sse4.1"))); +__m128d generic_blendvpd (__m128d a, __m128d b, __m128d c); + +__m128d +sse4_1_blendvpd (__m128d a, __m128d b, __m128d c) +{ + return __builtin_ia32_blendvpd (a, b, c); +} + +__m128d +generic_blendvpd (__m128d a, __m128d b, __m128d c) +{ + return __builtin_ia32_blendvpd (a, b, c); /* { dg-error "needs isa option" } */ +} + +#ifdef __SSE4_2__ +#error "-msse4.2 should not be set for this test" +#endif + +__m128i sse4_2_pcmpgtq (__m128i a, __m128i b) __attribute__((__option__("sse4.2"))); +__m128i generic_pcmpgtq (__m128i ab, __m128i b); + +__m128i +sse4_2_pcmpgtq (__m128i a, __m128i b) +{ + return __builtin_ia32_pcmpgtq (a, b); +} + +__m128i +generic_pcmpgtq (__m128i a, __m128i b) +{ + return __builtin_ia32_pcmpgtq (a, b); /* { dg-error "needs isa option" } */ +} + +#ifdef __SSE4A__ +#error "-msse4a should not be set for this test" +#endif + +__m128i sse4_2_insertq (__m128i a, __m128i b) __attribute__((__option__("sse4a"))); +__m128i generic_insertq (__m128i ab, __m128i b); + +__m128i +sse4_2_insertq (__m128i a, __m128i b) +{ + return __builtin_ia32_insertq (a, b); +} + +__m128i +generic_insertq (__m128i a, __m128i b) +{ + return __builtin_ia32_insertq (a, b); /* { dg-error "needs isa option" } */ +} + +#ifdef __SSE5__ +#error "-msse5 should not be set for this test" +#endif + +__m128d sse5_fmaddpd (__m128d a, __m128d b, __m128d c) __attribute__((__option__("sse5"))); +__m128d generic_fmaddpd (__m128d a, __m128d b, __m128d c); + +__m128d +sse5_fmaddpd (__m128d a, __m128d b, __m128d c) +{ + return __builtin_ia32_fmaddpd (a, b, c); +} + +__m128d +generic_fmaddpd (__m128d a, __m128d b, __m128d c) +{ + return __builtin_ia32_fmaddpd (a, b, c); /* { dg-error "needs isa option" } */ +} + +#ifdef __AES__ +#error "-maes should not be set for this test" +#endif + +__m128i aes_aesimc128 (__m128i a) __attribute__((__option__("aes"))); +__m128i generic_aesimc128 (__m128i a); + +__m128i +aes_aesimc128 (__m128i a) +{ + return __builtin_ia32_aesimc128 (a); +} + +__m128i +generic_aesimc128 (__m128i a) +{ + return __builtin_ia32_aesimc128 (a); /* { dg-error "needs isa option" } */ +} + +#ifdef __PCLMUL__ +#error "-mpclmul should not be set for this test" +#endif + +__m128i pclmul_pclmulqdq128 (__m128i a, __m128i b) __attribute__((__option__("pclmul"))); +__m128i generic_pclmulqdq128 (__m128i a, __m128i b); + +__m128i +pclmul_pclmulqdq128 (__m128i a, __m128i b) +{ + return __builtin_ia32_pclmulqdq128 (a, b, 5); +} + +__m128i +generic_pclmulqdq128 (__m128i a, __m128i b) +{ + return __builtin_ia32_pclmulqdq128 (a, b, 5); /* { dg-error "needs isa option" } */ +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-9.c b/gcc/testsuite/gcc.target/i386/funcspec-9.c new file mode 100644 index 00000000000..e6d19013101 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/funcspec-9.c @@ -0,0 +1,36 @@ +/* Test whether using target specific options, we can generate SSE5 code. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=k8 -mfpmath=sse -msse2" } */ + +extern void exit (int); + +#ifdef __SSE5__ +#warning "__SSE5__ should not be defined before #pragma GCC option." +#endif + +#pragma GCC option (push) +#pragma GCC option ("sse5,fused-madd") + +#ifndef __SSE5__ +#warning "__SSE5__ should have be defined after #pragma GCC option." +#endif + +float +flt_mul_add (float a, float b, float c) +{ + return (a * b) + c; +} + +#pragma GCC option (pop) +#ifdef __SSE5__ +#warning "__SSE5__ should not be defined after #pragma GCC pop option." +#endif + +double +dbl_mul_add (double a, double b, double c) +{ + return (a * b) + c; +} + +/* { dg-final { scan-assembler "fmaddss" } } */ +/* { dg-final { scan-assembler "addsd" } } */ diff --git a/gcc/testsuite/gcc.target/i386/hot-1.c b/gcc/testsuite/gcc.target/i386/hot-1.c new file mode 100644 index 00000000000..608f52fd6eb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/hot-1.c @@ -0,0 +1,33 @@ +/* Test whether using attribute((hot)) really turns on -O3. Do this test + by checking whether we vectorize a simple loop. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -msse2 -mfpmath=sse -march=k8" } */ +/* { dg-final { scan-assembler "addps" } } */ +/* { dg-final { scan-assembler "subss" } } */ + +#define SIZE 1024 +float a[SIZE] __attribute__((__aligned__(32))); +float b[SIZE] __attribute__((__aligned__(32))); +float c[SIZE] __attribute__((__aligned__(32))); + +/* This should vectorize. */ +void hot (void) __attribute__((__hot__)); + +void +hot (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] + c[i]; +} + +/* This should not vectorize. */ +void +not_hot (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] - c[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/opt-1.c b/gcc/testsuite/gcc.target/i386/opt-1.c new file mode 100644 index 00000000000..28e2ef38c34 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/opt-1.c @@ -0,0 +1,35 @@ +/* Test the attribute((optimize)) really works. Do this test by checking + whether we vectorize a simple loop. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -msse2 -mfpmath=sse -march=k8" } */ +/* { dg-final { scan-assembler "prefetcht0" } } */ +/* { dg-final { scan-assembler "addps" } } */ +/* { dg-final { scan-assembler "subss" } } */ + +#define SIZE 10240 +float a[SIZE] __attribute__((__aligned__(32))); +float b[SIZE] __attribute__((__aligned__(32))); +float c[SIZE] __attribute__((__aligned__(32))); + +/* This should vectorize. */ +void opt3 (void) __attribute__((__optimize__(3,"unroll-all-loops,-fprefetch-loop-arrays"))); + +void +opt3 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] + c[i]; +} + +/* This should not vectorize. */ +void +not_opt3 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] - c[i]; +} + diff --git a/gcc/testsuite/gcc.target/i386/opt-2.c b/gcc/testsuite/gcc.target/i386/opt-2.c new file mode 100644 index 00000000000..8d6ba6fe925 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/opt-2.c @@ -0,0 +1,38 @@ +/* Test the attribute((optimize)) really works. Do this test by checking + whether we vectorize a simple loop. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -msse2 -mfpmath=sse -march=k8" } */ +/* { dg-final { scan-assembler "prefetcht0" } } */ +/* { dg-final { scan-assembler "addps" } } */ +/* { dg-final { scan-assembler "subss" } } */ + +#define SIZE 10240 +float a[SIZE] __attribute__((__aligned__(32))); +float b[SIZE] __attribute__((__aligned__(32))); +float c[SIZE] __attribute__((__aligned__(32))); + +/* This should vectorize. */ +#pragma GCC optimize push +#pragma GCC optimize (3, "unroll-all-loops", "-fprefetch-loop-arrays") + +void +opt3 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] + c[i]; +} + +#pragma GCC optimize pop + +/* This should not vectorize. */ +void +not_opt3 (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] - c[i]; +} + diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c new file mode 100644 index 00000000000..a9b10333157 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -0,0 +1,171 @@ +/* Same as sse-14, except converted to use #pragma GCC option. */ +/* { dg-do compile } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration" } */ + +#include <mm_malloc.h> + +/* Test that the intrinsics compile without optimization. All of them are + defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h + that reference the proper builtin functions. Defining away "extern" and + "__inline" results in all of them being compiled as proper functions. */ + +#define extern +#define __inline + +#define _CONCAT(x,y) x ## y + +#define test_1(func, type, op1_type, imm) \ + type _CONCAT(_,func) (op1_type A, int const I) \ + { return func (A, imm); } + +#define test_1x(func, type, op1_type, imm1, imm2) \ + type _CONCAT(_,func) (op1_type A, int const I, int const L) \ + { return func (A, imm1, imm2); } + +#define test_2(func, type, op1_type, op2_type, imm) \ + type _CONCAT(_,func) (op1_type A, op2_type B, int const I) \ + { return func (A, B, imm); } + +#define test_2x(func, type, op1_type, op2_type, imm1, imm2) \ + type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ + { return func (A, B, imm1, imm2); } + +#define test_4(func, type, op1_type, op2_type, op3_type, op4_type, imm) \ + type _CONCAT(_,func) (op1_type A, op2_type B, \ + op3_type C, op4_type D, int const I) \ + { return func (A, B, C, D, imm); } + + +#ifndef DIFFERENT_PRAGMAS +#pragma GCC option ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse5,aes,pclmul") +#endif + +/* Following intrinsics require immediate arguments. They + are defined as macros for non-optimized compilations. */ + +/* mmintrin.h (MMX). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("mmx") +#endif +#include <mmintrin.h> + +/* mm3dnow.h (3DNOW). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("3dnow") +#endif +#include <mm3dnow.h> + +/* xmmintrin.h (SSE). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("sse") +#endif +#include <xmmintrin.h> +test_2 (_mm_shuffle_ps, __m128, __m128, __m128, 1) +test_1 (_mm_extract_pi16, int, __m64, 1) +test_1 (_m_pextrw, int, __m64, 1) +test_2 (_mm_insert_pi16, __m64, __m64, int, 1) +test_2 (_m_pinsrw, __m64, __m64, int, 1) +test_1 (_mm_shuffle_pi16, __m64, __m64, 1) +test_1 (_m_pshufw, __m64, __m64, 1) +test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA) + +/* emmintrin.h (SSE2). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("sse2") +#endif +#include <emmintrin.h> +test_2 (_mm_shuffle_pd, __m128d, __m128d, __m128d, 1) +test_1 (_mm_srli_si128, __m128i, __m128i, 1) +test_1 (_mm_slli_si128, __m128i, __m128i, 1) +test_1 (_mm_extract_epi16, int, __m128i, 1) +test_2 (_mm_insert_epi16, __m128i, __m128i, int, 1) +test_1 (_mm_shufflehi_epi16, __m128i, __m128i, 1) +test_1 (_mm_shufflelo_epi16, __m128i, __m128i, 1) +test_1 (_mm_shuffle_epi32, __m128i, __m128i, 1) + +/* pmmintrin.h (SSE3). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("sse3") +#endif +#include <pmmintrin.h> + +/* tmmintrin.h (SSSE3). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("ssse3") +#endif +#include <tmmintrin.h> +test_2 (_mm_alignr_epi8, __m128i, __m128i, __m128i, 1) +test_2 (_mm_alignr_pi8, __m64, __m64, __m64, 1) + +/* ammintrin.h (SSE4A). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("sse4a") +#endif +#include <ammintrin.h> +test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1) +test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1) + +/* smmintrin.h (SSE4.1). */ +/* nmmintrin.h (SSE4.2). */ +/* Note, nmmintrin.h includes smmintrin.h, and smmintrin.h checks for the + #ifdef. So just set the option to SSE4.2. */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("sse4.2") +#endif +#include <nmmintrin.h> +test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1) +test_2 (_mm_blend_ps, __m128, __m128, __m128, 1) +test_2 (_mm_blend_pd, __m128d, __m128d, __m128d, 1) +test_2 (_mm_dp_ps, __m128, __m128, __m128, 1) +test_2 (_mm_dp_pd, __m128d, __m128d, __m128d, 1) +test_2 (_mm_insert_ps, __m128, __m128, __m128, 1) +test_1 (_mm_extract_ps, int, __m128, 1) +test_2 (_mm_insert_epi8, __m128i, __m128i, int, 1) +test_2 (_mm_insert_epi32, __m128i, __m128i, int, 1) +#ifdef __x86_64__ +test_2 (_mm_insert_epi64, __m128i, __m128i, long long, 1) +#endif +test_1 (_mm_extract_epi8, int, __m128i, 1) +test_1 (_mm_extract_epi32, int, __m128i, 1) +#ifdef __x86_64__ +test_1 (_mm_extract_epi64, long long, __m128i, 1) +#endif +test_2 (_mm_mpsadbw_epu8, __m128i, __m128i, __m128i, 1) +test_2 (_mm_cmpistrm, __m128i, __m128i, __m128i, 1) +test_2 (_mm_cmpistri, int, __m128i, __m128i, 1) +test_4 (_mm_cmpestrm, __m128i, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestri, int, __m128i, int, __m128i, int, 1) +test_2 (_mm_cmpistra, int, __m128i, __m128i, 1) +test_2 (_mm_cmpistrc, int, __m128i, __m128i, 1) +test_2 (_mm_cmpistro, int, __m128i, __m128i, 1) +test_2 (_mm_cmpistrs, int, __m128i, __m128i, 1) +test_2 (_mm_cmpistrz, int, __m128i, __m128i, 1) +test_4 (_mm_cmpestra, int, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestrc, int, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestro, int, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestrs, int, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) + +/* bmmintrin.h (SSE5). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("sse5") +#endif +#include <bmmintrin.h> +test_1 (_mm_roti_epi8, __m128i, __m128i, 1) +test_1 (_mm_roti_epi16, __m128i, __m128i, 1) +test_1 (_mm_roti_epi32, __m128i, __m128i, 1) +test_1 (_mm_roti_epi64, __m128i, __m128i, 1) + +/* wmmintrin.h (AES/PCLMUL). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC option ("aes,pclmul") +#endif +#include <wmmintrin.h> +test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1) +test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1) + +/* mmintrin-common.h */ +test_1 (_mm_round_pd, __m128d, __m128d, 1) +test_1 (_mm_round_ps, __m128, __m128, 1) +test_2 (_mm_round_sd, __m128d, __m128d, __m128d, 1) +test_2 (_mm_round_ss, __m128, __m128, __m128, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c new file mode 100644 index 00000000000..27b601452a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -0,0 +1,108 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8" } */ + +#include <mm_malloc.h> + +/* Test that the intrinsics compile with optimization. All of them are + defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h + that reference the proper builtin functions. Defining away "extern" and + "__inline" results in all of them being compiled as proper functions. */ + +#define extern +#define __inline + +/* Following intrinsics require immediate arguments. */ + +/* ammintrin.h */ +#define __builtin_ia32_extrqi(X, I, L) __builtin_ia32_extrqi(X, 1, 1) +#define __builtin_ia32_insertqi(X, Y, I, L) __builtin_ia32_insertqi(X, Y, 1, 1) + +/* wmmintrin.h */ +#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1) +#define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1) + +/* mmintrin-common.h */ +#define __builtin_ia32_roundpd(V, M) __builtin_ia32_roundpd(V, 1) +#define __builtin_ia32_roundsd(D, V, M) __builtin_ia32_roundsd(D, V, 1) +#define __builtin_ia32_roundps(V, M) __builtin_ia32_roundps(V, 1) +#define __builtin_ia32_roundss(D, V, M) __builtin_ia32_roundss(D, V, 1) + +/* smmintrin.h */ +#define __builtin_ia32_pblendw128(X, Y, M) __builtin_ia32_pblendw128 (X, Y, 1) +#define __builtin_ia32_blendps(X, Y, M) __builtin_ia32_blendps(X, Y, 1) +#define __builtin_ia32_blendpd(X, Y, M) __builtin_ia32_blendpd(X, Y, 1) +#define __builtin_ia32_dpps(X, Y, M) __builtin_ia32_dpps(X, Y, 1) +#define __builtin_ia32_dppd(X, Y, M) __builtin_ia32_dppd(X, Y, 1) +#define __builtin_ia32_insertps128(D, S, N) __builtin_ia32_insertps128(D, S, 1) +#define __builtin_ia32_vec_ext_v4sf(X, N) __builtin_ia32_vec_ext_v4sf(X, 1) +#define __builtin_ia32_vec_set_v16qi(D, S, N) __builtin_ia32_vec_set_v16qi(D, S, 1) +#define __builtin_ia32_vec_set_v4si(D, S, N) __builtin_ia32_vec_set_v4si(D, S, 1) +#define __builtin_ia32_vec_set_v2di(D, S, N) __builtin_ia32_vec_set_v2di(D, S, 1) +#define __builtin_ia32_vec_ext_v16qi(X, N) __builtin_ia32_vec_ext_v16qi(X, 1) +#define __builtin_ia32_vec_ext_v4si(X, N) __builtin_ia32_vec_ext_v4si(X, 1) +#define __builtin_ia32_vec_ext_v2di(X, N) __builtin_ia32_vec_ext_v2di(X, 1) +#define __builtin_ia32_mpsadbw128(X, Y, M) __builtin_ia32_mpsadbw128(X, Y, 1) +#define __builtin_ia32_pcmpistrm128(X, Y, M) \ + __builtin_ia32_pcmpistrm128(X, Y, 1) +#define __builtin_ia32_pcmpistri128(X, Y, M) \ + __builtin_ia32_pcmpistri128(X, Y, 1) +#define __builtin_ia32_pcmpestrm128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestrm128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestri128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestri128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpistria128(X, Y, M) \ + __builtin_ia32_pcmpistria128(X, Y, 1) +#define __builtin_ia32_pcmpistric128(X, Y, M) \ + __builtin_ia32_pcmpistric128(X, Y, 1) +#define __builtin_ia32_pcmpistrio128(X, Y, M) \ + __builtin_ia32_pcmpistrio128(X, Y, 1) +#define __builtin_ia32_pcmpistris128(X, Y, M) \ + __builtin_ia32_pcmpistris128(X, Y, 1) +#define __builtin_ia32_pcmpistriz128(X, Y, M) \ + __builtin_ia32_pcmpistriz128(X, Y, 1) +#define __builtin_ia32_pcmpestria128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestria128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestric128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestric128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestrio128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestrio128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestris128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestris128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestriz128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestriz128(X, LX, Y, LY, 1) + +/* tmmintrin.h */ +#define __builtin_ia32_palignr128(X, Y, N) __builtin_ia32_palignr128(X, Y, 8) +#define __builtin_ia32_palignr(X, Y, N) __builtin_ia32_palignr(X, Y, 8) + +/* emmintrin.h */ +#define __builtin_ia32_psrldqi128(A, B) __builtin_ia32_psrldqi128(A, 8) +#define __builtin_ia32_pslldqi128(A, B) __builtin_ia32_pslldqi128(A, 8) +#define __builtin_ia32_pshufhw(A, N) __builtin_ia32_pshufhw(A, 0) +#define __builtin_ia32_pshuflw(A, N) __builtin_ia32_pshuflw(A, 0) +#define __builtin_ia32_pshufd(A, N) __builtin_ia32_pshufd(A, 0) +#define __builtin_ia32_vec_set_v8hi(A, D, N) \ + __builtin_ia32_vec_set_v8hi(A, D, 0) +#define __builtin_ia32_vec_ext_v8hi(A, N) __builtin_ia32_vec_ext_v8hi(A, 0) +#define __builtin_ia32_shufpd(A, B, N) __builtin_ia32_shufpd(A, B, 0) + +/* xmmintrin.h */ +#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, A, _MM_HINT_NTA) +#define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0) +#define __builtin_ia32_vec_set_v4hi(A, D, N) \ + __builtin_ia32_vec_set_v4hi(A, D, 0) +#define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0) +#define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0) + +/* bmmintrin.h */ +#define __builtin_ia32_protbi(A, B) __builtin_ia32_protbi(A,1) +#define __builtin_ia32_protwi(A, B) __builtin_ia32_protwi(A,1) +#define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1) +#define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1) + + +#pragma GCC option ("3dnow,sse4,sse5,aes,pclmul") +#include <wmmintrin.h> +#include <bmmintrin.h> +#include <smmintrin.h> +#include <mm3dnow.h> |