summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriverbin <iverbin@138bc75d-0d04-0410-961f-82ee72b054a4>2016-06-21 14:32:38 +0000
committeriverbin <iverbin@138bc75d-0d04-0410-961f-82ee72b054a4>2016-06-21 14:32:38 +0000
commit49763db6f71429c61da1fda88f614585c3df6245 (patch)
treeb924121c2319170fff3ebaf0994fb98c6c11bdb2
parentada61167d8e79b9a6f86faad584256ebe427102a (diff)
downloadgcc-49763db6f71429c61da1fda88f614585c3df6245.tar.gz
[AVX-512ER] vrcp28ps auto generation
gcc/ * config/i386/i386.c (ix86_emit_swdivsf): Emit vrcp28ps. gcc/testsuite/ * gcc.target/i386/avx512er-vrcp28ps-3.c: New test. * gcc.target/i386/avx512er-vrcp28ps-4.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@237648 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog4
-rw-r--r--gcc/config/i386/i386.c15
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-3.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-4.c6
5 files changed, 78 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 26e8057db25..dacf8c16fe9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2016-06-21 Ilya Verbin <ilya.verbin@intel.com>
+
+ * config/i386/i386.c (ix86_emit_swdivsf): Emit vrcp28ps.
+
2016-06-21 H.J. Lu <hongjiu.lu@intel.com>
Ilya Enkovich <ilya.enkovich@intel.com>
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 17664ff9f17..8139807512b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -48726,8 +48726,19 @@ void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
/* x0 = rcp(b) estimate */
if (mode == V16SFmode || mode == V8DFmode)
- emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
- UNSPEC_RCP14)));
+ {
+ if (TARGET_AVX512ER)
+ {
+ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
+ UNSPEC_RCP28)));
+ /* res = a * x0 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0)));
+ return;
+ }
+ else
+ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
+ UNSPEC_RCP14)));
+ }
else
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
UNSPEC_RCP)));
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index cfee40b7572..cff25cc1979 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-06-21 Ilya Verbin <ilya.verbin@intel.com>
+
+ * gcc.target/i386/avx512er-vrcp28ps-3.c: New test.
+ * gcc.target/i386/avx512er-vrcp28ps-4.c: New test.
+
2016-06-21 H.J. Lu <hongjiu.lu@intel.com>
PR target/71549
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-3.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-3.c
new file mode 100644
index 00000000000..e08bea41c3e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512er } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */
+
+#include "avx512er-check.h"
+
+#define MAX 1000
+#define EPS 0.00001
+
+__attribute__ ((noinline, optimize (0)))
+void static
+compute_rcp_ref (float *a, float *b, float *r)
+{
+ for (int i = 0; i < MAX; i++)
+ r[i] = a[i] / b[i];
+}
+
+__attribute__ ((noinline))
+void static
+compute_rcp_exp (float *a, float *b, float *r)
+{
+ for (int i = 0; i < MAX; i++)
+ r[i] = a[i] / b[i];
+}
+
+void static
+avx512er_test (void)
+{
+ float a[MAX];
+ float b[MAX];
+ float ref[MAX];
+ float exp[MAX];
+
+ for (int i = 0; i < MAX; i++)
+ {
+ a[i] = 179.345 - 6.5645 * i;
+ b[i] = 8765.987 - 8.6756 * i;
+ }
+
+ compute_rcp_ref (a, b, ref);
+ compute_rcp_exp (a, b, exp);
+
+ for (int i = 0; i < MAX; i++)
+ {
+ float rel_err = (ref[i] - exp[i]) / ref[i];
+ rel_err = rel_err > 0.0 ? rel_err : -rel_err;
+ if (rel_err > EPS)
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-4.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-4.c
new file mode 100644
index 00000000000..2c76d967184
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-4.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */
+
+#include "avx512er-vrcp28ps-3.c"
+
+/* { dg-final { scan-assembler-times "vrcp28ps\[^\n\r\]*zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */