Benchmark: Evaluate math performance on intervals

Functions to benchmark math functions on intervals. Tests: sin, cos, exp2, exp, exp10, log2, log, log10 Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com> Reviewed-by: Ruiling Song <ruiling.song@intel.com>
author: Grigore Lupescu <grigore.lupescu at intel.com> 2016-07-26 14:24:01 +0100
committer: Yang Rong <rong.r.yang@intel.com> 2016-08-03 18:24:15 +0800
commit: 178a4f7510d1c6ad7d5f8df41a6b040157f3d37f (patch)
tree: ba31be9652f520ed2a0193cf2005d63083e8a1e9 /kernels
parent: 44f130d75c030788ea6f60948f5dfc6e1632672d (diff)
download: beignet-178a4f7510d1c6ad7d5f8df41a6b040157f3d37f.tar.gz
1 files changed, 272 insertions, 0 deletions
diff --git a/kernels/bench_math.cl b/kernels/bench_math.cl
new file mode 100644
index 00000000..8d85d519
--- /dev/null
+++ b/kernels/bench_math.cl
@@ -0,0 +1,272 @@
+//#define BENCHMARK_NATIVE 1
+//#define BENCHMARK_INTERNAL_FAST 2
+
+/* benchmark pow performance */
+kernel void bench_math_pow(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+  {
+#if defined(BENCHMARK_NATIVE)
+    result = native_powr(result, pwr); /* calls native */
+#else
+    result = pow(result, pwr); /* calls internal slow */
+#endif
+  }
+  dst[get_global_id(0)] = result;
+}
+
+/* benchmark exp2 performance, exp2 is native */
+kernel void bench_math_exp2(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+    result = exp2(result) * 0.1f;
+
+  dst[get_global_id(0)] = result;
+}
+
+/* benchmark exp performance */
+/* calls internal fast (native) if (x > -0x1.6p1 && x < 0x1.6p1) */
+kernel void bench_math_exp(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+  {
+#if defined(BENCHMARK_NATIVE)
+    result = native_exp((float)-0x1.6p1 - result * 0.1f); /* calls native */
+#elif defined(BENCHMARK_INTERNAL_FAST)
+    result = exp((float)-0x1.6p1 + result * 0.1f); /* calls internal fast */
+#else
+    result = exp((float)-0x1.6p1 - result * 0.1f); /* calls internal slow */
+#endif
+  }
+
+  dst[get_global_id(0)] = result;
+}
+
+/* benchmark exp10 performance */
+/* calls internal fast (native) if (x < -0x1.4p+5) || (x > +0x1.4p+5)  */
+kernel void bench_math_exp10(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+  {
+#if defined(BENCHMARK_NATIVE)
+    result = native_exp10((float)0x1.4p+5 + result * 0.1f); /* calls native */
+#elif defined(BENCHMARK_INTERNAL_FAST)
+    result = exp10((float)-0x1.4p+5 - result * 0.1f); /* calls internal fast */
+#else
+    result = exp10((float)-0x1.2p+5 - result * 0.1f); /* calls internal slow */
+#endif
+  }
+
+  dst[get_global_id(0)] = result;
+}
+
+/* benchmark log2 performance */
+/* calls internal fast (native) if (x > 0x1.1p0)  */
+kernel void bench_math_log2(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+  {
+#if defined(BENCHMARK_NATIVE)
+    result = native_log2((float)0x1.1p0 + result * 0.0001f); /* calls native */
+#elif defined(BENCHMARK_INTERNAL_FAST)
+    result = log2((float)0x1.1p0 + result * 0.0001f); /* calls internal fast */
+#else
+    result = log2((float)0x1.1p0 - result * 0.0001f); /* calls internal slow */
+#endif
+  }
+
+  dst[get_global_id(0)] = result;
+}
+
+/* benchmark log performance */
+/* calls internal fast (native) if (x > 0x1.1p0)  */
+kernel void bench_math_log(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+  {
+#if defined(BENCHMARK_NATIVE)
+    result = native_log((float)0x1.1p0 + result * 0.0001f); /* calls native */
+#elif defined(BENCHMARK_INTERNAL_FAST)
+    result = log((float)0x1.1p0 + result * 0.0001f); /* calls internal fast */
+#else
+    result = log((float)0x1.1p0 - result * 0.0001f); /* calls internal slow */
+#endif
+  }
+
+  dst[get_global_id(0)] = result;
+}
+
+/* benchmark log10 performance */
+/* calls internal fast (native) if (x > 0x1.1p0)  */
+kernel void bench_math_log10(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+  {
+#if defined(BENCHMARK_NATIVE)
+    result = native_log10((float)0x1.1p0 + result * 0.0001f); /* calls native */
+#elif defined(BENCHMARK_INTERNAL_FAST)
+    result = log10((float)0x1.1p0 + result * 0.0001f); /* calls internal fast */
+#else
+    result = log10((float)0x1.1p0 - result * 0.0001f); /* calls internal slow */
+#endif
+  }
+
+  dst[get_global_id(0)] = result;
+}
+
+/* benchmark sqrt performance */
+kernel void bench_math_sqrt(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+    result = sqrt(result) + sqrt(pwr + result);
+
+  dst[get_global_id(0)] = result;
+}
+
+/* benchmark sin performance */
+kernel void bench_math_sin(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+  {
+#if defined(BENCHMARK_NATIVE)
+    result = native_sin(result); /* calls native */
+#else
+    result = sin(result);	/* calls internal, random complexity */
+    //result = sin(0.1f + result); /* calls internal, (1) no reduction */
+    //result = sin(2.f + result); /* calls internal, (2) fast reduction */
+    //result = sin(4001 + result); /* calls internal, (3) slow reduction */
+    result *= 0x1p-16;
+#endif
+  }
+
+  dst[get_global_id(0)] = result;
+}
+
+/* benchmark cos performance */
+kernel void bench_math_cos(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+  {
+#if defined(BENCHMARK_NATIVE)
+    result = native_cos(result); /* calls native */
+#else
+    result = cos(result);	/* calls internal, random complexity */
+    //result = cos(0.1f + result); /* calls internal, (1) no reduction */
+    //result = cos(2.f + result); /* calls internal, (2) fast reduction */
+    //result = cos(4001.f + result); /* calls internal, (3) slow reduction */
+    result *= 0x1p-16;
+#endif
+  }
+  dst[get_global_id(0)] = result;
+}
+
+/* benchmark native tan performance */
+kernel void bench_math_tan(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+  {
+#if defined(BENCHMARK_NATIVE)
+    result = native_tan(result); /* calls native */
+#else
+    result = tan(result); /* calls internal slow */
+#endif
+  }
+
+  dst[get_global_id(0)] = result;
+}
+
+/* benchmark asin performance */
+kernel void bench_math_asin(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+    result = asin(pwr - 1);
+
+  dst[get_global_id(0)] = result;
+}
+
+/* benchmark acos performance */
+kernel void bench_math_acos(
+  global float *src,
+  global float *dst,
+  float pwr,
+  uint loop)
+{
+  float result = src[get_global_id(0)];
+
+  for(; loop > 0; loop--)
+    result = acos(pwr - 1);
+
+  dst[get_global_id(0)] = result;
+}
author	Grigore Lupescu <grigore.lupescu at intel.com>	2016-07-26 14:24:01 +0100
committer	Yang Rong <rong.r.yang@intel.com>	2016-08-03 18:24:15 +0800
commit	178a4f7510d1c6ad7d5f8df41a6b040157f3d37f (patch)
tree	ba31be9652f520ed2a0193cf2005d63083e8a1e9 /kernels
parent	44f130d75c030788ea6f60948f5dfc6e1632672d (diff)
download	beignet-178a4f7510d1c6ad7d5f8df41a6b040157f3d37f.tar.gz