diff options
author | Grigore Lupescu <grigore.lupescu at intel.com> | 2016-07-26 14:24:01 +0100 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2016-08-03 18:24:15 +0800 |
commit | 178a4f7510d1c6ad7d5f8df41a6b040157f3d37f (patch) | |
tree | ba31be9652f520ed2a0193cf2005d63083e8a1e9 /kernels | |
parent | 44f130d75c030788ea6f60948f5dfc6e1632672d (diff) | |
download | beignet-178a4f7510d1c6ad7d5f8df41a6b040157f3d37f.tar.gz |
Benchmark: Evaluate math performance on intervals
Functions to benchmark math functions on intervals.
Tests: sin, cos, exp2, exp, exp10, log2, log, log10
Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
Reviewed-by: Ruiling Song <ruiling.song@intel.com>
Diffstat (limited to 'kernels')
-rw-r--r-- | kernels/bench_math.cl | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/kernels/bench_math.cl b/kernels/bench_math.cl new file mode 100644 index 00000000..8d85d519 --- /dev/null +++ b/kernels/bench_math.cl @@ -0,0 +1,272 @@ +//#define BENCHMARK_NATIVE 1 +//#define BENCHMARK_INTERNAL_FAST 2 + +/* benchmark pow performance */ +kernel void bench_math_pow( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + { +#if defined(BENCHMARK_NATIVE) + result = native_powr(result, pwr); /* calls native */ +#else + result = pow(result, pwr); /* calls internal slow */ +#endif + } + dst[get_global_id(0)] = result; +} + +/* benchmark exp2 performance, exp2 is native */ +kernel void bench_math_exp2( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = exp2(result) * 0.1f; + + dst[get_global_id(0)] = result; +} + +/* benchmark exp performance */ +/* calls internal fast (native) if (x > -0x1.6p1 && x < 0x1.6p1) */ +kernel void bench_math_exp( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + { +#if defined(BENCHMARK_NATIVE) + result = native_exp((float)-0x1.6p1 - result * 0.1f); /* calls native */ +#elif defined(BENCHMARK_INTERNAL_FAST) + result = exp((float)-0x1.6p1 + result * 0.1f); /* calls internal fast */ +#else + result = exp((float)-0x1.6p1 - result * 0.1f); /* calls internal slow */ +#endif + } + + dst[get_global_id(0)] = result; +} + +/* benchmark exp10 performance */ +/* calls internal fast (native) if (x < -0x1.4p+5) || (x > +0x1.4p+5) */ +kernel void bench_math_exp10( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + { +#if defined(BENCHMARK_NATIVE) + result = native_exp10((float)0x1.4p+5 + result * 0.1f); /* calls native */ +#elif defined(BENCHMARK_INTERNAL_FAST) + result = exp10((float)-0x1.4p+5 - result * 0.1f); /* calls internal fast */ +#else + result = exp10((float)-0x1.2p+5 - result * 0.1f); /* calls internal slow */ +#endif + } + + dst[get_global_id(0)] = result; +} + +/* benchmark log2 performance */ +/* calls internal fast (native) if (x > 0x1.1p0) */ +kernel void bench_math_log2( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + { +#if defined(BENCHMARK_NATIVE) + result = native_log2((float)0x1.1p0 + result * 0.0001f); /* calls native */ +#elif defined(BENCHMARK_INTERNAL_FAST) + result = log2((float)0x1.1p0 + result * 0.0001f); /* calls internal fast */ +#else + result = log2((float)0x1.1p0 - result * 0.0001f); /* calls internal slow */ +#endif + } + + dst[get_global_id(0)] = result; +} + +/* benchmark log performance */ +/* calls internal fast (native) if (x > 0x1.1p0) */ +kernel void bench_math_log( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + { +#if defined(BENCHMARK_NATIVE) + result = native_log((float)0x1.1p0 + result * 0.0001f); /* calls native */ +#elif defined(BENCHMARK_INTERNAL_FAST) + result = log((float)0x1.1p0 + result * 0.0001f); /* calls internal fast */ +#else + result = log((float)0x1.1p0 - result * 0.0001f); /* calls internal slow */ +#endif + } + + dst[get_global_id(0)] = result; +} + +/* benchmark log10 performance */ +/* calls internal fast (native) if (x > 0x1.1p0) */ +kernel void bench_math_log10( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + { +#if defined(BENCHMARK_NATIVE) + result = native_log10((float)0x1.1p0 + result * 0.0001f); /* calls native */ +#elif defined(BENCHMARK_INTERNAL_FAST) + result = log10((float)0x1.1p0 + result * 0.0001f); /* calls internal fast */ +#else + result = log10((float)0x1.1p0 - result * 0.0001f); /* calls internal slow */ +#endif + } + + dst[get_global_id(0)] = result; +} + +/* benchmark sqrt performance */ +kernel void bench_math_sqrt( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = sqrt(result) + sqrt(pwr + result); + + dst[get_global_id(0)] = result; +} + +/* benchmark sin performance */ +kernel void bench_math_sin( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + { +#if defined(BENCHMARK_NATIVE) + result = native_sin(result); /* calls native */ +#else + result = sin(result); /* calls internal, random complexity */ + //result = sin(0.1f + result); /* calls internal, (1) no reduction */ + //result = sin(2.f + result); /* calls internal, (2) fast reduction */ + //result = sin(4001 + result); /* calls internal, (3) slow reduction */ + result *= 0x1p-16; +#endif + } + + dst[get_global_id(0)] = result; +} + +/* benchmark cos performance */ +kernel void bench_math_cos( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + { +#if defined(BENCHMARK_NATIVE) + result = native_cos(result); /* calls native */ +#else + result = cos(result); /* calls internal, random complexity */ + //result = cos(0.1f + result); /* calls internal, (1) no reduction */ + //result = cos(2.f + result); /* calls internal, (2) fast reduction */ + //result = cos(4001.f + result); /* calls internal, (3) slow reduction */ + result *= 0x1p-16; +#endif + } + dst[get_global_id(0)] = result; +} + +/* benchmark native tan performance */ +kernel void bench_math_tan( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + { +#if defined(BENCHMARK_NATIVE) + result = native_tan(result); /* calls native */ +#else + result = tan(result); /* calls internal slow */ +#endif + } + + dst[get_global_id(0)] = result; +} + +/* benchmark asin performance */ +kernel void bench_math_asin( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = asin(pwr - 1); + + dst[get_global_id(0)] = result; +} + +/* benchmark acos performance */ +kernel void bench_math_acos( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = acos(pwr - 1); + + dst[get_global_id(0)] = result; +} |