summaryrefslogtreecommitdiff
path: root/kernels
diff options
context:
space:
mode:
authorGrigore Lupescu <grigore.lupescu at intel.com>2016-04-11 17:41:30 +0300
committerYang Rong <rong.r.yang@intel.com>2016-05-18 15:10:36 +0800
commit52628691322b9fd5bdba2ecfb9e6bfe17ae940f8 (patch)
tree16299017fb63c4e00fc9aede947d8d0d3162a9ff /kernels
parent22d7f7c22654b58d1daf1e10da26235fe4ece3b8 (diff)
downloadbeignet-52628691322b9fd5bdba2ecfb9e6bfe17ae940f8.tar.gz
Benchmark: Add performance tests for workgroup broadcast
Added the following performance tests: benchmark_workgroup_broadcast_1D_int benchmark_workgroup_broadcast_1D_long benchmark_workgroup_broadcast_2D_int benchmark_workgroup_broadcast_2D_long Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com> Reviewed-by: Pan Xiuli <xiuli.pan@intel.com>
Diffstat (limited to 'kernels')
-rw-r--r--kernels/bench_workgroup.cl99
1 files changed, 99 insertions, 0 deletions
diff --git a/kernels/bench_workgroup.cl b/kernels/bench_workgroup.cl
index 8a525de0..87986fca 100644
--- a/kernels/bench_workgroup.cl
+++ b/kernels/bench_workgroup.cl
@@ -1,4 +1,103 @@
/*
+ * Benchmark broadcast 1D
+ */
+kernel void bench_workgroup_broadcast_1D_int(global int *src,
+ global int *dst,
+ int reduce_loop,
+ uint wg_local_x,
+ uint wg_local_y)
+{
+ uint offset = 0;
+ uint index = offset + get_global_id(0);
+
+ int val = src[index];
+ /* depending on generated ASM, volatile may be removed */
+ volatile int result;
+
+ for(; reduce_loop > 0; reduce_loop--){
+ result = work_group_broadcast(val,
+ wg_local_x);
+ }
+
+ dst[index] = result;
+}
+
+kernel void bench_workgroup_broadcast_1D_long(global long *src,
+ global long *dst,
+ int reduce_loop,
+ uint wg_local_x,
+ uint wg_local_y)
+{
+ uint offset = 0;
+ uint index = offset + get_global_id(0);
+
+ long val = src[index];
+ /* depending on generated ASM, volatile may be removed */
+ volatile long result;
+
+ for(; reduce_loop > 0; reduce_loop--){
+ result = work_group_broadcast(val,
+ wg_local_x);
+ }
+
+ dst[index] = result;
+}
+
+
+/*
+ * Benchmark broadcast 2D
+ */
+kernel void bench_workgroup_broadcast_2D_int(global int *src,
+ global int *dst,
+ int reduce_loop,
+ uint wg_local_x,
+ uint wg_local_y)
+{
+ uint lsize = get_local_size(0) * get_local_size(1);
+ uint offset = get_group_id(0) * lsize +
+ get_group_id(1) * get_num_groups(0) * lsize;
+ uint index = offset + get_local_id(0) +
+ get_local_id(1) * get_local_size(0);
+
+ int val = src[index];
+ /* depending on generated ASM, volatile may be removed */
+ int result;
+
+ for(; reduce_loop > 0; reduce_loop--){
+ result = work_group_broadcast(val,
+ wg_local_x,
+ wg_local_y);
+ }
+
+ dst[index] = result;
+}
+
+kernel void bench_workgroup_broadcast_2D_long(global long *src,
+ global long *dst,
+ int reduce_loop,
+ uint wg_local_x,
+ uint wg_local_y)
+{
+ uint lsize = get_local_size(0) * get_local_size(1);
+ uint offset = get_group_id(0) * lsize +
+ get_group_id(1) * get_num_groups(0) * lsize;
+ uint index = offset + get_local_id(0) +
+ get_local_id(1) * get_local_size(0);
+
+ long val = src[index];
+ /* depending on generated ASM, volatile may be removed */
+ long result;
+
+ for(; reduce_loop > 0; reduce_loop--){
+ result = work_group_broadcast(val,
+ wg_local_x,
+ wg_local_y);
+ }
+
+ dst[index] = result;
+}
+
+/*
* Benchmark workgroup reduce add
*/
kernel void bench_workgroup_reduce_add_int(