diff options
author | Grigore Lupescu <grigore.lupescu at intel.com> | 2016-04-11 17:41:30 +0300 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2016-05-18 15:10:36 +0800 |
commit | 52628691322b9fd5bdba2ecfb9e6bfe17ae940f8 (patch) | |
tree | 16299017fb63c4e00fc9aede947d8d0d3162a9ff /kernels | |
parent | 22d7f7c22654b58d1daf1e10da26235fe4ece3b8 (diff) | |
download | beignet-52628691322b9fd5bdba2ecfb9e6bfe17ae940f8.tar.gz |
Benchmark: Add performance tests for workgroup broadcast
Added the following performance tests:
benchmark_workgroup_broadcast_1D_int
benchmark_workgroup_broadcast_1D_long
benchmark_workgroup_broadcast_2D_int
benchmark_workgroup_broadcast_2D_long
Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
Reviewed-by: Pan Xiuli <xiuli.pan@intel.com>
Diffstat (limited to 'kernels')
-rw-r--r-- | kernels/bench_workgroup.cl | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/kernels/bench_workgroup.cl b/kernels/bench_workgroup.cl index 8a525de0..87986fca 100644 --- a/kernels/bench_workgroup.cl +++ b/kernels/bench_workgroup.cl @@ -1,4 +1,103 @@ /* + * Benchmark broadcast 1D + */ +kernel void bench_workgroup_broadcast_1D_int(global int *src, + global int *dst, + int reduce_loop, + uint wg_local_x, + uint wg_local_y) +{ + uint offset = 0; + uint index = offset + get_global_id(0); + + int val = src[index]; + /* depending on generated ASM, volatile may be removed */ + volatile int result; + + for(; reduce_loop > 0; reduce_loop--){ + result = work_group_broadcast(val, + wg_local_x); + } + + dst[index] = result; +} + +kernel void bench_workgroup_broadcast_1D_long(global long *src, + global long *dst, + int reduce_loop, + uint wg_local_x, + uint wg_local_y) +{ + uint offset = 0; + uint index = offset + get_global_id(0); + + long val = src[index]; + /* depending on generated ASM, volatile may be removed */ + volatile long result; + + for(; reduce_loop > 0; reduce_loop--){ + result = work_group_broadcast(val, + wg_local_x); + } + + dst[index] = result; +} + + +/* + * Benchmark broadcast 2D + */ +kernel void bench_workgroup_broadcast_2D_int(global int *src, + global int *dst, + int reduce_loop, + uint wg_local_x, + uint wg_local_y) +{ + uint lsize = get_local_size(0) * get_local_size(1); + uint offset = get_group_id(0) * lsize + + get_group_id(1) * get_num_groups(0) * lsize; + uint index = offset + get_local_id(0) + + get_local_id(1) * get_local_size(0); + + int val = src[index]; + /* depending on generated ASM, volatile may be removed */ + int result; + + for(; reduce_loop > 0; reduce_loop--){ + result = work_group_broadcast(val, + wg_local_x, + wg_local_y); + } + + dst[index] = result; +} + +kernel void bench_workgroup_broadcast_2D_long(global long *src, + global long *dst, + int reduce_loop, + uint wg_local_x, + uint wg_local_y) +{ + uint lsize = get_local_size(0) * get_local_size(1); + uint offset = get_group_id(0) * lsize + + get_group_id(1) * get_num_groups(0) * lsize; + uint index = offset + get_local_id(0) + + get_local_id(1) * get_local_size(0); + + long val = src[index]; + /* depending on generated ASM, volatile may be removed */ + long result; + + for(; reduce_loop > 0; reduce_loop--){ + result = work_group_broadcast(val, + wg_local_x, + wg_local_y); + } + + dst[index] = result; +} + +/* * Benchmark workgroup reduce add */ kernel void bench_workgroup_reduce_add_int( |