summaryrefslogtreecommitdiff
path: root/kernels/compiler_skip_check.cl
diff options
context:
space:
mode:
Diffstat (limited to 'kernels/compiler_skip_check.cl')
-rw-r--r--kernels/compiler_skip_check.cl53
1 files changed, 53 insertions, 0 deletions
diff --git a/kernels/compiler_skip_check.cl b/kernels/compiler_skip_check.cl
new file mode 100644
index 00000000..68369dd6
--- /dev/null
+++ b/kernels/compiler_skip_check.cl
@@ -0,0 +1,53 @@
+__kernel __attribute__((intel_reqd_sub_group_size(16)))
+void compiler_skip_check(__read_only image2d_t src_img,
+ __read_only image2d_t ref_img,
+ __global short2 *motion_vector_buffer,
+ __global ushort *residual_buffer,
+ __global uint* dwo_buffer,
+ __global uint* pld_buffer){
+ sampler_t vs = 0;
+
+ int gr_id0 = get_group_id(0);
+ int gr_id1 = get_group_id(1);
+
+ ushort2 src_coord;
+
+ src_coord.x = gr_id0 * 16;
+ src_coord.y = gr_id1 * 16;
+
+ intel_sub_group_avc_sic_payload_t payload =
+ intel_sub_group_avc_sic_initialize(src_coord);
+
+ //Ignore in unidirectional, so just set to 0
+ uchar bidir_weight = 0;
+
+ uint skip_block_partition_type = CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL;
+ uint skip_motion_vector_mask = CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL;
+ uchar skip_sad_adjustment = CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL;
+
+ uint2 bi_mv;
+ int mb_idx = gr_id0 + gr_id1 * get_num_groups(0);
+ short2 input_mv = motion_vector_buffer[mb_idx];
+ bi_mv.s0 = as_uint(input_mv);
+
+ ulong mv = as_ulong(bi_mv);
+
+ payload = intel_sub_group_avc_sic_configure_skc(
+ skip_block_partition_type, skip_motion_vector_mask, mv, bidir_weight,
+ skip_sad_adjustment, payload);
+
+ intel_sub_group_avc_sic_result_t result =
+ intel_sub_group_avc_sic_evaluate_with_single_reference(
+ src_img, ref_img, vs, payload);
+
+ ushort distortion = intel_sub_group_avc_sic_get_inter_distortions(result);
+
+ int lid_x = get_local_id(0);
+ if(lid_x == 0)
+ residual_buffer[mb_idx] = distortion;
+ dwo_buffer[mb_idx*16*4 + lid_x + 16*0] = result.s0;
+ dwo_buffer[mb_idx*16*4 + lid_x + 16*1] = result.s1;
+ dwo_buffer[mb_idx*16*4 + lid_x + 16*2] = result.s2;
+ dwo_buffer[mb_idx*16*4 + lid_x + 16*3] = result.s3;
+
+}