summaryrefslogtreecommitdiff
path: root/kernels/compiler_intra_prediction.cl
diff options
context:
space:
mode:
Diffstat (limited to 'kernels/compiler_intra_prediction.cl')
-rw-r--r--kernels/compiler_intra_prediction.cl91
1 files changed, 91 insertions, 0 deletions
diff --git a/kernels/compiler_intra_prediction.cl b/kernels/compiler_intra_prediction.cl
new file mode 100644
index 00000000..28e81e52
--- /dev/null
+++ b/kernels/compiler_intra_prediction.cl
@@ -0,0 +1,91 @@
+
+__kernel __attribute__((intel_reqd_sub_group_size(16)))
+void compiler_intra_prediction(
+ __read_only image2d_t srcImg,
+ __global uchar *luma_mode,
+ __global ushort *luma_distortion,
+ __global uchar *luma_shape,
+ __global uint* dwo_buffer,
+ __global uint* pld_buffer){
+
+ int gr_id0 = get_group_id(0);
+ int gr_id1 = get_group_id(1);
+
+ ushort2 src_coord;
+ /*src_coord.x = gr_id0 * 16;
+ src_coord.y = gr_id1 * 16;*/
+ src_coord.x = 2 * 16;
+ src_coord.y = 1 * 16;
+
+ intel_sub_group_avc_sic_payload_t payload = intel_sub_group_avc_sic_initialize(src_coord);
+
+ uchar sad_adjustment = CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL;
+ uchar intra_partition_mask = CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL;
+//XXX: Different from official value?
+#undef CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL
+#undef CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL
+#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x4
+#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x8
+ uint nb_avail = CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL |
+ CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL |
+ CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL |
+ CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL;
+
+ uint sgl_id = get_sub_group_local_id();
+ int2 nb_coord;
+ float4 color;
+
+ nb_coord.x = src_coord.x - 1;
+ nb_coord.y = src_coord.y + sgl_id;
+ color = read_imagef(srcImg, nb_coord);
+ uchar left_edge = color.s0 * 255;
+
+ nb_coord.x = src_coord.x - 1;
+ nb_coord.y = src_coord.y - 1;
+ color = read_imagef(srcImg, nb_coord);
+ uchar upper_left_corner = color.s0 * 255;
+
+ nb_coord.x = src_coord.x + sgl_id;
+ nb_coord.y = src_coord.y - 1;
+ color = read_imagef(srcImg, nb_coord);
+ uchar upper_edge = color.s0 * 255;
+
+ uchar upper_right_edge = 0;
+ if(sgl_id < 8){
+ nb_coord.x = src_coord.x + 16 + sgl_id;
+ nb_coord.y = src_coord.y - 1;
+ color = read_imagef(srcImg, nb_coord);
+ upper_right_edge = color.s0 * 255;
+ }
+ payload = intel_sub_group_avc_sic_configure_ipe(
+ intra_partition_mask, nb_avail, left_edge, upper_left_corner, upper_edge,
+ upper_right_edge, sad_adjustment, payload);
+
+ uchar shape_cost_16_16 = (1 << 4) | 5;
+ uchar shape_cost_8_8 = (1 << 4) | 4;
+ uchar shape_cost_4_4 = (1 << 4) | 3;
+ uint intra_shape_cost = (shape_cost_4_4 << 24) | (shape_cost_8_8 << 16) | (shape_cost_16_16 << 8) | (0x0);
+ payload = intel_sub_group_avc_sic_set_intra_luma_shape_penalty(intra_shape_cost, payload);
+
+ sampler_t vs = 0;
+ intel_sub_group_avc_sic_result_t result =
+ intel_sub_group_avc_sic_evaluate_ipe(srcImg, vs, payload);
+
+ uchar shape = intel_sub_group_avc_sic_get_ipe_luma_shape(result);
+ ushort dist = intel_sub_group_avc_sic_get_best_ipe_luma_distortion(result);
+ ulong modes = intel_sub_group_avc_sic_get_packed_ipe_luma_modes(result);
+
+ int lid_x = get_local_id(0);
+ int mb_idx = gr_id0 + gr_id1 * get_num_groups(0);
+ if (lid_x == 0) {
+ luma_shape[mb_idx] = shape;
+ luma_distortion[mb_idx] = dist;
+ uchar mode = modes & 0xF;
+ luma_mode[mb_idx] = mode;
+ }
+
+ dwo_buffer[mb_idx*16*4 + lid_x + 16*0] = result.s0;
+ dwo_buffer[mb_idx*16*4 + lid_x + 16*1] = result.s1;
+ dwo_buffer[mb_idx*16*4 + lid_x + 16*2] = result.s2;
+ dwo_buffer[mb_idx*16*4 + lid_x + 16*3] = result.s3;
+}