summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSreerenj Balachandran <sreerenj.balachandran@intel.com>2017-12-01 14:47:11 -0800
committerXiang, Haihao <haihao.xiang@intel.com>2018-01-10 14:34:35 +0800
commitffa22b4b4ac286043486ea21ff7c78c40aabc282 (patch)
tree961d1cf2e7be2374752b5c19fcca72869c514a4b
parentb8828f09836d5b156e6b17a096e687c03631aba6 (diff)
downloadlibva-intel-driver-ffa22b4b4ac286043486ea21ff7c78c40aabc282.tar.gz
Stats/PreEnc: Add GEN9 AVC PreEnc support
The AVC PreEnc includes three stage processing: 1: It does the dowscaling of source and reference pictures. Also generate the variance and pixel average of current frame, past and future ref based on user request. 2: Second stage involves the Hierarchical Motion Estimation Only the 4x hme is supported in PreEnc. 3:Third stage is a PreProc which can generate the Motion Vectors and Distortion values libva PR: https://github.com/01org/libva/pull/110 Signed-off-by: Sreerenj Balachandran <sreerenj.balachandran@intel.com>
-rw-r--r--src/i965_avc_encoder.c1355
-rw-r--r--src/i965_avc_encoder.h660
-rw-r--r--src/i965_avc_encoder_common.h18
3 files changed, 1997 insertions, 36 deletions
diff --git a/src/i965_avc_encoder.c b/src/i965_avc_encoder.c
index e4ac008d..a334e0af 100644
--- a/src/i965_avc_encoder.c
+++ b/src/i965_avc_encoder.c
@@ -82,6 +82,11 @@
#define FEI_AVC_MV_PREDICTOR_BUFFER_SIZE 40
#define FEI_AVC_DISTORTION_BUFFER_SIZE 48
#define FEI_AVC_QP_BUFFER_SIZE 1
+#define PREENC_AVC_STATISTICS_BUFFER_SIZE 64
+
+#define SCALE_CUR_PIC 1
+#define SCALE_PAST_REF_PIC 2
+#define SCALE_FUTURE_REF_PIC 3
static const uint32_t qm_flat[16] = {
0x10101010, 0x10101010, 0x10101010, 0x10101010,
@@ -692,15 +697,17 @@ intel_avc_fei_get_kernel_header_and_size(
bin_start = (char *)pvbinary;
pkh_table = (gen9_avc_fei_encoder_kernel_header *)pvbinary;
- pinvalid_entry = &(pkh_table->ply_2xdscale_2f_ply_2f) + 1;
+ pinvalid_entry = &(pkh_table->wp) + 1;
next_krnoffset = binary_size;
if (operation == INTEL_GENERIC_ENC_SCALING4X) {
- pcurr_header = &pkh_table->ply_2xdscale_ply;
+ pcurr_header = &pkh_table->ply_dscale_ply;
} else if (operation == INTEL_GENERIC_ENC_ME) {
pcurr_header = &pkh_table->me_p;
} else if (operation == INTEL_GENERIC_ENC_MBENC) {
pcurr_header = &pkh_table->mbenc_i;
+ } else if (operation == INTEL_GENERIC_ENC_PREPROC) {
+ pcurr_header = &pkh_table->preproc;
} else {
return false;
}
@@ -1359,6 +1366,26 @@ gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
}
}
+ /* free preenc resources */
+ i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
+ i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
+ i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
+ i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
+
+ i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
+ i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
+
+ i965_DestroySurfaces(ctx, &avc_ctx->preenc_scaled_4x_surface_id, 1);
+ avc_ctx->preenc_scaled_4x_surface_id = VA_INVALID_SURFACE;
+ avc_ctx->preenc_scaled_4x_surface_obj = NULL;
+
+ i965_DestroySurfaces(ctx, &avc_ctx->preenc_past_ref_scaled_4x_surface_id, 1);
+ avc_ctx->preenc_past_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
+ avc_ctx->preenc_past_ref_scaled_4x_surface_obj = NULL;
+
+ i965_DestroySurfaces(ctx, &avc_ctx->preenc_future_ref_scaled_4x_surface_id, 1);
+ avc_ctx->preenc_future_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
+ avc_ctx->preenc_future_ref_scaled_4x_surface_obj = NULL;
}
static void
@@ -5967,6 +5994,784 @@ gen9_avc_kernel_sfd(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
+/**************** PreEnc Scaling *************************************/
+/* function to run preenc scaling: gen9_avc_preenc_kernel_scaling()
+ * function to set preenc scaling curbe is the same one using for avc encode
+ == gen95_avc_set_curbe_scaling4x()
+ * function to send buffer/surface resources is the same one using for avc encode
+ == gen9_avc_send_surface_scaling()
+ */
+static VAStatus
+gen9_avc_preenc_kernel_scaling(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int hme_type,
+ int scale_surface_type)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_gpe_table *gpe = &i965->gpe_table;
+ struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+ struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+ struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
+ VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
+ VAStatsStatisticsParameter *stat_param = NULL;
+ struct i965_gpe_context *gpe_context;
+ struct scaling_param surface_param;
+ struct object_surface *obj_surface = NULL;
+ struct gpe_media_object_walker_parameter media_object_walker_param;
+ struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
+ unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
+ int media_function = 0;
+ int kernel_idx = 0;
+ int enable_statistics_output;
+
+ stat_param_h264 = avc_state->stat_param;
+ assert(stat_param_h264);
+ stat_param = &stat_param_h264->stats_params;
+ enable_statistics_output = !stat_param_h264->disable_statistics_output;
+
+ memset(&surface_param, 0, sizeof(struct scaling_param));
+ media_function = INTEL_MEDIA_STATE_4X_SCALING;
+ kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
+ downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
+ downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
+
+ surface_param.input_frame_width = generic_state->frame_width_in_pixel;
+ surface_param.input_frame_height = generic_state->frame_height_in_pixel;
+ surface_param.output_frame_width = generic_state->frame_width_4x;
+ surface_param.output_frame_height = generic_state->frame_height_4x;
+ surface_param.use_4x_scaling = 1 ;
+ surface_param.use_16x_scaling = 0 ;
+ surface_param.use_32x_scaling = 0 ;
+ surface_param.enable_mb_flatness_check = enable_statistics_output;
+ surface_param.enable_mb_variance_output = enable_statistics_output;
+ surface_param.enable_mb_pixel_average_output = enable_statistics_output;
+ surface_param.blk8x8_stat_enabled = stat_param_h264->enable_8x8_statistics;
+
+ switch (scale_surface_type) {
+
+ case SCALE_CUR_PIC:
+ surface_param.input_surface = encode_state->input_yuv_object ;
+ surface_param.output_surface = avc_ctx->preenc_scaled_4x_surface_obj ;
+
+ if (enable_statistics_output) {
+ surface_param.pres_mbv_proc_stat_buffer =
+ &avc_ctx->preproc_stat_data_out_buffer;
+ surface_param.mbv_proc_stat_enabled = 1;
+ } else {
+ surface_param.mbv_proc_stat_enabled = 0;
+ surface_param.pres_mbv_proc_stat_buffer = NULL;
+ }
+ break;
+
+ case SCALE_PAST_REF_PIC:
+ obj_surface = SURFACE(stat_param->past_references[0].picture_id);
+ assert(obj_surface);
+ surface_param.input_surface = obj_surface;
+ surface_param.output_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
+
+ if (stat_param->past_ref_stat_buf) {
+ surface_param.pres_mbv_proc_stat_buffer =
+ &avc_ctx->preenc_past_ref_stat_data_out_buffer;
+ surface_param.mbv_proc_stat_enabled = 1;
+ } else {
+ surface_param.mbv_proc_stat_enabled = 0;
+ surface_param.pres_mbv_proc_stat_buffer = NULL;
+ }
+ break;
+
+ case SCALE_FUTURE_REF_PIC:
+
+ obj_surface = SURFACE(stat_param->future_references[0].picture_id);
+ assert(obj_surface);
+ surface_param.input_surface = obj_surface;
+ surface_param.output_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
+
+ if (stat_param->future_ref_stat_buf) {
+ surface_param.pres_mbv_proc_stat_buffer =
+ &avc_ctx->preenc_future_ref_stat_data_out_buffer;
+ surface_param.mbv_proc_stat_enabled = 1;
+ } else {
+ surface_param.mbv_proc_stat_enabled = 0;
+ surface_param.pres_mbv_proc_stat_buffer = NULL;
+ }
+ break;
+ default :
+ assert(0);
+ }
+
+ gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
+
+ gpe->context_init(ctx, gpe_context);
+ gpe->reset_binding_table(ctx, gpe_context);
+
+ generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
+
+ surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
+ surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
+
+ /* No need of explicit flatness_check surface allocation. The field mb_is_flat
+ * VAStatsStatisticsH264 will be used to store the output. */
+ surface_param.enable_mb_flatness_check = 0;
+ generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
+
+ /* setup the interface data */
+ gpe->setup_interface_data(ctx, gpe_context);
+
+ memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
+ /* the scaling is based on 8x8 blk level */
+ kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
+ kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
+ kernel_walker_param.no_dependency = 1;
+
+ i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
+
+ gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
+ gpe_context,
+ media_function,
+ &media_object_walker_param);
+
+ return VA_STATUS_SUCCESS;
+}
+
+/**************** PreEnc HME *************************************/
+/* function to run preenc hme is the same one we using in avc encode:
+ == gen9_avc_kernel_me()
+ * function to set preenc hme curbe: gen9_avc_preenc_set_curbe_me()
+ * function to send hme buffer/surface: gen9_avc_preenc_send_surface_me()
+ */
+static void
+gen9_avc_preenc_set_curbe_me(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct i965_gpe_context *gpe_context,
+ struct intel_encoder_context *encoder_context,
+ void * param)
+{
+ gen9_avc_fei_me_curbe_data *curbe_cmd;
+ struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+ struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+ VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
+ VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
+
+ struct me_param * curbe_param = (struct me_param *)param ;
+ unsigned char use_mv_from_prev_step = 0;
+ unsigned char write_distortions = 0;
+ unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
+ unsigned char seach_table_idx = 0;
+ unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
+ unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
+ unsigned int scale_factor = 0;
+
+ switch (curbe_param->hme_type) {
+ case INTEL_ENC_HME_4x:
+ use_mv_from_prev_step = 0;
+ write_distortions = 0;
+ mv_shift_factor = 2;
+ scale_factor = 4;
+ prev_mv_read_pos_factor = 0;
+ break;
+
+ default:
+ assert(0);
+ }
+
+ curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
+ if (!curbe_cmd)
+ return;
+
+ downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
+ downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
+
+ memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_init_data));
+
+ curbe_cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
+ if (avc_state->field_scaling_output_interleaved) {
+ /*frame set to zero,field specified*/
+ curbe_cmd->dw3.src_access = 0;
+ curbe_cmd->dw3.ref_access = 0;
+ curbe_cmd->dw7.src_field_polarity = 0;
+ }
+ curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
+ curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
+ curbe_cmd->dw5.qp_prime_y = stat_param_h264->frame_qp;
+
+ curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
+ curbe_cmd->dw6.write_distortions = write_distortions;
+ curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
+ curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;//frame only
+
+ if (generic_state->frame_type == SLICE_TYPE_B) {
+ curbe_cmd->dw1.bi_weight = 32;
+ curbe_cmd->dw13.num_ref_idx_l1_minus1 = stat_param->num_future_references - 1;
+ me_method = gen9_avc_b_me_method[generic_state->preset];
+ seach_table_idx = 1;
+ }
+
+ if (generic_state->frame_type == SLICE_TYPE_P ||
+ generic_state->frame_type == SLICE_TYPE_B)
+ curbe_cmd->dw13.num_ref_idx_l0_minus1 = stat_param->num_past_references - 1;
+
+ curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
+ curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
+
+ memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
+
+ curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
+ curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
+ curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
+ curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
+ curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
+ curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
+ curbe_cmd->dw38.reserved = 0;
+
+ i965_gpe_context_unmap_curbe(gpe_context);
+ return;
+}
+
+static void
+gen9_avc_preenc_send_surface_me(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct i965_gpe_context *gpe_context,
+ struct intel_encoder_context *encoder_context,
+ void * param)
+{
+ struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+ struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+ VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
+ VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
+ struct object_surface *input_surface;
+ struct i965_gpe_resource *gpe_resource;
+ struct me_param * curbe_param = (struct me_param *)param ;
+ int i = 0;
+
+ /* PreEnc Only supports 4xme */
+ assert(curbe_param->hme_type == INTEL_ENC_HME_4x);
+
+ switch (curbe_param->hme_type) {
+ case INTEL_ENC_HME_4x : {
+ /*memv output 4x*/
+ gpe_resource = &avc_ctx->s4x_memv_data_buffer;
+ i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
+
+ /* memv distortion output*/
+ gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
+ i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
+
+ /* brc distortion output*/
+ gpe_resource = &avc_ctx->res_brc_dist_data_surface;
+ i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_ME_BRC_DISTORTION_INDEX);
+
+ /* input past ref scaled YUV surface*/
+ for (i = 0; i < stat_param->num_past_references; i++) {
+ /*input current down scaled YUV surface for forward refef */
+ input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
+ i965_add_adv_gpe_surface(ctx, gpe_context,
+ input_surface,
+ GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
+
+ input_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
+ i965_add_adv_gpe_surface(ctx, gpe_context,
+ input_surface,
+ GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
+ }
+
+ /* input future ref scaled YUV surface*/
+ for (i = 0; i < stat_param->num_future_references; i++) {
+ /*input current down scaled YUV surface for backward ref */
+ input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
+ i965_add_adv_gpe_surface(ctx, gpe_context,
+ input_surface,
+ GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
+
+ input_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
+ i965_add_adv_gpe_surface(ctx, gpe_context,
+ input_surface,
+ GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
+ }
+ break;
+
+ }
+ default:
+ break;
+
+ }
+}
+
+/**************** PreEnc PreProc *************************************/
+/* function to run preenc preproc: gen9_avc_preenc_kernel_preproc()
+ * function to set preenc preproc curbe: gen9_avc_preenc_set_curbe_preproc()
+ * function to send preproc buffer/surface: gen9_avc_preenc_send_surface_preproc ()
+ */
+static void
+gen9_avc_preenc_set_curbe_preproc(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct i965_gpe_context *gpe_context,
+ struct intel_encoder_context *encoder_context,
+ void * param)
+{
+ gen9_avc_preproc_curbe_data *cmd;
+ struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+ struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+ VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
+ VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
+ unsigned char me_method = 0;
+ unsigned int table_idx = 0;
+ int ref_width, ref_height, len_sp;
+ int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
+ int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
+ unsigned int preset = generic_state->preset;
+
+ cmd = (gen9_avc_preproc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
+ if (!cmd)
+ return;
+ memset(cmd, 0, sizeof(gen9_avc_preproc_curbe_data));
+
+ switch (generic_state->frame_type) {
+ case SLICE_TYPE_I:
+ memcpy(cmd, gen9_avc_preenc_preproc_curbe_i_frame_init_data,
+ sizeof(gen9_avc_preproc_curbe_data));
+ break;
+ case SLICE_TYPE_P:
+ memcpy(cmd, gen9_avc_preenc_preproc_curbe_p_frame_init_data,
+ sizeof(gen9_avc_preproc_curbe_data));
+ break;
+ case SLICE_TYPE_B:
+ memcpy(cmd, gen9_avc_preenc_preproc_curbe_b_frame_init_data,
+ sizeof(gen9_avc_preproc_curbe_data));
+ break;
+ default:
+ assert(0);
+ }
+ /* 4 means full search, 6 means diamand search */
+ me_method = (stat_param_h264->search_window == 5) ||
+ (stat_param_h264->search_window == 8) ? 4 : 6;
+
+ ref_width = stat_param_h264->ref_width;
+ ref_height = stat_param_h264->ref_height;
+ len_sp = stat_param_h264->len_sp;
+ /* If there is a serch_window, discard user provided ref_width, ref_height
+ * and search_path length */
+ switch (stat_param_h264->search_window) {
+ case 0:
+ /* not use predefined search window, there should be a search_path input */
+ if ((stat_param_h264->search_path != 0) &&
+ (stat_param_h264->search_path != 1) &&
+ (stat_param_h264->search_path != 2)) {
+ WARN_ONCE("Invalid input search_path for SearchWindow=0 \n");
+ assert(0);
+ }
+ /* 4 means full search, 6 means diamand search */
+ me_method = (stat_param_h264->search_path == 1) ? 6 : 4;
+ if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
+ WARN_ONCE("Invalid input ref_width/ref_height in"
+ "SearchWindow=0 case! \n");
+ assert(0);
+ }
+ break;
+
+ case 1:
+ /* Tiny - 4 SUs 24x24 window */
+ ref_width = 24;
+ ref_height = 24;
+ len_sp = 4;
+ break;
+
+ case 2:
+ /* Small - 9 SUs 28x28 window */
+ ref_width = 28;
+ ref_height = 28;
+ len_sp = 9;
+ break;
+ case 3:
+ /* Diamond - 16 SUs 48x40 window */
+ ref_width = 48;
+ ref_height = 40;
+ len_sp = 16;
+ break;
+ case 4:
+ /* Large Diamond - 32 SUs 48x40 window */
+ ref_width = 48;
+ ref_height = 40;
+ len_sp = 32;
+ break;
+ case 5:
+ /* Exhaustive - 48 SUs 48x40 window */
+ ref_width = 48;
+ ref_height = 40;
+ len_sp = 48;
+ break;
+ case 6:
+ /* Diamond - 16 SUs 64x32 window */
+ ref_width = 64;
+ ref_height = 32;
+ len_sp = 16;
+ break;
+ case 7:
+ /* Large Diamond - 32 SUs 64x32 window */
+ ref_width = 64;
+ ref_height = 32;
+ len_sp = 32;
+ break;
+ case 8:
+ /* Exhaustive - 48 SUs 64x32 window */
+ ref_width = 64;
+ ref_height = 32;
+ len_sp = 48;
+ break;
+
+ default:
+ assert(0);
+ }
+
+ /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
+ if (is_bframe) {
+ CLIP(ref_width, 4, 32);
+ CLIP(ref_height, 4, 32);
+ } else if (is_pframe) {
+ CLIP(ref_width, 4, 64);
+ CLIP(ref_height, 4, 32);
+ }
+
+ cmd->dw0.adaptive_enable =
+ cmd->dw37.adaptive_enable = stat_param_h264->adaptive_search;
+ cmd->dw2.max_len_sp = len_sp;
+ cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
+ cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
+ cmd->dw3.src_access =
+ cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
+
+ if (generic_state->frame_type != SLICE_TYPE_I && avc_state->ftq_enable)
+ cmd->dw3.ft_enable = stat_param_h264->ft_enable;
+ else
+ cmd->dw3.ft_enable = 0;
+
+ cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
+ cmd->dw6.pic_height = cmd->dw5.slice_mb_height = generic_state->frame_height_in_mbs;
+ cmd->dw3.sub_mb_part_mask = stat_param_h264->sub_mb_part_mask;
+ cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
+ cmd->dw3.inter_sad = stat_param_h264->inter_sad;
+ cmd->dw3.intra_sad = stat_param_h264->intra_sad;
+ cmd->dw4.hme_enable = generic_state->hme_enabled;
+ cmd->dw4.frame_qp = stat_param_h264->frame_qp;
+ cmd->dw4.per_mb_qp_enable = stat_param_h264->mb_qp;
+
+ cmd->dw4.multiple_mv_predictor_per_mb_enable =
+ (generic_state->frame_type != SLICE_TYPE_I) ? 0 : stat_param_h264->mv_predictor_ctrl;
+
+ cmd->dw4.disable_mv_output = (generic_state->frame_type == SLICE_TYPE_I) ? 1 : stat_param_h264->disable_mv_output;
+ cmd->dw4.disable_mb_stats = stat_param_h264->disable_statistics_output;
+
+ cmd->dw4.fwd_ref_pic_enable = (stat_param->num_past_references > 0) ? 1 : 0;
+ cmd->dw4.bwd_ref_pic_enable = (stat_param->num_future_references > 0) ? 1 : 0;
+
+ cmd->dw7.intra_part_mask = stat_param_h264->intra_part_mask;
+
+ /* mv mode cost */
+ memcpy(&(cmd->dw8), gen75_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][stat_param_h264->frame_qp], 8 * sizeof(unsigned int));
+
+ /* reset all except sic_fwd_trans_coeff_threshold_* from dw8 to dw15 */
+ memset(&(cmd->dw8), 0, 6 * (sizeof(unsigned int)));
+
+ /* search path tables */
+ table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
+ memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
+
+ if (stat_param_h264->intra_part_mask == 0x07)
+ cmd->dw31.intra_compute_type = 3;
+
+ cmd->dw38.ref_threshold = 400;
+ cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
+
+ if (generic_state->frame_type == SLICE_TYPE_I) {
+ cmd->dw0.skip_mode_enable = cmd->dw37.skip_mode_enable = 0;
+ cmd->dw36.hme_combine_overlap = 0;
+ } else if (generic_state->frame_type == SLICE_TYPE_P) {
+ cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
+ cmd->dw3.bme_disable_fbr = 1;
+ cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
+ cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
+ cmd->dw7.non_skip_zmv_added = 1;
+ cmd->dw7.non_skip_mode_added = 1;
+ cmd->dw7.skip_center_mask = 1;
+ cmd->dw32.max_vmv_r =
+ i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
+ cmd->dw36.hme_combine_overlap = 1;
+
+ } else if (generic_state->frame_type == SLICE_TYPE_P) { /* B slice */
+
+ cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
+ cmd->dw3.search_ctrl = 0;
+ cmd->dw3.skip_type = 1;
+ cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
+ cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
+ cmd->dw7.skip_center_mask = 0xff;
+ cmd->dw32.max_vmv_r =
+ i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
+ cmd->dw36.hme_combine_overlap = 1;
+ }
+
+ cmd->dw40.curr_pic_surf_index = GEN9_AVC_PREPROC_CURR_Y_INDEX;
+ cmd->dw41.hme_mv_dat_surf_index = GEN9_AVC_PREPROC_HME_MV_DATA_INDEX;
+ cmd->dw42.mv_predictor_surf_index = GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX;
+ cmd->dw43.mb_qp_surf_index = GEN9_AVC_PREPROC_MBQP_INDEX;
+ cmd->dw44.mv_data_out_surf_index = GEN9_AVC_PREPROC_MV_DATA_INDEX;
+ cmd->dw45.mb_stats_out_surf_index = GEN9_AVC_PREPROC_MB_STATS_INDEX;
+ cmd->dw46.vme_inter_prediction_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX;
+ cmd->dw47.vme_Inter_prediction_mr_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX;
+ cmd->dw48.ftq_lut_surf_index = GEN9_AVC_PREPROC_FTQ_LUT_INDEX;
+
+ i965_gpe_context_unmap_curbe(gpe_context);
+}
+
+static void
+gen9_avc_preenc_send_surface_preproc(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct i965_gpe_context *gpe_context,
+ struct intel_encoder_context *encoder_context,
+ void * param)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+ struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+ struct object_surface *obj_surface;
+ struct i965_gpe_resource *gpe_resource;
+ VASurfaceID surface_id;
+ VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
+ VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
+ unsigned int size = 0, frame_mb_nums = 0;
+
+ frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
+
+ /* input yuv surface, Y index */
+ obj_surface = encode_state->input_yuv_object;
+ i965_add_2d_gpe_surface(ctx,
+ gpe_context,
+ obj_surface,
+ 0,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_PREPROC_CURR_Y_INDEX);
+
+ /* input yuv surface, UV index */
+ i965_add_2d_gpe_surface(ctx,
+ gpe_context,
+ obj_surface,
+ 1,
+ 1,
+ I965_SURFACEFORMAT_R16_UINT,
+ GEN9_AVC_MBENC_CURR_UV_INDEX);
+
+
+ if (generic_state->hme_enabled) {
+ /* HME mv data buffer */
+ gpe_resource = &avc_ctx->s4x_memv_data_buffer;
+ i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_PREPROC_HME_MV_DATA_INDEX);
+ }
+
+ /* mv predictor buffer */
+ if (stat_param_h264->mv_predictor_ctrl) {
+ size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
+ gpe_resource = &avc_ctx->preproc_mv_predictor_buffer;
+ i965_add_buffer_gpe_surface(ctx,
+ gpe_context,
+ gpe_resource,
+ 0,
+ size / 4,
+ 0,
+ GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX);
+ }
+
+ /* MB qp buffer */
+ if (stat_param_h264->mb_qp) {
+ size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
+ gpe_resource = &avc_ctx->preproc_mb_qp_buffer;
+ i965_add_buffer_gpe_surface(ctx,
+ gpe_context,
+ gpe_resource,
+ 0,
+ size / 4,
+ 0,
+ GEN9_AVC_PREPROC_MBQP_INDEX);
+
+ gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
+ size = 16 * AVC_QP_MAX * 4;
+ i965_add_buffer_gpe_surface(ctx,
+ gpe_context,
+ gpe_resource,
+ 0,
+ size / 4,
+ 0,
+ GEN9_AVC_PREPROC_FTQ_LUT_INDEX);
+
+ }
+
+ /* mv data output buffer */
+ if (!stat_param_h264->disable_mv_output) {
+ gpe_resource = &avc_ctx->preproc_mv_data_out_buffer;
+ size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
+ i965_add_buffer_gpe_surface(ctx,
+ gpe_context,
+ gpe_resource,
+ 0,
+ size / 4,
+ 0,
+ GEN9_AVC_PREPROC_MV_DATA_INDEX);
+ }
+
+ /* statistics output buffer */
+ if (!stat_param_h264->disable_statistics_output) {
+ gpe_resource = &avc_ctx->preproc_stat_data_out_buffer;
+ size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
+ i965_add_buffer_gpe_surface(ctx,
+ gpe_context,
+ gpe_resource,
+ 0,
+ size / 4,
+ 0,
+ GEN9_AVC_PREPROC_MB_STATS_INDEX);
+ }
+
+ /* vme cur pic y */
+ obj_surface = encode_state->input_yuv_object;
+ i965_add_2d_gpe_surface(ctx,
+ gpe_context,
+ obj_surface,
+ 0,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX);
+
+ /* vme cur pic y (repeating based on required BTI order for mediakerel)*/
+ obj_surface = encode_state->input_yuv_object;
+ i965_add_2d_gpe_surface(ctx,
+ gpe_context,
+ obj_surface,
+ 0,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX);
+
+ /* vme forward ref */
+ /* Only supports one past ref */
+ if (stat_param->num_past_references > 0) {
+ surface_id = stat_param->past_references[0].picture_id;
+ assert(surface_id != VA_INVALID_ID);
+ obj_surface = SURFACE(surface_id);
+ if (!obj_surface)
+ return;
+ i965_add_adv_gpe_surface(ctx, gpe_context,
+ obj_surface,
+ GEN9_AVC_PREPROC_VME_FWD_PIC_IDX0_INDEX);
+
+ }
+
+ /* vme future ref */
+ /* Only supports one future ref */
+ if (stat_param->num_future_references > 0) {
+ surface_id = stat_param->future_references[0].picture_id;
+ assert(surface_id != VA_INVALID_ID);
+ obj_surface = SURFACE(surface_id);
+ if (!obj_surface)
+ return;
+ i965_add_adv_gpe_surface(ctx, gpe_context,
+ obj_surface,
+ GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_0_INDEX);
+
+ surface_id = stat_param->future_references[0].picture_id;
+ assert(surface_id != VA_INVALID_ID);
+ obj_surface = SURFACE(surface_id);
+ if (!obj_surface)
+ return;
+ i965_add_adv_gpe_surface(ctx, gpe_context,
+ obj_surface,
+ GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_1_INDEX);
+ }
+
+ return;
+
+}
+
+static VAStatus
+gen9_avc_preenc_kernel_preproc(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_gpe_table *gpe = &i965->gpe_table;
+ struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
+ struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+ struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+ VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
+ struct i965_gpe_context *gpe_context;
+ struct gpe_media_object_walker_parameter media_object_walker_param;
+ struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
+ int media_function = INTEL_MEDIA_STATE_PREPROC;
+ struct i965_gpe_resource *gpe_resource = NULL;
+ unsigned int * data = NULL;
+ unsigned int ftq_lut_table_size = 16 * 52; /* 16 DW per QP for each qp*/
+
+ gpe_context = &(avc_ctx->context_preproc.gpe_contexts);
+ gpe->context_init(ctx, gpe_context);
+ gpe->reset_binding_table(ctx, gpe_context);
+
+ /*set curbe*/
+ generic_ctx->pfn_set_curbe_preproc(ctx, encode_state, gpe_context, encoder_context, NULL);
+
+ /*send surface*/
+ generic_ctx->pfn_send_preproc_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
+
+ gpe->setup_interface_data(ctx, gpe_context);
+
+ /* Set up FtqLut Buffer if there is QP change within a frame */
+ if (stat_param_h264->mb_qp) {
+ gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
+ assert(gpe_resource);
+ data = i965_map_gpe_resource(gpe_resource);
+ assert(data);
+ memcpy(data, gen9_avc_preenc_preproc_ftq_lut, ftq_lut_table_size * sizeof(unsigned int));
+ }
+
+ memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
+ kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs ;
+ kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs ;
+ kernel_walker_param.no_dependency = 1;
+
+ i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
+
+ gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
+ gpe_context,
+ media_function,
+ &media_object_walker_param);
+
+ return VA_STATUS_SUCCESS;
+}
+
+
static void
gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
struct encode_state *encode_state,
@@ -6599,7 +7404,8 @@ kernel related function:init/destroy etc
static void
gen9_avc_kernel_init_scaling(VADriverContextP ctx,
struct generic_encoder_context *generic_context,
- struct gen_avc_scaling_context *kernel_context)
+ struct gen_avc_scaling_context *kernel_context,
+ int preenc_enabled)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_gpe_table *gpe = &i965->gpe_table;
@@ -6611,8 +7417,14 @@ gen9_avc_kernel_init_scaling(VADriverContextP ctx,
memset(&kernel_param, 0, sizeof(kernel_param));
if (IS_SKL(i965->intel.device_info) ||
IS_BXT(i965->intel.device_info)) {
- kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
- kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
+ if (!preenc_enabled) {
+ kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
+ kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
+ } else {
+ /* Skylake PreEnc using GEN95/gen10 DS kernel */
+ kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
+ kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
+ }
} else if (IS_KBL(i965->intel.device_info) ||
IS_GLK(i965->intel.device_info)) {
kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
@@ -6638,17 +7450,21 @@ gen9_avc_kernel_init_scaling(VADriverContextP ctx,
memset(&common_kernel, 0, sizeof(common_kernel));
- intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
- generic_context->enc_kernel_size,
- INTEL_GENERIC_ENC_SCALING4X,
- 0,
- &common_kernel);
+ generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+ generic_context->enc_kernel_size,
+ INTEL_GENERIC_ENC_SCALING4X,
+ 0,
+ &common_kernel);
gpe->load_kernels(ctx,
gpe_context,
&common_kernel,
1);
+ /* PreEnc using only the 4X scaling */
+ if (preenc_enabled)
+ return;
+
/*2x scaling kernel*/
kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
kernel_param.inline_data_size = 0;
@@ -6676,7 +7492,8 @@ gen9_avc_kernel_init_scaling(VADriverContextP ctx,
static void
gen9_avc_kernel_init_me(VADriverContextP ctx,
struct generic_encoder_context *generic_context,
- struct gen_avc_me_context *kernel_context)
+ struct gen_avc_me_context *kernel_context,
+ int preenc_enabled)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_gpe_table *gpe = &i965->gpe_table;
@@ -6685,12 +7502,18 @@ gen9_avc_kernel_init_me(VADriverContextP ctx,
struct encoder_scoreboard_parameter scoreboard_param;
struct i965_kernel common_kernel;
int i = 0;
+ unsigned int curbe_size = 0;
if (IS_GEN8(i965->intel.device_info)) {
- kernel_param.curbe_size = sizeof(gen8_avc_me_curbe_data);
+ curbe_size = sizeof(gen8_avc_me_curbe_data);
} else {
- kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
+ if (!preenc_enabled)
+ curbe_size = sizeof(gen9_avc_me_curbe_data);
+ else
+ curbe_size = sizeof(gen9_avc_fei_me_curbe_data);
}
+
+ kernel_param.curbe_size = curbe_size;
kernel_param.inline_data_size = 0;
kernel_param.sampler_size = 0;
@@ -6700,6 +7523,7 @@ gen9_avc_kernel_init_me(VADriverContextP ctx,
scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
scoreboard_param.walkpat_flag = 0;
+ /* There is two hme kernel, one for P and other for B frame */
for (i = 0; i < 2; i++) {
gpe_context = &kernel_context->gpe_contexts[i];
gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
@@ -6707,11 +7531,11 @@ gen9_avc_kernel_init_me(VADriverContextP ctx,
memset(&common_kernel, 0, sizeof(common_kernel));
- intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
- generic_context->enc_kernel_size,
- INTEL_GENERIC_ENC_ME,
- i,
- &common_kernel);
+ generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+ generic_context->enc_kernel_size,
+ INTEL_GENERIC_ENC_ME,
+ i,
+ &common_kernel);
gpe->load_kernels(ctx,
gpe_context,
@@ -6722,6 +7546,47 @@ gen9_avc_kernel_init_me(VADriverContextP ctx,
}
static void
+gen9_avc_kernel_init_preproc(VADriverContextP ctx,
+ struct generic_encoder_context *generic_context,
+ struct gen_avc_preproc_context *kernel_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_gpe_table *gpe = &i965->gpe_table;
+ struct i965_gpe_context *gpe_context = NULL;
+ struct encoder_kernel_parameter kernel_param ;
+ struct encoder_scoreboard_parameter scoreboard_param;
+ struct i965_kernel common_kernel;
+
+ kernel_param.curbe_size = sizeof(gen9_avc_preproc_curbe_data);
+ kernel_param.inline_data_size = 0;
+ kernel_param.sampler_size = 0;
+
+ memset(&scoreboard_param, 0, sizeof(scoreboard_param));
+ scoreboard_param.mask = 0xFF;
+ scoreboard_param.enable = generic_context->use_hw_scoreboard;
+ scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
+ scoreboard_param.walkpat_flag = 0;
+
+ gpe_context = &kernel_context->gpe_contexts;
+ gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
+ gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
+
+ memset(&common_kernel, 0, sizeof(common_kernel));
+
+ intel_avc_fei_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+ generic_context->enc_kernel_size,
+ INTEL_GENERIC_ENC_PREPROC,
+ 0,
+ &common_kernel);
+
+ gpe->load_kernels(ctx,
+ gpe_context,
+ &common_kernel,
+ 1);
+
+}
+
+static void
gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
struct generic_encoder_context *generic_context,
struct gen_avc_mbenc_context *kernel_context,
@@ -6968,6 +7833,8 @@ gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
+ gpe->context_destroy(&avc_ctx->context_preproc.gpe_contexts);
+
}
/*
@@ -7717,6 +8584,391 @@ gen9_avc_vme_pipeline(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
+/* Update PreEnc specific parameters */
+static VAStatus
+gen9_avc_preenc_update_parameters(VADriverContextP ctx,
+ VAProfile profile,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+ struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+ struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+ VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
+ VAStatsStatisticsParameter *stat_param = NULL;
+ struct object_buffer *obj_buffer = NULL;
+ struct object_buffer *obj_buffer_mv = NULL, *obj_buffer_stat = NULL;
+ struct buffer_store *buffer_store = NULL;
+ unsigned int size = 0, i = 0;
+ unsigned int frame_mb_nums = 0;
+
+ if (!encoder_context->preenc_enabled ||
+ !encode_state->stat_param_ext ||
+ !encode_state->stat_param_ext->buffer)
+ return VA_STATUS_ERROR_OPERATION_FAILED;
+
+ stat_param_h264 = avc_state->stat_param =
+ (VAStatsStatisticsParameterH264 *)encode_state->stat_param_ext->buffer;
+ stat_param = &stat_param_h264->stats_params;
+
+ /* Assume the frame type based on number of past/future ref frames */
+ if (!stat_param->num_past_references && !stat_param->num_future_references)
+ generic_state->frame_type = SLICE_TYPE_I;
+ else if (stat_param->num_future_references > 0)
+ generic_state->frame_type = SLICE_TYPE_B;
+ else
+ generic_state->frame_type = SLICE_TYPE_P;
+
+ generic_state->preset = INTEL_PRESET_RT_SPEED;
+ generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
+
+ /* frame width and height */
+ generic_state->frame_width_in_pixel = encoder_context->frame_width_in_pixel;
+ generic_state->frame_height_in_pixel = encoder_context->frame_height_in_pixel;
+ generic_state->frame_width_in_mbs = ALIGN(generic_state->frame_width_in_pixel, 16) / 16;
+ generic_state->frame_height_in_mbs = ALIGN(generic_state->frame_height_in_pixel, 16) / 16;
+
+ /* 4x downscaled width and height */
+ generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
+ generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
+ generic_state->downscaled_width_4x_in_mb = generic_state->frame_width_4x / 16 ;
+ generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
+
+ /* reset hme types for preenc */
+ if (generic_state->frame_type != SLICE_TYPE_I)
+ generic_state->hme_enabled = 1;
+
+ /* ensure frame width is not too small */
+ if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
+ generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
+ generic_state->downscaled_width_4x_in_mb =
+ WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
+ }
+
+ /* ensure frame height is not too small*/
+ if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
+ generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
+ generic_state->downscaled_height_4x_in_mb =
+ WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
+ }
+
+ /********** Ensure buffer object parameters ********/
+ frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
+
+ /* mv predictor buffer */
+ if (stat_param_h264->mv_predictor_ctrl) {
+ if (stat_param->mv_predictor == VA_INVALID_ID)
+ goto error;
+ size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
+ obj_buffer = BUFFER(stat_param->mv_predictor);
+ buffer_store = obj_buffer->buffer_store;
+ if (buffer_store->bo->size < size)
+ goto error;
+ if (avc_ctx->preproc_mv_predictor_buffer.bo != NULL)
+ i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
+ i965_dri_object_to_buffer_gpe_resource(
+ &avc_ctx->preproc_mv_predictor_buffer,
+ buffer_store->bo);
+ }
+
+ /* MB qp buffer */
+ if (stat_param_h264->mb_qp) {
+ if (stat_param->qp == VA_INVALID_ID)
+ goto error;
+ size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
+ obj_buffer = BUFFER(stat_param->qp);
+ buffer_store = obj_buffer->buffer_store;
+ if (buffer_store->bo->size < size)
+ goto error;
+ if (avc_ctx->preproc_mb_qp_buffer.bo != NULL)
+ i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
+ i965_dri_object_to_buffer_gpe_resource(
+ &avc_ctx->preproc_mb_qp_buffer,
+ buffer_store->bo);
+ }
+
+ /* locate mv and stat buffer */
+ if (!stat_param_h264->disable_mv_output ||
+ !stat_param_h264->disable_statistics_output) {
+
+ if (!stat_param->outputs)
+ goto error;
+
+ for (i = 0; i < 2 ; i++) {
+ if (stat_param->outputs[i] != VA_INVALID_ID) {
+ obj_buffer = BUFFER(stat_param->outputs[i]);
+ switch (obj_buffer->type) {
+ case VAStatsMVBufferType:
+ obj_buffer_mv = obj_buffer;
+ break;
+ case VAStatsStatisticsBufferType:
+ obj_buffer_stat = obj_buffer;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ if (!(!stat_param_h264->disable_mv_output &&
+ !stat_param_h264->disable_statistics_output))
+ break;
+ }
+ }
+ /* mv data output buffer */
+ if (!stat_param_h264->disable_mv_output) {
+ size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
+ buffer_store = obj_buffer_mv->buffer_store;
+ if (buffer_store->bo->size < size)
+ goto error;
+ if (avc_ctx->preproc_mv_data_out_buffer.bo != NULL)
+ i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
+ i965_dri_object_to_buffer_gpe_resource(
+ &avc_ctx->preproc_mv_data_out_buffer,
+ buffer_store->bo);
+ }
+ /* statistics output buffer */
+ if (!stat_param_h264->disable_statistics_output) {
+ size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
+ buffer_store = obj_buffer_stat->buffer_store;
+ if (buffer_store->bo->size < size)
+ goto error;
+ if (avc_ctx->preproc_stat_data_out_buffer.bo != NULL)
+ i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
+ i965_dri_object_to_buffer_gpe_resource(
+ &avc_ctx->preproc_stat_data_out_buffer,
+ buffer_store->bo);
+ }
+
+ /* past ref stat out buffer */
+ if (stat_param->num_past_references && stat_param->past_ref_stat_buf &&
+ stat_param->past_ref_stat_buf[0] != VA_INVALID_ID) {
+ size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
+ obj_buffer = BUFFER(stat_param->past_ref_stat_buf[0]);
+ buffer_store = obj_buffer->buffer_store;
+ if (buffer_store->bo->size < size)
+ goto error;
+ if (avc_ctx->preenc_past_ref_stat_data_out_buffer.bo != NULL)
+ i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
+ i965_dri_object_to_buffer_gpe_resource(
+ &avc_ctx->preenc_past_ref_stat_data_out_buffer,
+ buffer_store->bo);
+ }
+ /* future ref stat out buffer */
+ if (stat_param->num_past_references && stat_param->future_ref_stat_buf &&
+ stat_param->future_ref_stat_buf[0] != VA_INVALID_ID) {
+ size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
+ obj_buffer = BUFFER(stat_param->future_ref_stat_buf[0]);
+ buffer_store = obj_buffer->buffer_store;
+ if (buffer_store->bo->size < size)
+ goto error;
+ if (avc_ctx->preenc_future_ref_stat_data_out_buffer.bo != NULL)
+ i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
+ i965_dri_object_to_buffer_gpe_resource(
+ &avc_ctx->preenc_future_ref_stat_data_out_buffer,
+ buffer_store->bo);
+ }
+ return VA_STATUS_SUCCESS;
+
+error:
+ return VA_STATUS_ERROR_INVALID_BUFFER;
+}
+
+/* allocate internal resouces required for PreEenc */
+static VAStatus
+gen9_avc_preenc_allocate_internal_resources(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+ unsigned int width = 0;
+ unsigned int height = 0;
+ unsigned int size = 0;
+ int allocate_flag = 1;
+
+ /* 4x MEMV data buffer */
+ width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
+ height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
+ i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
+ allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
+ &avc_ctx->s4x_memv_data_buffer,
+ width, height,
+ width,
+ "4x MEMV data buffer");
+ if (!allocate_flag)
+ goto failed_allocation;
+ i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
+
+ /* Output DISTORTION surface from 4x ME */
+ width = generic_state->downscaled_width_4x_in_mb * 8;
+ height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
+ i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
+ allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
+ &avc_ctx->s4x_memv_distortion_buffer,
+ width, height,
+ ALIGN(width, 64),
+ "4x MEMV distortion buffer");
+ if (!allocate_flag)
+ goto failed_allocation;
+ i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
+
+ /* output BRC DISTORTION surface from 4x ME */
+ width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
+ height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
+ i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
+ allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
+ &avc_ctx->res_brc_dist_data_surface,
+ width, height,
+ width,
+ "brc dist data buffer");
+ if (!allocate_flag)
+ goto failed_allocation;
+ i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
+
+
+ /* FTQ Lut buffer,whichs is the mbbrc_const_data_buffer */
+ i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
+ size = 16 * AVC_QP_MAX * 4;
+ allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
+ &avc_ctx->res_mbbrc_const_data_buffer,
+ ALIGN(size, 0x1000),
+ "mbbrc const data buffer");
+ if (!allocate_flag)
+ goto failed_allocation;
+ i965_zero_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
+
+ /* 4x downscaled surface */
+ if (!avc_ctx->preenc_scaled_4x_surface_obj) {
+ i965_CreateSurfaces(ctx,
+ generic_state->frame_width_4x,
+ generic_state->frame_height_4x,
+ VA_RT_FORMAT_YUV420,
+ 1,
+ &avc_ctx->preenc_scaled_4x_surface_id);
+ avc_ctx->preenc_scaled_4x_surface_obj = SURFACE(avc_ctx->preenc_scaled_4x_surface_id);
+ if (!avc_ctx->preenc_scaled_4x_surface_obj)
+ goto failed_allocation;
+ i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_scaled_4x_surface_obj, 1,
+ VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+ }
+
+ /* 4x downscaled past ref surface */
+ if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj) {
+ i965_CreateSurfaces(ctx,
+ generic_state->frame_width_4x,
+ generic_state->frame_height_4x,
+ VA_RT_FORMAT_YUV420,
+ 1,
+ &avc_ctx->preenc_past_ref_scaled_4x_surface_id);
+ avc_ctx->preenc_past_ref_scaled_4x_surface_obj =
+ SURFACE(avc_ctx->preenc_past_ref_scaled_4x_surface_id);
+ if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj)
+ goto failed_allocation;
+ i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_past_ref_scaled_4x_surface_obj, 1,
+ VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+ }
+
+ /* 4x downscaled future ref surface */
+ if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj) {
+ i965_CreateSurfaces(ctx,
+ generic_state->frame_width_4x,
+ generic_state->frame_height_4x,
+ VA_RT_FORMAT_YUV420,
+ 1,
+ &avc_ctx->preenc_future_ref_scaled_4x_surface_id);
+ avc_ctx->preenc_future_ref_scaled_4x_surface_obj =
+ SURFACE(avc_ctx->preenc_future_ref_scaled_4x_surface_id);
+ if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj)
+ goto failed_allocation;
+ i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_future_ref_scaled_4x_surface_obj, 1,
+ VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+ }
+
+ /* FeiPreEncFixme: dummy coded buffer. This is a tweak which helps to use
+ * the generic AVC Encdoe codepath which allocate status buffer as extension
+ * to CodedBuffer */
+ if (!avc_ctx->status_buffer.bo) {
+ size =
+ generic_state->frame_width_in_pixel * generic_state->frame_height_in_pixel * 12;
+ size += I965_CODEDBUFFER_HEADER_SIZE;
+ size += 0x1000;
+ avc_ctx->status_buffer.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Dummy Coded Buffer",
+ size, 64);
+ }
+
+ return VA_STATUS_SUCCESS;
+
+failed_allocation:
+ return VA_STATUS_ERROR_ALLOCATION_FAILED;
+}
+
+
+static VAStatus
+gen9_avc_preenc_gpe_kernel_run(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+ struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+ VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;;
+ VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
+
+ /* FeiPreEncFixme: Optimize the scaling. Keep a cache of already scaled surfaces
+ * to avoid repeated scaling of same surfaces */
+
+ /* down scaling */
+ gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
+ INTEL_ENC_HME_4x, SCALE_CUR_PIC);
+ if (stat_param->num_past_references > 0) {
+ gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
+ INTEL_ENC_HME_4x, SCALE_PAST_REF_PIC);
+ }
+ if (stat_param->num_future_references > 0) {
+ gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
+ INTEL_ENC_HME_4x, SCALE_FUTURE_REF_PIC);
+ }
+
+ /* me kernel */
+ if (generic_state->hme_enabled) {
+ gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
+ }
+
+ /* preproc kernel */
+ if (!stat_param_h264->disable_mv_output || !stat_param_h264->disable_statistics_output) {
+ gen9_avc_preenc_kernel_preproc(ctx, encode_state, encoder_context);
+ }
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen9_avc_preenc_pipeline(VADriverContextP ctx,
+ VAProfile profile,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ VAStatus va_status;
+
+ va_status = gen9_avc_preenc_update_parameters(ctx, profile, encode_state, encoder_context);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
+
+ va_status = gen9_avc_preenc_allocate_internal_resources(ctx, encode_state, encoder_context);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
+
+ va_status = gen9_avc_preenc_gpe_kernel_run(ctx, encode_state, encoder_context);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
+
+ return VA_STATUS_SUCCESS;
+}
+
static void
gen9_avc_vme_context_destroy(void * context)
{
@@ -7757,9 +9009,9 @@ gen8_avc_kernel_init(VADriverContextP ctx,
generic_ctx->get_kernel_header_and_size = fei_enabled ?
intel_avc_fei_get_kernel_header_and_size :
intel_avc_get_kernel_header_and_size ;
- gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
+ gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
- gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
+ gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled);
gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
@@ -7788,18 +9040,19 @@ gen9_avc_kernel_init(VADriverContextP ctx,
struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
int fei_enabled = encoder_context->fei_enabled;
+ int preenc_enabled = encoder_context->preenc_enabled;
- generic_ctx->get_kernel_header_and_size = fei_enabled ?
+ generic_ctx->get_kernel_header_and_size = (fei_enabled || preenc_enabled) ?
intel_avc_fei_get_kernel_header_and_size :
intel_avc_get_kernel_header_and_size ;
- gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
- encoder_context->fei_enabled);
-
- if (!fei_enabled) {
- gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
+ if (!fei_enabled && !preenc_enabled) {
+ /* generic AVC Encoder */
+ gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
- gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
+ gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
+ gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
+ encoder_context->fei_enabled);
gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
@@ -7829,9 +9082,30 @@ gen9_avc_kernel_init(VADriverContextP ctx,
else if (IS_KBL(i965->intel.device_info) ||
IS_GLK(i965->intel.device_info))
generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
- } else {
+
+ } else if (fei_enabled) {
+ /* FEI AVC Encoding */
+ gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
+ encoder_context->fei_enabled);
generic_ctx->pfn_set_curbe_mbenc = gen9_avc_fei_set_curbe_mbenc;
generic_ctx->pfn_send_mbenc_surface = gen9_avc_fei_send_surface_mbenc;
+
+ } else {
+ /* PreEnc for AVC */
+ gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling,
+ encoder_context->preenc_enabled);
+ gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me,
+ encoder_context->preenc_enabled);
+ gen9_avc_kernel_init_preproc(ctx, generic_ctx, &avc_ctx->context_preproc);
+
+ /* preenc 4x scaling uses the gen95 kernel */
+ generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
+ generic_ctx->pfn_set_curbe_me = gen9_avc_preenc_set_curbe_me;
+ generic_ctx->pfn_set_curbe_preproc = gen9_avc_preenc_set_curbe_preproc;
+
+ generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
+ generic_ctx->pfn_send_me_surface = gen9_avc_preenc_send_surface_me;
+ generic_ctx->pfn_send_preproc_surface = gen9_avc_preenc_send_surface_preproc;
}
}
@@ -9700,10 +10974,12 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
if (IS_SKL(i965->intel.device_info) ||
IS_BXT(i965->intel.device_info)) {
- if (!encoder_context->fei_enabled) {
+ if (!encoder_context->fei_enabled && !encoder_context->preenc_enabled) {
generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
} else {
+ /* FEI and PreEnc operation kernels are included in
+ * the monolithic kernel binary */
generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
}
@@ -9747,19 +11023,25 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
generic_state->downscaled_width_32x_in_mb = 0;
generic_state->downscaled_height_32x_in_mb = 0;
- if (!encoder_context->fei_enabled) {
- generic_state->hme_supported = 1;
- generic_state->b16xme_supported = 1;
- }
+ generic_state->hme_supported = 1;
generic_state->b16xme_supported = 1;
generic_state->b32xme_supported = 0;
generic_state->hme_enabled = 0;
generic_state->b16xme_enabled = 0;
generic_state->b32xme_enabled = 0;
+
+ if (encoder_context->fei_enabled) {
+ /* Disabling HME in FEI encode */
+ generic_state->hme_supported = 0;
+ generic_state->b16xme_supported = 0;
+ } else if (encoder_context->preenc_enabled) {
+ /* Disabling 16x16ME in PreEnc */
+ generic_state->b16xme_supported = 0;
+ }
+
generic_state->brc_distortion_buffer_supported = 1;
generic_state->brc_constant_buffer_supported = 0;
-
generic_state->frame_rate = 30;
generic_state->brc_allocated = 0;
generic_state->brc_inited = 0;
@@ -9961,7 +11243,10 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
gen9_avc_kernel_init(ctx, encoder_context);
}
encoder_context->vme_context = vme_context;
- encoder_context->vme_pipeline = gen9_avc_vme_pipeline;
+ /* Handling PreEnc operations separately since it gives better
+ * code readability, avoid possible vme operations mess-up */
+ encoder_context->vme_pipeline =
+ !encoder_context->preenc_enabled ? gen9_avc_vme_pipeline : gen9_avc_preenc_pipeline;
encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
return true;
diff --git a/src/i965_avc_encoder.h b/src/i965_avc_encoder.h
index d08db913..f30aee38 100644
--- a/src/i965_avc_encoder.h
+++ b/src/i965_avc_encoder.h
@@ -446,10 +446,20 @@ typedef struct _gen9_avc_fei_encoder_kernel_header {
kernel_header me_p;
kernel_header me_b;
- /* 2x DownScaling */
+ /* DownScaling */
+ kernel_header ply_dscale_ply;
+ kernel_header ply_dscale_2f_ply_2f;
+
+ /* BRC_I Frame Distortion */
+ kernel_header frame_brc_i_dist;
+
+ // 2x DownScaling
kernel_header ply_2xdscale_ply;
kernel_header ply_2xdscale_2f_ply_2f;
+ //Weighted Prediction Kernel
+ kernel_header wp;
+
} gen9_avc_fei_encoder_kernel_header;
/*
@@ -852,6 +862,281 @@ typedef struct _gen9_avc_me_curbe_data {
} dw38;
} gen9_avc_me_curbe_data;
+/* FeiPreEncFixme: name change fei to preenc */
+typedef struct _gen9_avc_fei_me_curbe_data {
+ struct {
+ uint32_t skip_mode_enable: 1;
+ uint32_t adaptive_enable: 1;
+ uint32_t bi_mix_dis: 1;
+ uint32_t reserved0: 2;
+ uint32_t early_ime_success_enable: 1;
+ uint32_t reserved1: 1;
+ uint32_t t8x8_flag_for_inter_enable: 1;
+ uint32_t reserved2: 16;
+ uint32_t early_ime_stop: 8;
+ } dw0;
+
+ struct {
+ uint32_t max_num_mvs: 6;
+ uint32_t reserved0: 10;
+ uint32_t bi_weight: 6;
+ uint32_t reserved1: 6;
+ uint32_t uni_mix_disable: 1;
+ uint32_t reserved2: 3;
+ } dw1;
+
+ struct {
+ uint32_t max_len_sp: 8;
+ uint32_t max_num_su: 8;
+ uint32_t reserved0: 16;
+ } dw2;
+
+ struct {
+ uint32_t src_size: 2;
+ uint32_t reserved0: 2;
+ uint32_t mb_type_remap: 2;
+ uint32_t src_access: 1;
+ uint32_t ref_access: 1;
+ uint32_t search_ctrl: 3;
+ uint32_t dual_search_path_option: 1;
+ uint32_t sub_pel_mode: 2;
+ uint32_t skip_type: 1;
+ uint32_t disable_field_cache_allocation: 1;
+ uint32_t inter_chroma_mode: 1;
+ uint32_t ft_enable: 1;
+ uint32_t bme_disable_fbr: 1;
+ uint32_t block_based_skip_enable: 1;
+ uint32_t inter_sad: 2;
+ uint32_t intra_sad: 2;
+ uint32_t sub_mb_part_mask: 7;
+ uint32_t reserved1: 1;
+ } dw3;
+
+ struct {
+ uint32_t reserved0: 8;
+ uint32_t picture_height_minus1: 8;
+ uint32_t picture_width: 8;
+ uint32_t reserved1: 8;
+ } dw4;
+
+ struct {
+ uint32_t reserved0: 8;
+ uint32_t qp_prime_y: 8;
+ uint32_t ref_width: 8;
+ uint32_t ref_height: 8;
+ } dw5;
+
+ struct {
+ uint32_t reserved0: 3;
+ uint32_t write_distortions: 1;
+ uint32_t use_mv_from_prev_step: 1;
+ uint32_t reserved1: 3;
+ uint32_t super_combine_dist: 8;
+ uint32_t max_vmvr: 16;
+ } dw6;
+
+ struct {
+ uint32_t reserved0: 16;
+ uint32_t mv_cost_scale_factor: 2;
+ uint32_t bilinear_enable: 1;
+ uint32_t src_field_polarity: 1;
+ uint32_t weightedsad_harr: 1;
+ uint32_t ac_only_haar: 1;
+ uint32_t ref_id_cost_mode: 1;
+ uint32_t reserved1: 1;
+ uint32_t skip_center_mask: 8;
+ } dw7;
+
+ struct {
+ uint32_t mode_0_cost: 8;
+ uint32_t mode_1_cost: 8;
+ uint32_t mode_2_cost: 8;
+ uint32_t mode_3_cost: 8;
+ } dw8;
+
+ struct {
+ uint32_t mode_4_cost: 8;
+ uint32_t mode_5_cost: 8;
+ uint32_t mode_6_cost: 8;
+ uint32_t mode_7_cost: 8;
+ } dw9;
+
+ struct {
+ uint32_t mode_8_cost: 8;
+ uint32_t mode_9_cost: 8;
+ uint32_t ref_id_cost: 8;
+ uint32_t chroma_intra_mode_cost: 8;
+ } dw10;
+
+ struct {
+ uint32_t mv_0_cost: 8;
+ uint32_t mv_1_cost: 8;
+ uint32_t mv_2_cost: 8;
+ uint32_t mv_3_cost: 8;
+ } dw11;
+
+ struct {
+ uint32_t mv_4_cost: 8;
+ uint32_t mv_5_cost: 8;
+ uint32_t mv_6_cost: 8;
+ uint32_t mv_7_cost: 8;
+ } dw12;
+
+ struct {
+ uint32_t num_ref_idx_l0_minus1: 8;
+ uint32_t num_ref_idx_l1_minus1: 8;
+ uint32_t actual_mb_width: 8;
+ uint32_t actual_mb_height: 8;
+ } dw13;
+
+ struct {
+ uint32_t l0_ref_pic_polarity_bits: 8;
+ uint32_t l1_ref_pic_polarity_bits: 2;
+ uint32_t reserved: 22;
+ } dw14;
+
+ struct {
+ uint32_t prev_mv_read_pos_factor : 8;
+ uint32_t mv_shift_factor : 8;
+ uint32_t reserved: 16;
+ } dw15;
+
+ struct {
+ struct generic_search_path_delta sp_delta_0;
+ struct generic_search_path_delta sp_delta_1;
+ struct generic_search_path_delta sp_delta_2;
+ struct generic_search_path_delta sp_delta_3;
+ } dw16;
+
+ struct {
+ struct generic_search_path_delta sp_delta_4;
+ struct generic_search_path_delta sp_delta_5;
+ struct generic_search_path_delta sp_delta_6;
+ struct generic_search_path_delta sp_delta_7;
+ } dw17;
+
+ struct {
+ struct generic_search_path_delta sp_delta_8;
+ struct generic_search_path_delta sp_delta_9;
+ struct generic_search_path_delta sp_delta_10;
+ struct generic_search_path_delta sp_delta_11;
+ } dw18;
+
+ struct {
+ struct generic_search_path_delta sp_delta_12;
+ struct generic_search_path_delta sp_delta_13;
+ struct generic_search_path_delta sp_delta_14;
+ struct generic_search_path_delta sp_delta_15;
+ } dw19;
+
+ struct {
+ struct generic_search_path_delta sp_delta_16;
+ struct generic_search_path_delta sp_delta_17;
+ struct generic_search_path_delta sp_delta_18;
+ struct generic_search_path_delta sp_delta_19;
+ } dw20;
+
+ struct {
+ struct generic_search_path_delta sp_delta_20;
+ struct generic_search_path_delta sp_delta_21;
+ struct generic_search_path_delta sp_delta_22;
+ struct generic_search_path_delta sp_delta_23;
+ } dw21;
+
+ struct {
+ struct generic_search_path_delta sp_delta_24;
+ struct generic_search_path_delta sp_delta_25;
+ struct generic_search_path_delta sp_delta_26;
+ struct generic_search_path_delta sp_delta_27;
+ } dw22;
+
+ struct {
+ struct generic_search_path_delta sp_delta_28;
+ struct generic_search_path_delta sp_delta_29;
+ struct generic_search_path_delta sp_delta_30;
+ struct generic_search_path_delta sp_delta_31;
+ } dw23;
+
+ struct {
+ struct generic_search_path_delta sp_delta_32;
+ struct generic_search_path_delta sp_delta_33;
+ struct generic_search_path_delta sp_delta_34;
+ struct generic_search_path_delta sp_delta_35;
+ } dw24;
+
+ struct {
+ struct generic_search_path_delta sp_delta_36;
+ struct generic_search_path_delta sp_delta_37;
+ struct generic_search_path_delta sp_delta_38;
+ struct generic_search_path_delta sp_delta_39;
+ } dw25;
+
+ struct {
+ struct generic_search_path_delta sp_delta_40;
+ struct generic_search_path_delta sp_delta_41;
+ struct generic_search_path_delta sp_delta_42;
+ struct generic_search_path_delta sp_delta_43;
+ } dw26;
+
+ struct {
+ struct generic_search_path_delta sp_delta_44;
+ struct generic_search_path_delta sp_delta_45;
+ struct generic_search_path_delta sp_delta_46;
+ struct generic_search_path_delta sp_delta_47;
+ } dw27;
+
+ struct {
+ struct generic_search_path_delta sp_delta_48;
+ struct generic_search_path_delta sp_delta_49;
+ struct generic_search_path_delta sp_delta_50;
+ struct generic_search_path_delta sp_delta_51;
+ } dw28;
+
+ struct {
+ struct generic_search_path_delta sp_delta_52;
+ struct generic_search_path_delta sp_delta_53;
+ struct generic_search_path_delta sp_delta_54;
+ struct generic_search_path_delta sp_delta_55;
+ } dw29;
+
+ struct {
+ uint32_t reserved;
+ } dw30;
+
+ struct {
+ uint32_t reserved;
+ } dw31;
+
+ struct {
+ uint32_t _4x_memv_output_data_surf_index;
+ } dw32;
+
+ struct {
+ uint32_t _16x_32x_memv_input_data_surf_index;
+ } dw33;
+
+ struct {
+ uint32_t _4x_me_output_dist_surf_index;
+ } dw34;
+
+ struct {
+ uint32_t _4x_me_output_brc_dist_surf_index;
+ } dw35;
+
+ struct {
+ uint32_t vme_fwd_inter_pred_surf_index;
+ } dw36;
+
+ struct {
+ uint32_t vme_bdw_inter_pred_surf_index;
+ } dw37;
+
+ /* reserved */
+ struct {
+ uint32_t reserved;
+ } dw38;
+} gen9_avc_fei_me_curbe_data;
+
#define GEN9_AVC_KERNEL_ME_P_IDX 0
#define GEN9_AVC_KERNEL_ME_B_IDX 1
#define NUM_GEN9_AVC_KERNEL_ME 2
@@ -2923,6 +3208,379 @@ struct gen_avc_sfd_context {
struct i965_gpe_context gpe_contexts;
};
+struct gen_avc_preproc_context {
+ struct i965_gpe_context gpe_contexts;
+};
+
+/* preproc binding table */
+typedef enum _gen9_avc_binding_table_offset_preproc {
+ GEN9_AVC_PREPROC_CURR_Y_INDEX = 0,
+ GEN9_AVC_PREPROC_CURR_UV_INDEX = 1,
+ GEN9_AVC_PREPROC_HME_MV_DATA_INDEX = 2,
+ GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX = 3,
+ GEN9_AVC_PREPROC_MBQP_INDEX = 4,
+ GEN9_AVC_PREPROC_MV_DATA_INDEX = 5,
+ GEN9_AVC_PREPROC_MB_STATS_INDEX = 6,
+ GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX = 7,
+ GEN9_AVC_PREPROC_VME_FWD_PIC_IDX0_INDEX = 8,
+ GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_0_INDEX = 9,
+ GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX = 10,
+ GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_1_INDEX = 11,
+ GEN9_AVC_PREPROC_RESERVED1_INDEX = 12,
+ GEN9_AVC_PREPROC_FTQ_LUT_INDEX = 13,
+ GEN9_AVC_PREPROC_NUM_SURFACES_INDEX = 14
+} gen9_avc_binding_table_offset_preproc;
+
+/* preenc preproc curbe data */
+typedef struct _gen9_avc_preproc_curbe_data {
+ struct {
+ uint32_t skip_mode_enable: 1;
+ uint32_t adaptive_enable: 1;
+ uint32_t bi_mix_dis: 1;
+ uint32_t reserved0: 2;
+ uint32_t early_ime_success_enable: 1;
+ uint32_t reserved1: 1;
+ uint32_t t8x8_flag_for_inter_enable: 1;
+ uint32_t reserved2: 16;
+ uint32_t early_ime_stop: 8;
+ } dw0;
+
+ struct {
+ uint32_t max_num_mvs: 6;
+ uint32_t reserved0: 10;
+ uint32_t bi_weight: 6;
+ uint32_t reserved1: 6;
+ uint32_t uni_mix_disable: 1;
+ uint32_t reserved2: 3;
+ } dw1;
+
+ struct {
+ uint32_t max_len_sp: 8;
+ uint32_t max_num_su: 8;
+ uint32_t pic_width: 16;
+ } dw2;
+
+ struct {
+ uint32_t src_size: 2;
+ uint32_t reserved0: 2;
+ uint32_t mb_type_remap: 2;
+ uint32_t src_access: 1;
+ uint32_t ref_access: 1;
+ uint32_t search_ctrl: 3;
+ uint32_t dual_search_path_option: 1;
+ uint32_t sub_pel_mode: 2;
+ uint32_t skip_type: 1;
+ uint32_t disable_field_cache_allocation: 1;
+ uint32_t inter_chroma_mode: 1;
+ uint32_t ft_enable: 1;
+ uint32_t bme_disable_fbr: 1;
+ uint32_t block_based_skip_enable: 1;
+ uint32_t inter_sad: 2;
+ uint32_t intra_sad: 2;
+ uint32_t sub_mb_part_mask: 7;
+ uint32_t reserved1: 1;
+ } dw3;
+
+ struct {
+ uint32_t frame_qp: 8;
+ uint32_t per_mb_qp_enable: 1;
+ uint32_t field_parity_flag: 1;
+ uint32_t hme_enable : 1;
+ uint32_t multiple_mv_predictor_per_mb_enable: 2;
+ uint32_t disable_mv_output: 1;
+ uint32_t disable_mb_stats: 1;
+ uint32_t bwd_ref_pic_frame_field_flag: 1;
+ uint32_t fwd_ref_pic_frame_field_flag: 1;
+ uint32_t bwd_ref_pic_field_parity_flag: 1;
+ uint32_t fwd_ref_pic_field_parity_flag: 1;
+ uint32_t curr_pic_field_parity_flag: 1;
+ uint32_t bwd_ref_pic_enable: 1;
+ uint32_t fwd_ref_pic_enable: 1;
+ uint32_t reserved: 10;
+ } dw4;
+
+ struct {
+ uint32_t slice_mb_height: 16;
+ uint32_t ref_width: 8;
+ uint32_t ref_height: 8;
+ } dw5;
+
+ struct {
+ uint32_t pic_height: 16;
+ uint32_t reserved: 16;
+ } dw6;
+
+ struct {
+ uint32_t intra_part_mask: 5;
+ uint32_t non_skip_zmv_added: 1;
+ uint32_t non_skip_mode_added: 1;
+ uint32_t luma_intra_src_corner_swap: 1;
+ uint32_t reserved0: 8;
+ uint32_t mv_cost_scale_factor: 2;
+ uint32_t bilinear_enable: 1;
+ uint32_t src_field_polarity: 1;
+ uint32_t weightedsad_harr: 1;
+ uint32_t ac_only_haar: 1;
+ uint32_t ref_id_cost_mode: 1;
+ uint32_t reserved1: 1;
+ uint32_t skip_center_mask: 8;
+ } dw7;
+
+ struct {
+ uint32_t mode_0_cost: 8;
+ uint32_t mode_1_cost: 8;
+ uint32_t mode_2_cost: 8;
+ uint32_t mode_3_cost: 8;
+ } dw8;
+
+ struct {
+ uint32_t mode_4_cost: 8;
+ uint32_t mode_5_cost: 8;
+ uint32_t mode_6_cost: 8;
+ uint32_t mode_7_cost: 8;
+ } dw9;
+ struct {
+ uint32_t mode_8_cost: 8;
+ uint32_t mode_9_cost: 8;
+ uint32_t ref_id_cost: 8;
+ uint32_t chroma_intra_mode_cost: 8;
+ } dw10;
+
+ struct {
+ uint32_t mv_0_cost: 8;
+ uint32_t mv_1_cost: 8;
+ uint32_t mv_2_cost: 8;
+ uint32_t mv_3_cost: 8;
+ } dw11;
+ struct {
+ uint32_t mv_4_cost: 8;
+ uint32_t mv_5_cost: 8;
+ uint32_t mv_6_cost: 8;
+ uint32_t mv_7_cost: 8;
+ } dw12;
+
+ struct {
+ uint32_t reserved;
+ } dw13;
+
+ struct {
+ uint32_t sic_fwd_trans_coeff_threshold_0: 16;
+ uint32_t sic_fwd_trans_coeff_threshold_1: 8;
+ uint32_t sic_fwd_trans_coeff_threshold_2: 8;
+ } dw14;
+
+ struct {
+ uint32_t sic_fwd_trans_coeff_threshold_3: 8;
+ uint32_t sic_fwd_trans_coeff_threshold_4: 8;
+ uint32_t sic_fwd_trans_coeff_threshold_5: 8;
+ uint32_t sic_fwd_trans_coeff_threshold_6: 8;
+ } dw15;
+
+ struct {
+ struct generic_search_path_delta sp_delta_0;
+ struct generic_search_path_delta sp_delta_1;
+ struct generic_search_path_delta sp_delta_2;
+ struct generic_search_path_delta sp_delta_3;
+ } dw16;
+
+ struct {
+ struct generic_search_path_delta sp_delta_4;
+ struct generic_search_path_delta sp_delta_5;
+ struct generic_search_path_delta sp_delta_6;
+ struct generic_search_path_delta sp_delta_7;
+ } dw17;
+
+ struct {
+ struct generic_search_path_delta sp_delta_8;
+ struct generic_search_path_delta sp_delta_9;
+ struct generic_search_path_delta sp_delta_10;
+ struct generic_search_path_delta sp_delta_11;
+ } dw18;
+ struct {
+ struct generic_search_path_delta sp_delta_12;
+ struct generic_search_path_delta sp_delta_13;
+ struct generic_search_path_delta sp_delta_14;
+ struct generic_search_path_delta sp_delta_15;
+ } dw19;
+
+ struct {
+ struct generic_search_path_delta sp_delta_16;
+ struct generic_search_path_delta sp_delta_17;
+ struct generic_search_path_delta sp_delta_18;
+ struct generic_search_path_delta sp_delta_19;
+ } dw20;
+
+ struct {
+ struct generic_search_path_delta sp_delta_20;
+ struct generic_search_path_delta sp_delta_21;
+ struct generic_search_path_delta sp_delta_22;
+ struct generic_search_path_delta sp_delta_23;
+ } dw21;
+
+
+ struct {
+ struct generic_search_path_delta sp_delta_24;
+ struct generic_search_path_delta sp_delta_25;
+ struct generic_search_path_delta sp_delta_26;
+ struct generic_search_path_delta sp_delta_27;
+ } dw22;
+
+ struct {
+ struct generic_search_path_delta sp_delta_28;
+ struct generic_search_path_delta sp_delta_29;
+ struct generic_search_path_delta sp_delta_30;
+ struct generic_search_path_delta sp_delta_31;
+ } dw23;
+
+ struct {
+ struct generic_search_path_delta sp_delta_32;
+ struct generic_search_path_delta sp_delta_33;
+ struct generic_search_path_delta sp_delta_34;
+ struct generic_search_path_delta sp_delta_35;
+ } dw24;
+
+ struct {
+ struct generic_search_path_delta sp_delta_36;
+ struct generic_search_path_delta sp_delta_37;
+ struct generic_search_path_delta sp_delta_38;
+ struct generic_search_path_delta sp_delta_39;
+ } dw25;
+
+
+ struct {
+ struct generic_search_path_delta sp_delta_40;
+ struct generic_search_path_delta sp_delta_41;
+ struct generic_search_path_delta sp_delta_42;
+ struct generic_search_path_delta sp_delta_43;
+ } dw26;
+
+ struct {
+ struct generic_search_path_delta sp_delta_44;
+ struct generic_search_path_delta sp_delta_45;
+ struct generic_search_path_delta sp_delta_46;
+ struct generic_search_path_delta sp_delta_47;
+ } dw27;
+
+
+ struct {
+ struct generic_search_path_delta sp_delta_48;
+ struct generic_search_path_delta sp_delta_49;
+ struct generic_search_path_delta sp_delta_50;
+ struct generic_search_path_delta sp_delta_51;
+ } dw28;
+
+ struct {
+ struct generic_search_path_delta sp_delta_52;
+ struct generic_search_path_delta sp_delta_53;
+ struct generic_search_path_delta sp_delta_54;
+ struct generic_search_path_delta sp_delta_55;
+ } dw29;
+
+ struct {
+ uint32_t intra_4x4_mode_mask: 8;
+ uint32_t reserved1: 8;
+ uint32_t intra_8x8_mode_mask: 8;
+ uint32_t reserved2: 8;
+ } dw30;
+
+ struct {
+ uint32_t intra_16x16_mode_mask: 4;
+ uint32_t intra_chroma_mode_mask: 4;
+ uint32_t intra_compute_type: 2;
+ uint32_t reserved: 22;
+ } dw31;
+
+ struct {
+ uint32_t max_vmv_r: 16;
+ uint32_t reserved: 16;
+ } dw32;
+
+ struct {
+ uint32_t intra_16x16_non_dc_predPenalty: 8;
+ uint32_t intra_8x8_non_dc_pred_penalty: 8;
+ uint32_t intra_4x4_non_dc_pred_penalty: 8;
+ uint32_t reserved : 8;
+ } dw33;
+
+ struct {
+ uint32_t reserved;
+ } dw34;
+
+ struct {
+ uint32_t reserved;
+ } dw35;
+
+ struct {
+ uint32_t reserved1: 8;
+ uint32_t hme_combined_extra_sus: 8;
+ uint32_t reserved2: 14;
+ uint32_t hme_combine_overlap: 2;
+ } dw36;
+
+ struct {
+ uint32_t skip_mode_enable: 1;
+ uint32_t adaptive_enable: 1;
+ uint32_t bi_mix_disable: 1;
+ uint32_t reserved1: 2;
+ uint32_t early_ime_success_enable: 1;
+ uint32_t reserved2: 1;
+ uint32_t t8x8_flag_for_inter_enable: 1;
+ uint32_t reserved3: 16;
+ uint32_t early_ime_stop: 8;
+ } dw37;
+
+ struct {
+ uint32_t max_len_sp: 8;
+ uint32_t max_num_su: 8;
+ uint32_t ref_threshold: 16;
+ } dw38;
+
+ struct {
+ uint32_t reserved: 8;
+ uint32_t hme_ref_windows_comb_threshold: 8;
+ uint32_t ref_width: 8;
+ uint32_t ref_height: 8;
+ } dw39;
+
+
+ struct {
+ uint32_t curr_pic_surf_index;
+ } dw40;
+
+ struct {
+ uint32_t hme_mv_dat_surf_index;
+ } dw41;
+
+ struct {
+ uint32_t mv_predictor_surf_index;
+ } dw42;
+
+ struct {
+ uint32_t mb_qp_surf_index;
+ } dw43;
+
+ struct {
+ uint32_t mv_data_out_surf_index;
+ } dw44;
+
+ struct {
+ uint32_t mb_stats_out_surf_index;
+ } dw45;
+
+ struct {
+ uint32_t vme_inter_prediction_surf_index;
+ } dw46;
+
+ struct {
+ uint32_t vme_Inter_prediction_mr_surf_index;
+ } dw47;
+
+ struct {
+ uint32_t ftq_lut_surf_index;
+ } dw48;
+
+} gen9_avc_preproc_curbe_data;
+
/* Gen95 */
typedef struct _gen95_avc_scaling4x_curbe_data {
diff --git a/src/i965_avc_encoder_common.h b/src/i965_avc_encoder_common.h
index d69686fd..e16cbed8 100644
--- a/src/i965_avc_encoder_common.h
+++ b/src/i965_avc_encoder_common.h
@@ -179,6 +179,22 @@ struct i965_avc_encoder_context {
//ref list
struct i965_gpe_resource list_reference_res[MAX_MFC_AVC_REFERENCE_SURFACES];
+ //preenc downscale surfae
+ VASurfaceID preenc_scaled_4x_surface_id;
+ struct object_surface *preenc_scaled_4x_surface_obj;
+ VASurfaceID preenc_past_ref_scaled_4x_surface_id;
+ struct object_surface *preenc_past_ref_scaled_4x_surface_obj;
+ VASurfaceID preenc_future_ref_scaled_4x_surface_id;
+ struct object_surface *preenc_future_ref_scaled_4x_surface_obj;
+ struct i965_gpe_resource preenc_past_ref_stat_data_out_buffer;
+ struct i965_gpe_resource preenc_future_ref_stat_data_out_buffer;
+
+ // preproc resources
+ struct i965_gpe_resource preproc_mv_predictor_buffer;
+ struct i965_gpe_resource preproc_mb_qp_buffer;
+ struct i965_gpe_resource preproc_mv_data_out_buffer;
+ struct i965_gpe_resource preproc_stat_data_out_buffer;
+
// kernel context
struct gen_avc_scaling_context context_scaling;
struct gen_avc_me_context context_me;
@@ -186,6 +202,7 @@ struct i965_avc_encoder_context {
struct gen_avc_mbenc_context context_mbenc;
struct gen_avc_wp_context context_wp;
struct gen_avc_sfd_context context_sfd;
+ struct gen_avc_preproc_context context_preproc;
struct encoder_status_buffer_internal status_buffer;
@@ -199,6 +216,7 @@ struct avc_enc_state {
VAEncSliceParameterBufferH264 *slice_param[MAX_AVC_SLICE_NUM];
VAEncMacroblockParameterBufferH264 *mb_param;
VAEncMiscParameterFEIFrameControlH264 *fei_framectl_param;
+ VAStatsStatisticsParameterH264 *stat_param;
uint32_t mad_enable: 1;
//mb skip
uint32_t mb_disable_skip_map_enable: 1;