diff options
Diffstat (limited to 'src/gen75_mfc.c')
-rw-r--r-- | src/gen75_mfc.c | 861 |
1 files changed, 428 insertions, 433 deletions
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index cfc3c229..a6a3c1d3 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -43,33 +43,31 @@ #include "gen6_vme.h" #include "intel_media.h" -#define MFC_SOFTWARE_HASWELL 1 +#define AVC_INTRA_RDO_OFFSET 4 +#define AVC_INTER_RDO_OFFSET 10 +#define AVC_INTER_MSG_OFFSET 8 +#define AVC_INTER_MV_OFFSET 48 +#define AVC_RDO_MASK 0xFFFF + +#define MFC_SOFTWARE_HASWELL 0 + +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) #define B0_STEP_REV 2 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV) -static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = { -#include "shaders/utils/mfc_batchbuffer_avc_intra.g7b" -}; - -static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = { -#include "shaders/utils/mfc_batchbuffer_avc_inter.g7b" +static const uint32_t gen75_mfc_batchbuffer_avc[][4] = { +#include "shaders/utils/mfc_batchbuffer_hsw.g75b" }; static struct i965_kernel gen75_mfc_kernels[] = { { "MFC AVC INTRA BATCHBUFFER ", MFC_BATCHBUFFER_AVC_INTRA, - gen75_mfc_batchbuffer_avc_intra, - sizeof(gen75_mfc_batchbuffer_avc_intra), - NULL - }, - - { - "MFC AVC INTER BATCHBUFFER ", - MFC_BATCHBUFFER_AVC_INTER, - gen75_mfc_batchbuffer_avc_inter, - sizeof(gen75_mfc_batchbuffer_avc_inter), + gen75_mfc_batchbuffer_avc, + sizeof(gen75_mfc_batchbuffer_avc), NULL }, }; @@ -86,8 +84,8 @@ static struct i965_kernel gen75_mfc_kernels[] = { static void gen75_mfc_pipe_mode_select(VADriverContextP ctx, - int standard_select, - struct intel_encoder_context *encoder_context) + int standard_select, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -152,7 +150,7 @@ gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *enco static void gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -161,11 +159,11 @@ gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 26); OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2)); - /* the DW1-3 is for the MFX indirect bistream offset */ + /* the DW1-3 is for the MFX indirect bistream offset */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW4-5 is the MFX upper bound */ + /* the DW4-5 is the MFX upper bound */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -176,14 +174,14 @@ gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ OUT_BCS_BATCH(batch, 0); - /* the DW11-15 is for MFX IT-COFF. Not used on encoder */ + /* the DW11-15 is for MFX IT-COFF. Not used on encoder */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */ + /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -247,7 +245,7 @@ gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_con static void gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -261,7 +259,7 @@ gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); /*DW1. MB setting of frame */ OUT_BCS_BATCH(batch, - ((width_in_mbs * height_in_mbs) & 0xFFFF)); + ((width_in_mbs * height_in_mbs - 1) & 0xFFFF)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); @@ -291,22 +289,22 @@ gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, (1 << 2) | /* Frame MB only flag */ (0 << 1) | /* MBAFF mode is in active */ (0 << 0)); /* Field picture flag */ - /* DW5 Trellis quantization */ + /* DW5 Trellis quantization */ OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */ OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */ (0xBB8 << 16) | /* InterMbMaxSz */ (0xEE8) ); /* IntraMbMaxSz */ OUT_BCS_BATCH(batch, 0); /* Reserved */ - /* DW8. QP delta */ + /* DW8. QP delta */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ - /* DW10. Bit setting for MB */ + /* DW10. Bit setting for MB */ OUT_BCS_BATCH(batch, 0x8C000000); OUT_BCS_BATCH(batch, 0x00010000); - /* DW12. */ + /* DW12. */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0x02010100); - /* DW14. For short format */ + /* DW14. For short format */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -315,10 +313,10 @@ gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, static void gen75_mfc_qm_state(VADriverContextP ctx, - int qm_type, - unsigned int *qm, - int qm_length, - struct intel_encoder_context *encoder_context) + int qm_type, + unsigned int *qm, + int qm_length, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; unsigned int qm_buffer[16]; @@ -352,10 +350,10 @@ gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encod static void gen75_mfc_fqm_state(VADriverContextP ctx, - int fqm_type, - unsigned int *fqm, - int fqm_length, - struct intel_encoder_context *encoder_context) + int fqm_type, + unsigned int *fqm, + int fqm_length, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; unsigned int fqm_buffer[32]; @@ -393,9 +391,9 @@ gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *enco static void gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context, - unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw, - int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag, - struct intel_batchbuffer *batch) + unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw, + int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag, + struct intel_batchbuffer *batch) { if (batch == NULL) batch = encoder_context->base.batch; @@ -418,16 +416,33 @@ gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context * static void gen75_mfc_init(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; dri_bo *bo; int i; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + int width_in_mbs = 0; + int height_in_mbs = 0; + int slice_batchbuffer_size; + + if (encoder_context->codec == CODEC_H264 || + encoder_context->codec == CODEC_H264_MVC) { + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + width_in_mbs = pSequenceParameter->picture_width_in_mbs; + height_in_mbs = pSequenceParameter->picture_height_in_mbs; + } else { + VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + + assert(encoder_context->codec == CODEC_MPEG2); + + width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16; + height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16; + } + + slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 + + (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext; /*Encode common setup for MFC*/ dri_bo_unreference(mfc_context->post_deblocking_output.bo); @@ -495,7 +510,8 @@ static void gen75_mfc_init(VADriverContextP ctx, if (mfc_context->aux_batchbuffer) intel_batchbuffer_free(mfc_context->aux_batchbuffer); - mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0); + mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, + slice_batchbuffer_size); mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer; dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); mfc_context->aux_batchbuffer_surface.pitch = 16; @@ -507,7 +523,7 @@ static void gen75_mfc_init(VADriverContextP ctx, static void gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -525,9 +541,9 @@ gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx, else OUT_BCS_BATCH(batch, 0); /* pre output addr */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - /* the DW4-6 is for the post_deblocking */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* the DW4-6 is for the post_deblocking */ if (mfc_context->post_deblocking_output.bo) OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo, @@ -535,37 +551,37 @@ gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx, 0); /* post output addr */ else OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW7-9 is for the uncompressed_picture */ + /* the DW7-9 is for the uncompressed_picture */ OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* uncompressed data */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW10-12 is for the mb status */ + /* the DW10-12 is for the mb status */ OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* StreamOut data*/ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW13-15 is for the intra_row_store_scratch */ + /* the DW13-15 is for the intra_row_store_scratch */ OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW16-18 is for the deblocking filter */ + /* the DW16-18 is for the deblocking filter */ OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* the DW 19-50 is for Reference pictures*/ for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) { @@ -578,25 +594,25 @@ gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx, } OUT_BCS_BATCH(batch, 0); } - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* The DW 52-54 is for the MB status buffer */ + /* The DW 52-54 is for the MB status buffer */ OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* Macroblock status buffer*/ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW 55-57 is the ILDB buffer */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + /* the DW 55-57 is the ILDB buffer */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW 58-60 is the second ILDB buffer */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + /* the DW 58-60 is the second ILDB buffer */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); ADVANCE_BCS_BATCH(batch); } @@ -657,14 +673,14 @@ gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* Macroblock status buffer*/ - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); ADVANCE_BCS_BATCH(batch); } static void gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -688,15 +704,15 @@ gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); } } - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW34-36 is the MV for the current reference */ - OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); + /* the DW34-36 is the MV for the current reference */ + OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* POL list */ for(i = 0; i < 32; i++) { @@ -749,7 +765,7 @@ gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_contex static void gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -763,12 +779,12 @@ gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */ + /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW7-9 is for Bitplane Read Buffer Base Address */ + /* the DW7-9 is for Bitplane Read Buffer Base Address */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -802,8 +818,8 @@ gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_con static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -821,8 +837,8 @@ static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, static VAStatus gen75_mfc_run(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; @@ -834,9 +850,9 @@ static VAStatus gen75_mfc_run(VADriverContextP ctx, static VAStatus gen75_mfc_stop(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int *encoded_bits_size) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int *encoded_bits_size) { VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; @@ -853,13 +869,13 @@ gen75_mfc_stop(VADriverContextP ctx, static void gen75_mfc_avc_slice_state(VADriverContextP ctx, - VAEncPictureParameterBufferH264 *pic_param, - VAEncSliceParameterBufferH264 *slice_param, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int rate_control_enable, - int qp, - struct intel_batchbuffer *batch) + VAEncPictureParameterBufferH264 *pic_param, + VAEncSliceParameterBufferH264 *slice_param, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int rate_control_enable, + int qp, + struct intel_batchbuffer *batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; @@ -875,19 +891,32 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx, int maxQpN, maxQpP; unsigned char correct[6], grow, shrink; int i; - int bslice = 0; int weighted_pred_idc = 0; unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom; unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom; + int num_ref_l0 = 0, num_ref_l1 = 0; if (batch == NULL) batch = encoder_context->base.batch; - if (slice_type == SLICE_TYPE_P) { + if (slice_type == SLICE_TYPE_I) { + luma_log2_weight_denom = 0; + chroma_log2_weight_denom = 0; + } else if (slice_type == SLICE_TYPE_P) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; } else if (slice_type == SLICE_TYPE_B) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc; - bslice = 1; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) { + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1; + } if (weighted_pred_idc == 2) { /* 8.4.3 - Derivation process for prediction weights (8-279) */ @@ -912,14 +941,11 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) ); OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/ - if (slice_type == SLICE_TYPE_I) { - OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/ - } else { - OUT_BCS_BATCH(batch, - (1 << 16) | (bslice << 24) | /*1 reference frame*/ - (chroma_log2_weight_denom << 8) | - (luma_log2_weight_denom << 0)); - } + OUT_BCS_BATCH(batch, + (num_ref_l0 << 16) | + (num_ref_l1 << 24) | + (chroma_log2_weight_denom << 8) | + (luma_log2_weight_denom << 0)); OUT_BCS_BATCH(batch, (weighted_pred_idc << 30) | @@ -969,14 +995,14 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx, } -#ifdef MFC_SOFTWARE_HASWELL +#if MFC_SOFTWARE_HASWELL static int gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, - int qp,unsigned int *msg, - struct intel_encoder_context *encoder_context, - unsigned char target_mb_size, unsigned char max_mb_size, - struct intel_batchbuffer *batch) + int qp,unsigned int *msg, + struct intel_encoder_context *encoder_context, + unsigned char target_mb_size, unsigned char max_mb_size, + struct intel_batchbuffer *batch) { int len_in_dwords = 12; unsigned int intra_msg; @@ -1023,13 +1049,14 @@ gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, static int gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, - unsigned int *msg, unsigned int offset, - struct intel_encoder_context *encoder_context, - unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, - struct intel_batchbuffer *batch) + unsigned int *msg, unsigned int offset, + struct intel_encoder_context *encoder_context, + unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, + struct intel_batchbuffer *batch) { + struct gen6_vme_context *vme_context = encoder_context->vme_context; int len_in_dwords = 12; - unsigned int inter_msg = 0; + unsigned int inter_msg = 0; if (batch == NULL) batch = encoder_context->base.batch; { @@ -1041,30 +1068,30 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i * command. */ if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) { - /* MV[0] and MV[2] are replicated */ - mv_ptr[4] = mv_ptr[0]; - mv_ptr[5] = mv_ptr[1]; - mv_ptr[2] = mv_ptr[8]; - mv_ptr[3] = mv_ptr[9]; - mv_ptr[6] = mv_ptr[8]; - mv_ptr[7] = mv_ptr[9]; + /* MV[0] and MV[2] are replicated */ + mv_ptr[4] = mv_ptr[0]; + mv_ptr[5] = mv_ptr[1]; + mv_ptr[2] = mv_ptr[8]; + mv_ptr[3] = mv_ptr[9]; + mv_ptr[6] = mv_ptr[8]; + mv_ptr[7] = mv_ptr[9]; } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) { - /* MV[0] and MV[1] are replicated */ - mv_ptr[2] = mv_ptr[0]; - mv_ptr[3] = mv_ptr[1]; - mv_ptr[4] = mv_ptr[16]; - mv_ptr[5] = mv_ptr[17]; - mv_ptr[6] = mv_ptr[24]; - mv_ptr[7] = mv_ptr[25]; + /* MV[0] and MV[1] are replicated */ + mv_ptr[2] = mv_ptr[0]; + mv_ptr[3] = mv_ptr[1]; + mv_ptr[4] = mv_ptr[16]; + mv_ptr[5] = mv_ptr[17]; + mv_ptr[6] = mv_ptr[24]; + mv_ptr[7] = mv_ptr[25]; } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && - !(msg[1] & SUBMB_SHAPE_MASK)) { - /* Don't touch MV[0] or MV[1] */ - mv_ptr[2] = mv_ptr[8]; - mv_ptr[3] = mv_ptr[9]; - mv_ptr[4] = mv_ptr[16]; - mv_ptr[5] = mv_ptr[17]; - mv_ptr[6] = mv_ptr[24]; - mv_ptr[7] = mv_ptr[25]; + !(msg[1] & SUBMB_SHAPE_MASK)) { + /* Don't touch MV[0] or MV[1] */ + mv_ptr[2] = mv_ptr[8]; + mv_ptr[3] = mv_ptr[9]; + mv_ptr[4] = mv_ptr[16]; + mv_ptr[5] = mv_ptr[17]; + mv_ptr[6] = mv_ptr[24]; + mv_ptr[7] = mv_ptr[25]; } } @@ -1072,21 +1099,21 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); - inter_msg = 32; - /* MV quantity */ - if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) { - if (msg[1] & SUBMB_SHAPE_MASK) - inter_msg = 128; - } + inter_msg = 32; + /* MV quantity */ + if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) { + if (msg[1] & SUBMB_SHAPE_MASK) + inter_msg = 128; + } OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/ OUT_BCS_BATCH(batch, offset); - inter_msg = msg[0] & (0x1F00FFFF); - inter_msg |= INTER_MV8; - inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17)); - if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && - (msg[1] & SUBMB_SHAPE_MASK)) { - inter_msg |= INTER_MV32; - } + inter_msg = msg[0] & (0x1F00FFFF); + inter_msg |= INTER_MV8; + inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17)); + if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && + (msg[1] & SUBMB_SHAPE_MASK)) { + inter_msg |= INTER_MV32; + } OUT_BCS_BATCH(batch, inter_msg); @@ -1102,11 +1129,11 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */ #endif - inter_msg = msg[1] >> 8; + inter_msg = msg[1] >> 8; /*Stuff for Inter MB*/ OUT_BCS_BATCH(batch, inter_msg); - OUT_BCS_BATCH(batch, 0x0); - OUT_BCS_BATCH(batch, 0x0); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]); /*MaxSizeInWord and TargetSzieInWord*/ OUT_BCS_BATCH(batch, (max_mb_size << 24) | @@ -1119,18 +1146,12 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i return len_in_dwords; } -#define AVC_INTRA_RDO_OFFSET 4 -#define AVC_INTER_RDO_OFFSET 10 -#define AVC_INTER_MSG_OFFSET 8 -#define AVC_INTER_MV_OFFSET 48 -#define AVC_RDO_MASK 0xFFFF - static void gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int slice_index, - struct intel_batchbuffer *slice_batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + struct intel_batchbuffer *slice_batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct gen6_vme_context *vme_context = encoder_context->vme_context; @@ -1145,15 +1166,18 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, int i,x,y; int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned char *slice_header = NULL; - int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; + int qp_slice; + qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + if (encode_state->slice_header_index[slice_index] == 0) { + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + qp_slice = qp; + } } /* only support for 8-bit pixel bit-depth */ @@ -1162,22 +1186,16 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); assert(qp >= 0 && qp < 52); - gen75_mfc_avc_slice_state(ctx, - pPicParameter, - pSliceParameter, - encode_state, encoder_context, - (rate_control_mode == VA_RC_CBR), qp, slice_batch); + gen75_mfc_avc_slice_state(ctx, + pPicParameter, + pSliceParameter, + encode_state, encoder_context, + (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch); - if ( slice_index == 0) + if ( slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); - slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); - - // slice hander - mfc_context->insert_object(ctx, encoder_context, - (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f, - 5, /* first 5 bytes are start code + nal unit type */ - 1, 0, 1, slice_batch); + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); dri_bo_map(vme_context->vme_output.bo , 1); msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; @@ -1223,27 +1241,21 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, tail_data, 1, 8, 1, 1, 1, 0, slice_batch); } - - free(slice_header); - } static dri_bo * gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch; dri_bo *batch_bo; int i; int buffer_size; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - buffer_size = width_in_mbs * height_in_mbs * 64; - batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size); + batch = mfc_context->aux_batchbuffer; batch_bo = batch->buffer; for (i = 0; i < encode_state->num_slice_params_ext; i++) { gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch); @@ -1257,7 +1269,9 @@ gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); dri_bo_reference(batch_bo); + intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; return batch_bo; } @@ -1266,8 +1280,8 @@ gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx, static void gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; @@ -1279,43 +1293,27 @@ gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, &vme_context->vme_output, BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT), SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT)); - assert(mfc_context->aux_batchbuffer_surface.bo); - mfc_context->buffer_suface_setup(ctx, - &mfc_context->gpe_context, - &mfc_context->aux_batchbuffer_surface, - BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER), - SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER)); } static void gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { - struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1; - mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */ - mfc_context->mfc_batchbuffer_surface.pitch = 16; - mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, - "MFC batchbuffer", - mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block, - 0x1000); + assert(mfc_context->aux_batchbuffer_surface.bo); mfc_context->buffer_suface_setup(ctx, &mfc_context->gpe_context, - &mfc_context->mfc_batchbuffer_surface, + &mfc_context->aux_batchbuffer_surface, BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER), SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER)); } static void gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context); gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context); @@ -1323,8 +1321,8 @@ gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, static void gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct gen6_interface_descriptor_data *desc; @@ -1366,155 +1364,140 @@ gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, static void gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; (void)mfc_context; } +#define AVC_PAK_LEN_IN_BYTE 48 +#define AVC_PAK_LEN_IN_OWORD 3 + static void gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, - int index, - int head_offset, - int batchbuffer_offset, - int head_size, - int tail_size, - int number_mb_cmds, - int first_object, - int last_object, - int last_slice, - int mb_x, - int mb_y, - int width_in_mbs, - int qp) + uint32_t intra_flag, + int head_offset, + int number_mb_cmds, + int slice_end_x, + int slice_end_y, + int mb_x, + int mb_y, + int width_in_mbs, + int qp, + uint32_t fwd_ref, + uint32_t bwd_ref) { - BEGIN_BATCH(batch, 12); + uint32_t temp_value; + BEGIN_BATCH(batch, 14); - OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2)); - OUT_BATCH(batch, index); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2)); + OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); /*inline data */ - OUT_BATCH(batch, head_offset); - OUT_BATCH(batch, batchbuffer_offset); - OUT_BATCH(batch, - head_size << 16 | - tail_size); - OUT_BATCH(batch, - number_mb_cmds << 16 | - first_object << 2 | - last_object << 1 | - last_slice); - OUT_BATCH(batch, - mb_y << 8 | - mb_x); + OUT_BATCH(batch, head_offset / 16); + OUT_BATCH(batch, (intra_flag) | (qp << 16)); + temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16)); + OUT_BATCH(batch, temp_value); + + OUT_BATCH(batch, number_mb_cmds); + OUT_BATCH(batch, - qp << 16 | - width_in_mbs); + ((slice_end_y << 8) | (slice_end_x))); + OUT_BATCH(batch, fwd_ref); + OUT_BATCH(batch, bwd_ref); + + OUT_BATCH(batch, MI_NOOP); ADVANCE_BATCH(batch); } static void gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - VAEncSliceParameterBufferH264 *slice_param, - int head_offset, - unsigned short head_size, - unsigned short tail_size, - int batchbuffer_offset, - int qp, - int last_slice) + struct intel_encoder_context *encoder_context, + VAEncSliceParameterBufferH264 *slice_param, + int head_offset, + int qp, + int last_slice) { struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_vme_context *vme_context = encoder_context->vme_context; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int total_mbs = slice_param->num_macroblocks; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); int number_mb_cmds = 128; - int starting_mb = 0; - int last_object = 0; - int first_object = 1; - int i; + int starting_offset = 0; int mb_x, mb_y; - int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER; - - for (i = 0; i < total_mbs / number_mb_cmds; i++) { - last_object = (total_mbs - starting_mb) == number_mb_cmds; - mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; - mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; - assert(mb_x <= 255 && mb_y <= 255); + int last_mb, slice_end_x, slice_end_y; + int remaining_mb = total_mbs; + uint32_t fwd_ref , bwd_ref, mb_flag; - starting_mb += number_mb_cmds; + last_mb = slice_param->macroblock_address + total_mbs - 1; + slice_end_x = last_mb % width_in_mbs; + slice_end_y = last_mb / width_in_mbs; - gen75_mfc_batchbuffer_emit_object_command(batch, - index, - head_offset, - batchbuffer_offset, - head_size, - tail_size, - number_mb_cmds, - first_object, - last_object, - last_slice, - mb_x, - mb_y, - width_in_mbs, - qp); - - if (first_object) { - head_offset += head_size; - batchbuffer_offset += head_size; - } - - if (last_object) { - head_offset += tail_size; - batchbuffer_offset += tail_size; - } - - batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD; + if (slice_type == SLICE_TYPE_I) { + fwd_ref = 0; + bwd_ref = 0; + mb_flag = 1; + } else { + fwd_ref = vme_context->ref_index_in_mb[0]; + bwd_ref = vme_context->ref_index_in_mb[1]; + mb_flag = 0; + } - first_object = 0; + if (width_in_mbs >= 100) { + number_mb_cmds = width_in_mbs / 5; + } else if (width_in_mbs >= 80) { + number_mb_cmds = width_in_mbs / 4; + } else if (width_in_mbs >= 60) { + number_mb_cmds = width_in_mbs / 3; + } else if (width_in_mbs >= 40) { + number_mb_cmds = width_in_mbs / 2; + } else { + number_mb_cmds = width_in_mbs; } - if (!last_object) { - last_object = 1; - number_mb_cmds = total_mbs % number_mb_cmds; - mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; - mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; - assert(mb_x <= 255 && mb_y <= 255); - starting_mb += number_mb_cmds; + do { + if (number_mb_cmds >= remaining_mb) { + number_mb_cmds = remaining_mb; + } + mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs; + mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs; gen75_mfc_batchbuffer_emit_object_command(batch, - index, - head_offset, - batchbuffer_offset, - head_size, - tail_size, - number_mb_cmds, - first_object, - last_object, - last_slice, - mb_x, - mb_y, - width_in_mbs, - qp); - } + mb_flag, + head_offset, + number_mb_cmds, + slice_end_x, + slice_end_y, + mb_x, + mb_y, + width_in_mbs, + qp, + fwd_ref, + bwd_ref); + + head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE); + remaining_mb -= number_mb_cmds; + starting_offset += number_mb_cmds; + } while (remaining_mb > 0); } /* * return size in Owords (16bytes) */ -static int +static void gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int slice_index, - int batchbuffer_offset) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer; @@ -1526,17 +1509,18 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned char *slice_header = NULL; - int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; long head_offset; - int old_used = intel_batchbuffer_used_size(slice_batch), used; - unsigned short head_size, tail_size; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); + int qp_slice; + qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + if (encode_state->slice_header_index[slice_index] == 0) { + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + qp_slice = qp; + } } /* only support for 8-bit pixel bit-depth */ @@ -1545,40 +1529,35 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); assert(qp >= 0 && qp < 52); - head_offset = old_used / 16; gen75_mfc_avc_slice_state(ctx, - pPicParameter, - pSliceParameter, - encode_state, - encoder_context, - (rate_control_mode == VA_RC_CBR), - qp, - slice_batch); + pPicParameter, + pSliceParameter, + encode_state, + encoder_context, + (rate_control_mode == VA_RC_CBR), + qp_slice, + slice_batch); if (slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); - slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); - - // slice hander - mfc_context->insert_object(ctx, - encoder_context, - (unsigned int *)slice_header, - ALIGN(slice_header_length_in_bits, 32) >> 5, - slice_header_length_in_bits & 0x1f, - 5, /* first 5 bytes are start code + nal unit type */ - 1, - 0, - 1, - slice_batch); - free(slice_header); + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ - used = intel_batchbuffer_used_size(slice_batch); - head_size = (used - old_used) / 16; - old_used = used; + head_offset = intel_batchbuffer_used_size(slice_batch); + + slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE; + + gen75_mfc_avc_batchbuffer_slice_command(ctx, + encoder_context, + pSliceParameter, + head_offset, + qp, + last_slice); - /* tail */ + + /* Aligned for tail */ + intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ if (last_slice) { mfc_context->insert_object(ctx, encoder_context, @@ -1603,48 +1582,41 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, slice_batch); } - intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ - used = intel_batchbuffer_used_size(slice_batch); - tail_size = (used - old_used) / 16; - - - gen75_mfc_avc_batchbuffer_slice_command(ctx, - encoder_context, - pSliceParameter, - head_offset, - head_size, - tail_size, - batchbuffer_offset, - qp, - last_slice); - - return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD; + return; } static void gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *batch = encoder_context->base.batch; - int i, size, offset = 0; + int i; intel_batchbuffer_start_atomic(batch, 0x4000); gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch); for ( i = 0; i < encode_state->num_slice_params_ext; i++) { - size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset); - offset += size; + gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i); + } + { + struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer; + intel_batchbuffer_align(slice_batch, 8); + BEGIN_BCS_BATCH(slice_batch, 2); + OUT_BCS_BATCH(slice_batch, 0); + OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END); + ADVANCE_BCS_BATCH(slice_batch); + mfc_context->aux_batchbuffer = NULL; + intel_batchbuffer_free(slice_batch); } - intel_batchbuffer_end_atomic(batch); intel_batchbuffer_flush(batch); } static void gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context); gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context); @@ -1654,23 +1626,23 @@ gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, static dri_bo * gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context); - dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo); - return mfc_context->mfc_batchbuffer_surface.bo; + return mfc_context->aux_batchbuffer_surface.bo; } #endif static void gen75_mfc_avc_pipeline_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; dri_bo *slice_batch_bo; @@ -1681,7 +1653,7 @@ gen75_mfc_avc_pipeline_programing(VADriverContextP ctx, return; } -#ifdef MFC_SOFTWARE_HASWELL +#if MFC_SOFTWARE_HASWELL slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); #else slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); @@ -1711,8 +1683,8 @@ gen75_mfc_avc_pipeline_programing(VADriverContextP ctx, static VAStatus gen75_mfc_avc_encode_picture(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; unsigned int rate_control_mode = encoder_context->rate_control_mode; @@ -1768,9 +1740,11 @@ gen75_mfc_mpeg2_pic_state(VADriverContextP ctx, VAEncPictureParameterBufferMPEG2 *pic_param; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + VAEncSliceParameterBufferMPEG2 *slice_param = NULL; assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer); pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; BEGIN_BCS_BATCH(batch, 13); OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2)); @@ -1795,7 +1769,11 @@ gen75_mfc_mpeg2_pic_state(VADriverContextP ctx, 1 << 31 | /* slice concealment */ (height_in_mbs - 1) << 16 | (width_in_mbs - 1)); - OUT_BCS_BATCH(batch, 0); + if (slice_param && slice_param->quantiser_scale_code >= 14) + OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12)); + else + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0xFFF << 16 | /* InterMBMaxSize */ @@ -1814,7 +1792,7 @@ static void gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned char intra_qm[64] = { - 8, 16, 19, 22, 26, 27, 29, 34, + 8, 16, 19, 22, 26, 27, 29, 34, 16, 16, 22, 24, 27, 29, 34, 37, 19, 22, 26, 27, 29, 34, 34, 38, 22, 22, 26, 27, 29, 34, 37, 40, @@ -1843,14 +1821,14 @@ static void gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned short intra_fqm[64] = { - 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, - 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, - 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, - 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, - 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, - 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, - 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, - 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, + 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, + 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, + 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, + 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, + 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, + 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, + 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, + 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, }; unsigned short non_intra_fqm[64] = { @@ -2228,20 +2206,39 @@ gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, 0xff, slice_batch); } else { - gen75_mfc_mpeg2_pak_object_inter(ctx, - encode_state, - encoder_context, - msg, - width_in_mbs, height_in_mbs, - h_pos, v_pos, - first_mb_in_slice, - last_mb_in_slice, - first_mb_in_slice_group, - last_mb_in_slice_group, - slice_param->quantiser_scale_code, - 0, - 0xff, - slice_batch); + int inter_rdo, intra_rdo; + inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; + intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; + + if (intra_rdo < inter_rdo) + gen75_mfc_mpeg2_pak_object_intra(ctx, + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); + else + gen75_mfc_mpeg2_pak_object_inter(ctx, + encode_state, + encoder_context, + msg, + width_in_mbs, height_in_mbs, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + slice_param->quantiser_scale_code, + 0, + 0xff, + slice_batch); } } @@ -2286,18 +2283,13 @@ gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { - struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *batch; - VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL; dri_bo *batch_bo; int i; - int buffer_size; - int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; - int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; - buffer_size = width_in_mbs * height_in_mbs * 64; - batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size); + batch = mfc_context->aux_batchbuffer; batch_bo = batch->buffer; for (i = 0; i < encode_state->num_slice_params_ext; i++) { @@ -2318,6 +2310,7 @@ gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, dri_bo_reference(batch_bo); intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; return batch_bo; } @@ -2385,7 +2378,7 @@ intel_mfc_mpeg2_prepare(VADriverContextP ctx, /* reconstructed surface */ obj_surface = encode_state->reconstructed_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); mfc_context->pre_deblocking_output.bo = obj_surface->bo; dri_bo_reference(mfc_context->pre_deblocking_output.bo); mfc_context->surface_state.width = obj_surface->orig_width; @@ -2439,7 +2432,7 @@ intel_mfc_mpeg2_prepare(VADriverContextP ctx, dri_bo_map(bo, 1); coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; coded_buffer_segment->mapped = 0; - coded_buffer_segment->codec = CODED_MPEG2; + coded_buffer_segment->codec = encoder_context->codec; dri_bo_unmap(bo); return vaStatus; @@ -2516,16 +2509,18 @@ gen75_mfc_context_destroy(void *context) } static VAStatus gen75_mfc_pipeline(VADriverContextP ctx, - VAProfile profile, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { VAStatus vaStatus; switch (profile) { - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context); break; @@ -2563,7 +2558,7 @@ Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context * i965_gpe_load_kernels(ctx, &mfc_context->gpe_context, gen75_mfc_kernels, - NUM_MFC_KERNEL); + 1); mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select; mfc_context->set_surface_state = gen75_mfc_surface_state; |