From a70ea804d4ceb93b10d1bb3cc50c92fe559d96e4 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 1 Jul 2013 10:40:19 +0800 Subject: Check whether VEBOX is supported by the underlying OS Signed-off-by: Xiang, Haihao (cherry picked from commit c586c80d29d8860011d95e78d1609ff3683f3cc4) --- src/intel_driver.c | 4 +++- src/intel_driver.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/intel_driver.c b/src/intel_driver.c index 83542d9b..74d3f18e 100644 --- a/src/intel_driver.c +++ b/src/intel_driver.c @@ -72,7 +72,7 @@ intel_driver_init(VADriverContextP ctx) { struct intel_driver_data *intel = intel_driver_data(ctx); struct drm_state * const drm_state = (struct drm_state *)ctx->drm_state; - int has_exec2, has_bsd, has_blt; + int has_exec2, has_bsd, has_blt, has_vebox; assert(drm_state); assert(VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI1) || @@ -97,6 +97,8 @@ intel_driver_init(VADriverContextP ctx) intel->has_bsd = has_bsd; if (intel_driver_get_param(intel, I915_PARAM_HAS_BLT, &has_blt)) intel->has_blt = has_blt; + if (intel_driver_get_param(intel, I915_PARAM_HAS_VEBOX, &has_vebox)) + intel->has_vebox = !!has_vebox; intel_driver_get_revid(intel, &intel->revision); intel_memman_init(intel); diff --git a/src/intel_driver.h b/src/intel_driver.h index 9631b969..fcb09082 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -121,6 +121,7 @@ struct intel_driver_data unsigned int has_exec2 : 1; /* Flag: has execbuffer2? */ unsigned int has_bsd : 1; /* Flag: has bitstream decoder for H.264? */ unsigned int has_blt : 1; /* Flag: has BLT unit? */ + unsigned int has_vebox : 1; /* Flag: has VEBOX unit */ }; bool intel_driver_init(VADriverContextP ctx); -- cgit v1.2.1 From a0b06fa98887b341c8208ce40a471f1721944df2 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 1 Jul 2013 12:47:28 +0800 Subject: Add the dependency to the ring supported by the underlying OS for VPP filters Signed-off-by: Xiang, Haihao (cherry picked from commit a532539cbc7048f5c01b64dfe239f1570123c959) --- src/i965_drv_video.c | 62 +++++++++++++++++++++++++++++++++++++++------------- src/i965_drv_video.h | 13 ++++++++++- 2 files changed, 59 insertions(+), 16 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 3ac5418e..606e2795 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -232,8 +232,8 @@ static struct hw_codec_info gen6_hw_codec_info = { .num_filters = 2, .filters = { - VAProcFilterNoiseReduction, - VAProcFilterDeinterlacing, + { VAProcFilterNoiseReduction, I965_RING_NULL }, + { VAProcFilterDeinterlacing, I965_RING_NULL }, }, }; @@ -259,8 +259,8 @@ static struct hw_codec_info gen7_hw_codec_info = { .num_filters = 2, .filters = { - VAProcFilterNoiseReduction, - VAProcFilterDeinterlacing, + { VAProcFilterNoiseReduction, I965_RING_NULL }, + { VAProcFilterDeinterlacing, I965_RING_NULL }, }, }; @@ -285,10 +285,10 @@ static struct hw_codec_info gen75_hw_codec_info = { .has_di_motion_adptive = 1, .num_filters = 4, .filters = { - VAProcFilterNoiseReduction, - VAProcFilterDeinterlacing, - VAProcFilterSharpening, - VAProcFilterColorBalance, + { VAProcFilterNoiseReduction, I965_RING_VEBOX }, + { VAProcFilterDeinterlacing, I965_RING_VEBOX }, + { VAProcFilterSharpening, I965_RING_NULL }, + { VAProcFilterColorBalance, I965_RING_VEBOX}, }, }; @@ -4511,6 +4511,35 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, return vaStatus; } + +static int +i965_os_has_ring_support(VADriverContextP ctx, + int ring) +{ + struct i965_driver_data *const i965 = i965_driver_data(ctx); + + switch (ring) { + case I965_RING_BSD: + return i965->intel.has_bsd; + + case I965_RING_BLT: + return i965->intel.has_blt; + + case I965_RING_VEBOX: + return i965->intel.has_vebox; + + case I965_RING_NULL: + return 1; /* Always support */ + + default: + /* should never get here */ + assert(0); + break; + } + + return 0; +} + /* * Query video processing pipeline */ @@ -4522,18 +4551,21 @@ VAStatus i965_QueryVideoProcFilters( ) { struct i965_driver_data *const i965 = i965_driver_data(ctx); - unsigned int i = 0; + unsigned int i = 0, num = 0; if (!num_filters || !filters) return VA_STATUS_ERROR_INVALID_PARAMETER; - for (i = 0; i < *num_filters && i < i965->codec_info->num_filters; i++) - filters[i] = i965->codec_info->filters[i]; - - *num_filters = i; + for (i = 0; i < i965->codec_info->num_filters; i++) { + if (i965_os_has_ring_support(ctx, i965->codec_info->filters[i].ring)) { + if (num == *num_filters) + return VA_STATUS_ERROR_MAX_NUM_EXCEEDED; + + filters[num++] = i965->codec_info->filters[i].type; + } + } - if (i < i965->codec_info->num_filters) - return VA_STATUS_ERROR_MAX_NUM_EXCEEDED; + *num_filters = num; return VA_STATUS_SUCCESS; } diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index e694d672..48519cdf 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -258,6 +258,17 @@ struct object_subpic unsigned int flags; }; +#define I965_RING_NULL 0 +#define I965_RING_BSD 1 +#define I965_RING_BLT 2 +#define I965_RING_VEBOX 3 + +struct i965_filter +{ + VAProcFilterType type; + int ring; +}; + struct hw_codec_info { struct hw_context *(*dec_hw_context_init)(VADriverContextP, struct object_config *); @@ -282,7 +293,7 @@ struct hw_codec_info unsigned int has_di_motion_compensated:1; unsigned int num_filters; - VAProcFilterType filters[VAProcFilterCount]; + struct i965_filter filters[VAProcFilterCount]; }; -- cgit v1.2.1 From 8bf807539c1790d6eee531373131672d38c82b31 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 10 Jul 2013 14:16:02 +0800 Subject: Check the returned pointer from malloc() before using it Signed-off-by: Xiang, Haihao (cherry picked from commit 1caf179b1425b13cacaa421c688c6df8369668c6) --- src/i965_drv_video.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 606e2795..40977502 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -4291,6 +4291,9 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs = malloc(I965_MAX_SURFACE_ATTRIBUTES *sizeof(*attribs)); + if (attribs == NULL) + return VA_STATUS_ERROR_ALLOCATION_FAILED; + if (IS_G4X(i965->intel.device_id)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { -- cgit v1.2.1 From 5f037849ff4ece789ec4a7af20f8fa3905076fbd Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 3 Jul 2013 10:31:17 +0800 Subject: Insert a phantom slice for H.264 deocdong on SNB If the first slice does't start at 0, a phantom slice is added before the first slice. This fixes the GPU hang issue mentioned in https://bugs.freedesktop.org/show_bug.cgi?id=63946 (not the original issue). Signed-off-by: Xiang, Haihao Tested-by: Krzysztof Kotlenga (cherry picked from commit 11115e3f0427d056367c1c5946585e3f7cead662) --- src/gen6_mfd.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 3f696dd0..afbfc4c2 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -606,11 +606,32 @@ gen6_mfd_avc_slice_state(VADriverContextP ctx, static void gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, struct gen6_mfd_context *gen6_mfd_context) { struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ + int slice_hor_pos, slice_ver_pos, slice_start_mb_num, next_slice_hor_pos, next_slice_ver_pos; + int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag && + pic_param->seq_fields.bits.mb_adaptive_frame_field_flag); + + if (next_slice_param) { + int first_mb_in_next_slice; + + slice_hor_pos = 0; + slice_ver_pos = 0; + slice_start_mb_num = 0; + first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture; + next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; + next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs; + } else { + slice_hor_pos = 0; + slice_ver_pos = height_in_mbs; + slice_start_mb_num = width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag); + next_slice_hor_pos = 0; + next_slice_ver_pos = 0; + } BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */ OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2)); @@ -618,9 +639,12 @@ gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, - height_in_mbs << 24 | - width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag)); - OUT_BCS_BATCH(batch, 0); + slice_ver_pos << 24 | + slice_hor_pos << 16 | + slice_start_mb_num << 0); + OUT_BCS_BATCH(batch, + next_slice_ver_pos << 16 | + next_slice_hor_pos << 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -749,12 +773,30 @@ gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx, static void gen6_mfd_avc_phantom_slice(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, struct gen6_mfd_context *gen6_mfd_context) { - gen6_mfd_avc_phantom_slice_state(ctx, pic_param, gen6_mfd_context); + gen6_mfd_avc_phantom_slice_state(ctx, pic_param, next_slice_param, gen6_mfd_context); gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, gen6_mfd_context); } +static void +gen6_mfd_avc_phantom_slice_first(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct gen6_mfd_context *gen6_mfd_context) +{ + gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen6_mfd_context); +} + +static void +gen6_mfd_avc_phantom_slice_last(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + struct gen6_mfd_context *gen6_mfd_context) +{ + gen6_mfd_avc_phantom_slice(ctx, pic_param, NULL, gen6_mfd_context); +} + static void gen6_mfd_avc_decode_init(VADriverContextP ctx, struct decode_state *decode_state, @@ -896,6 +938,10 @@ gen6_mfd_avc_decode_picture(VADriverContextP ctx, else next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer; + if (j == 0 && + slice_param->first_mb_in_slice) + gen6_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen6_mfd_context); + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); assert((slice_param->slice_type == SLICE_TYPE_I) || @@ -918,7 +964,7 @@ gen6_mfd_avc_decode_picture(VADriverContextP ctx, } } - gen6_mfd_avc_phantom_slice(ctx, pic_param, gen6_mfd_context); + gen6_mfd_avc_phantom_slice_last(ctx, pic_param, gen6_mfd_context); intel_batchbuffer_end_atomic(batch); intel_batchbuffer_flush(batch); } -- cgit v1.2.1 From 188004b83afd2dd7f8bde16e7c724aa50d5a90c1 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 23 Jul 2013 13:08:05 +0800 Subject: VPP: check the filter when query the video filter capabilities Return VA_STATUS_ERROR_UNSUPPORTED_FILTER if an unsupported filter was supplied Signed-off-by: Xiang, Haihao (cherry picked from commit 428723853f88b4d5cac436f5fd62e6cc64a9e8e9) --- src/i965_drv_video.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 40977502..bd6aa9a3 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -4587,6 +4587,17 @@ VAStatus i965_QueryVideoProcFilterCaps( if (!filter_caps || !num_filter_caps) return VA_STATUS_ERROR_INVALID_PARAMETER; + for (i = 0; i < i965->codec_info->num_filters; i++) { + if (type == i965->codec_info->filters[i].type && + i965_os_has_ring_support(ctx, i965->codec_info->filters[i].ring)) + break; + } + + if (i == i965->codec_info->num_filters) + return VA_STATUS_ERROR_UNSUPPORTED_FILTER; + + i = 0; + switch (type) { case VAProcFilterNoiseReduction: case VAProcFilterSharpening: -- cgit v1.2.1 From 5ecce9007616d8fec41fc05092a7874a40b13be3 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 23 Jul 2013 13:14:48 +0800 Subject: Release the private driver data when call vaTerminate() Signed-off-by: Xiang, Haihao (cherry picked from commit c735d9e58dd49c9a92ad0042b5649a9d3fe7c2c4) --- src/i965_drv_video.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index bd6aa9a3..4dfc33e5 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -4941,6 +4941,9 @@ i965_Terminate(VADriverContextP ctx) i965_sub_ops[i - 1].display_type == (ctx->display_type & VA_DISPLAY_MAJOR_MASK)) { i965_sub_ops[i - 1].terminate(ctx); } + + free(i965); + ctx->pDriverData = NULL; } return VA_STATUS_SUCCESS; -- cgit v1.2.1 From f4cf05a9d4821d13bc60681fde3fd9700bb640a5 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 23 Jul 2013 13:22:16 +0800 Subject: Fixes valgrind warning "Conditional jump or move depends on uninitialised value(s)" Signed-off-by: Xiang, Haihao (cherry picked from commit cbd00deb6c5cad58ebd5e6ce5b89aaaded0f78a5) --- src/intel_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel_driver.c b/src/intel_driver.c index 74d3f18e..8650dba6 100644 --- a/src/intel_driver.c +++ b/src/intel_driver.c @@ -72,7 +72,7 @@ intel_driver_init(VADriverContextP ctx) { struct intel_driver_data *intel = intel_driver_data(ctx); struct drm_state * const drm_state = (struct drm_state *)ctx->drm_state; - int has_exec2, has_bsd, has_blt, has_vebox; + int has_exec2 = 0, has_bsd = 0, has_blt = 0, has_vebox = 0; assert(drm_state); assert(VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI1) || -- cgit v1.2.1 From 57d51835107784de2c2847c53393697d359c5e63 Mon Sep 17 00:00:00 2001 From: Li Xiaowei Date: Thu, 18 Jul 2013 19:18:54 +0800 Subject: VPP: remove needless functions and parameters in gpe pipeline (cherry picked from commit ab0546e76967e5e7c465569f90e192b560678d8c) --- src/gen75_vpp_gpe.c | 1 - src/gen75_vpp_gpe.h | 9 --------- 2 files changed, 10 deletions(-) diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index 70f229b8..236ccaf7 100644 --- a/src/gen75_vpp_gpe.c +++ b/src/gen75_vpp_gpe.c @@ -595,7 +595,6 @@ gen75_gpe_context_init(VADriverContextP ctx) gpe_ctx->vfe_state.urb_entry_size = 59 - 1; gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1; - vpp_gpe_ctx->vpp_surface2_setup = gen7_gpe_surface2_setup; vpp_gpe_ctx->vpp_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup; vpp_gpe_ctx->vpp_buffer_surface_setup = gen7_gpe_buffer_suface_setup; vpp_gpe_ctx->vpp_media_chroma_surface_setup = gen75_gpe_media_chroma_surface_setup; diff --git a/src/gen75_vpp_gpe.h b/src/gen75_vpp_gpe.h index 52322140..97ee72dd 100644 --- a/src/gen75_vpp_gpe.h +++ b/src/gen75_vpp_gpe.h @@ -91,19 +91,10 @@ struct vpp_gpe_context{ unsigned int forward_surf_sum; unsigned int backward_surf_sum; - unsigned int x_step; - unsigned int y_step; - unsigned int in_frame_w; unsigned int in_frame_h; unsigned int is_first_frame; - void (*vpp_surface2_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct object_surface *obj_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); - void (*vpp_media_rw_surface_setup)(VADriverContextP ctx, struct i965_gpe_context *gpe_context, struct object_surface *obj_surface, -- cgit v1.2.1 From 8dd5513e222524baa4be24acf774edc332a50a74 Mon Sep 17 00:00:00 2001 From: Zhao Halley Date: Thu, 25 Jul 2013 09:09:41 +0800 Subject: Enable the Bay Trail platform. This patch adds PCI IDs for Bay Trail (sometimes called Valley View). As far as the video driver is concerned, it's very similar to Ivybridge GT1 except VP8 decoding support. (cherry picked from commit b3afeef8092dc4eb7cb73fce672ddf7a55205f34) --- src/i965_render.c | 2 +- src/intel_driver.h | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 21ec8442..26a7baf1 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -3127,7 +3127,7 @@ i965_render_init(VADriverContextP ctx) render_state->max_wm_threads = 204; } else if (IS_HSW_GT3(i965->intel.device_id)) { render_state->max_wm_threads = 408; - } else if (IS_IVB_GT1(i965->intel.device_id)) { + } else if (IS_IVB_GT1(i965->intel.device_id) || IS_BAYTRAIL(i965->intel.device_id)) { render_state->max_wm_threads = 48; } else if (IS_IVB_GT2(i965->intel.device_id)) { render_state->max_wm_threads = 172; diff --git a/src/intel_driver.h b/src/intel_driver.h index fcb09082..c36dbbe2 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -238,6 +238,12 @@ struct intel_region #define PCI_CHIP_HASWELL_CRW_E_GT2 0x0D1E #define PCI_CHIP_HASWELL_CRW_E_GT3 0x0D2E +#define PCI_CHIP_BAYTRAIL_M_1 0x0F31 +#define PCI_CHIP_BAYTRAIL_M_2 0x0F32 +#define PCI_CHIP_BAYTRAIL_M_3 0x0F33 +#define PCI_CHIP_BAYTRAIL_M_4 0x0157 +#define PCI_CHIP_BAYTRAIL_D 0x0155 + #define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_G45_G || \ @@ -264,6 +270,15 @@ struct intel_region #define IS_GEN6(devid) (IS_SNB_GT1(devid) || \ IS_SNB_GT2(devid)) +#define IS_BAYTRAIL_M1(devid) (devid == PCI_CHIP_BAYTRAIL_M_1) +#define IS_BAYTRAIL_M2(devid) (devid == PCI_CHIP_BAYTRAIL_M_2) +#define IS_BAYTRAIL_M3(devid) (devid == PCI_CHIP_BAYTRAIL_M_3) +#define IS_BAYTRAIL_D(devid) (devid == PCI_CHIP_BAYTRAIL_D) +#define IS_BAYTRAIL(devid) (IS_BAYTRAIL_M1(devid) || \ + IS_BAYTRAIL_M2(devid) || \ + IS_BAYTRAIL_M3(devid) || \ + IS_BAYTRAIL_D(devid) ) + #define IS_IVB_GT1(devid) (devid == PCI_CHIP_IVYBRIDGE_GT1 || \ devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \ devid == PCI_CHIP_IVYBRIDGE_S_GT1) @@ -273,7 +288,8 @@ struct intel_region devid == PCI_CHIP_IVYBRIDGE_S_GT2) #define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || \ - IS_IVB_GT2(devid)) + IS_IVB_GT2(devid) || \ + IS_BAYTRAIL(devid) ) #define IS_HSW_GT1(devid) (devid == PCI_CHIP_HASWELL_GT1 || \ devid == PCI_CHIP_HASWELL_M_GT1 || \ -- cgit v1.2.1 From f08afbabeade7403557413f4858548d9a91b0d3e Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 8 Aug 2013 09:35:30 +0800 Subject: Rename the macros Signed-off-by: Xiang, Haihao (cherry picked from commit 3ab97be8db1b8e55d0d5b95f577863416a87c6ff) --- src/gen6_mfc_common.c | 2 +- src/gen75_mfc.c | 2 +- src/gen7_mfc.c | 2 +- src/i965_drv_video.c | 4 ++-- src/i965_drv_video.h | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index ab91c866..ba56c1af 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -642,7 +642,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, dri_bo_map(bo, 1); coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; coded_buffer_segment->mapped = 0; - coded_buffer_segment->codec = CODED_H264; + coded_buffer_segment->codec = CODEC_H264; dri_bo_unmap(bo); return vaStatus; diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index cfc3c229..3fdae21b 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -2439,7 +2439,7 @@ intel_mfc_mpeg2_prepare(VADriverContextP ctx, dri_bo_map(bo, 1); coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; coded_buffer_segment->mapped = 0; - coded_buffer_segment->codec = CODED_MPEG2; + coded_buffer_segment->codec = CODEC_MPEG2; dri_bo_unmap(bo); return vaStatus; diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index 8572b89f..d1eb9a69 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -1069,7 +1069,7 @@ gen7_mfc_mpeg2_prepare(VADriverContextP ctx, dri_bo_map(bo, 1); coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; coded_buffer_segment->mapped = 0; - coded_buffer_segment->codec = CODED_MPEG2; + coded_buffer_segment->codec = CODEC_MPEG2; dri_bo_unmap(bo); return vaStatus; diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 4dfc33e5..6942eedf 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -1761,13 +1761,13 @@ i965_MapBuffer(VADriverContextP ctx, coded_buffer_segment->base.buf = buffer = (unsigned char *)(obj_buffer->buffer_store->bo->virtual) + I965_CODEDBUFFER_HEADER_SIZE; - if (coded_buffer_segment->codec == CODED_H264) { + if (coded_buffer_segment->codec == CODEC_H264) { delimiter0 = H264_DELIMITER0; delimiter1 = H264_DELIMITER1; delimiter2 = H264_DELIMITER2; delimiter3 = H264_DELIMITER3; delimiter4 = H264_DELIMITER4; - } else if (coded_buffer_segment->codec == CODED_MPEG2) { + } else if (coded_buffer_segment->codec == CODEC_MPEG2) { delimiter0 = MPEG2_DELIMITER0; delimiter1 = MPEG2_DELIMITER1; delimiter2 = MPEG2_DELIMITER2; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 48519cdf..301ebadd 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -366,8 +366,8 @@ int va_enc_packed_type_to_idx(int packed_type); /* reserve 2 byte for internal using */ -#define CODED_H264 0 -#define CODED_MPEG2 1 +#define CODEC_H264 0 +#define CODEC_MPEG2 1 #define H264_DELIMITER0 0x00 #define H264_DELIMITER1 0x00 -- cgit v1.2.1 From 496bd7f47f93ad4ac4f3510aa1c088c027bfe762 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 8 Aug 2013 09:52:33 +0800 Subject: Cleanup profile tracking in encoder Signed-off-by: Xiang, Haihao (cherry picked from commit edd25a94e92b9cec23594dc978691506a1c8cfab) --- src/gen6_mfc_common.c | 2 +- src/gen6_vme.c | 4 +--- src/gen75_mfc.c | 2 +- src/gen75_vme.c | 22 +++++++--------------- src/gen7_mfc.c | 2 +- src/gen7_vme.c | 9 +++------ src/i965_encoder.c | 19 ++++++++++++++++++- src/i965_encoder.h | 2 +- 8 files changed, 33 insertions(+), 29 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index ba56c1af..c2ec080d 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -642,7 +642,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, dri_bo_map(bo, 1); coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; coded_buffer_segment->mapped = 0; - coded_buffer_segment->codec = CODEC_H264; + coded_buffer_segment->codec = encoder_context->codec; dri_bo_unmap(bo); return vaStatus; diff --git a/src/gen6_vme.c b/src/gen6_vme.c index 69c667d2..443dda89 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -649,9 +649,7 @@ Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e { struct gen6_vme_context *vme_context = NULL; - if (encoder_context->profile != VAProfileH264Baseline && - encoder_context->profile != VAProfileH264Main && - encoder_context->profile != VAProfileH264High) { + if (encoder_context->codec != CODEC_H264) { /* Never get here */ assert(0); return False; diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 3fdae21b..d2201e34 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -2439,7 +2439,7 @@ intel_mfc_mpeg2_prepare(VADriverContextP ctx, dri_bo_map(bo, 1); coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; coded_buffer_segment->mapped = 0; - coded_buffer_segment->codec = CODEC_MPEG2; + coded_buffer_segment->codec = encoder_context->codec; dri_bo_unmap(bo); return vaStatus; diff --git a/src/gen75_vme.c b/src/gen75_vme.c index b7965052..231ffc61 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -374,17 +374,14 @@ static VAStatus gen75_vme_constant_setup(VADriverContextP ctx, vme_state_message = (unsigned int *)vme_context->vme_state_message; - if (encoder_context->profile == VAProfileH264Baseline || - encoder_context->profile == VAProfileH264Main || - encoder_context->profile == VAProfileH264High) { + if (encoder_context->codec == CODEC_H264) { if (vme_context->h264_level >= 30) { mv_num = 16; if (vme_context->h264_level >= 31) mv_num = 8; } - } else if (encoder_context->profile == VAProfileMPEG2Simple || - encoder_context->profile == VAProfileMPEG2Main) { + } else if (encoder_context->codec == CODEC_MPEG2) { mv_num = 2; } @@ -501,10 +498,8 @@ static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx, vme_state_message[i] = 0; } - switch (encoder_context->profile) { - case VAProfileH264Baseline: - case VAProfileH264Main: - case VAProfileH264High: + switch (encoder_context->codec) { + case CODEC_H264: gen75_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message); break; @@ -1009,17 +1004,14 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context * struct i965_kernel *vme_kernel_list = NULL; int i965_kernel_num; - switch (encoder_context->profile) { - case VAProfileH264Baseline: - case VAProfileH264Main: - case VAProfileH264High: + switch (encoder_context->codec) { + case CODEC_H264: vme_kernel_list = gen75_vme_kernels; encoder_context->vme_pipeline = gen75_vme_pipeline; i965_kernel_num = sizeof(gen75_vme_kernels) / sizeof(struct i965_kernel); break; - case VAProfileMPEG2Simple: - case VAProfileMPEG2Main: + case CODEC_MPEG2: vme_kernel_list = gen75_vme_mpeg2_kernels; encoder_context->vme_pipeline = gen75_vme_mpeg2_pipeline; i965_kernel_num = sizeof(gen75_vme_mpeg2_kernels) / sizeof(struct i965_kernel); diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index d1eb9a69..dda8f91c 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -1069,7 +1069,7 @@ gen7_mfc_mpeg2_prepare(VADriverContextP ctx, dri_bo_map(bo, 1); coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; coded_buffer_segment->mapped = 0; - coded_buffer_segment->codec = CODEC_MPEG2; + coded_buffer_segment->codec = encoder_context->codec; dri_bo_unmap(bo); return vaStatus; diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 88eb4844..11af9e58 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -1092,21 +1092,18 @@ Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e gen7_vme_scoreboard_init(ctx, vme_context); - if(encoder_context->profile == VAProfileH264Baseline || - encoder_context->profile == VAProfileH264Main || - encoder_context->profile == VAProfileH264High ){ + if (encoder_context->codec == CODEC_H264) { vme_kernel_list = gen7_vme_kernels; vme_context->video_coding_type = VIDEO_CODING_AVC; vme_context->vme_kernel_sum = AVC_VME_KERNEL_SUM; encoder_context->vme_pipeline = gen7_vme_pipeline; - } else if (encoder_context->profile == VAProfileMPEG2Simple || - encoder_context->profile == VAProfileMPEG2Main ){ + } else if (encoder_context->codec == CODEC_MPEG2) { vme_kernel_list = gen7_vme_mpeg2_kernels; vme_context->video_coding_type = VIDEO_CODING_MPEG2; vme_context->vme_kernel_sum = MPEG2_VME_KERNEL_SUM; encoder_context->vme_pipeline = gen7_vme_mpeg2_pipeline; } else { - /* Unsupported encoding profile */ + /* Unsupported codec */ assert(0); } diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 43846193..73cd3e30 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -345,7 +345,24 @@ intel_enc_hw_context_init(VADriverContextP ctx, encoder_context->input_yuv_surface = VA_INVALID_SURFACE; encoder_context->is_tmp_id = 0; encoder_context->rate_control_mode = VA_RC_NONE; - encoder_context->profile = obj_config->profile; + + switch (obj_config->profile) { + case VAProfileMPEG2Simple: + case VAProfileMPEG2Main: + encoder_context->codec = CODEC_MPEG2; + break; + + case VAProfileH264Baseline: + case VAProfileH264Main: + case VAProfileH264High: + encoder_context->codec = CODEC_H264; + break; + + default: + /* Never get here */ + assert(0); + break; + } for (i = 0; i < obj_config->num_attribs; i++) { if (obj_config->attrib_list[i].type == VAConfigAttribRateControl) { diff --git a/src/i965_encoder.h b/src/i965_encoder.h index d9d6511f..29bd7028 100644 --- a/src/i965_encoder.h +++ b/src/i965_encoder.h @@ -39,7 +39,7 @@ struct intel_encoder_context { struct hw_context base; - VAProfile profile; + int codec; VASurfaceID input_yuv_surface; int is_tmp_id; unsigned int rate_control_mode; -- cgit v1.2.1 From 7c0849287c2fa0186239973a0844592328bca9ba Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 8 Aug 2013 10:01:50 +0800 Subject: Use the right wight/height to initialize the internal buffers for MPEG-2 encoding Signed-off-by: Xiang, Haihao (cherry picked from commit 3ecbff585af918d96959ce791eec29be25360d91) --- src/gen6_mfc.c | 18 +++++++++++++++--- src/gen75_mfc.c | 18 +++++++++++++++--- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index 883a42b0..1103e612 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -516,9 +516,21 @@ gen6_mfc_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; dri_bo *bo; int i; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + int width_in_mbs = 0; + int height_in_mbs = 0; + + if (encoder_context->codec == CODEC_H264) { + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + width_in_mbs = pSequenceParameter->picture_width_in_mbs; + height_in_mbs = pSequenceParameter->picture_height_in_mbs; + } else { + VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + + assert(encoder_context->codec == CODEC_MPEG2); + + width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16; + height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16; + } /*Encode common setup for MFC*/ dri_bo_unreference(mfc_context->post_deblocking_output.bo); diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index d2201e34..255b64c8 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -425,9 +425,21 @@ static void gen75_mfc_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; dri_bo *bo; int i; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + int width_in_mbs = 0; + int height_in_mbs = 0; + + if (encoder_context->codec == CODEC_H264) { + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + width_in_mbs = pSequenceParameter->picture_width_in_mbs; + height_in_mbs = pSequenceParameter->picture_height_in_mbs; + } else { + VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + + assert(encoder_context->codec == CODEC_MPEG2); + + width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16; + height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16; + } /*Encode common setup for MFC*/ dri_bo_unreference(mfc_context->post_deblocking_output.bo); -- cgit v1.2.1 From dd9c77d4088cd6cd257227e66890929e65d6cd9c Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 9 Aug 2013 13:40:10 +0800 Subject: A separate batch buffer for video processing It is easy to result in multithread issue if the rendering code and video processing code share the same batch buffer Signed-off-by: Xiang, Haihao (cherry picked from commit ce0984814269e0923f44196e47f1c7cc2dddc55c) --- src/i965_drv_video.c | 4 ++++ src/i965_drv_video.h | 1 + src/i965_post_processing.c | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 6942eedf..ea1f1d00 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -4795,6 +4795,7 @@ i965_driver_data_init(VADriverContextP ctx) goto err_subpic_heap; i965->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0); + i965->pp_batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0); _i965InitMutex(&i965->render_mutex); _i965InitMutex(&i965->pp_mutex); @@ -4826,6 +4827,9 @@ i965_driver_data_terminate(VADriverContextP ctx) if (i965->batch) intel_batchbuffer_free(i965->batch); + if (i965->pp_batch) + intel_batchbuffer_free(i965->pp_batch); + i965_destroy_heap(&i965->subpic_heap, i965_destroy_subpic); i965_destroy_heap(&i965->image_heap, i965_destroy_image); i965_destroy_heap(&i965->buffer_heap, i965_destroy_buffer); diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 301ebadd..a0e7790d 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -313,6 +313,7 @@ struct i965_driver_data _I965Mutex render_mutex; _I965Mutex pp_mutex; struct intel_batchbuffer *batch; + struct intel_batchbuffer *pp_batch; struct i965_render_state render_state; void *pp_context; char va_vendor[256]; diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index e91dc03d..22071225 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -5147,7 +5147,7 @@ i965_post_processing_init(VADriverContextP ctx) if (HAS_PP(i965)) { if (pp_context == NULL) { pp_context = calloc(1, sizeof(*pp_context)); - i965_post_processing_context_init(ctx, pp_context, i965->batch); + i965_post_processing_context_init(ctx, pp_context, i965->pp_batch); i965->pp_context = pp_context; } } -- cgit v1.2.1 From 679529ebebe889afa4ffb3b7459218b41804bfac Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 9 Aug 2013 13:52:16 +0800 Subject: Convert 422H/422V/411P/444P into other formats for internal using Signed-off-by: Xiang, Haihao (cherry picked from commit 232ef48766c1f91a87a3d41f951fd2ac26dcf2ae) --- src/i965_post_processing.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 22071225..bf1f7e18 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -4973,6 +4973,10 @@ i965_image_processing(VADriverContextP ctx, case VA_FOURCC('I', '4', '2', '0'): case VA_FOURCC('I', 'M', 'C', '1'): case VA_FOURCC('I', 'M', 'C', '3'): + case VA_FOURCC('4', '2', '2', 'H'): + case VA_FOURCC('4', '2', '2', 'V'): + case VA_FOURCC('4', '1', '1', 'P'): + case VA_FOURCC('4', '4', '4', 'P'): status = i965_image_pl3_processing(ctx, src_surface, src_rect, -- cgit v1.2.1 From a27920e84023e9be75997db6b61bbe94f36f2963 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:23 +0800 Subject: Rewrite inter-frame shader for MPEG2 encoding on HSW to follow MPEG2 spec Now the MPEG2/H264 uses the same mode/motion vector prediction shader. But the MV search region of mpeg2 is different with that on H264, which causes that the wrong mode/motion vector prediction is used for MPEG2. Signed-off-by: Zhao Yakui (cherry picked from commit 6842f08aa375b5942cee4b9d06421609c212895a) --- src/gen6_mfc_common.c | 15 + src/gen6_vme.h | 8 + src/gen75_vme.c | 3 +- src/shaders/vme/Makefile.am | 6 +- src/shaders/vme/mpeg2_inter_haswell.asm | 696 +++++++++++++++++++++++++++++++ src/shaders/vme/mpeg2_inter_haswell.g75a | 3 + src/shaders/vme/mpeg2_inter_haswell.g75b | 278 ++++++++++++ src/shaders/vme/vme75_mpeg2.inc | 3 + 8 files changed, 1008 insertions(+), 4 deletions(-) create mode 100644 src/shaders/vme/mpeg2_inter_haswell.asm create mode 100644 src/shaders/vme/mpeg2_inter_haswell.g75a create mode 100644 src/shaders/vme/mpeg2_inter_haswell.g75b diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index c2ec080d..b77916de 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1111,3 +1111,18 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx, } ADVANCE_BCS_BATCH(batch); } + + +void intel_vme_mpeg2_state_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message); + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + + vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) | + width_in_mbs; +} diff --git a/src/gen6_vme.h b/src/gen6_vme.h index 17f199e2..9d7acdf2 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -87,6 +87,9 @@ struct gen6_vme_context unsigned int vme_kernel_sum; }; +#define MPEG2_PIC_WIDTH_HEIGHT 30 + + Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); extern void intel_vme_update_mbmv_cost(VADriverContextP ctx, @@ -137,4 +140,9 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, extern void gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context); +extern void +intel_vme_mpeg2_state_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); + #endif /* _GEN6_VME_H_ */ diff --git a/src/gen75_vme.c b/src/gen75_vme.c index 231ffc61..41b85642 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -113,7 +113,7 @@ static const uint32_t gen75_vme_mpeg2_intra_frame[][4] = { }; static const uint32_t gen75_vme_mpeg2_inter_frame[][4] = { -#include "shaders/vme/mpeg2_inter_frame_haswell.g75b" +#include "shaders/vme/mpeg2_inter_haswell.g75b" }; static const uint32_t gen75_vme_mpeg2_batchbuffer[][4] = { @@ -952,6 +952,7 @@ gen75_vme_mpeg2_prepare(VADriverContextP ctx, gen75_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); gen75_vme_interface_setup(ctx, encode_state, encoder_context); gen75_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); + intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context); gen75_vme_constant_setup(ctx, encode_state, encoder_context); /*Programing media pipeline*/ diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index e3c401d3..27a1828a 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -1,6 +1,6 @@ VME_CORE = batchbuffer.asm intra_frame.asm inter_frame.asm VME7_CORE = batchbuffer.asm intra_frame_ivb.asm inter_frame_ivb.asm inter_bframe_ivb..asm -VME75_CORE = batchbuffer.asm intra_frame_haswell.asm inter_frame_haswell.asm inter_bframe_haswell.asm +VME75_CORE = batchbuffer.asm intra_frame_haswell.asm inter_frame_haswell.asm inter_bframe_haswell.asm mpeg2_inter_haswell.asm INTEL_G6B = batchbuffer.g6b intra_frame.g6b inter_frame.g6b INTEL_G6A = batchbuffer.g6a intra_frame.g6a inter_frame.g6a @@ -12,8 +12,8 @@ INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a mpeg2_inter_frame.g7 INTEL_GEN7_INC = batchbuffer.inc vme.inc vme7_mpeg2.inc vme7.inc INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm) -INTEL_G75B = batchbuffer.g75b intra_frame_haswell.g75b inter_frame_haswell.g75b mpeg2_inter_frame_haswell.g75b inter_bframe_haswell.g75b -INTEL_G75A = batchbuffer.g75a intra_frame_haswell.g75a inter_frame_haswell.g75a mpeg2_inter_frame_haswell.g75a inter_bframe_haswell.g75a +INTEL_G75B = batchbuffer.g75b intra_frame_haswell.g75b inter_frame_haswell.g75b mpeg2_inter_frame_haswell.g75b inter_bframe_haswell.g75b mpeg2_inter_haswell.g75b +INTEL_G75A = batchbuffer.g75a intra_frame_haswell.g75a inter_frame_haswell.g75a mpeg2_inter_frame_haswell.g75a inter_bframe_haswell.g75a mpeg2_inter_haswell.g75a INTEL_GEN75_INC = batchbuffer.inc vme75.inc vme75_mpeg2.inc INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm) diff --git a/src/shaders/vme/mpeg2_inter_haswell.asm b/src/shaders/vme/mpeg2_inter_haswell.asm new file mode 100644 index 00000000..17e48431 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_haswell.asm @@ -0,0 +1,696 @@ +/* + * Copyright © <2010>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * Author : Zhao Yakui + */ +// Modual name: mpeg2_inter_haswell.asm +// +// Make MPEG2 inter predition estimation for Inter-frame on Haswell +// + +// +// Now, begin source code.... +// + +#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud +#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud + +/* + * __START + */ +__INTER_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) pic_ref.0<1>:uw r4.24<2,2,1>:uw 4:uw {align1}; +mov (2) pic_ref.16<1>:uw r4.20<2,2,1>:uw {align1}; +mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; +mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; +/* read back the data for MB A */ +/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag), +* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID) +*/ +mov (8) mba_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; +mba_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */ +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; +mov (1) mba_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB A */ +/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* MV */ +mov (2) mba_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1}; + +mbb_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +/* MB B doesn't exist. Zero MV. mba_flag is zero */ +/* If MB B doesn't exist, neither MB C nor D exists */ +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbb_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbc_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB B */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +mov (2) mbb_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1}; + +mbc_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1}; +/* MB C doesn't exist. Zero MV. mba_flag is zero */ +/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */ +(f0.0) jmpi (1) mbd_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB C */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +jmpi (1) mb_mvp_start; +mbd_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB D */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ub + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +/* TODO: RefID is required after multi-references are added */ + +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +mb_mvp_start: +/*TODO: Add the skip prediction */ +/* Check whether both MB B and C are inavailable */ +add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; +cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; +(-f0.0) jmpi (1) mb_median_start; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1}; +(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1}; +jmpi (1) __mb_hwdep_end; + +mb_median_start: +/* check whether only one neighbour MB has the same ref ID with the current MB */ +mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; +cmp.z.f0.0 (1) null:d mba_result.20<1>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbb_result.20<1>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbc_result.20<1>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1}; +cmp.e.f0.0 (1) null:d tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; + +mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1}; +SAVE_RET {align1}; + jmpi (1) word_imedian; +mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1}; +SAVE_RET {align1}; +jmpi (1) word_imedian; +mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; + +__mb_hwdep_end: + +/* Calibrate the ref window for MPEG2 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; + +mov (1) INPUT_ARG0.0<1>:ud vme_m0.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1}; +mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; + +SAVE_RET {align1}; +jmpi (1) ref_boundary_check; +mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; + +/* m2, get the MV/Mb cost passed from constant buffer when +spawning thread by MEDIA_OBJECT */ +mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* m3 */ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; + +/* the neighbour pixel is zero for MPEG2 Intra-prediction */ + +/* m4 */ +mov (8) vme_msg_4<1>:UD 0:UD {align1}; +mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1}; +/* Use the Luma mode */ +mov (1) vme_msg_4.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; +mov (1) tmp_reg0.0<1>:UW INTRA16_DC_PRED:UW {align1}; +mov (1) vme_msg_4.4<1>:ub tmp_reg0.0<0,1,0>:UB {align1}; + +/* m5 */ +mov (8) vme_msg_5<1>:UD 0x0:UD {align1}; +mov (1) vme_msg_5.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + +/* the penalty for Intra mode */ +mov (1) vme_msg_5.28<1>:UD 0x010101:UD {align1}; + + +/* m6 */ +mov (8) vme_msg_6.0<1>:UD 0:Ud {align1}; + +/* + * SIC VME message + */ +/* m0 */ +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* Disable Intra8x8/Intra4x4 Intra-prediction */ +/* m1 */ +mov (8) vme_m1.0<1>:ud 0x0:UD {align1}; + +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1}; +add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1}; +mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1}; + +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + +/* Enable DC HAAR component when calculating HARR SATD block */ +mov (1) tmp_reg0.0<1>:UW DC_HARR_ENABLE:UW {align1}; +mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */ +/* m0 */ +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_SIC_MESSAGE_TYPE + ) + mlen sic_vme_msg_length + rlen vme_wb_length + {align1}; +/* + * Oword Block Write message + */ +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; + +/* Distortion, Intra (17-16), */ +mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1}; + +mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1}; +/* VME clock counts */ +mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1}; + +mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* the Max MV number is passed by constant buffer */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; +/* Set the MV cost center */ +mov (1) vme_m1.16<1>:ud 0:ud {align1}; +mov (1) vme_m1.20<1>:ud 0:ud {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; +/* M3/M4 search path */ + +mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1}; + +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; + +mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; + +send (8) + vme_msg_ind + vme_wb<1>:UD + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_IME_MESSAGE_TYPE + ) + mlen ime_vme_msg_length + rlen vme_wb_length {align1}; + +/* Set Macroblock-shape/mode for FBR */ + +mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; +mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; +mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; + +and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; +mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* Send FBR message into CRE */ + +mov (8) vme_msg_3.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_4.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) vme_msg_5.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) vme_msg_6.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; + +mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */ + +/* Bilinear filter */ +mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; +add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_FBR_MESSAGE_TYPE + ) + mlen fbr_vme_msg_length + rlen vme_wb_length + {align1}; + +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; +/* write FME info */ +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; + +mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1}; +/* Inter distortion of FME */ +mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1}; + +mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1}; + +/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_0, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Write FME/BME MV */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; + + +mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; +/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Write FME/BME RefID */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Issue message fence so that the previous write message is committed */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_FENCE, + OBR_MF_COMMIT, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; + + nop ; + nop ; + +word_imedian: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_a_ge_b; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + jmpi (1) cmp_end; +cmp_a_ge_b: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; +cmp_end: + RETURN {align1}; + +nop; +nop; + +ref_boundary_check: + +/* The left/up coordinate of reference window */ +add (2) TEMP_VAR0.0<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG0.0<2,2,1>:w {align1}; +/* The right/bottom coordinate of reference window */ +add (1) TEMP_VAR0.16<1>:w TEMP_VAR0.0<0,1,0>:w 48:w {align1}; +add (1) TEMP_VAR0.18<1>:w TEMP_VAR0.2<0,1,0>:w 40:w {align1}; +x_left_cmp: + cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w 0:w {align1}; + (-f0.0) jmpi (1) x_right_cmp; + (f0.0) mov (1) TEMP_VAR0.0<1>:w 0:w {align1}; + jmpi (1) y_top_cmp; +x_right_cmp: + cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w INPUT_ARG1.0<0,1,0>:w {align1}; + (-f0.0) jmpi (1) y_top_cmp; + (f0.0) add (1) TEMP_VAR0.0<1>:w INPUT_ARG1.0<0,1,0>:w -48:w {align1}; +y_top_cmp: + cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w 0:w {align1}; + (-f0.0) jmpi (1) y_bottom_cmp; + (f0.0) mov (1) TEMP_VAR0.2<1>:w 0:w {align1}; + jmpi (1) y_bottom_end; +y_bottom_cmp: + cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w INPUT_ARG1.2<0,1,0>:w {align1}; + (f0.0) add (1) TEMP_VAR0.2<1>:w INPUT_ARG1.2<0,1,0>:w -40:w {align1}; + +y_bottom_end: +mul (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w -1:w {align1}; +add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1}; + RETURN {align1}; +nop; +nop; diff --git a/src/shaders/vme/mpeg2_inter_haswell.g75a b/src/shaders/vme/mpeg2_inter_haswell.g75a new file mode 100644 index 00000000..355812c1 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_haswell.g75a @@ -0,0 +1,3 @@ +#include "vme75.inc" +#include "vme75_mpeg2.inc" +#include "mpeg2_inter_haswell.asm" diff --git a/src/shaders/vme/mpeg2_inter_haswell.g75b b/src/shaders/vme/mpeg2_inter_haswell.g75b new file mode 100644 index 00000000..b3e67e77 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_haswell.g75b @@ -0,0 +1,278 @@ + { 0x00800001, 0x24000061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00061, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, + { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 }, + { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 }, + { 0x00000041, 0x24880c21, 0x00000488, 0x00000018 }, + { 0x00000001, 0x24940231, 0x00000014, 0x00000000 }, + { 0x00200009, 0x2a402d29, 0x00450098, 0x00040004 }, + { 0x00200001, 0x2a500129, 0x00450094, 0x00000000 }, + { 0x00600001, 0x2ac00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2a800061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000740 }, + { 0x00600001, 0x2ae00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, + { 0x00210001, 0x2af401ed, 0x00000000, 0xffffffff }, + { 0x00010020, 0x34001c00, 0x00001400, 0x000000f0 }, + { 0x00000001, 0x2ae000e5, 0x00000000, 0x00000001 }, + { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24003dad, 0x00000400, 0xffffffff }, + { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, + { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, + { 0x00000041, 0x2b480c21, 0x00000b48, 0x00000018 }, + { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02280303 }, + { 0x05000010, 0x2000252c, 0x00000b70, 0x00000b88 }, + { 0x00210001, 0x2af401ed, 0x00000000, 0xffffffff }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2ae40021, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2af001ed, 0x00000000, 0x00010001 }, + { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, + { 0x00210001, 0x2b1401ed, 0x00000000, 0xffffffff }, + { 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000350 }, + { 0x00000001, 0x2b0000e5, 0x00000000, 0x00000001 }, + { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24023dad, 0x00000402, 0xffffffff }, + { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, + { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, + { 0x00000041, 0x2b480c21, 0x00000b48, 0x00000018 }, + { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02280303 }, + { 0x05000010, 0x2000252c, 0x00000b70, 0x00000b88 }, + { 0x00210001, 0x2b1401ed, 0x00000000, 0xffffffff }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2b040021, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b1001ed, 0x00000000, 0x00010001 }, + { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00080008 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000110 }, + { 0x00000001, 0x2b2000e5, 0x00000000, 0x00000001 }, + { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24023dad, 0x00000402, 0xffffffff }, + { 0x00000040, 0x24003dad, 0x00000400, 0x00010001 }, + { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, + { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, + { 0x00000041, 0x2b480c21, 0x00000b48, 0x00000018 }, + { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02280303 }, + { 0x05000010, 0x2000252c, 0x00000b70, 0x00000b88 }, + { 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000170 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2b240021, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x000000f0 }, + { 0x00000001, 0x2b2000e5, 0x00000000, 0x00000001 }, + { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, + { 0x00200040, 0x24003dad, 0x00450400, 0xffffffff }, + { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, + { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, + { 0x00000041, 0x2b480c21, 0x00000b48, 0x00000018 }, + { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02280303 }, + { 0x05000010, 0x2000252c, 0x00000b70, 0x00000b88 }, + { 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, + { 0x0a800031, 0x2ba01cb1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2b240021, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 }, + { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, + { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000080 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2b340129, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2ac00021, 0x00000ae4, 0x00000000 }, + { 0x00110001, 0x2ac00061, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000190 }, + { 0x00600001, 0x24000061, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20003da4, 0x00200af4, 0x00000000 }, + { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 }, + { 0x00010001, 0x24040021, 0x00000ae4, 0x00000000 }, + { 0x01000010, 0x20003da4, 0x00200b14, 0x00000000 }, + { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 }, + { 0x00010001, 0x24040021, 0x00000b04, 0x00000000 }, + { 0x01000010, 0x20003da4, 0x00200b34, 0x00000000 }, + { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 }, + { 0x00010001, 0x24040021, 0x00000b24, 0x00000000 }, + { 0x01000010, 0x20003da4, 0x00200400, 0x00010001 }, + { 0x00010001, 0x2ac00021, 0x00000404, 0x00000000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x000000c0 }, + { 0x00000001, 0x2fa001ad, 0x00000ae4, 0x00000000 }, + { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, + { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000720 }, + { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, + { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, + { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000006c0 }, + { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, + { 0x00000001, 0x2fa00021, 0x00000440, 0x00000000 }, + { 0x00000001, 0x2fa80021, 0x00000448, 0x00000000 }, + { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000760 }, + { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 }, + { 0x00600001, 0x25600021, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00010001 }, + { 0x00000001, 0x28850231, 0x00000400, 0x00000000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00bb00bb }, + { 0x00000001, 0x28840231, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x28b00061, 0x00000000, 0x11111111 }, + { 0x00000001, 0x28bc0061, 0x00000000, 0x00010101 }, + { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x24600061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00020002 }, + { 0x00000040, 0x24002d29, 0x00000400, 0x00040004 }, + { 0x00000001, 0x247c0231, 0x00000400, 0x00000000 }, + { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00000000 }, + { 0x00000001, 0x247e0231, 0x00000400, 0x00000000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00800000 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, + { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e782000 }, + { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240021, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280021, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28300129, 0x0000018c, 0x00000000 }, + { 0x00000001, 0x28340021, 0x00000188, 0x00000000 }, + { 0x00000001, 0x28380021, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x283c0021, 0x00000488, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x7e200000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, + { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, + { 0x00000001, 0x24700061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24740061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, + { 0x00000001, 0x28600061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28640061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28700061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28740061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28800061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28900061, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21801ca1, 0x00000800, 0x0a784000 }, + { 0x00000001, 0x25740061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x25750231, 0x00000199, 0x00000000 }, + { 0x00000001, 0x25760231, 0x0000019a, 0x00000000 }, + { 0x00000005, 0x24002d29, 0x00000180, 0x00030003 }, + { 0x00000001, 0x25740231, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28600021, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28800021, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28a00021, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28c00021, 0x008d0200, 0x00000000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00241000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00040004 }, + { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, + { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e786000 }, + { 0x00000040, 0x24880c21, 0x00000488, 0x00000002 }, + { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240021, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28280021, 0x00000188, 0x00000000 }, + { 0x00000001, 0x282c0021, 0x00000574, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0003 }, + { 0x00000040, 0x24880c21, 0x00000488, 0x00000001 }, + { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28400021, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28600021, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28800021, 0x008d0200, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, + { 0x00000040, 0x24880c21, 0x00000488, 0x00000008 }, + { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0240, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, + { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x0219e003 }, + { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24001ca8, 0x00000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa4 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe401ad, 0x00000fa0, 0x00000000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x000000a0 }, + { 0x04000010, 0x200035ac, 0x00000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe401ad, 0x00000fa4, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000060 }, + { 0x04000010, 0x200035ac, 0x00000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe401ad, 0x00000fa4, 0x00000000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000030 }, + { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe401ad, 0x00000fa0, 0x00000000 }, + { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00200040, 0x2f6035ad, 0x00450fa8, 0x00450fa0 }, + { 0x00000040, 0x2f703dad, 0x00000f60, 0x00300030 }, + { 0x00000040, 0x2f723dad, 0x00000f62, 0x00280028 }, + { 0x05000010, 0x20003dac, 0x00000f60, 0x00000000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000020 }, + { 0x00010001, 0x2f6001ed, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000030 }, + { 0x03000010, 0x200035ac, 0x00000f70, 0x00000fc0 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000010 }, + { 0x00010040, 0x2f603dad, 0x00000fc0, 0xffd0ffd0 }, + { 0x05000010, 0x20003dac, 0x00000f62, 0x00000000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000020 }, + { 0x00010001, 0x2f6201ed, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000020 }, + { 0x03000010, 0x200035ac, 0x00000f72, 0x00000fc2 }, + { 0x00010040, 0x2f623dad, 0x00000fc2, 0xffd8ffd8 }, + { 0x00200041, 0x2f803dad, 0x00450fa8, 0xffffffff }, + { 0x00200040, 0x2fe435ad, 0x00450f60, 0x00450f80 }, + { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/vme/vme75_mpeg2.inc b/src/shaders/vme/vme75_mpeg2.inc index 9b877acf..c8e42cc0 100644 --- a/src/shaders/vme/vme75_mpeg2.inc +++ b/src/shaders/vme/vme75_mpeg2.inc @@ -16,3 +16,6 @@ */ define(`INTER_PART_MASK', `0x7e000000') +define(`mpeg2_ref', `r83') +define(`pic_ref', `r82') +define(`INTRA16_DC_PRED', `0xBB') -- cgit v1.2.1 From 272c5a63d1854c391fc302ebdd3a48005a356444 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:23 +0800 Subject: Restrict the MV search range based on MPEG2 encoding LEVEL Signed-off-by: Zhao Yakui (cherry picked from commit c05073b1f8764271ccf4fe1aa037f881dedd3818) --- src/gen6_mfc_common.c | 18 ++++++++++++++++++ src/gen6_vme.h | 6 ++++++ src/gen75_vme.c | 8 ++++++++ src/shaders/vme/mpeg2_inter_haswell.asm | 15 +++++++++++++++ src/shaders/vme/mpeg2_inter_haswell.g75b | 11 +++++++++++ 5 files changed, 58 insertions(+) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index b77916de..10790443 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1122,6 +1122,24 @@ void intel_vme_mpeg2_state_setup(VADriverContextP ctx, VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + uint32_t mv_x, mv_y; + + if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) { + mv_x = 512; + mv_y = 64; + } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) { + mv_x = 1024; + mv_y = 128; + } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) { + mv_x = 2048; + mv_y = 128; + } else { + WARN_ONCE("Incorrect Mpeg2 level setting!\n"); + mv_x = 512; + mv_y = 64; + } + + vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x); vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) | width_in_mbs; diff --git a/src/gen6_vme.h b/src/gen6_vme.h index 9d7acdf2..ab0ff3b3 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -85,9 +85,15 @@ struct gen6_vme_context unsigned int h264_level; unsigned int video_coding_type; unsigned int vme_kernel_sum; + unsigned int mpeg2_level; }; #define MPEG2_PIC_WIDTH_HEIGHT 30 +#define MPEG2_MV_RANGE 29 +#define MPEG2_LEVEL_MASK 0x0f +#define MPEG2_LEVEL_LOW 0x0a +#define MPEG2_LEVEL_MAIN 0x08 +#define MPEG2_LEVEL_HIGH 0x04 Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); diff --git a/src/gen75_vme.c b/src/gen75_vme.c index 41b85642..39355281 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -948,6 +948,14 @@ gen75_vme_mpeg2_prepare(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_SUCCESS; VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + if ((!vme_context->mpeg2_level) || + (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) { + vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK; + } + /*Setup all the memory object*/ gen75_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); gen75_vme_interface_setup(ctx, encode_state, encoder_context); diff --git a/src/shaders/vme/mpeg2_inter_haswell.asm b/src/shaders/vme/mpeg2_inter_haswell.asm index 17e48431..bd41a908 100644 --- a/src/shaders/vme/mpeg2_inter_haswell.asm +++ b/src/shaders/vme/mpeg2_inter_haswell.asm @@ -670,6 +670,21 @@ add (2) TEMP_VAR0.0<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG0.0<2,2,1>:w {align1}; /* The right/bottom coordinate of reference window */ add (1) TEMP_VAR0.16<1>:w TEMP_VAR0.0<0,1,0>:w 48:w {align1}; add (1) TEMP_VAR0.18<1>:w TEMP_VAR0.2<0,1,0>:w 40:w {align1}; + +/* Firstly the MV range is checked */ +mul (2) TEMP_VAR1.16<1>:w INPUT_ARG1.16<2,2,1>:w -1:w {align1}; +add (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w TEMP_VAR1.16<2,2,1>:w {align1}; +add (2) TEMP_VAR1.4<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG1.16<2,2,1>:w {align1}; + +cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w TEMP_VAR1.0<0,1,0>:w {align1}; +(f0.0) mov (1) TEMP_VAR0.0<1>:w TEMP_VAR1.0<0,1,0>:w {align1}; +cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w TEMP_VAR1.4<0,1,0>:w {align1}; +(f0.0) add (1) TEMP_VAR0.0<1>:w TEMP_VAR1.4<0,1,0>:w -48:w {align1}; +cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w TEMP_VAR1.2<0,1,0>:w {align1}; +(f0.0) mov (1) TEMP_VAR0.2<1>:w TEMP_VAR1.2<0,1,0>:w {align1}; +cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w TEMP_VAR1.6<0,1,0>:w {align1}; +(f0.0) add (1) TEMP_VAR0.2<1>:w TEMP_VAR1.6<0,1,0>:w -40:w {align1}; + x_left_cmp: cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w 0:w {align1}; (-f0.0) jmpi (1) x_right_cmp; diff --git a/src/shaders/vme/mpeg2_inter_haswell.g75b b/src/shaders/vme/mpeg2_inter_haswell.g75b index b3e67e77..cdaeb4c0 100644 --- a/src/shaders/vme/mpeg2_inter_haswell.g75b +++ b/src/shaders/vme/mpeg2_inter_haswell.g75b @@ -258,6 +258,17 @@ { 0x00200040, 0x2f6035ad, 0x00450fa8, 0x00450fa0 }, { 0x00000040, 0x2f703dad, 0x00000f60, 0x00300030 }, { 0x00000040, 0x2f723dad, 0x00000f62, 0x00280028 }, + { 0x00200041, 0x2f903dad, 0x00450fd0, 0xffffffff }, + { 0x00200040, 0x2f8035ad, 0x00450fa8, 0x00450f90 }, + { 0x00200040, 0x2f8435ad, 0x00450fa8, 0x00450fd0 }, + { 0x05000010, 0x200035ac, 0x00000f60, 0x00000f80 }, + { 0x00010001, 0x2f6001ad, 0x00000f80, 0x00000000 }, + { 0x03000010, 0x200035ac, 0x00000f70, 0x00000f84 }, + { 0x00010040, 0x2f603dad, 0x00000f84, 0xffd0ffd0 }, + { 0x05000010, 0x200035ac, 0x00000f62, 0x00000f82 }, + { 0x00010001, 0x2f6201ad, 0x00000f82, 0x00000000 }, + { 0x03000010, 0x200035ac, 0x00000f72, 0x00000f86 }, + { 0x00010040, 0x2f623dad, 0x00000f86, 0xffd8ffd8 }, { 0x05000010, 0x20003dac, 0x00000f60, 0x00000000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000020 }, { 0x00010001, 0x2f6001ed, 0x00000000, 0x00000000 }, -- cgit v1.2.1 From 68d570f80382f95a87d2ddccb778445f8a996afd Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:23 +0800 Subject: Enable the Intra-prediction for MPEG2 P-B frame Signed-off-by: Zhao Yakui (cherry picked from commit c074d4d61ad931d044fbe0836a45c49768090b4b) --- src/gen75_mfc.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 255b64c8..e4cdca5f 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -2240,7 +2240,26 @@ gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, 0xff, slice_batch); } else { - gen75_mfc_mpeg2_pak_object_inter(ctx, + int inter_rdo, intra_rdo; + inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; + intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; + + if (intra_rdo < inter_rdo) + gen75_mfc_mpeg2_pak_object_intra(ctx, + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); + else + gen75_mfc_mpeg2_pak_object_inter(ctx, encode_state, encoder_context, msg, -- cgit v1.2.1 From fe782daf315bf491780dc61a6b866f13ec190762 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:23 +0800 Subject: configure the dynamic VME MV/mode cost for MPEG2 encoding on Haswell Currently it uses the constant VME MV/mode cost when executing the mode/motion vector prediction for MPEG2 encoding on Haswell, which causes that the unoptimized mode/MV is used for MPEG2 encoding. Signed-off-by: Zhao Yakui (cherry picked from commit 6fec7e353704dc23c9675966467caffa95f792fe) --- src/gen6_mfc_common.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 10790443..ac28d23f 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1123,6 +1123,9 @@ void intel_vme_mpeg2_state_setup(VADriverContextP ctx, int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; uint32_t mv_x, mv_y; + VAEncSliceParameterBufferMPEG2 *slice_param = NULL; + VAEncPictureParameterBufferMPEG2 *pic_param = NULL; + slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) { mv_x = 512; @@ -1139,6 +1142,45 @@ void intel_vme_mpeg2_state_setup(VADriverContextP ctx, mv_y = 64; } + pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + if (pic_param->picture_type != VAEncPictureTypeIntra) { + int qp, m_cost, j, mv_count; + float lambda, m_costf; + slice_param = (VAEncSliceParameterBufferMPEG2 *) + encode_state->slice_params_ext[0]->buffer; + qp = slice_param->quantiser_scale_code; + lambda = intel_lambda_qp(qp); + /* No Intra prediction. So it is zero */ + vme_state_message[MODE_INTRA_8X8] = 0; + vme_state_message[MODE_INTRA_4X4] = 0; + vme_state_message[MODE_INTER_MV0] = 0; + for (j = 1; j < 3; j++) { + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; + m_cost = (int)m_costf; + vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f); + } + mv_count = 3; + for (j = 4; j <= 64; j *= 2) { + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; + m_cost = (int)m_costf; + vme_state_message[MODE_INTER_MV0 + mv_count] = + intel_format_lutvalue(m_cost, 0x6f); + mv_count++; + } + m_cost = lambda; + /* It can only perform the 16x16 search. So mode cost can be ignored for + * the other mode. for example: 16x8/8x8 + */ + vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f); + vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); + + vme_state_message[MODE_INTER_16X8] = 0; + vme_state_message[MODE_INTER_8X8] = 0; + vme_state_message[MODE_INTER_8X4] = 0; + vme_state_message[MODE_INTER_4X4] = 0; + vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f); + + } vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x); vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) | -- cgit v1.2.1 From 19c32cefd707c0b547be5da5123160eaf22411bb Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:23 +0800 Subject: Use the scoreboard/walker to assure MB dependency for MPEG2 encoding If MVP is added for MPEG2 encoding, it must be assured that the left macroblock should be already finished before processing the current macroblock. And this needs the scoreboard/walker mechanism to assure MB dependency. Signed-off-by: Zhao Yakui (cherry picked from commit 5d2163d02d395fc0a794d834979a06c287bf9ba5) --- src/gen6_mfc_common.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/gen6_vme.h | 7 +++ src/gen75_vme.c | 37 ++++++++++++++- 3 files changed, 166 insertions(+), 2 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index ac28d23f..469bf640 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1186,3 +1186,127 @@ void intel_vme_mpeg2_state_setup(VADriverContextP ctx, vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) | width_in_mbs; } + +void +gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + unsigned int *command_ptr; + +#define MPEG2_SCOREBOARD (1 << 21) + + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); + command_ptr = vme_context->vme_batchbuffer.bo->virtual; + + { + unsigned int mb_intra_ub, score_dep; + int x_outer, y_outer, x_inner, y_inner; + int xtemp_outer = 0; + int first_mb = 0; + int num_mb = mb_width * mb_height; + + x_outer = 0; + y_outer = 0; + + + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { + x_inner = x_outer; + y_inner = y_outer; + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { + mb_intra_ub = 0; + score_dep = 0; + if (x_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + score_dep |= MB_SCOREBOARD_A; + } + if (y_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + score_dep |= MB_SCOREBOARD_B; + + if (x_inner != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (x_inner != (mb_width -1)) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + score_dep |= MB_SCOREBOARD_C; + } + } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = MPEG2_SCOREBOARD; + /* Indirect data */ + *command_ptr++ = 0; + /* the (X, Y) term of scoreboard */ + *command_ptr++ = ((y_inner << 16) | x_inner); + *command_ptr++ = score_dep; + /*inline data */ + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); + *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8)); + x_inner -= 2; + y_inner += 1; + } + x_outer += 1; + } + + xtemp_outer = mb_width - 2; + if (xtemp_outer < 0) + xtemp_outer = 0; + x_outer = xtemp_outer; + y_outer = 0; + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { + y_inner = y_outer; + x_inner = x_outer; + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { + mb_intra_ub = 0; + score_dep = 0; + if (x_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + score_dep |= MB_SCOREBOARD_A; + } + if (y_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + score_dep |= MB_SCOREBOARD_B; + + if (x_inner != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (x_inner != (mb_width -1)) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + score_dep |= MB_SCOREBOARD_C; + } + } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = MPEG2_SCOREBOARD; + /* Indirect data */ + *command_ptr++ = 0; + /* the (X, Y) term of scoreboard */ + *command_ptr++ = ((y_inner << 16) | x_inner); + *command_ptr++ = score_dep; + /*inline data */ + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); + *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8)); + + x_inner -= 2; + y_inner += 1; + } + x_outer++; + if (x_outer >= mb_width) { + y_outer += 1; + x_outer = xtemp_outer; + } + } + } + + *command_ptr++ = 0; + *command_ptr++ = MI_BATCH_BUFFER_END; + + dri_bo_unmap(vme_context->vme_batchbuffer.bo); + return; +} diff --git a/src/gen6_vme.h b/src/gen6_vme.h index ab0ff3b3..b130b58f 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -151,4 +151,11 @@ intel_vme_mpeg2_state_setup(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context); +extern void +gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + struct intel_encoder_context *encoder_context); + #endif /* _GEN6_VME_H_ */ diff --git a/src/gen75_vme.c b/src/gen75_vme.c index 39355281..3e769ed0 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -916,14 +916,47 @@ gen75_vme_mpeg2_pipeline_programing(VADriverContextP ctx, { struct gen6_vme_context *vme_context = encoder_context->vme_context; struct intel_batchbuffer *batch = encoder_context->base.batch; + VAEncPictureParameterBufferMPEG2 *pic_param = NULL; VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + bool allow_hwscore = true; + int s; + int kernel_shader; - gen75_vme_mpeg2_fill_vme_batchbuffer(ctx, + pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + int j; + VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer; + + for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) { + if (slice_param->macroblock_address % width_in_mbs) { + allow_hwscore = false; + break; + } + } + } + + pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + if (pic_param->picture_type == VAEncPictureTypeIntra) { + allow_hwscore = false; + kernel_shader = VME_INTRA_SHADER; + } else { + kernel_shader = VME_INTER_SHADER; + } + + if (allow_hwscore) + gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + encoder_context); + else + gen75_vme_mpeg2_fill_vme_batchbuffer(ctx, encode_state, width_in_mbs, height_in_mbs, - is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, + kernel_shader, 0, encoder_context); -- cgit v1.2.1 From 6d6c227ca19ffaa7454cc748b5864f2024974b33 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:23 +0800 Subject: Configure the cost-center of MPEG2 VME shader on haswell This is derived from the neighbour macroblock based on MPEG2 spec. Signed-off-by: Zhao Yakui (cherry picked from commit a51860aba2d2713a9c8c817c430ffa93abd5145a) --- src/shaders/vme/mpeg2_inter_haswell.asm | 14 +++++++++----- src/shaders/vme/mpeg2_inter_haswell.g75b | 13 +++++++------ src/shaders/vme/vme75_mpeg2.inc | 2 ++ 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/shaders/vme/mpeg2_inter_haswell.asm b/src/shaders/vme/mpeg2_inter_haswell.asm index bd41a908..cc9efa42 100644 --- a/src/shaders/vme/mpeg2_inter_haswell.asm +++ b/src/shaders/vme/mpeg2_inter_haswell.asm @@ -39,15 +39,16 @@ shl (2) pic_ref.0<1>:uw r4.24<2,2,1>:uw 4:uw {align1}; mov (2) pic_ref.16<1>:uw r4.20<2,2,1>:uw {align1}; mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; +mov (8) mba_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; + and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; (f0.0) jmpi (1) __mb_hwdep_end; /* read back the data for MB A */ /* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag), * rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID) */ -mov (8) mba_result.0<1>:ud 0x0:ud {align1}; -mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; -mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; mba_start: mov (8) mb_msg0.0<1>:ud 0:ud {align1}; and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; @@ -330,6 +331,8 @@ mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; __mb_hwdep_end: +mov (2) mv_cc_ref.0<1>:w mba_result.4<2,2,1>:w {align1}; + /* Calibrate the ref window for MPEG2 */ mov (1) vme_m0.0<1>:W -16:W {align1}; mov (1) vme_m0.2<1>:W -12:W {align1}; @@ -460,8 +463,9 @@ mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; /* Set the MV cost center */ -mov (1) vme_m1.16<1>:ud 0:ud {align1}; -mov (1) vme_m1.20<1>:ud 0:ud {align1}; +mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; + mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; diff --git a/src/shaders/vme/mpeg2_inter_haswell.g75b b/src/shaders/vme/mpeg2_inter_haswell.g75b index cdaeb4c0..277f1165 100644 --- a/src/shaders/vme/mpeg2_inter_haswell.g75b +++ b/src/shaders/vme/mpeg2_inter_haswell.g75b @@ -12,11 +12,11 @@ { 0x00200001, 0x2a500129, 0x00450094, 0x00000000 }, { 0x00600001, 0x2ac00061, 0x00000000, 0x00000000 }, { 0x00600001, 0x2a800061, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, - { 0x00010020, 0x34001c00, 0x00001400, 0x00000740 }, { 0x00600001, 0x2ae00061, 0x00000000, 0x00000000 }, { 0x00600001, 0x2b000061, 0x00000000, 0x00000000 }, { 0x00600001, 0x2b200061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000710 }, { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, { 0x00210001, 0x2af401ed, 0x00000000, 0xffffffff }, @@ -122,14 +122,15 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000720 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000730 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000006c0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000006d0 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, + { 0x00200001, 0x2a2001ad, 0x00450ae4, 0x00000000 }, { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, { 0x00000001, 0x2fa00021, 0x00000440, 0x00000000 }, @@ -180,8 +181,8 @@ { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, - { 0x00000001, 0x24700061, 0x00000000, 0x00000000 }, - { 0x00000001, 0x24740061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24700021, 0x00000a20, 0x00000000 }, + { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, { 0x00000001, 0x28600061, 0x00000000, 0x01010101 }, diff --git a/src/shaders/vme/vme75_mpeg2.inc b/src/shaders/vme/vme75_mpeg2.inc index c8e42cc0..b297a49c 100644 --- a/src/shaders/vme/vme75_mpeg2.inc +++ b/src/shaders/vme/vme75_mpeg2.inc @@ -19,3 +19,5 @@ define(`INTER_PART_MASK', `0x7e000000') define(`mpeg2_ref', `r83') define(`pic_ref', `r82') define(`INTRA16_DC_PRED', `0xBB') +/* Cost center ref */ +define(`mv_cc_ref', `r81') -- cgit v1.2.1 From 7264aeb6c682b0bc352303befdcec536cca02fec Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:23 +0800 Subject: Optimize the VME shader for MPEG2 on Haswell Signed-off-by: Zhao Yakui (cherry picked from commit 19e93152f0e10f94ecaf3ddecf95c1dc7b97dfed) --- src/shaders/vme/mpeg2_inter_haswell.asm | 145 +++++++++++++++++++++++++++++++ src/shaders/vme/mpeg2_inter_haswell.g75b | 82 ++++++++++++++++- src/shaders/vme/vme75_mpeg2.inc | 10 +++ 3 files changed, 234 insertions(+), 3 deletions(-) diff --git a/src/shaders/vme/mpeg2_inter_haswell.asm b/src/shaders/vme/mpeg2_inter_haswell.asm index cc9efa42..0e91a04a 100644 --- a/src/shaders/vme/mpeg2_inter_haswell.asm +++ b/src/shaders/vme/mpeg2_inter_haswell.asm @@ -414,6 +414,7 @@ send (8) mlen sic_vme_msg_length rlen vme_wb_length {align1}; + /* * Oword Block Write message */ @@ -540,6 +541,11 @@ send (8) rlen vme_wb_length {align1}; +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(-f0.0) jmpi (1) vme_run_again; +nop; +vme_mv_output: + add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; /* write FME info */ @@ -568,6 +574,7 @@ send (16) rlen obw_wb_length {align1}; + /* Write FME/BME MV */ add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1}; mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; @@ -617,6 +624,7 @@ send (16) rlen obw_wb_length {align1}; + /* Issue message fence so that the previous write message is committed */ send (16) mb_ind @@ -713,3 +721,140 @@ add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1}; RETURN {align1}; nop; nop; + +vme_run_again: + +asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1}; +mov (2) tmp_reg0.0<1>:w mb_ref_win.0<2,2,1>:w {align1}; +add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1}; +and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1}; + +cmp.l.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 0:w {align1}; +(f0.0) mul (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +cmp.l.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 0:w {align1}; +(f0.0) mul (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; + +cmp.ge.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 4:w {align1}; +(f0.0) jmpi (1) vme_start; +cmp.ge.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 4:w {align1}; +(f0.0) jmpi (1) vme_start; + +jmpi (1) vme_done; + +vme_start: + mov (8) tmp_vme_wb0.0<1>:ud vme_wb0.0<8,8,1>:ud {align1}; + mov (8) tmp_vme_wb1.0<1>:ud vme_wb1.0<8,8,1>:ud {align1}; + +/* Calibrate the ref window for MPEG2 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; +mov (4) INPUT_ARG0.0<1>:ud vme_m0.0<4,4,1>:ud {align1}; +add (2) INPUT_ARG0.0<1>:w INPUT_ARG0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; + +SAVE_RET {align1}; +jmpi (1) ref_boundary_check; +mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; + +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (8) vme_m1.0<1>:ud 0x0:UD {align1}; + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* the Max MV number is passed by constant buffer */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; +/* Set the MV cost center */ +mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; +/* M3/M4 search path */ + +mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1}; + +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; + +mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; + +send (8) + vme_msg_ind + vme_wb<1>:UD + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_IME_MESSAGE_TYPE + ) + mlen ime_vme_msg_length + rlen vme_wb_length {align1}; + +/* Set Macroblock-shape/mode for FBR */ + +mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; +mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; +mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; + +and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; +mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* Send FBR message into CRE */ + +mov (8) vme_msg_3.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_4.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) vme_msg_5.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) vme_msg_6.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; + +mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */ + +/* Bilinear filter */ +mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; +add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_FBR_MESSAGE_TYPE + ) + mlen fbr_vme_msg_length + rlen vme_wb_length + {align1}; + +cmp.l.f0.0 (1) null:uw vme_wb0.8<0,1,0>:uw tmp_vme_wb0.8<0,1,0>:uw {align1}; +(f0.0) jmpi (1) vme_done; +mov (8) vme_wb0.0<1>:ud tmp_vme_wb0.0<8,8,1>:ud {align1}; +mov (8) vme_wb1.0<1>:ud tmp_vme_wb1.0<8,8,1>:ud {align1}; + +vme_done: + jmpi (1) vme_mv_output; +nop; +nop; +nop; + diff --git a/src/shaders/vme/mpeg2_inter_haswell.g75b b/src/shaders/vme/mpeg2_inter_haswell.g75b index 277f1165..d6625e3e 100644 --- a/src/shaders/vme/mpeg2_inter_haswell.g75b +++ b/src/shaders/vme/mpeg2_inter_haswell.g75b @@ -122,13 +122,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000730 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000760 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000006d0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000700 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x00200001, 0x2a2001ad, 0x00450ae4, 0x00000000 }, { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, @@ -137,7 +137,7 @@ { 0x00000001, 0x2fa80021, 0x00000448, 0x00000000 }, { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000760 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000790 }, { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 }, { 0x00600001, 0x25600021, 0x008d0020, 0x00000000 }, { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, @@ -215,6 +215,9 @@ { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e786000 }, + { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x000004a0 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x00000040, 0x24880c21, 0x00000488, 0x00000002 }, { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, @@ -288,3 +291,76 @@ { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, + { 0x00200001, 0x240001ad, 0x00450a80, 0x00000000 }, + { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, + { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc }, + { 0x05000010, 0x20003dac, 0x00000400, 0x00000000 }, + { 0x00010041, 0x24003dad, 0x00000400, 0xffffffff }, + { 0x05000010, 0x20003dac, 0x00000402, 0x00000000 }, + { 0x00010041, 0x24023dad, 0x00000402, 0xffffffff }, + { 0x04000010, 0x20003dac, 0x00000400, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000030 }, + { 0x04000010, 0x20003dac, 0x00000402, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000010 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000380 }, + { 0x00600001, 0x2c800021, 0x008d0180, 0x00000000 }, + { 0x00600001, 0x2ca00021, 0x008d01a0, 0x00000000 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, + { 0x00400001, 0x2fa00021, 0x00690440, 0x00000000 }, + { 0x00200040, 0x2fa035ad, 0x00450fa0, 0x00450a90 }, + { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0xfffffca0 }, + { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x7e200000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x24600061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, + { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, + { 0x00000001, 0x24700021, 0x00000a20, 0x00000000 }, + { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, + { 0x00000001, 0x28600061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28640061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28700061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28740061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28800061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28900061, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21801ca1, 0x00000800, 0x0a784000 }, + { 0x00000001, 0x25740061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x25750231, 0x00000199, 0x00000000 }, + { 0x00000001, 0x25760231, 0x0000019a, 0x00000000 }, + { 0x00000005, 0x24002d29, 0x00000180, 0x00030003 }, + { 0x00000001, 0x25740231, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28600021, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28800021, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28a00021, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28c00021, 0x008d0200, 0x00000000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00241000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00040004 }, + { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, + { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e786000 }, + { 0x05000010, 0x20002528, 0x00000188, 0x00000c88 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000020 }, + { 0x00600001, 0x21800021, 0x008d0c80, 0x00000000 }, + { 0x00600001, 0x21a00021, 0x008d0ca0, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0xfffff710 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/vme/vme75_mpeg2.inc b/src/shaders/vme/vme75_mpeg2.inc index b297a49c..b6380563 100644 --- a/src/shaders/vme/vme75_mpeg2.inc +++ b/src/shaders/vme/vme75_mpeg2.inc @@ -21,3 +21,13 @@ define(`pic_ref', `r82') define(`INTRA16_DC_PRED', `0xBB') /* Cost center ref */ define(`mv_cc_ref', `r81') +define(`tmp_vme_wb0', `r100') +define(`tmp_vme_wb1', `r101') +define(`tmp_vme_wb2', `r102') +define(`tmp_vme_wb3', `r103') +define(`tmp_vme_wb4', `r104') +define(`tmp_vme_wb5', `r105') +define(`tmp_vme_wb6', `r106') +define(`tmp_vme_wb7', `r107') +define(`tmp_vme_wb8', `r108') +define(`tmp_vme_wb9', `r109') -- cgit v1.2.1 From 5ae18f29106ffde1004b721013ebb81f7a273690 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:24 +0800 Subject: Optimize quantization rounding precision for MPEG2 encoding on haswell Signed-off-by: Zhao Yakui (cherry picked from commit 24d8bf31c8aeb326bc8b33c1ac9700ec1d169666) --- src/gen75_mfc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index e4cdca5f..b3b6f881 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -1780,9 +1780,11 @@ gen75_mfc_mpeg2_pic_state(VADriverContextP ctx, VAEncPictureParameterBufferMPEG2 *pic_param; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + VAEncSliceParameterBufferMPEG2 *slice_param = NULL; assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer); pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; BEGIN_BCS_BATCH(batch, 13); OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2)); @@ -1807,7 +1809,11 @@ gen75_mfc_mpeg2_pic_state(VADriverContextP ctx, 1 << 31 | /* slice concealment */ (height_in_mbs - 1) << 16 | (width_in_mbs - 1)); - OUT_BCS_BATCH(batch, 0); + if (slice_param && slice_param->quantiser_scale_code >= 14) + OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12)); + else + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0xFFF << 16 | /* InterMBMaxSize */ -- cgit v1.2.1 From 159240f5def627909b4dcaf62159e34a4e2bcbc3 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:24 +0800 Subject: Pass the constant buffer info for MPEG2 encoding correctly on Ivb Signed-off-by: Zhao Yakui (cherry picked from commit 7690091889eac91dcab53e0318f9810c25071e18) --- src/gen7_vme.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 11af9e58..cb754a46 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -359,23 +359,34 @@ static VAStatus gen7_vme_constant_setup(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; - // unsigned char *constant_buffer; + unsigned char *constant_buffer; unsigned int *vme_state_message; - int mv_num = 32; - if (vme_context->h264_level >= 30) { - mv_num = 16; - if (vme_context->h264_level >= 31) - mv_num = 8; - } + int mv_num; + + vme_state_message = (unsigned int *)vme_context->vme_state_message; + mv_num = 32; + + if (encoder_context->codec == CODEC_H264) { + if (vme_context->h264_level >= 30) { + mv_num = 16; + + if (vme_context->h264_level >= 31) + mv_num = 8; + } + } else if (encoder_context->codec == CODEC_MPEG2) { + mv_num = 2; + } + + + vme_state_message[31] = mv_num; dri_bo_map(vme_context->gpe_context.curbe.bo, 1); assert(vme_context->gpe_context.curbe.bo->virtual); - // constant_buffer = vme_context->curbe.bo->virtual; - vme_state_message = (unsigned int *)vme_context->gpe_context.curbe.bo->virtual; - vme_state_message[31] = mv_num; - - /*TODO copy buffer into CURB*/ + constant_buffer = vme_context->gpe_context.curbe.bo->virtual; + /* Pass the required constant info into the constant buffer */ + memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128); + dri_bo_unmap( vme_context->gpe_context.curbe.bo); return VA_STATUS_SUCCESS; @@ -519,7 +530,7 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx, return VA_STATUS_SUCCESS; } -static VAStatus gen7_vme_vme_state_setup(VADriverContextP ctx, +static VAStatus gen7_vme_mpeg2_state_setup(VADriverContextP ctx, struct encode_state *encode_state, int is_intra, struct intel_encoder_context *encoder_context) @@ -560,8 +571,6 @@ static VAStatus gen7_vme_vme_state_setup(VADriverContextP ctx, } //vme_state_message[16] = 0x42424242; //cost function LUT set 0 for Intra - gen7_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message); - dri_bo_unmap( vme_context->vme_state.bo); return VA_STATUS_SUCCESS; } @@ -999,8 +1008,8 @@ gen7_vme_mpeg2_prepare(VADriverContextP ctx, /*Setup all the memory object*/ gen7_vme_mpeg2_surface_setup(ctx, encode_state, 0, encoder_context); gen7_vme_interface_setup(ctx, encode_state, encoder_context); - gen7_vme_vme_state_setup(ctx, encode_state, 0, encoder_context); gen7_vme_constant_setup(ctx, encode_state, encoder_context); + gen7_vme_mpeg2_state_setup(ctx, encode_state, 0, encoder_context); /*Programing media pipeline*/ gen7_vme_mpeg2_pipeline_programing(ctx, encode_state, 0, encoder_context); -- cgit v1.2.1 From 47aa58780c346e617c735508b09ee7b49d08358f Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:24 +0800 Subject: Remove the dead code in file of gen7_vme.c Signed-off-by: Zhao Yakui (cherry picked from commit d15582b54486847811f3feab46ddd51181561776) --- src/gen7_vme.c | 72 ---------------------------------------------------------- 1 file changed, 72 deletions(-) diff --git a/src/gen7_vme.c b/src/gen7_vme.c index cb754a46..0a1cc0ac 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -392,78 +392,6 @@ static VAStatus gen7_vme_constant_setup(VADriverContextP ctx, return VA_STATUS_SUCCESS; } -static const unsigned int intra_mb_mode_cost_table[] = { - 0x31110001, // for qp0 - 0x09110001, // for qp1 - 0x15030001, // for qp2 - 0x0b030001, // for qp3 - 0x0d030011, // for qp4 - 0x17210011, // for qp5 - 0x41210011, // for qp6 - 0x19210011, // for qp7 - 0x25050003, // for qp8 - 0x1b130003, // for qp9 - 0x1d130003, // for qp10 - 0x27070021, // for qp11 - 0x51310021, // for qp12 - 0x29090021, // for qp13 - 0x35150005, // for qp14 - 0x2b0b0013, // for qp15 - 0x2d0d0013, // for qp16 - 0x37170007, // for qp17 - 0x61410031, // for qp18 - 0x39190009, // for qp19 - 0x45250015, // for qp20 - 0x3b1b000b, // for qp21 - 0x3d1d000d, // for qp22 - 0x47270017, // for qp23 - 0x71510041, // for qp24 ! center for qp=0..30 - 0x49290019, // for qp25 - 0x55350025, // for qp26 - 0x4b2b001b, // for qp27 - 0x4d2d001d, // for qp28 - 0x57370027, // for qp29 - 0x81610051, // for qp30 - 0x57270017, // for qp31 - 0x81510041, // for qp32 ! center for qp=31..51 - 0x59290019, // for qp33 - 0x65350025, // for qp34 - 0x5b2b001b, // for qp35 - 0x5d2d001d, // for qp36 - 0x67370027, // for qp37 - 0x91610051, // for qp38 - 0x69390029, // for qp39 - 0x75450035, // for qp40 - 0x6b3b002b, // for qp41 - 0x6d3d002d, // for qp42 - 0x77470037, // for qp43 - 0xa1710061, // for qp44 - 0x79490039, // for qp45 - 0x85550045, // for qp46 - 0x7b4b003b, // for qp47 - 0x7d4d003d, // for qp48 - 0x87570047, // for qp49 - 0xb1810071, // for qp50 - 0x89590049 // for qp51 -}; - -static void gen7_vme_state_setup_fixup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - unsigned int *vme_state_message) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; - - if (slice_param->slice_type != SLICE_TYPE_I && - slice_param->slice_type != SLICE_TYPE_SI) - return; - if (encoder_context->rate_control_mode == VA_RC_CQP) - vme_state_message[16] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta]; - else - vme_state_message[16] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY]; -} static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx, struct encode_state *encode_state, -- cgit v1.2.1 From b6b6d3b86d9d587f4986c8a6c8ec70f643ff63cd Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:24 +0800 Subject: Rewrite the VME shader for MPEG2 encoding on Ivy Signed-off-by: Zhao Yakui (cherry picked from commit 6200c9a7779c1309f5a85b7c62aec1b9796793c6) --- src/gen7_vme.c | 43 ++- src/shaders/vme/Makefile.am | 6 +- src/shaders/vme/mpeg2_inter_ivb.asm | 592 ++++++++++++++++++++++++++++++++++++ src/shaders/vme/mpeg2_inter_ivb.g7a | 3 + src/shaders/vme/mpeg2_inter_ivb.g7b | 251 +++++++++++++++ src/shaders/vme/vme7_mpeg2.inc | 3 + 6 files changed, 883 insertions(+), 15 deletions(-) create mode 100644 src/shaders/vme/mpeg2_inter_ivb.asm create mode 100644 src/shaders/vme/mpeg2_inter_ivb.g7a create mode 100644 src/shaders/vme/mpeg2_inter_ivb.g7b diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 0a1cc0ac..847e801c 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -126,7 +126,7 @@ static struct i965_kernel gen7_vme_kernels[] = { }; static const uint32_t gen7_vme_mpeg2_inter_frame[][4] = { -#include "shaders/vme/mpeg2_inter_frame.g7b" +#include "shaders/vme/mpeg2_inter_ivb.g7b" }; static const uint32_t gen7_vme_mpeg2_batchbuffer[][4] = { @@ -855,33 +855,43 @@ gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) { int slice_mb_begin = slice_param->macroblock_address; int slice_mb_number = slice_param->num_macroblocks; + unsigned int mb_intra_ub; for (i = 0; i < slice_mb_number;) { - int mb_count = i + slice_mb_begin; + int mb_count = i + slice_mb_begin; mb_x = mb_count % mb_width; mb_y = mb_count / mb_width; + mb_intra_ub = 0; - if( i == 0) { - number_mb_cmds = mb_width; - } else if ((i + 128) <= slice_mb_number) { - number_mb_cmds = 128; - } else { - number_mb_cmds = slice_mb_number - i; + if (mb_x != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; } + if (mb_y != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + + if (mb_x != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (mb_x != (mb_width -1)) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + } + + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); *command_ptr++ = kernel; *command_ptr++ = 0; *command_ptr++ = 0; *command_ptr++ = 0; *command_ptr++ = 0; - + /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); - *command_ptr++ = ( (number_mb_cmds << 16) | transform_8x8_mode_flag | ((i == 0) << 1)); + *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); - i += number_mb_cmds; + i += 1; } slice_param++; @@ -932,8 +942,17 @@ gen7_vme_mpeg2_prepare(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { VAStatus vaStatus = VA_STATUS_SUCCESS; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + if ((!vme_context->mpeg2_level) || + (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) { + vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK; + } + + /*Setup all the memory object*/ - /*Setup all the memory object*/ + intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context); gen7_vme_mpeg2_surface_setup(ctx, encode_state, 0, encoder_context); gen7_vme_interface_setup(ctx, encode_state, encoder_context); gen7_vme_constant_setup(ctx, encode_state, encoder_context); diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index 27a1828a..2bc883ab 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -1,5 +1,5 @@ VME_CORE = batchbuffer.asm intra_frame.asm inter_frame.asm -VME7_CORE = batchbuffer.asm intra_frame_ivb.asm inter_frame_ivb.asm inter_bframe_ivb..asm +VME7_CORE = batchbuffer.asm intra_frame_ivb.asm inter_frame_ivb.asm inter_bframe_ivb.asm mpeg2_inter_ivb.asm VME75_CORE = batchbuffer.asm intra_frame_haswell.asm inter_frame_haswell.asm inter_bframe_haswell.asm mpeg2_inter_haswell.asm INTEL_G6B = batchbuffer.g6b intra_frame.g6b inter_frame.g6b @@ -7,8 +7,8 @@ INTEL_G6A = batchbuffer.g6a intra_frame.g6a inter_frame.g6a INTEL_GEN6_INC = batchbuffer.inc vme.inc INTEL_GEN6_ASM = $(INTEL_G6A:%.g6a=%.gen6.asm) -INTEL_G7B = batchbuffer.g7b intra_frame.g7b inter_frame.g7b mpeg2_inter_frame.g7b intra_frame_ivb.g7b inter_frame_ivb.g7b inter_bframe_ivb.g7b -INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a mpeg2_inter_frame.g7a intra_frame_ivb.g7a inter_frame_ivb.g7a inter_bframe_ivb.g7a +INTEL_G7B = batchbuffer.g7b intra_frame.g7b inter_frame.g7b mpeg2_inter_frame.g7b intra_frame_ivb.g7b inter_frame_ivb.g7b inter_bframe_ivb.g7b mpeg2_inter_ivb.g7b +INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a mpeg2_inter_frame.g7a intra_frame_ivb.g7a inter_frame_ivb.g7a inter_bframe_ivb.g7a mpeg2_inter_ivb.g7a INTEL_GEN7_INC = batchbuffer.inc vme.inc vme7_mpeg2.inc vme7.inc INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm) diff --git a/src/shaders/vme/mpeg2_inter_ivb.asm b/src/shaders/vme/mpeg2_inter_ivb.asm new file mode 100644 index 00000000..3c7661f5 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_ivb.asm @@ -0,0 +1,592 @@ +/* + * Copyright © <2010>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * Authors: Zhao Yakui + * + */ +// Modual name: mpeg2_inter_ivb.asm +// +// Make inter predition estimation for Mpeg2 Inter frame on Ivy +// + +// +// Now, begin source code.... +// + +#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud +#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud + +/* + * __START + */ +__INTER_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) pic_ref.0<1>:uw r4.24<2,2,1>:uw 4:uw {align1}; +mov (2) pic_ref.16<1>:uw r4.20<2,2,1>:uw {align1}; + +mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; +mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; +mov (8) mba_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; +/* read back the data for MB A */ +/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag), +* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID) +*/ +mba_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */ +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; +mov (1) mba_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) mb_msg_tmp.8<1>:ud mb_msg0.8<0,1,0>:ud {align1}; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1}; +/* bind index 3, read 1 oword (16bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_0, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +and.z.f0.0 (1) null<1>:ud mb_mode_wb.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; +(-f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(-f0.0) jmpi (1) mbb_start; + +mov (1) mb_msg0.8<1>:UD mb_msg_tmp.8<0,1,0>:ud {align1}; +/* Read MV for MB A */ +/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* MV */ +mov (2) mba_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1}; + +mbb_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +/* MB B doesn't exist. Zero MV. mba_flag is zero */ +/* If MB B doesn't exist, neither MB C nor D exists */ +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbb_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) mb_msg_tmp.8<1>:ud mb_msg0.8<0,1,0>:ud {align1}; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1}; + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_0, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +and.z.f0.0 (1) null<1>:ud mb_mode_wb.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; +(-f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(-f0.0) jmpi (1) mbc_start; + +mov (1) mb_msg0.8<1>:UD mb_msg_tmp.8<0,1,0>:ud {align1}; +/* Read MV for MB B */ +/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +mov (2) mbb_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1}; + +mbc_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1}; +/* MB C doesn't exist. Zero MV. mba_flag is zero */ +/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */ +(f0.0) jmpi (1) mbd_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mov (1) mb_msg_tmp.8<1>:ud mb_msg0.8<0,1,0>:ud {align1}; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1}; +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_0, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +and.z.f0.0 (1) null<1>:ud mb_mode_wb.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; +(-f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(-f0.0) jmpi (1) mb_mvp_start; +mov (1) mb_msg0.8<1>:UD mb_msg_tmp.8<0,1,0>:ud {align1}; +/* Read MV for MB C */ +/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +jmpi (1) mb_mvp_start; +mbd_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; + +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) mb_msg_tmp.8<1>:ud mb_msg0.8<0,1,0>:ud {align1}; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1}; +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_0, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +and.z.f0.0 (1) null<1>:ud mb_mode_wb.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; +(-f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(-f0.0) jmpi (1) mb_mvp_start; + +mov (1) mb_msg0.8<1>:UD mb_msg_tmp.8<0,1,0>:ud {align1}; +/* Read MV for MB D */ +/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ub + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +/* TODO: RefID is required after multi-references are added */ + +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +mb_mvp_start: +/*TODO: Add the skip prediction */ +/* Check whether both MB B and C are invailable */ +add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; +cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; +(-f0.0) jmpi (1) mb_median_start; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1}; +(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1}; +jmpi (1) __mb_hwdep_end; + +mb_median_start: +/* check whether only one neighbour MB has the same ref ID with the current MB */ +mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; +cmp.z.f0.0 (1) null:d mba_result.20<1>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbb_result.20<1>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbc_result.20<1>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1}; +cmp.e.f0.0 (1) null:d tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; + +mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1}; +SAVE_RET {align1}; + jmpi (1) word_imedian; +mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1}; +SAVE_RET {align1}; +jmpi (1) word_imedian; +mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; + +__mb_hwdep_end: + + +/* Calibrate the ref window for MPEG2 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; +mov (1) INPUT_ARG0.0<1>:ud vme_m0.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1}; +mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; + +SAVE_RET {align1}; +jmpi (1) ref_boundary_check; +mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; + +/* m2 */ +mov (8) vme_msg_2<1>:UD 0x0:UD {align1}; + +/* m3 */ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; +/* Use the Luma mode */ +mov (1) tmp_reg0.0<1>:UW INTRA16_DC_PRED:UW {align1}; +mov (1) vme_msg_3.4<1>:ub tmp_reg0.0<0,1,0>:UB {align1}; + +/* m4 */ +mov (8) vme_msg_4<1>:UD 0x0:UD {align1}; +mov (1) vme_msg_4.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + + +/* m1 */ +mov (8) vme_m1.0<1>:ud 0x0:ud {align1}; +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1}; +add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1}; +mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1}; +/* m1 */ +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + + +/* M0 */ +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1}; +/* 16x16 Source, 1/2 pixel, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; +add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* m1 */ + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* MV num is passed by constant buffer. R4.28 */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +add (1) vme_m1.4<1>:UD vme_m1.4<0,1,0>:UD FB_PRUNING_DISABLE:UD {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; + +/* Bilinear filter */ +mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; +add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; + +/* Set the MV cost center */ +mov (1) vme_m1.16<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; +mov (1) vme_m1.20<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + + +send (8) + vme_msg_ind + vme_wb + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_MESSAGE_TYPE_MIXED + ) + mlen vme_msg_length + rlen vme_inter_wb_length + {align1}; + +and.z.f0.0 (1) null<1>:ud vme_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; + +(-f0.0)jmpi (1) __INTRA_INFO ; + +__INTER_INFO: +/* Write MV pairs */ +mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; + +mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; + +/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + + +mov (1) msg_reg1.0<1>:ud vme_wb0.0<0,1,0>:ud {align1} ; +mov (1) msg_reg1.4<1>:UD vme_wb0.28<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:ud tmp_ud1<0,1,0>:ud {align1} ; +mov (1) msg_reg1.12<1>:ud vme_wb0.0<0,1,0>:ud {align1} ; +mov (1) msg_reg1.16<1>:ud 0x25:ud {align1} ; +jmpi (1) __OUTPUT_INFO; + +__INTRA_INFO: +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; +mov (1) msg_reg1.16<1>:ud 0x35:ud {align1} ; + +__OUTPUT_INFO: + +mov (1) msg_reg1.20<1>:ud obw_m0.8<0,1,0>:ud {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1}; +mov (8) msg_reg0.0<1>:ud obw_m0.0<8,8,1>:ud {align1}; + + +/* bind index 3, write 1 oword, msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Issue message fence so that the previous write message is committed */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_FENCE, + OBR_MF_COMMIT, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; + + + nop ; + nop ; + +word_imedian: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_a_ge_b; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + jmpi (1) cmp_end; +cmp_a_ge_b: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; +cmp_end: + RETURN {align1}; + +nop; +nop; + +ref_boundary_check: + +/* The left/up coordinate of reference window */ +add (2) TEMP_VAR0.0<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG0.0<2,2,1>:w {align1}; +/* The right/bottom coordinate of reference window */ +add (1) TEMP_VAR0.16<1>:w TEMP_VAR0.0<0,1,0>:w 48:w {align1}; +add (1) TEMP_VAR0.18<1>:w TEMP_VAR0.2<0,1,0>:w 40:w {align1}; + +/* Firstly the MV range is checked */ +mul (2) TEMP_VAR1.16<1>:w INPUT_ARG1.16<2,2,1>:w -1:w {align1}; +add (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w TEMP_VAR1.16<2,2,1>:w {align1}; +add (2) TEMP_VAR1.4<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG1.16<2,2,1>:w {align1}; + +cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w TEMP_VAR1.0<0,1,0>:w {align1}; +(f0.0) mov (1) TEMP_VAR0.0<1>:w TEMP_VAR1.0<0,1,0>:w {align1}; +cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w TEMP_VAR1.4<0,1,0>:w {align1}; +(f0.0) add (1) TEMP_VAR0.0<1>:w TEMP_VAR1.4<0,1,0>:w -48:w {align1}; +cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w TEMP_VAR1.2<0,1,0>:w {align1}; +(f0.0) mov (1) TEMP_VAR0.2<1>:w TEMP_VAR1.2<0,1,0>:w {align1}; +cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w TEMP_VAR1.6<0,1,0>:w {align1}; +(f0.0) add (1) TEMP_VAR0.2<1>:w TEMP_VAR1.6<0,1,0>:w -40:w {align1}; + + +x_left_cmp: + cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w 0:w {align1}; + (-f0.0) jmpi (1) x_right_cmp; + (f0.0) mov (1) TEMP_VAR0.0<1>:w 0:w {align1}; + jmpi (1) y_top_cmp; +x_right_cmp: + cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w INPUT_ARG1.0<0,1,0>:w {align1}; + (-f0.0) jmpi (1) y_top_cmp; + (f0.0) add (1) TEMP_VAR0.0<1>:w INPUT_ARG1.0<0,1,0>:w -48:w {align1}; +y_top_cmp: + cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w 0:w {align1}; + (-f0.0) jmpi (1) y_bottom_cmp; + (f0.0) mov (1) TEMP_VAR0.2<1>:w 0:w {align1}; + jmpi (1) y_bottom_end; +y_bottom_cmp: + cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w INPUT_ARG1.2<0,1,0>:w {align1}; + (f0.0) add (1) TEMP_VAR0.2<1>:w INPUT_ARG1.2<0,1,0>:w -40:w {align1}; + +y_bottom_end: +mul (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w -1:w {align1}; +add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1}; + RETURN {align1}; +nop; +nop; + + diff --git a/src/shaders/vme/mpeg2_inter_ivb.g7a b/src/shaders/vme/mpeg2_inter_ivb.g7a new file mode 100644 index 00000000..bf0cdb34 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_ivb.g7a @@ -0,0 +1,3 @@ +#include "vme7.inc" +#include "vme7_mpeg2.inc" +#include "mpeg2_inter_ivb.asm" diff --git a/src/shaders/vme/mpeg2_inter_ivb.g7b b/src/shaders/vme/mpeg2_inter_ivb.g7b new file mode 100644 index 00000000..96466d11 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_ivb.g7b @@ -0,0 +1,251 @@ + { 0x00800001, 0x24000061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00061, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, + { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 }, + { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 }, + { 0x00000041, 0x24880c21, 0x00000488, 0x0000000a }, + { 0x00000001, 0x24940231, 0x00000014, 0x00000000 }, + { 0x00200009, 0x2a402d29, 0x00450098, 0x00040004 }, + { 0x00200001, 0x2a500129, 0x00450094, 0x00000000 }, + { 0x00600001, 0x2ac00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2a800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2ae00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b200061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x000000f2 }, + { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, + { 0x00210001, 0x2af401ed, 0x00000000, 0xffffffff }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000022 }, + { 0x00000001, 0x2ae000e5, 0x00000000, 0x00000001 }, + { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24003dad, 0x00000400, 0xffffffff }, + { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, + { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, + { 0x00000041, 0x2b480c21, 0x00000b48, 0x0000000a }, + { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x2b680021, 0x00000b48, 0x00000000 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000008 }, + { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x02180003 }, + { 0x01000005, 0x20000c20, 0x00000b80, 0x00002000 }, + { 0x00310001, 0x2af401ed, 0x00000000, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 }, + { 0x00000001, 0x2b480021, 0x00000b68, 0x00000000 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2ae40021, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2af001ed, 0x00000000, 0x00010001 }, + { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, + { 0x00210001, 0x2b1401ed, 0x00000000, 0xffffffff }, + { 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000076 }, + { 0x00000001, 0x2b0000e5, 0x00000000, 0x00000001 }, + { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24023dad, 0x00000402, 0xffffffff }, + { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, + { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, + { 0x00000041, 0x2b480c21, 0x00000b48, 0x0000000a }, + { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x2b680021, 0x00000b48, 0x00000000 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000008 }, + { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x02180003 }, + { 0x01000005, 0x20000c20, 0x00000b80, 0x00002000 }, + { 0x00310001, 0x2b1401ed, 0x00000000, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 }, + { 0x00000001, 0x2b480021, 0x00000b68, 0x00000000 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2b040021, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b1001ed, 0x00000000, 0x00010001 }, + { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00080008 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000026 }, + { 0x00000001, 0x2b2000e5, 0x00000000, 0x00000001 }, + { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24023dad, 0x00000402, 0xffffffff }, + { 0x00000040, 0x24003dad, 0x00000400, 0x00010001 }, + { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, + { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, + { 0x00000041, 0x2b480c21, 0x00000b48, 0x0000000a }, + { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x2b680021, 0x00000b48, 0x00000000 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000008 }, + { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x02180003 }, + { 0x01000005, 0x20000c20, 0x00000b80, 0x00002000 }, + { 0x00310001, 0x2b3401ed, 0x00000000, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000032 }, + { 0x00000001, 0x2b480021, 0x00000b68, 0x00000000 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2b240021, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000028 }, + { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000022 }, + { 0x00000001, 0x2b2000e5, 0x00000000, 0x00000001 }, + { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, + { 0x00200040, 0x24003dad, 0x00450400, 0xffffffff }, + { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, + { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, + { 0x00000041, 0x2b480c21, 0x00000b48, 0x0000000a }, + { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x2b680021, 0x00000b48, 0x00000000 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000008 }, + { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x02180003 }, + { 0x01000005, 0x20000c20, 0x00000b80, 0x00002000 }, + { 0x00310001, 0x2b3401ed, 0x00000000, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 }, + { 0x00000001, 0x2b480021, 0x00000b68, 0x00000000 }, + { 0x0a800031, 0x2ba01cb1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2b240021, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 }, + { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, + { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000010 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2b340129, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2ac00021, 0x00000ae4, 0x00000000 }, + { 0x00110001, 0x2ac00061, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000032 }, + { 0x00600001, 0x24000061, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20003da4, 0x00200af4, 0x00000000 }, + { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 }, + { 0x00010001, 0x24040021, 0x00000ae4, 0x00000000 }, + { 0x01000010, 0x20003da4, 0x00200b14, 0x00000000 }, + { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 }, + { 0x00010001, 0x24040021, 0x00000b04, 0x00000000 }, + { 0x01000010, 0x20003da4, 0x00200b34, 0x00000000 }, + { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 }, + { 0x00010001, 0x24040021, 0x00000b24, 0x00000000 }, + { 0x01000010, 0x20003da4, 0x00200400, 0x00010001 }, + { 0x00010001, 0x2ac00021, 0x00000404, 0x00000000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000018 }, + { 0x00000001, 0x2fa001ad, 0x00000ae4, 0x00000000 }, + { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, + { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000088 }, + { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, + { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, + { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000007c }, + { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, + { 0x00000001, 0x2fa00021, 0x00000440, 0x00000000 }, + { 0x00000001, 0x2fa80021, 0x00000448, 0x00000000 }, + { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000090 }, + { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 }, + { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00bb00bb }, + { 0x00000001, 0x28640231, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28800061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x28900061, 0x00000000, 0x11111111 }, + { 0x00600001, 0x24600061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00020002 }, + { 0x00000040, 0x24002d29, 0x00000400, 0x00040004 }, + { 0x00000001, 0x247c0231, 0x00000400, 0x00000000 }, + { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x7e201000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, + { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 }, + { 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, + { 0x00000040, 0x24640c21, 0x00000464, 0x00000000 }, + { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00040004 }, + { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 }, + { 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 }, + { 0x00000001, 0x24740021, 0x00000ac0, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, + { 0x08600031, 0x21801cbd, 0x00000800, 0x0a686000 }, + { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000012 }, + { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d01a0, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, + { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240021, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x28280021, 0x00000544, 0x00000000 }, + { 0x00000001, 0x282c0021, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28300061, 0x00000000, 0x00000025 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000000a }, + { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240021, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280021, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28300061, 0x00000000, 0x00000035 }, + { 0x00000001, 0x28340021, 0x00000488, 0x00000000 }, + { 0x00000040, 0x24880c21, 0x00000488, 0x00000008 }, + { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, + { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x0219e003 }, + { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24001ca8, 0x00000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa4 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe401ad, 0x00000fa0, 0x00000000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000014 }, + { 0x04000010, 0x200035ac, 0x00000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe401ad, 0x00000fa4, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000000c }, + { 0x04000010, 0x200035ac, 0x00000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe401ad, 0x00000fa4, 0x00000000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000006 }, + { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe401ad, 0x00000fa0, 0x00000000 }, + { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00200040, 0x2f6035ad, 0x00450fa8, 0x00450fa0 }, + { 0x00000040, 0x2f703dad, 0x00000f60, 0x00300030 }, + { 0x00000040, 0x2f723dad, 0x00000f62, 0x00280028 }, + { 0x00200041, 0x2f903dad, 0x00450fd0, 0xffffffff }, + { 0x00200040, 0x2f8035ad, 0x00450fa8, 0x00450f90 }, + { 0x00200040, 0x2f8435ad, 0x00450fa8, 0x00450fd0 }, + { 0x05000010, 0x200035ac, 0x00000f60, 0x00000f80 }, + { 0x00010001, 0x2f6001ad, 0x00000f80, 0x00000000 }, + { 0x03000010, 0x200035ac, 0x00000f70, 0x00000f84 }, + { 0x00010040, 0x2f603dad, 0x00000f84, 0xffd0ffd0 }, + { 0x05000010, 0x200035ac, 0x00000f62, 0x00000f82 }, + { 0x00010001, 0x2f6201ad, 0x00000f82, 0x00000000 }, + { 0x03000010, 0x200035ac, 0x00000f72, 0x00000f86 }, + { 0x00010040, 0x2f623dad, 0x00000f86, 0xffd8ffd8 }, + { 0x05000010, 0x20003dac, 0x00000f60, 0x00000000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000004 }, + { 0x00010001, 0x2f6001ed, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000006 }, + { 0x03000010, 0x200035ac, 0x00000f70, 0x00000fc0 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000002 }, + { 0x00010040, 0x2f603dad, 0x00000fc0, 0xffd0ffd0 }, + { 0x05000010, 0x20003dac, 0x00000f62, 0x00000000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000004 }, + { 0x00010001, 0x2f6201ed, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000004 }, + { 0x03000010, 0x200035ac, 0x00000f72, 0x00000fc2 }, + { 0x00010040, 0x2f623dad, 0x00000fc2, 0xffd8ffd8 }, + { 0x00200041, 0x2f803dad, 0x00450fa8, 0xffffffff }, + { 0x00200040, 0x2fe435ad, 0x00450f60, 0x00450f80 }, + { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/vme/vme7_mpeg2.inc b/src/shaders/vme/vme7_mpeg2.inc index 9b877acf..c8e42cc0 100644 --- a/src/shaders/vme/vme7_mpeg2.inc +++ b/src/shaders/vme/vme7_mpeg2.inc @@ -16,3 +16,6 @@ */ define(`INTER_PART_MASK', `0x7e000000') +define(`mpeg2_ref', `r83') +define(`pic_ref', `r82') +define(`INTRA16_DC_PRED', `0xBB') -- cgit v1.2.1 From 58e4c588326b332c2250842f6dd103a6e17adf2a Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:24 +0800 Subject: Optimize MPEG2 encoding on Ivb This is backported from Haswell. The scoreboard/walker/cost-center is applied. Signed-off-by: Zhao yakui (cherry picked from commit cc7452d14f7faa1d5b5fa8c16db3e3cb3fd4f53d) --- src/gen7_vme.c | 39 ++++++++++++++++++++++++++++++------- src/shaders/vme/mpeg2_inter_ivb.asm | 5 +++-- src/shaders/vme/mpeg2_inter_ivb.g7b | 9 +++++---- src/shaders/vme/vme7_mpeg2.inc | 3 +++ 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 847e801c..097fe08b 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -466,6 +466,9 @@ static VAStatus gen7_vme_mpeg2_state_setup(VADriverContextP ctx, struct gen6_vme_context *vme_context = encoder_context->vme_context; unsigned int *vme_state_message; int i; + unsigned int *mb_cost_table; + + mb_cost_table = (unsigned int *)vme_context->vme_state_message; //building VME state message dri_bo_map(vme_context->vme_state.bo, 1); @@ -487,12 +490,12 @@ static VAStatus gen7_vme_mpeg2_state_setup(VADriverContextP ctx, vme_state_message[12] = 0x00; vme_state_message[13] = 0x00; - vme_state_message[14] = 0x4a4a; - vme_state_message[15] = 0x0; - vme_state_message[16] = 0x4a4a4a4a; - vme_state_message[17] = 0x4a4a4a4a; - vme_state_message[18] = 0x21110100; - vme_state_message[19] = 0x61514131; + vme_state_message[14] = (mb_cost_table[2] & 0xFFFF); + vme_state_message[15] = 0; + vme_state_message[16] = mb_cost_table[0]; + vme_state_message[17] = 0; + vme_state_message[18] = mb_cost_table[3]; + vme_state_message[19] = mb_cost_table[4]; for(i = 20; i < 32; i++) { vme_state_message[i] = 0; @@ -916,7 +919,29 @@ gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; - gen7_vme_mpeg2_fill_vme_batchbuffer(ctx, + bool allow_hwscore = true; + int s; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + int j; + VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer; + + for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) { + if (slice_param->macroblock_address % width_in_mbs) { + allow_hwscore = false; + break; + } + } + } + + if (allow_hwscore) + gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + MPEG2_VME_INTER_SHADER, + encoder_context); + else + gen7_vme_mpeg2_fill_vme_batchbuffer(ctx, encode_state, width_in_mbs, height_in_mbs, MPEG2_VME_INTER_SHADER, diff --git a/src/shaders/vme/mpeg2_inter_ivb.asm b/src/shaders/vme/mpeg2_inter_ivb.asm index 3c7661f5..57522e1e 100644 --- a/src/shaders/vme/mpeg2_inter_ivb.asm +++ b/src/shaders/vme/mpeg2_inter_ivb.asm @@ -344,6 +344,7 @@ mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; __mb_hwdep_end: +mov (2) mv_cc_ref.0<1>:w mba_result.4<2,2,1>:w {align1}; /* Calibrate the ref window for MPEG2 */ mov (1) vme_m0.0<1>:W -16:W {align1}; @@ -405,8 +406,8 @@ mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; /* Set the MV cost center */ -mov (1) vme_m1.16<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; -mov (1) vme_m1.20<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; +mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; diff --git a/src/shaders/vme/mpeg2_inter_ivb.g7b b/src/shaders/vme/mpeg2_inter_ivb.g7b index 96466d11..2973488f 100644 --- a/src/shaders/vme/mpeg2_inter_ivb.g7b +++ b/src/shaders/vme/mpeg2_inter_ivb.g7b @@ -130,14 +130,15 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000088 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000008a }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x0000007c }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000007e }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, + { 0x00200001, 0x2a2001ad, 0x00450ae4, 0x00000000 }, { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, { 0x00000001, 0x2fa00021, 0x00000440, 0x00000000 }, @@ -170,8 +171,8 @@ { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, { 0x00000001, 0x24000169, 0x00000000, 0x00040004 }, { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 }, - { 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 }, - { 0x00000001, 0x24740021, 0x00000ac0, 0x00000000 }, + { 0x00000001, 0x24700021, 0x00000a20, 0x00000000 }, + { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, { 0x08600031, 0x21801cbd, 0x00000800, 0x0a686000 }, { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 }, diff --git a/src/shaders/vme/vme7_mpeg2.inc b/src/shaders/vme/vme7_mpeg2.inc index c8e42cc0..8ca768b8 100644 --- a/src/shaders/vme/vme7_mpeg2.inc +++ b/src/shaders/vme/vme7_mpeg2.inc @@ -19,3 +19,6 @@ define(`INTER_PART_MASK', `0x7e000000') define(`mpeg2_ref', `r83') define(`pic_ref', `r82') define(`INTRA16_DC_PRED', `0xBB') + +/* Cost center ref */ +define(`mv_cc_ref', `r81') -- cgit v1.2.1 From af57700ca9d44cf16b132d9bd9ea41b1891c9973 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:24 +0800 Subject: Optimize the VME shader for MPEG2 encoding on Ivb Signed-off-by: Zhao Yakui (cherry picked from commit 15db142076321e9523db0c8be8e6bae5e1b64c8a) --- src/shaders/vme/mpeg2_inter_ivb.asm | 112 ++++++++++++++++++++++++++++++++++++ src/shaders/vme/mpeg2_inter_ivb.g7b | 62 +++++++++++++++++++- src/shaders/vme/vme7_mpeg2.inc | 11 ++++ 3 files changed, 182 insertions(+), 3 deletions(-) diff --git a/src/shaders/vme/mpeg2_inter_ivb.asm b/src/shaders/vme/mpeg2_inter_ivb.asm index 57522e1e..261e74c2 100644 --- a/src/shaders/vme/mpeg2_inter_ivb.asm +++ b/src/shaders/vme/mpeg2_inter_ivb.asm @@ -425,6 +425,11 @@ send (8) rlen vme_inter_wb_length {align1}; +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(-f0.0) jmpi (1) vme_run_again; + +vme_mv_output: + and.z.f0.0 (1) null<1>:ud vme_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; (-f0.0)jmpi (1) __INTRA_INFO ; @@ -590,4 +595,111 @@ add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1}; nop; nop; +vme_run_again: + +asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1}; +mov (2) tmp_reg0.0<1>:w mb_ref_win.0<2,2,1>:w {align1}; +add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1}; +and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1}; + +cmp.l.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 0:w {align1}; +(f0.0) mul (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +cmp.l.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 0:w {align1}; +(f0.0) mul (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; + +cmp.ge.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 4:w {align1}; +(f0.0) jmpi (1) vme_start; +cmp.ge.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 4:w {align1}; +(f0.0) jmpi (1) vme_start; + +jmpi (1) vme_done; + +vme_start: + mov (8) tmp_vme_wb0.0<1>:ud vme_wb0.0<8,8,1>:ud {align1}; + mov (8) tmp_vme_wb1.0<1>:ud vme_wb1.0<8,8,1>:ud {align1}; + +/* Calibrate the ref window for MPEG2 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; +mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1}; +add (2) INPUT_ARG0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; + +SAVE_RET {align1}; +jmpi (1) ref_boundary_check; +mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; + +/* m2 */ +mov (8) vme_msg_2<1>:UD 0x0:UD {align1}; + +/* m3 */ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; + +/* m4 */ +mov (8) vme_msg_4<1>:UD 0x0:UD {align1}; + + +/* m1 */ +mov (8) vme_m1.0<1>:ud 0x0:ud {align1}; +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1}; +add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1}; +mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1}; +/* m1 */ +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + + +/* M0 */ +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1}; +/* 16x16 Source, 1/2 pixel, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* m1 */ + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* MV num is passed by constant buffer. R4.28 */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +add (1) vme_m1.4<1>:UD vme_m1.4<0,1,0>:UD FB_PRUNING_DISABLE:UD {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; + +/* Bilinear filter */ +mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; +add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; + +/* Set the MV cost center */ +mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + + +send (8) + vme_msg_ind + vme_wb + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_MESSAGE_TYPE_INTER + ) + mlen vme_msg_length + rlen vme_inter_wb_length + {align1}; + + +cmp.l.f0.0 (1) null:uw vme_wb0.6<0,1,0>:uw tmp_vme_wb0.6<0,1,0>:uw {align1}; +(f0.0) jmpi (1) vme_done; +mov (8) vme_wb0.0<1>:ud tmp_vme_wb0.0<8,8,1>:ud {align1}; +mov (8) vme_wb1.0<1>:ud tmp_vme_wb1.0<8,8,1>:ud {align1}; + +vme_done: + jmpi (1) vme_mv_output; +nop; +nop; +nop; diff --git a/src/shaders/vme/mpeg2_inter_ivb.g7b b/src/shaders/vme/mpeg2_inter_ivb.g7b index 2973488f..2ef3b13b 100644 --- a/src/shaders/vme/mpeg2_inter_ivb.g7b +++ b/src/shaders/vme/mpeg2_inter_ivb.g7b @@ -130,13 +130,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x0000008a }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000008e }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x0000007e }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000082 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x00200001, 0x2a2001ad, 0x00450ae4, 0x00000000 }, { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, @@ -145,7 +145,7 @@ { 0x00000001, 0x2fa80021, 0x00000448, 0x00000000 }, { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000090 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000094 }, { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 }, { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, @@ -175,6 +175,8 @@ { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, { 0x08600031, 0x21801cbd, 0x00000800, 0x0a686000 }, + { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000096 }, { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000012 }, { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, @@ -250,3 +252,57 @@ { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, + { 0x00200001, 0x240001ad, 0x00450a80, 0x00000000 }, + { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, + { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc }, + { 0x05000010, 0x20003dac, 0x00000400, 0x00000000 }, + { 0x00010041, 0x24003dad, 0x00000400, 0xffffffff }, + { 0x05000010, 0x20003dac, 0x00000402, 0x00000000 }, + { 0x00010041, 0x24023dad, 0x00000402, 0xffffffff }, + { 0x04000010, 0x20003dac, 0x00000400, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000006 }, + { 0x04000010, 0x20003dac, 0x00000402, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000002 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000004a }, + { 0x00600001, 0x2c800021, 0x008d0180, 0x00000000 }, + { 0x00600001, 0x2ca00021, 0x008d01a0, 0x00000000 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, + { 0x00000001, 0x2fa80021, 0x00000448, 0x00000000 }, + { 0x00200040, 0x2fa035ad, 0x00450440, 0x00450a90 }, + { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0xffffff94 }, + { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 }, + { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24600061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00020002 }, + { 0x00000040, 0x24002d29, 0x00000400, 0x00040004 }, + { 0x00000001, 0x247c0231, 0x00000400, 0x00000000 }, + { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x7e201000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, + { 0x00000040, 0x24640c21, 0x00000464, 0x00000000 }, + { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00040004 }, + { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 }, + { 0x00000001, 0x24700021, 0x00000a20, 0x00000000 }, + { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, + { 0x08600031, 0x21801cbd, 0x00000800, 0x0a682000 }, + { 0x05000010, 0x20002528, 0x00000186, 0x00000c86 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000004 }, + { 0x00600001, 0x21800021, 0x008d0c80, 0x00000000 }, + { 0x00600001, 0x21a00021, 0x008d0ca0, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0xffffff04 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/vme/vme7_mpeg2.inc b/src/shaders/vme/vme7_mpeg2.inc index 8ca768b8..2d7852a4 100644 --- a/src/shaders/vme/vme7_mpeg2.inc +++ b/src/shaders/vme/vme7_mpeg2.inc @@ -22,3 +22,14 @@ define(`INTRA16_DC_PRED', `0xBB') /* Cost center ref */ define(`mv_cc_ref', `r81') + +define(`tmp_vme_wb0', `r100') +define(`tmp_vme_wb1', `r101') +define(`tmp_vme_wb2', `r102') +define(`tmp_vme_wb3', `r103') +define(`tmp_vme_wb4', `r104') +define(`tmp_vme_wb5', `r105') +define(`tmp_vme_wb6', `r106') +define(`tmp_vme_wb7', `r107') +define(`tmp_vme_wb8', `r108') +define(`tmp_vme_wb9', `r109') -- cgit v1.2.1 From 7e5cdb824354ebcad42cd7e74916fff4771d0d43 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:24 +0800 Subject: Optimize quantization rounding precision for MPEG2 encoding on Ivy Signed-off-by: Zhao Yakui (cherry picked from commit 05ea96da3e0b09648bfdeb35967f6ab9bb3b23e4) --- src/gen7_mfc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index dda8f91c..e35ca85e 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -381,9 +381,11 @@ gen7_mfc_mpeg2_pic_state(VADriverContextP ctx, VAEncPictureParameterBufferMPEG2 *pic_param; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + VAEncSliceParameterBufferMPEG2 *slice_param = NULL; assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer); pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; BEGIN_BCS_BATCH(batch, 13); OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2)); @@ -408,7 +410,12 @@ gen7_mfc_mpeg2_pic_state(VADriverContextP ctx, 1 << 31 | /* slice concealment */ (height_in_mbs - 1) << 16 | (width_in_mbs - 1)); - OUT_BCS_BATCH(batch, 0); + + if (slice_param && slice_param->quantiser_scale_code >= 14) + OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12)); + else + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0xFFF << 16 | /* InterMBMaxSize */ -- cgit v1.2.1 From 719fbefea2930e40020cd7baab34240424619f02 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 12 Aug 2013 15:13:24 +0800 Subject: Remove the unnecessary shader binary for MPEG2 encoding on Haswell/Ivb Signed-off-by: Zhao Yakui (cherry picked from commit 42bb613e72d235bcbe141c906dec9431e4c29661) --- src/shaders/vme/Makefile.am | 8 +- src/shaders/vme/mpeg2_inter_frame.g7a | 3 - src/shaders/vme/mpeg2_inter_frame.g7b | 105 --------- src/shaders/vme/mpeg2_inter_frame_haswell.g75a | 3 - src/shaders/vme/mpeg2_inter_frame_haswell.g75b | 297 ------------------------- 5 files changed, 4 insertions(+), 412 deletions(-) delete mode 100644 src/shaders/vme/mpeg2_inter_frame.g7a delete mode 100644 src/shaders/vme/mpeg2_inter_frame.g7b delete mode 100644 src/shaders/vme/mpeg2_inter_frame_haswell.g75a delete mode 100644 src/shaders/vme/mpeg2_inter_frame_haswell.g75b diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index 2bc883ab..d3c20998 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -7,13 +7,13 @@ INTEL_G6A = batchbuffer.g6a intra_frame.g6a inter_frame.g6a INTEL_GEN6_INC = batchbuffer.inc vme.inc INTEL_GEN6_ASM = $(INTEL_G6A:%.g6a=%.gen6.asm) -INTEL_G7B = batchbuffer.g7b intra_frame.g7b inter_frame.g7b mpeg2_inter_frame.g7b intra_frame_ivb.g7b inter_frame_ivb.g7b inter_bframe_ivb.g7b mpeg2_inter_ivb.g7b -INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a mpeg2_inter_frame.g7a intra_frame_ivb.g7a inter_frame_ivb.g7a inter_bframe_ivb.g7a mpeg2_inter_ivb.g7a +INTEL_G7B = batchbuffer.g7b intra_frame.g7b inter_frame.g7b intra_frame_ivb.g7b inter_frame_ivb.g7b inter_bframe_ivb.g7b mpeg2_inter_ivb.g7b +INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a intra_frame_ivb.g7a inter_frame_ivb.g7a inter_bframe_ivb.g7a mpeg2_inter_ivb.g7a INTEL_GEN7_INC = batchbuffer.inc vme.inc vme7_mpeg2.inc vme7.inc INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm) -INTEL_G75B = batchbuffer.g75b intra_frame_haswell.g75b inter_frame_haswell.g75b mpeg2_inter_frame_haswell.g75b inter_bframe_haswell.g75b mpeg2_inter_haswell.g75b -INTEL_G75A = batchbuffer.g75a intra_frame_haswell.g75a inter_frame_haswell.g75a mpeg2_inter_frame_haswell.g75a inter_bframe_haswell.g75a mpeg2_inter_haswell.g75a +INTEL_G75B = batchbuffer.g75b intra_frame_haswell.g75b inter_frame_haswell.g75b inter_bframe_haswell.g75b mpeg2_inter_haswell.g75b +INTEL_G75A = batchbuffer.g75a intra_frame_haswell.g75a inter_frame_haswell.g75a inter_bframe_haswell.g75a mpeg2_inter_haswell.g75a INTEL_GEN75_INC = batchbuffer.inc vme75.inc vme75_mpeg2.inc INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm) diff --git a/src/shaders/vme/mpeg2_inter_frame.g7a b/src/shaders/vme/mpeg2_inter_frame.g7a deleted file mode 100644 index 937ea9bd..00000000 --- a/src/shaders/vme/mpeg2_inter_frame.g7a +++ /dev/null @@ -1,3 +0,0 @@ -#include "vme.inc" -#include "vme7_mpeg2.inc" -#include "inter_frame.asm" diff --git a/src/shaders/vme/mpeg2_inter_frame.g7b b/src/shaders/vme/mpeg2_inter_frame.g7b deleted file mode 100644 index 40aeb3f5..00000000 --- a/src/shaders/vme/mpeg2_inter_frame.g7b +++ /dev/null @@ -1,105 +0,0 @@ - { 0x00800001, 0x24000061, 0x00000000, 0x00000000 }, - { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, - { 0x00800001, 0x24600061, 0x00000000, 0x00000000 }, - { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 }, - { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff }, - { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f }, - { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, - { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc }, - { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 }, - { 0x00000001, 0x24340231, 0x00000014, 0x00000000 }, - { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, - { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, - { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, - { 0x00000001, 0x244c0061, 0x00000000, 0x7e203000 }, - { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, - { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, - { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, - { 0x00000001, 0x24640061, 0x00000000, 0x40000000 }, - { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, - { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, - { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 }, - { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 }, - { 0x00000041, 0x24880c21, 0x00000488, 0x0000000a }, - { 0x00000001, 0x24940231, 0x00000014, 0x00000000 }, - { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, - { 0x04600031, 0x22401cb1, 0x00000800, 0x02190004 }, - { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 }, - { 0x04600031, 0x22801cb1, 0x00000800, 0x02290004 }, - { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, - { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 }, - { 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 }, - { 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 }, - { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000060 }, - { 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 }, - { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000010 }, - { 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 }, - { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000004 }, - { 0x00000040, 0x25202e2d, 0x000000a0, 0x00010001 }, - { 0x00000040, 0x2520352d, 0x000000a2, 0x00004520 }, - { 0x02000041, 0x200045a0, 0x00000520, 0x000000a1 }, - { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000008 }, - { 0x02000005, 0x20002e28, 0x000000a4, 0x00020002 }, - { 0x00010005, 0x247d1e31, 0x0000047d, 0x000000e0 }, - { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, - { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, - { 0x00000001, 0x22400061, 0x00000000, 0x00000000 }, - { 0x00000005, 0x22440c21, 0x00000244, 0xff000000 }, - { 0x00600001, 0x28600021, 0x008d0240, 0x00000000 }, - { 0x00600001, 0x288000e1, 0x00000000, 0x00000000 }, - { 0x00800001, 0x28800231, 0x00cf0283, 0x00000000 }, - { 0x00000001, 0x28900061, 0x00000000, 0x11111111 }, - { 0x08600031, 0x21801cbd, 0x00000800, 0x0a686000 }, - { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, - { 0x00600001, 0x24a00021, 0x008d01a0, 0x00000000 }, - { 0x00600001, 0x24c00021, 0x008d01c0, 0x00000000 }, - { 0x00600001, 0x24e00021, 0x008d01e0, 0x00000000 }, - { 0x00600001, 0x25000021, 0x008d0200, 0x00000000 }, - { 0x00600001, 0x28200021, 0x008d04a0, 0x00000000 }, - { 0x00600001, 0x28400021, 0x008d04c0, 0x00000000 }, - { 0x00600001, 0x28600021, 0x008d04e0, 0x00000000 }, - { 0x00600001, 0x28800021, 0x008d0500, 0x00000000 }, - { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0403 }, - { 0x00000040, 0x28080c21, 0x00000488, 0x00000008 }, - { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 }, - { 0x00110020, 0x34001c00, 0x00001400, 0x0000001c }, - { 0x00000001, 0x25420169, 0x00000000, 0x00000000 }, - { 0x00000001, 0x25440061, 0x00000000, 0x00000000 }, - { 0x00010005, 0x25422d29, 0x00000182, 0x00200020 }, - { 0x00010008, 0x25422d29, 0x00200542, 0x00050005 }, - { 0x00010041, 0x25442d21, 0x00000542, 0x00600060 }, - { 0x00010040, 0x25442c21, 0x00000544, 0x00200020 }, - { 0x00010009, 0x25422d29, 0x00000542, 0x00050005 }, - { 0x00010040, 0x25422d29, 0x00000542, 0x00400040 }, - { 0x00000040, 0x25422d29, 0x00000542, 0x000e000e }, - { 0x00000001, 0x28200129, 0x00000180, 0x00000000 }, - { 0x00000001, 0x28220129, 0x00000542, 0x00000000 }, - { 0x00000001, 0x28240021, 0x0000019c, 0x00000000 }, - { 0x00000001, 0x28280021, 0x00000544, 0x00000000 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000008 }, - { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, - { 0x00000001, 0x28240021, 0x00000190, 0x00000000 }, - { 0x00000001, 0x28280021, 0x00000194, 0x00000000 }, - { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 }, - { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0003 }, - { 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 }, - { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 }, - { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 }, - { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 }, - { 0x00010040, 0x20a12e31, 0x000000a1, 0x00010001 }, - { 0x00010001, 0x24480169, 0x00000000, 0x00000000 }, - { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 }, - { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 }, - { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff }, - { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc }, - { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24882c21, 0x00000488, 0x000a000a }, - { 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff }, - { 0x00110020, 0x34001c00, 0x02001400, 0xffffff66 }, - { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 }, - { 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 }, diff --git a/src/shaders/vme/mpeg2_inter_frame_haswell.g75a b/src/shaders/vme/mpeg2_inter_frame_haswell.g75a deleted file mode 100644 index 662c76f7..00000000 --- a/src/shaders/vme/mpeg2_inter_frame_haswell.g75a +++ /dev/null @@ -1,3 +0,0 @@ -#include "vme75.inc" -#include "vme75_mpeg2.inc" -#include "inter_frame_haswell.asm" diff --git a/src/shaders/vme/mpeg2_inter_frame_haswell.g75b b/src/shaders/vme/mpeg2_inter_frame_haswell.g75b deleted file mode 100644 index 35175c76..00000000 --- a/src/shaders/vme/mpeg2_inter_frame_haswell.g75b +++ /dev/null @@ -1,297 +0,0 @@ - { 0x00800001, 0x24000061, 0x00000000, 0x00000000 }, - { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, - { 0x00800001, 0x24800061, 0x00000000, 0x00000000 }, - { 0x00800001, 0x24c00061, 0x00000000, 0x00000000 }, - { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 }, - { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff }, - { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f }, - { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, - { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc }, - { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 }, - { 0x00000001, 0x24340231, 0x00000014, 0x00000000 }, - { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, - { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, - { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 }, - { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 }, - { 0x00000041, 0x24880c21, 0x00000488, 0x00000018 }, - { 0x00000001, 0x24940231, 0x00000014, 0x00000000 }, - { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, - { 0x04600031, 0x23801cb1, 0x00000800, 0x02190004 }, - { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 }, - { 0x04600031, 0x23a01cb1, 0x00000800, 0x02290004 }, - { 0x00200009, 0x24002e25, 0x004500a0, 0x00030003 }, - { 0x00000041, 0x24003ca5, 0x00000400, 0x00020002 }, - { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 }, - { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff }, - { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, - { 0x04600031, 0x26001cb1, 0x00000800, 0x02190006 }, - { 0x00200009, 0x24202e25, 0x004500a0, 0x00030003 }, - { 0x00000041, 0x24203ca5, 0x00000420, 0x00020002 }, - { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc }, - { 0x00000001, 0x242800e1, 0x00000000, 0x00070003 }, - { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 }, - { 0x04600031, 0x26201cb1, 0x00000800, 0x02190006 }, - { 0x00600001, 0x2ac00061, 0x00000000, 0x00000000 }, - { 0x00600001, 0x2a800061, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, - { 0x00010020, 0x34001c00, 0x00001400, 0x00000740 }, - { 0x00600001, 0x2ae00061, 0x00000000, 0x00000000 }, - { 0x00600001, 0x2b000061, 0x00000000, 0x00000000 }, - { 0x00600001, 0x2b200061, 0x00000000, 0x00000000 }, - { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, - { 0x00210001, 0x2af401ed, 0x00000000, 0xffffffff }, - { 0x00010020, 0x34001c00, 0x00001400, 0x000000f0 }, - { 0x00000001, 0x2ae000e5, 0x00000000, 0x00000001 }, - { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, - { 0x00000040, 0x24003dad, 0x00000400, 0xffffffff }, - { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, - { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, - { 0x00000041, 0x2b480c21, 0x00000b48, 0x00000018 }, - { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, - { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02280303 }, - { 0x05000010, 0x2000252c, 0x00000b70, 0x00000b88 }, - { 0x00210001, 0x2af401ed, 0x00000000, 0xffffffff }, - { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 }, - { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, - { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480403 }, - { 0x00200001, 0x2ae40021, 0x00450bc8, 0x00000000 }, - { 0x00000001, 0x2af001ed, 0x00000000, 0x00010001 }, - { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, - { 0x00210001, 0x2b1401ed, 0x00000000, 0xffffffff }, - { 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff }, - { 0x00010020, 0x34001c00, 0x00001400, 0x00000350 }, - { 0x00000001, 0x2b0000e5, 0x00000000, 0x00000001 }, - { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, - { 0x00000040, 0x24023dad, 0x00000402, 0xffffffff }, - { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, - { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, - { 0x00000041, 0x2b480c21, 0x00000b48, 0x00000018 }, - { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, - { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02280303 }, - { 0x05000010, 0x2000252c, 0x00000b70, 0x00000b88 }, - { 0x00210001, 0x2b1401ed, 0x00000000, 0xffffffff }, - { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 }, - { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, - { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480403 }, - { 0x00200001, 0x2b040021, 0x00450bf0, 0x00000000 }, - { 0x00000001, 0x2b1001ed, 0x00000000, 0x00010001 }, - { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002e28, 0x000000a5, 0x00080008 }, - { 0x00010020, 0x34001c00, 0x00001400, 0x00000110 }, - { 0x00000001, 0x2b2000e5, 0x00000000, 0x00000001 }, - { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, - { 0x00000040, 0x24023dad, 0x00000402, 0xffffffff }, - { 0x00000040, 0x24003dad, 0x00000400, 0x00010001 }, - { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, - { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, - { 0x00000041, 0x2b480c21, 0x00000b48, 0x00000018 }, - { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, - { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02280303 }, - { 0x05000010, 0x2000252c, 0x00000b70, 0x00000b88 }, - { 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff }, - { 0x00010020, 0x34001c00, 0x00001400, 0x00000170 }, - { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, - { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480403 }, - { 0x00200001, 0x2b240021, 0x00450bf0, 0x00000000 }, - { 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000120 }, - { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002e28, 0x000000a5, 0x00040004 }, - { 0x00010020, 0x34001c00, 0x00001400, 0x000000f0 }, - { 0x00000001, 0x2b2000e5, 0x00000000, 0x00000001 }, - { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, - { 0x00200040, 0x24003dad, 0x00450400, 0xffffffff }, - { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, - { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, - { 0x00000041, 0x2b480c21, 0x00000b48, 0x00000018 }, - { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, - { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02280303 }, - { 0x05000010, 0x2000252c, 0x00000b70, 0x00000b88 }, - { 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff }, - { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 }, - { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, - { 0x0a800031, 0x2ba01cb1, 0x00000b40, 0x02480403 }, - { 0x00200001, 0x2b240021, 0x00450c18, 0x00000000 }, - { 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 }, - { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, - { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, - { 0x00110020, 0x34001c00, 0x00001400, 0x00000080 }, - { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, - { 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 }, - { 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 }, - { 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 }, - { 0x00010001, 0x2b340129, 0x00000af4, 0x00000000 }, - { 0x00010001, 0x2ac00021, 0x00000ae4, 0x00000000 }, - { 0x00110001, 0x2ac00061, 0x00000000, 0x00000000 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000190 }, - { 0x00600001, 0x24000061, 0x00000000, 0x00000000 }, - { 0x01000010, 0x20003da4, 0x00200af4, 0x00000000 }, - { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 }, - { 0x00010001, 0x24040021, 0x00000ae4, 0x00000000 }, - { 0x01000010, 0x20003da4, 0x00200b14, 0x00000000 }, - { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 }, - { 0x00010001, 0x24040021, 0x00000b04, 0x00000000 }, - { 0x01000010, 0x20003da4, 0x00200b34, 0x00000000 }, - { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 }, - { 0x00010001, 0x24040021, 0x00000b24, 0x00000000 }, - { 0x01000010, 0x20003da4, 0x00200400, 0x00010001 }, - { 0x00010001, 0x2ac00021, 0x00000404, 0x00000000 }, - { 0x00010020, 0x34001c00, 0x00001400, 0x000000c0 }, - { 0x00000001, 0x2fa001ad, 0x00000ae4, 0x00000000 }, - { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, - { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, - { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000850 }, - { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, - { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, - { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, - { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, - { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000007f0 }, - { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, - { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, - { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, - { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc }, - { 0x00600001, 0x25600021, 0x008d0020, 0x00000000 }, - { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, - { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, - { 0x00000001, 0x23800061, 0x00000000, 0x00000000 }, - { 0x00000005, 0x23840c21, 0x00000384, 0xff000000 }, - { 0x00600001, 0x28800021, 0x008d0380, 0x00000000 }, - { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 }, - { 0x00800001, 0x28a00231, 0x00cf03a3, 0x00000000 }, - { 0x00000001, 0x28b00061, 0x00000000, 0x11111111 }, - { 0x00000001, 0x28bc0061, 0x00000000, 0x00010101 }, - { 0x00000001, 0x28b40129, 0x00000606, 0x00000000 }, - { 0x00400001, 0x28d00021, 0x00690608, 0x00000000 }, - { 0x00600001, 0x28c00129, 0x00ae0622, 0x00000000 }, - { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, - { 0x00000001, 0x24000169, 0x00000000, 0x00010001 }, - { 0x00000001, 0x28850231, 0x00000400, 0x00000000 }, - { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 }, - { 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 }, - { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 }, - { 0x00000001, 0x24000169, 0x00000000, 0x00200020 }, - { 0x00000001, 0x247e0231, 0x00000400, 0x00000000 }, - { 0x00000001, 0x244c0061, 0x00000000, 0x00800000 }, - { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, - { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, - { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e782000 }, - { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, - { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, - { 0x00000001, 0x28240021, 0x00000190, 0x00000000 }, - { 0x00000001, 0x28280021, 0x00000194, 0x00000000 }, - { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 }, - { 0x00000001, 0x28300129, 0x0000018c, 0x00000000 }, - { 0x00000001, 0x28340021, 0x00000188, 0x00000000 }, - { 0x00000001, 0x28380021, 0x0000019c, 0x00000000 }, - { 0x00000001, 0x283c0021, 0x00000488, 0x00000000 }, - { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, - { 0x00000001, 0x244c0061, 0x00000000, 0x7e200000 }, - { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, - { 0x00000001, 0x24400021, 0x00000448, 0x00000000 }, - { 0x00000040, 0x24403dad, 0x00000440, 0xfff0fff0 }, - { 0x00000040, 0x24423dad, 0x00000442, 0xfff4fff4 }, - { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, - { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, - { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, - { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, - { 0x00010040, 0x24403dad, 0x00000440, 0x000c000c }, - { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, - { 0x00010040, 0x24423dad, 0x00000442, 0x00080008 }, - { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 }, - { 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 }, - { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, - { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, - { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, - { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, - { 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 }, - { 0x00000001, 0x24740021, 0x00000ac0, 0x00000000 }, - { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, - { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, - { 0x00000001, 0x28600061, 0x00000000, 0x01010101 }, - { 0x00000001, 0x28640061, 0x00000000, 0x10010101 }, - { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f }, - { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f }, - { 0x00000001, 0x28700061, 0x00000000, 0x01010101 }, - { 0x00000001, 0x28740061, 0x00000000, 0x10010101 }, - { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f }, - { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f }, - { 0x00000001, 0x28800061, 0x00000000, 0x01010101 }, - { 0x00000001, 0x28840061, 0x00000000, 0x10010101 }, - { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f }, - { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f }, - { 0x00400001, 0x28900061, 0x00000000, 0x00000000 }, - { 0x08600031, 0x21801ca1, 0x00000800, 0x0a784000 }, - { 0x00000001, 0x25740061, 0x00000000, 0x00000000 }, - { 0x00000001, 0x25750231, 0x00000199, 0x00000000 }, - { 0x00000001, 0x25760231, 0x0000019a, 0x00000000 }, - { 0x00000005, 0x24002d29, 0x00000180, 0x00030003 }, - { 0x00000001, 0x25740231, 0x00000400, 0x00000000 }, - { 0x00600001, 0x28600021, 0x008d01a0, 0x00000000 }, - { 0x00600001, 0x28800021, 0x008d01c0, 0x00000000 }, - { 0x00600001, 0x28a00021, 0x008d01e0, 0x00000000 }, - { 0x00600001, 0x28c00021, 0x008d0200, 0x00000000 }, - { 0x00000001, 0x244c0061, 0x00000000, 0x00243000 }, - { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, - { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, - { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, - { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e786000 }, - { 0x00000040, 0x24880c21, 0x00000488, 0x00000002 }, - { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, - { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, - { 0x00000001, 0x28240021, 0x00000198, 0x00000000 }, - { 0x00000001, 0x28280021, 0x00000188, 0x00000000 }, - { 0x00000001, 0x282c0021, 0x00000574, 0x00000000 }, - { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0003 }, - { 0x00000040, 0x24880c21, 0x00000488, 0x00000001 }, - { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, - { 0x00600001, 0x28200021, 0x008d01a0, 0x00000000 }, - { 0x00600001, 0x28400021, 0x008d01c0, 0x00000000 }, - { 0x00600001, 0x28600021, 0x008d01e0, 0x00000000 }, - { 0x00600001, 0x28800021, 0x008d0200, 0x00000000 }, - { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0403 }, - { 0x00000040, 0x24880c21, 0x00000488, 0x00000008 }, - { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, - { 0x00600001, 0x28200021, 0x008d0240, 0x00000000 }, - { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, - { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x0219e003 }, - { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 }, - { 0x07800031, 0x24001ca8, 0x00000e00, 0x82000010 }, - { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, - { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, - { 0x06000010, 0x200035ac, 0x00000fa0, 0x00000fa4 }, - { 0x00010001, 0x2f6001ad, 0x00000fa0, 0x00000000 }, - { 0x00110001, 0x2f6001ad, 0x00000fa4, 0x00000000 }, - { 0x06000010, 0x200035ac, 0x00000f60, 0x00000fa8 }, - { 0x00010001, 0x2fe401ad, 0x00000f60, 0x00000000 }, - { 0x00110001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, - { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, - { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa4 }, - { 0x00010001, 0x2f6001ad, 0x00000fa0, 0x00000000 }, - { 0x00110001, 0x2f6001ad, 0x00000fa4, 0x00000000 }, - { 0x04000010, 0x200035ac, 0x00000f60, 0x00000fa8 }, - { 0x00010001, 0x2fe401ad, 0x00000f60, 0x00000000 }, - { 0x00110001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, - { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, - { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa4 }, - { 0x00010020, 0x34001c00, 0x00001400, 0x00000070 }, - { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa8 }, - { 0x00010001, 0x2fe401ad, 0x00000fa0, 0x00000000 }, - { 0x00010020, 0x34001c00, 0x00001400, 0x000000a0 }, - { 0x04000010, 0x200035ac, 0x00000fa4, 0x00000fa8 }, - { 0x00010001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, - { 0x00110001, 0x2fe401ad, 0x00000fa4, 0x00000000 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000060 }, - { 0x04000010, 0x200035ac, 0x00000fa4, 0x00000fa8 }, - { 0x00010001, 0x2fe401ad, 0x00000fa4, 0x00000000 }, - { 0x00010020, 0x34001c00, 0x00001400, 0x00000030 }, - { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa8 }, - { 0x00010001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, - { 0x00110001, 0x2fe401ad, 0x00000fa0, 0x00000000 }, - { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, -- cgit v1.2.1 From c9b8eadcd9b49f884bcb978dbc541c525502ffa9 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 2 Sep 2013 16:23:36 +0800 Subject: Support B frame for reference frame Signed-off-by: Xiang, Haihao --- src/i965_encoder_utils.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/i965_encoder_utils.c b/src/i965_encoder_utils.c index 7f6f7687..cc67d15f 100644 --- a/src/i965_encoder_utils.c +++ b/src/i965_encoder_utils.c @@ -298,6 +298,7 @@ build_avc_slice_header(VAEncSequenceParameterBufferH264 *sps_param, { avc_bitstream bs; int is_idr = !!pic_param->pic_fields.bits.idr_pic_flag; + int is_ref = !!pic_param->pic_fields.bits.reference_pic_flag; avc_bitstream_start(&bs); nal_start_code_prefix(&bs); @@ -305,10 +306,12 @@ build_avc_slice_header(VAEncSequenceParameterBufferH264 *sps_param, if (IS_I_SLICE(slice_param->slice_type)) { nal_header(&bs, NAL_REF_IDC_HIGH, is_idr ? NAL_IDR : NAL_NON_IDR); } else if (IS_P_SLICE(slice_param->slice_type)) { - nal_header(&bs, NAL_REF_IDC_MEDIUM, is_idr ? NAL_IDR : NAL_NON_IDR); + assert(!is_idr); + nal_header(&bs, NAL_REF_IDC_MEDIUM, NAL_NON_IDR); } else { assert(IS_B_SLICE(slice_param->slice_type)); - nal_header(&bs, NAL_REF_IDC_NONE, is_idr ? NAL_IDR : NAL_NON_IDR); + assert(!is_idr); + nal_header(&bs, is_ref ? NAL_REF_IDC_LOW : NAL_REF_IDC_NONE, NAL_NON_IDR); } slice_header(&bs, sps_param, pic_param, slice_param); -- cgit v1.2.1 From 33cb20b36983dfb4407ff855f537b929cff45f8d Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 20 Aug 2013 22:02:09 +0800 Subject: Convert RGBx/RGBA into non-NV12 format It is the combined conversions of RGBx/RGBA->NV12 and NV12->non-NV12. It would be better to implement RGBx/RGBA->YUV444 and YUV444->non-444 later Signed-off-by: Xiang, Haihao --- src/i965_post_processing.c | 73 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 7 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index bf1f7e18..23865bf0 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1099,6 +1099,22 @@ pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface) return fourcc; } +static void +pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height) +{ + if (surface->type == I965_SURFACE_TYPE_IMAGE) { + struct object_image *obj_image = (struct object_image *)surface->base; + + *width = obj_image->image.width; + *height = obj_image->image.height; + } else { + struct object_surface *obj_surface = (struct object_surface *)surface->base; + + *width = obj_surface->orig_width; + *height = obj_surface->orig_height; + } +} + static void pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling) { @@ -4776,6 +4792,13 @@ i965_post_processing( return out_surface_id; } +static VAStatus +i965_image_pl2_processing(VADriverContextP ctx, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect); + static VAStatus i965_image_pl1_rgbx_processing(VADriverContextP ctx, const struct i965_surface *src_surface, @@ -4795,14 +4818,51 @@ i965_image_pl1_rgbx_processing(VADriverContextP ctx, dst_rect, PP_RGBX_LOAD_SAVE_NV12, NULL); + intel_batchbuffer_flush(pp_context->batch); + + return VA_STATUS_SUCCESS; } else { - assert(0); - return VA_STATUS_ERROR_UNKNOWN; - } + VAStatus status; + VASurfaceID out_surface_id = VA_INVALID_SURFACE; + struct object_surface *obj_surface = NULL; + struct i965_surface tmp_surface; + int width, height; - intel_batchbuffer_flush(pp_context->batch); + pp_get_surface_size(ctx, dst_surface, &width, &height); + status = i965_CreateSurfaces(ctx, + width, + height, + VA_RT_FORMAT_YUV420, + 1, + &out_surface_id); + assert(status == VA_STATUS_SUCCESS); + obj_surface = SURFACE(out_surface_id); + assert(obj_surface); + i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); - return VA_STATUS_SUCCESS; + tmp_surface.base = (struct object_base *)obj_surface; + tmp_surface.type = I965_SURFACE_TYPE_SURFACE; + tmp_surface.flags = I965_SURFACE_FLAG_FRAME; + + status = i965_image_pl1_rgbx_processing(ctx, + src_surface, + src_rect, + &tmp_surface, + dst_rect); + + if (status == VA_STATUS_SUCCESS) + status = i965_image_pl2_processing(ctx, + &tmp_surface, + dst_rect, + dst_surface, + dst_rect); + + i965_DestroySurfaces(ctx, + &out_surface_id, + 1); + + return status; + } } static VAStatus @@ -4907,8 +4967,7 @@ i965_image_pl2_processing(VADriverContextP ctx, PP_NV12_LOAD_SAVE_RGBX, NULL); } else { - assert(0); - return VA_STATUS_ERROR_UNKNOWN; + return VA_STATUS_ERROR_UNIMPLEMENTED; } intel_batchbuffer_flush(pp_context->batch); -- cgit v1.2.1 From f54dbcaa7e3d9cdf9a16e7d078e35c7334142d00 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 23 Aug 2013 13:39:07 +0800 Subject: More conversions between two images with different pixel formsts Signed-off-by: Xiang, Haihao --- src/i965_post_processing.c | 122 +++++++++++++++++++++++++++++---------------- 1 file changed, 78 insertions(+), 44 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 23865bf0..0124072b 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -4799,6 +4799,63 @@ i965_image_pl2_processing(VADriverContextP ctx, struct i965_surface *dst_surface, const VARectangle *dst_rect); +static VAStatus +i965_image_plx_nv12_plx_processing(VADriverContextP ctx, + VAStatus (*i965_image_plx_nv12_processing)( + VADriverContextP, + const struct i965_surface *, + const VARectangle *, + struct i965_surface *, + const VARectangle *), + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + VAStatus status; + VASurfaceID tmp_surface_id = VA_INVALID_SURFACE; + struct object_surface *obj_surface = NULL; + struct i965_surface tmp_surface; + int width, height; + + pp_get_surface_size(ctx, dst_surface, &width, &height); + status = i965_CreateSurfaces(ctx, + width, + height, + VA_RT_FORMAT_YUV420, + 1, + &tmp_surface_id); + assert(status == VA_STATUS_SUCCESS); + obj_surface = SURFACE(tmp_surface_id); + assert(obj_surface); + i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + + tmp_surface.base = (struct object_base *)obj_surface; + tmp_surface.type = I965_SURFACE_TYPE_SURFACE; + tmp_surface.flags = I965_SURFACE_FLAG_FRAME; + + status = i965_image_plx_nv12_processing(ctx, + src_surface, + src_rect, + &tmp_surface, + dst_rect); + + if (status == VA_STATUS_SUCCESS) + status = i965_image_pl2_processing(ctx, + &tmp_surface, + dst_rect, + dst_surface, + dst_rect); + + i965_DestroySurfaces(ctx, + &tmp_surface_id, + 1); + + return status; +} + + static VAStatus i965_image_pl1_rgbx_processing(VADriverContextP ctx, const struct i965_surface *src_surface, @@ -4822,46 +4879,12 @@ i965_image_pl1_rgbx_processing(VADriverContextP ctx, return VA_STATUS_SUCCESS; } else { - VAStatus status; - VASurfaceID out_surface_id = VA_INVALID_SURFACE; - struct object_surface *obj_surface = NULL; - struct i965_surface tmp_surface; - int width, height; - - pp_get_surface_size(ctx, dst_surface, &width, &height); - status = i965_CreateSurfaces(ctx, - width, - height, - VA_RT_FORMAT_YUV420, - 1, - &out_surface_id); - assert(status == VA_STATUS_SUCCESS); - obj_surface = SURFACE(out_surface_id); - assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); - - tmp_surface.base = (struct object_base *)obj_surface; - tmp_surface.type = I965_SURFACE_TYPE_SURFACE; - tmp_surface.flags = I965_SURFACE_FLAG_FRAME; - - status = i965_image_pl1_rgbx_processing(ctx, - src_surface, - src_rect, - &tmp_surface, - dst_rect); - - if (status == VA_STATUS_SUCCESS) - status = i965_image_pl2_processing(ctx, - &tmp_surface, - dst_rect, - dst_surface, - dst_rect); - - i965_DestroySurfaces(ctx, - &out_surface_id, - 1); - - return status; + return i965_image_plx_nv12_plx_processing(ctx, + i965_image_pl1_rgbx_processing, + src_surface, + src_rect, + dst_surface, + dst_rect); } } @@ -4885,6 +4908,7 @@ i965_image_pl3_processing(VADriverContextP ctx, dst_rect, PP_PL3_LOAD_SAVE_N12, NULL); + intel_batchbuffer_flush(pp_context->batch); } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || fourcc == VA_FOURCC('I', 'M', 'C', '3') || fourcc == VA_FOURCC('Y', 'V', '1', '2') || @@ -4896,6 +4920,7 @@ i965_image_pl3_processing(VADriverContextP ctx, dst_rect, PP_PL3_LOAD_SAVE_PL3, NULL); + intel_batchbuffer_flush(pp_context->batch); } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) { vaStatus = i965_post_processing_internal(ctx, i965->pp_context, @@ -4905,13 +4930,17 @@ i965_image_pl3_processing(VADriverContextP ctx, dst_rect, PP_PL3_LOAD_SAVE_PA, NULL); + intel_batchbuffer_flush(pp_context->batch); } else { - assert(0); + vaStatus = i965_image_plx_nv12_plx_processing(ctx, + i965_image_pl3_processing, + src_surface, + src_rect, + dst_surface, + dst_rect); } - intel_batchbuffer_flush(pp_context->batch); - return vaStatus; } @@ -5004,7 +5033,12 @@ i965_image_pl1_processing(VADriverContextP ctx, NULL); } else { - return VA_STATUS_ERROR_UNKNOWN; + return i965_image_plx_nv12_plx_processing(ctx, + i965_image_pl1_processing, + src_surface, + src_rect, + dst_surface, + dst_rect); } intel_batchbuffer_flush(pp_context->batch); -- cgit v1.2.1 From b85bed27bbcc38bf637c0c4fdc9ee19ce5a9f907 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 9 Sep 2013 13:18:43 +0800 Subject: Return the status of image processing Signed-off-by: Xiang, Haihao --- src/i965_post_processing.c | 79 +++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 0124072b..3f3eab90 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -4866,26 +4866,27 @@ i965_image_pl1_rgbx_processing(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_post_processing_context *pp_context = i965->pp_context; int fourcc = pp_get_surface_fourcc(ctx, dst_surface); + VAStatus vaStatus; if (fourcc == VA_FOURCC('N', 'V', '1', '2')) { - i965_post_processing_internal(ctx, i965->pp_context, - src_surface, - src_rect, - dst_surface, - dst_rect, - PP_RGBX_LOAD_SAVE_NV12, - NULL); + vaStatus = i965_post_processing_internal(ctx, i965->pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + PP_RGBX_LOAD_SAVE_NV12, + NULL); intel_batchbuffer_flush(pp_context->batch); - - return VA_STATUS_SUCCESS; } else { - return i965_image_plx_nv12_plx_processing(ctx, - i965_image_pl1_rgbx_processing, - src_surface, - src_rect, - dst_surface, - dst_rect); + vaStatus = i965_image_plx_nv12_plx_processing(ctx, + i965_image_pl1_rgbx_processing, + src_surface, + src_rect, + dst_surface, + dst_rect); } + + return vaStatus; } static VAStatus @@ -5014,36 +5015,36 @@ i965_image_pl1_processing(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_post_processing_context *pp_context = i965->pp_context; int fourcc = pp_get_surface_fourcc(ctx, dst_surface); + VAStatus vaStatus; if (fourcc == VA_FOURCC('N', 'V', '1', '2')) { - i965_post_processing_internal(ctx, i965->pp_context, - src_surface, - src_rect, - dst_surface, - dst_rect, - PP_PA_LOAD_SAVE_NV12, - NULL); + vaStatus = i965_post_processing_internal(ctx, i965->pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + PP_PA_LOAD_SAVE_NV12, + NULL); + intel_batchbuffer_flush(pp_context->batch); } else if (fourcc == VA_FOURCC_YV12) { - i965_post_processing_internal(ctx, i965->pp_context, - src_surface, - src_rect, - dst_surface, - dst_rect, - PP_PA_LOAD_SAVE_PL3, - NULL); - + vaStatus = i965_post_processing_internal(ctx, i965->pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + PP_PA_LOAD_SAVE_PL3, + NULL); + intel_batchbuffer_flush(pp_context->batch); } else { - return i965_image_plx_nv12_plx_processing(ctx, - i965_image_pl1_processing, - src_surface, - src_rect, - dst_surface, - dst_rect); + vaStatus = i965_image_plx_nv12_plx_processing(ctx, + i965_image_pl1_processing, + src_surface, + src_rect, + dst_surface, + dst_rect); } - intel_batchbuffer_flush(pp_context->batch); - - return VA_STATUS_SUCCESS; + return vaStatus; } VAStatus -- cgit v1.2.1 From a60439939b6389fc5c6fa1d225d437657b727d26 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 9 Sep 2013 14:03:17 +0800 Subject: VPP: cleanup code Signed-off-by: Xiang, Haihao --- src/i965_post_processing.c | 82 ++++++++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 28 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 3f3eab90..c95c8829 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -4868,7 +4868,8 @@ i965_image_pl1_rgbx_processing(VADriverContextP ctx, int fourcc = pp_get_surface_fourcc(ctx, dst_surface); VAStatus vaStatus; - if (fourcc == VA_FOURCC('N', 'V', '1', '2')) { + switch (fourcc) { + case VA_FOURCC('N', 'V', '1', '2'): vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4877,13 +4878,16 @@ i965_image_pl1_rgbx_processing(VADriverContextP ctx, PP_RGBX_LOAD_SAVE_NV12, NULL); intel_batchbuffer_flush(pp_context->batch); - } else { + break; + + default: vaStatus = i965_image_plx_nv12_plx_processing(ctx, i965_image_pl1_rgbx_processing, src_surface, src_rect, dst_surface, dst_rect); + break; } return vaStatus; @@ -4901,7 +4905,8 @@ i965_image_pl3_processing(VADriverContextP ctx, int fourcc = pp_get_surface_fourcc(ctx, dst_surface); VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED; - if (fourcc == VA_FOURCC('N', 'V', '1', '2')) { + switch (fourcc) { + case VA_FOURCC('N', 'V', '1', '2'): vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4910,10 +4915,12 @@ i965_image_pl3_processing(VADriverContextP ctx, PP_PL3_LOAD_SAVE_N12, NULL); intel_batchbuffer_flush(pp_context->batch); - } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || - fourcc == VA_FOURCC('I', 'M', 'C', '3') || - fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('I', '4', '2', '0')) { + break; + + case VA_FOURCC('I', 'M', 'C', '1'): + case VA_FOURCC('I', 'M', 'C', '3'): + case VA_FOURCC('Y', 'V', '1', '2'): + case VA_FOURCC('I', '4', '2', '0'): vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4922,8 +4929,10 @@ i965_image_pl3_processing(VADriverContextP ctx, PP_PL3_LOAD_SAVE_PL3, NULL); intel_batchbuffer_flush(pp_context->batch); - } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || - fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) { + break; + + case VA_FOURCC('Y', 'U', 'Y', '2'): + case VA_FOURCC('U', 'Y', 'V', 'Y'): vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4932,14 +4941,16 @@ i965_image_pl3_processing(VADriverContextP ctx, PP_PL3_LOAD_SAVE_PA, NULL); intel_batchbuffer_flush(pp_context->batch); - } - else { + break; + + default: vaStatus = i965_image_plx_nv12_plx_processing(ctx, i965_image_pl3_processing, src_surface, src_rect, dst_surface, dst_rect); + break; } return vaStatus; @@ -4957,7 +4968,8 @@ i965_image_pl2_processing(VADriverContextP ctx, int fourcc = pp_get_surface_fourcc(ctx, dst_surface); VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED; - if (fourcc == VA_FOURCC('N', 'V', '1', '2')) { + switch (fourcc) { + case VA_FOURCC('N', 'V', '1', '2'): vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4965,10 +4977,12 @@ i965_image_pl2_processing(VADriverContextP ctx, dst_rect, PP_NV12_LOAD_SAVE_N12, NULL); - } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || - fourcc == VA_FOURCC('I', 'M', 'C', '3') || - fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('I', '4', '2', '0') ) { + break; + + case VA_FOURCC('I', 'M', 'C', '1'): + case VA_FOURCC('I', 'M', 'C', '3'): + case VA_FOURCC('Y', 'V', '1', '2'): + case VA_FOURCC('I', '4', '2', '0'): vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4976,19 +4990,23 @@ i965_image_pl2_processing(VADriverContextP ctx, dst_rect, PP_NV12_LOAD_SAVE_PL3, NULL); - } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || - fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) { + break; + + case VA_FOURCC('Y', 'U', 'Y', '2'): + case VA_FOURCC('U', 'Y', 'V', 'Y'): vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, dst_surface, dst_rect, PP_NV12_LOAD_SAVE_PA, - NULL); - } else if (fourcc == VA_FOURCC('B', 'G', 'R', 'X') || - fourcc == VA_FOURCC('B', 'G', 'R', 'A') || - fourcc == VA_FOURCC('R', 'G', 'B', 'X') || - fourcc == VA_FOURCC('R', 'G', 'B', 'A') ) { + NULL); + break; + + case VA_FOURCC('B', 'G', 'R', 'X'): + case VA_FOURCC('B', 'G', 'R', 'A'): + case VA_FOURCC('R', 'G', 'B', 'X'): + case VA_FOURCC('R', 'G', 'B', 'A'): vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4996,7 +5014,9 @@ i965_image_pl2_processing(VADriverContextP ctx, dst_rect, PP_NV12_LOAD_SAVE_RGBX, NULL); - } else { + break; + + default: return VA_STATUS_ERROR_UNIMPLEMENTED; } @@ -5017,7 +5037,8 @@ i965_image_pl1_processing(VADriverContextP ctx, int fourcc = pp_get_surface_fourcc(ctx, dst_surface); VAStatus vaStatus; - if (fourcc == VA_FOURCC('N', 'V', '1', '2')) { + switch (fourcc) { + case VA_FOURCC('N', 'V', '1', '2'): vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -5026,7 +5047,9 @@ i965_image_pl1_processing(VADriverContextP ctx, PP_PA_LOAD_SAVE_NV12, NULL); intel_batchbuffer_flush(pp_context->batch); - } else if (fourcc == VA_FOURCC_YV12) { + break; + + case VA_FOURCC('Y', 'V', '1', '2'): vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -5035,13 +5058,16 @@ i965_image_pl1_processing(VADriverContextP ctx, PP_PA_LOAD_SAVE_PL3, NULL); intel_batchbuffer_flush(pp_context->batch); - } else { + break; + + default: vaStatus = i965_image_plx_nv12_plx_processing(ctx, i965_image_pl1_processing, src_surface, src_rect, dst_surface, dst_rect); + break; } return vaStatus; @@ -5085,7 +5111,7 @@ i965_image_processing(VADriverContextP ctx, dst_surface, dst_rect); break; - case VA_FOURCC('Y', 'U', 'Y', '2'): + case VA_FOURCC('Y', 'U', 'Y', '2'): case VA_FOURCC('U', 'Y', 'V', 'Y'): status = i965_image_pl1_processing(ctx, src_surface, -- cgit v1.2.1 From 7a02d77bd3565817af0dc093722995ab0e2d5418 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 9 Sep 2013 14:44:53 +0800 Subject: VPP: Packed 4:2:2 to packed 4:2:2 on IVB+ Signed-off-by: Xiang, Haihao --- src/i965_post_processing.c | 78 ++- src/i965_post_processing.h | 1 + src/shaders/post_processing/gen7/Makefile.am | 1 + src/shaders/post_processing/gen7/pa_to_pa.asm | 17 + src/shaders/post_processing/gen7/pa_to_pa.g75b | 677 +++++++++++++++++++++++++ src/shaders/post_processing/gen7/pa_to_pa.g7b | 677 +++++++++++++++++++++++++ 6 files changed, 1448 insertions(+), 3 deletions(-) create mode 100644 src/shaders/post_processing/gen7/pa_to_pa.asm create mode 100644 src/shaders/post_processing/gen7/pa_to_pa.g75b create mode 100644 src/shaders/post_processing/gen7/pa_to_pa.g7b diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index c95c8829..0fe36acc 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -112,6 +112,9 @@ static const uint32_t pp_pa_load_save_pl3_gen5[][4] = { #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5" }; +static const uint32_t pp_pa_load_save_pa_gen5[][4] = { +}; + static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = { #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5" }; @@ -320,6 +323,18 @@ static struct pp_module pp_modules_gen5[] = { pp_plx_load_save_plx_initialize, }, + { + { + "PA_PA module", + PP_PA_LOAD_SAVE_PA, + pp_pa_load_save_pa_gen5, + sizeof(pp_pa_load_save_pa_gen5), + NULL, + }, + + pp_plx_load_save_plx_initialize, + }, + { { "RGBX_NV12 module", @@ -397,6 +412,9 @@ static const uint32_t pp_pa_load_save_pl3_gen6[][4] = { #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b" }; +static const uint32_t pp_pa_load_save_pa_gen6[][4] = { +}; + static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = { #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b" }; @@ -560,7 +578,19 @@ static struct pp_module pp_modules_gen6[] = { pp_plx_load_save_plx_initialize, }, - + + { + { + "PA_PA module", + PP_PA_LOAD_SAVE_PA, + pp_pa_load_save_pa_gen6, + sizeof(pp_pa_load_save_pa_gen6), + NULL, + }, + + pp_plx_load_save_plx_initialize, + }, + { { "RGBX_NV12 module", @@ -632,6 +662,9 @@ static const uint32_t pp_pa_load_save_nv12_gen7[][4] = { static const uint32_t pp_pa_load_save_pl3_gen7[][4] = { #include "shaders/post_processing/gen7/pa_to_pl3.g7b" }; +static const uint32_t pp_pa_load_save_pa_gen7[][4] = { +#include "shaders/post_processing/gen7/pa_to_pa.g7b" +}; static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = { #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b" }; @@ -820,7 +853,19 @@ static struct pp_module pp_modules_gen7[] = { gen7_pp_plx_avs_initialize, }, - + + { + { + "PA_PA module", + PP_PA_LOAD_SAVE_PA, + pp_pa_load_save_pa_gen7, + sizeof(pp_pa_load_save_pa_gen7), + NULL, + }, + + gen7_pp_plx_avs_initialize, + }, + { { "RGBX_NV12 module", @@ -893,6 +938,9 @@ static const uint32_t pp_pa_load_save_nv12_gen75[][4] = { static const uint32_t pp_pa_load_save_pl3_gen75[][4] = { #include "shaders/post_processing/gen7/pa_to_pl3.g75b" }; +static const uint32_t pp_pa_load_save_pa_gen75[][4] = { +#include "shaders/post_processing/gen7/pa_to_pa.g75b" +}; static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = { #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b" }; @@ -1056,7 +1104,19 @@ static struct pp_module pp_modules_gen75[] = { gen7_pp_plx_avs_initialize, }, - + + { + { + "PA_PA module", + PP_PA_LOAD_SAVE_PA, + pp_pa_load_save_pa_gen75, + sizeof(pp_pa_load_save_pa_gen75), + NULL, + }, + + gen7_pp_plx_avs_initialize, + }, + { { "RGBX_NV12 module", @@ -5060,6 +5120,18 @@ i965_image_pl1_processing(VADriverContextP ctx, intel_batchbuffer_flush(pp_context->batch); break; + case VA_FOURCC('Y', 'U', 'Y', '2'): + case VA_FOURCC('U', 'Y', 'V', 'Y'): + vaStatus = i965_post_processing_internal(ctx, i965->pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + PP_PA_LOAD_SAVE_PA, + NULL); + intel_batchbuffer_flush(pp_context->batch); + break; + default: vaStatus = i965_image_plx_nv12_plx_processing(ctx, i965_image_pl1_processing, diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index 66fcdefc..b1ff3dae 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -51,6 +51,7 @@ enum PP_PL3_LOAD_SAVE_PA, PP_PA_LOAD_SAVE_NV12, PP_PA_LOAD_SAVE_PL3, + PP_PA_LOAD_SAVE_PA, PP_RGBX_LOAD_SAVE_NV12, PP_NV12_LOAD_SAVE_RGBX, NUM_PP_MODULES, diff --git a/src/shaders/post_processing/gen7/Makefile.am b/src/shaders/post_processing/gen7/Makefile.am index f1a1c601..0bb572da 100644 --- a/src/shaders/post_processing/gen7/Makefile.am +++ b/src/shaders/post_processing/gen7/Makefile.am @@ -4,6 +4,7 @@ INTEL_PP_G7B = \ nv12_dn_nv12.g7b \ pa_to_pl2.g7b \ pa_to_pl3.g7b \ + pa_to_pa.g7b \ pl2_to_pa.g7b \ pl2_to_pl2.g7b \ pl2_to_pl3.g7b \ diff --git a/src/shaders/post_processing/gen7/pa_to_pa.asm b/src/shaders/post_processing/gen7/pa_to_pa.asm new file mode 100644 index 00000000..62f14bd2 --- /dev/null +++ b/src/shaders/post_processing/gen7/pa_to_pa.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PA_TO_PL3 +.code + +#include "VP_Setup.g4a" +#include "Set_Layer_0.g4a" +#include "Set_AVS_Buf_0123_VYUA.g4a" +#include "PA_AVS_Buf_0.g4a" +#include "PA_AVS_Buf_1.g4a" +#include "PA_AVS_Buf_2.g4a" +#include "PA_AVS_Buf_3.g4a" +#include "Save_AVS_PA.g4a" +#include "EOT.g4a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen7/pa_to_pa.g75b b/src/shaders/post_processing/gen7/pa_to_pa.g75b new file mode 100644 index 00000000..0ccd59e1 --- /dev/null +++ b/src/shaders/post_processing/gen7/pa_to_pa.g75b @@ -0,0 +1,677 @@ + { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 }, + { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 }, + { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff }, + { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 }, + { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 }, + { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 }, + { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 }, + { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 }, + { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 }, + { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 }, + { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 }, + { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 }, + { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 }, + { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 }, + { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 }, + { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea }, + { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 }, + { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 }, + { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 }, + { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 }, + { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 }, + { 0x00000801, 0x22500061, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000240 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 }, + { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x28000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 }, + { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 }, + { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 }, + { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 }, + { 0x00000801, 0x23680061, 0x00000000, 0x0001001f }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 }, + { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 }, + { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 }, + { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 }, + { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen7/pa_to_pa.g7b b/src/shaders/post_processing/gen7/pa_to_pa.g7b new file mode 100644 index 00000000..20728b5c --- /dev/null +++ b/src/shaders/post_processing/gen7/pa_to_pa.g7b @@ -0,0 +1,677 @@ + { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 }, + { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 }, + { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff }, + { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 }, + { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a }, + { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 }, + { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 }, + { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 }, + { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 }, + { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 }, + { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 }, + { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 }, + { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 }, + { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 }, + { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 }, + { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea }, + { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 }, + { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 }, + { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 }, + { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 }, + { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 }, + { 0x00000801, 0x22500061, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000048 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 }, + { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x28000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 }, + { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 }, + { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 }, + { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 }, + { 0x00000801, 0x23680061, 0x00000000, 0x0001001f }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 }, + { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 }, + { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 }, + { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 }, + { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 }, -- cgit v1.2.1 From 69ccd74f754e22dafc8b090ca724878abb83a661 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Fri, 26 Jul 2013 10:23:39 +0200 Subject: vpp: add basic processing support for packed YUV to packed YUV. Add support for video processing from packed YUV 4:2:2 formats (YUY2, UYVY) to packed YUV 4:2:2 formats. In particular, add support for HW accelerated vaPutImage() for packed YUV formats. https://bugs.freedesktop.org/show_bug.cgi?id=67338 Signed-off-by: Gwenole Beauchesne [Haihao: the shader for IVB+ has been added in 0eb2288] Signed-off-by: Xiang, Haihao --- src/i965_post_processing.c | 2 + src/shaders/post_processing/gen5_6/Makefile.am | 5 +- .../post_processing/gen5_6/pa_load_save_pa.asm | 17 ++ .../gen5_6/pa_load_save_pa.g4b.gen5 | 115 +++++++++++++ .../post_processing/gen5_6/pa_load_save_pa.g6b | 188 +++++++++++++++++++++ 5 files changed, 326 insertions(+), 1 deletion(-) create mode 100644 src/shaders/post_processing/gen5_6/pa_load_save_pa.asm create mode 100644 src/shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5 create mode 100644 src/shaders/post_processing/gen5_6/pa_load_save_pa.g6b diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 0fe36acc..cfc4aacc 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -113,6 +113,7 @@ static const uint32_t pp_pa_load_save_pl3_gen5[][4] = { }; static const uint32_t pp_pa_load_save_pa_gen5[][4] = { +#include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5" }; static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = { @@ -413,6 +414,7 @@ static const uint32_t pp_pa_load_save_pl3_gen6[][4] = { }; static const uint32_t pp_pa_load_save_pa_gen6[][4] = { +#include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b" }; static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = { diff --git a/src/shaders/post_processing/gen5_6/Makefile.am b/src/shaders/post_processing/gen5_6/Makefile.am index bb8caa47..4a13d9d5 100755 --- a/src/shaders/post_processing/gen5_6/Makefile.am +++ b/src/shaders/post_processing/gen5_6/Makefile.am @@ -16,6 +16,7 @@ INTEL_PP_G4B_GEN5 = \ nv12_load_save_pl3.g4b.gen5 \ nv12_load_save_rgbx.g4b.gen5 \ nv12_scaling_nv12.g4b.gen5 \ + pa_load_save_pa.g4b.gen5 \ pa_load_save_nv12.g4b.gen5 \ pa_load_save_pl3.g4b.gen5 \ pl3_load_save_nv12.g4b.gen5 \ @@ -33,6 +34,7 @@ INTEL_PP_G6B = \ nv12_load_save_pl3.g6b \ nv12_load_save_rgbx.g6b \ nv12_scaling_nv12.g6b \ + pa_load_save_pa.g6b \ pa_load_save_nv12.g6b \ pa_load_save_pl3.g6b \ pl3_load_save_nv12.g6b \ @@ -50,6 +52,7 @@ INTEL_PP_ASM = \ nv12_load_save_pl3.asm \ nv12_load_save_rgbx.asm \ nv12_scaling_nv12.asm \ + pa_load_save_pa.asm \ pa_load_save_nv12.asm \ pa_load_save_pl3.asm \ pl3_load_save_nv12.asm \ @@ -178,7 +181,7 @@ endif all-local: $(TARGETS) -SUFFIXES = .g4a .g4b .g6a .g6b .g5s .g6s .asm +SUFFIXES = .g4a .g4b .g4b.gen5 .g6a .g6b .g5s .g6s .asm if HAVE_GEN4ASM .g4a.g4b: diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pa.asm b/src/shaders/post_processing/gen5_6/pa_load_save_pa.asm new file mode 100644 index 00000000..72c2a8ad --- /dev/null +++ b/src/shaders/post_processing/gen5_6/pa_load_save_pa.asm @@ -0,0 +1,17 @@ +// Module name: PA_LOAD_SAVE_PA +.kernel PA_LOAD_SAVE_PA +.code + +#include "SetupVPKernel.asm" +#include "Multiple_Loop_Head.asm" +#include "PA_Load_8x8.asm" +#include "PL8x8_Save_PA.asm" +#include "Multiple_Loop.asm" + +END_THREAD // End of Thread + +.end_code + +.end_kernel + +// end of pa_load_save_pa.asm diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5 b/src/shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5 new file mode 100644 index 00000000..a75c75a1 --- /dev/null +++ b/src/shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5 @@ -0,0 +1,115 @@ + { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 }, + { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 }, + { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 }, + { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 }, + { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 }, + { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 }, + { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 }, + { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 }, + { 0x00000001, 0x21080061, 0x00000000, 0x0007001f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x01600031, 0x27000c01, 0x408d0000, 0x0288a001 }, + { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 }, + { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 }, + { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 }, + { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 }, + { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 }, + { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 }, + { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 }, + { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 }, + { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 }, + { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 }, + { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 }, + { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 }, + { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 }, + { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 }, + { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 }, + { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 }, + { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 }, + { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 }, + { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 }, + { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 }, + { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 }, + { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 }, + { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 }, + { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 }, + { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 }, + { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 }, + { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 }, + { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 }, + { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 }, + { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 }, + { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 }, + { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 }, + { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 }, + { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 }, + { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 }, + { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 }, + { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 }, + { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 }, + { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 }, + { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 }, + { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 }, + { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 }, + { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 }, + { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 }, + { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 }, + { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 }, + { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 }, + { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 }, + { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 }, + { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 }, + { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 }, + { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 }, + { 0x00000801, 0x21080061, 0x00000000, 0x0007001f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff }, + { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff }, + { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a }, + { 0x01600031, 0x21400c01, 0x408d0000, 0x0288a007 }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 }, + { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 }, + { 0x00710001, 0x24400169, 0x00000000, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000440, 0x00000000 }, + { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000442, 0x00000000 }, + { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000444, 0x00000000 }, + { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000446, 0x00000000 }, + { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000448, 0x00000000 }, + { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 }, + { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 }, + { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 }, + { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 }, + { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 }, + { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 }, + { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 }, + { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 }, + { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 }, + { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 }, + { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 }, + { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 }, + { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 }, + { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 }, + { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 }, + { 0x01600031, 0x20000c04, 0x508d0000, 0x12082007 }, + { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff }, + { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 }, + { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 }, + { 0x00010220, 0x34001c00, 0x00001400, 0x00000010 }, + { 0x00000001, 0x20b80129, 0x000000c8, 0x00000000 }, + { 0x01000010, 0x20003dac, 0x00000086, 0x00010001 }, + { 0x00010001, 0x20b80129, 0x000000c4, 0x00000000 }, + { 0x00010001, 0x20ba0231, 0x000000c6, 0x00000000 }, + { 0x00010220, 0x34001c00, 0x02001400, 0xffffff36 }, + { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 }, + { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 }, + { 0x00000220, 0x34001c00, 0x00001400, 0xffffff30 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 }, + { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 }, + { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 }, + { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 }, diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pa.g6b b/src/shaders/post_processing/gen5_6/pa_load_save_pa.g6b new file mode 100644 index 00000000..5d9fe48f --- /dev/null +++ b/src/shaders/post_processing/gen5_6/pa_load_save_pa.g6b @@ -0,0 +1,188 @@ + { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 }, + { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 }, + { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 }, + { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 }, + { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 }, + { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 }, + { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 }, + { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 }, + { 0x00000001, 0x21080061, 0x00000000, 0x0007001f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x04600031, 0x27000cc1, 0x00000020, 0x02898001 }, + { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 }, + { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 }, + { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 }, + { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 }, + { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 }, + { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 }, + { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 }, + { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 }, + { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 }, + { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 }, + { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 }, + { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 }, + { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 }, + { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 }, + { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 }, + { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 }, + { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 }, + { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 }, + { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 }, + { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 }, + { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 }, + { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 }, + { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 }, + { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 }, + { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 }, + { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 }, + { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 }, + { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 }, + { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 }, + { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 }, + { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 }, + { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 }, + { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 }, + { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 }, + { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 }, + { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 }, + { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 }, + { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 }, + { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 }, + { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 }, + { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 }, + { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 }, + { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 }, + { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 }, + { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 }, + { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 }, + { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 }, + { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 }, + { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 }, + { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 }, + { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 }, + { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 }, + { 0x00000801, 0x21080061, 0x00000000, 0x0007001f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff }, + { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff }, + { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a }, + { 0x04600031, 0x21400cc1, 0x00000020, 0x02898007 }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 }, + { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 }, + { 0x00710001, 0x24400169, 0x00000000, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000440, 0x00000000 }, + { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000442, 0x00000000 }, + { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000444, 0x00000000 }, + { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000446, 0x00000000 }, + { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000448, 0x00000000 }, + { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 }, + { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 }, + { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 }, + { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 }, + { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 }, + { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 }, + { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 }, + { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 }, + { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 }, + { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 }, + { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 }, + { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 }, + { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 }, + { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 }, + { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 }, + { 0x05600031, 0x20000cc4, 0x00000020, 0x12094007 }, + { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff }, + { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 }, + { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 }, + { 0x00010220, 0x34001c00, 0x00001400, 0x00000010 }, + { 0x00000001, 0x20b80129, 0x000000c8, 0x00000000 }, + { 0x01000010, 0x20003dac, 0x00000086, 0x00010001 }, + { 0x00010001, 0x20b80129, 0x000000c4, 0x00000000 }, + { 0x00010001, 0x20ba0231, 0x000000c6, 0x00000000 }, + { 0x00010220, 0x34001c00, 0x02001400, 0xffffff36 }, + { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 }, + { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 }, + { 0x00000220, 0x34001c00, 0x00001400, 0xffffff30 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 }, + { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 }, -- cgit v1.2.1 From 5b584344bd145b191968e2200404a4012a531b61 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Thu, 25 Jul 2013 10:52:31 +0200 Subject: vpp: fix output filter count from QueryVideoProcFilters(). When vaQueryVideoProcFilters() returns VA_STATUS_ERROR_MAX_NUM_EXCEEDED, i.e. when the caller allocated too few entries for the filters argument, then the num_filters argument shall be adjusted to correct number of entries that need to be re-allocated. https://bugs.freedesktop.org/show_bug.cgi?id=67292 Signed-off-by: Gwenole Beauchesne Signed-off-by: Xiang, Haihao --- src/i965_drv_video.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index ea1f1d00..9a8d7090 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -4561,8 +4561,11 @@ VAStatus i965_QueryVideoProcFilters( for (i = 0; i < i965->codec_info->num_filters; i++) { if (i965_os_has_ring_support(ctx, i965->codec_info->filters[i].ring)) { - if (num == *num_filters) + if (num == *num_filters) { + *num_filters = i965->codec_info->num_filters; + return VA_STATUS_ERROR_MAX_NUM_EXCEEDED; + } filters[num++] = i965->codec_info->filters[i].type; } -- cgit v1.2.1 From 48c5170c83398ceb3b2a29bb76bd0c0d8bf9993d Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Fri, 26 Jul 2013 13:08:27 +0200 Subject: allow vaDeriveImage() to work for UYVY formats Hi, I would push the following as obvious around next week. Add support for UYVY format to vaDeriveImage(). Also remove dead code along the way, i.e. packed YUV 4:2:2 formats have a single plane. Regards, Gwenole. --- src/i965_drv_video.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 9a8d7090..89e776a1 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -3021,13 +3021,10 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->offsets[2] = w_pitch * obj_surface->y_cr_offset; break; case VA_FOURCC('Y', 'U', 'Y', '2'): + case VA_FOURCC('U', 'Y', 'V', 'Y'): image->num_planes = 1; image->pitches[0] = obj_surface->width * 2; /* Y, width is aligned already */ image->offsets[0] = 0; - image->pitches[1] = obj_surface->width * 2; /* U */ - image->offsets[1] = 0; - image->pitches[2] = obj_surface->width * 2; /* V */ - image->offsets[2] = 0; break; case VA_FOURCC('R', 'G', 'B', 'A'): case VA_FOURCC('R', 'G', 'B', 'X'): -- cgit v1.2.1 From 9f3fd6261fed1a1d3dd18674b7e9a46c164c0b1a Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 9 Sep 2013 15:16:56 +0800 Subject: Update NEWS Signed-off-by: Xiang, Haihao --- NEWS | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 4ac3a3f2..83b237d1 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,12 @@ -libva-intel-driver NEWS -- summary of changes. 2013-06-26 +libva-intel-driver NEWS -- summary of changes. 2013-09-DD Copyright (C) 2009-2013 Intel Corporation +Version 1.2.1 - DD.Sep.2013 +* Add PCI IDs for Bay Trail +* Performance improvement for MPEG-2 Encoding on IVB/HSW +* Add basic processing support for packed YUV to packed YUV on ILK+ +* Bug fixes + Version 1.2.0 - 26.Jun.2013 * The new H.264 encoding API on SNB/IVB/HSW - Profile: BP/MP/HP -- cgit v1.2.1 From a944cd5c296d979e089a95252cce9112261a4931 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 10 Sep 2013 16:45:30 +0800 Subject: VPP: VAProcFilterParameterBufferDeinterlacing::flags overrides VAProcPipelineParameterBuffer::filter_flags Signed-off-by: Xiang, Haihao (cherry picked from commit 5334ca07c5ad1a2edeb279784bde3d03cbb10c49) --- src/i965_post_processing.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index cfc4aacc..3ab29e4f 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -3588,14 +3588,16 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c int w, h; int orig_w, orig_h; int dndi_top_first = 1; + VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param; - if (src_surface->flags == I965_SURFACE_FLAG_FRAME) + if (di_filter_param->flags == I965_SURFACE_FLAG_FRAME) return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED; - if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) + if (di_filter_param->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) { dndi_top_first = 1; - else + } else { dndi_top_first = 0; + } /* surface */ obj_surface = (struct object_surface *)src_surface->base; -- cgit v1.2.1 From afcb209b50c46988b39c3c548d8cf6edb27472e4 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 11 Sep 2013 08:43:17 +0800 Subject: VPP: Update DEINTERLACE_SAMPLER_STATE on IVB Signed-off-by: Xiang, Haihao (cherry picked from commit b05729d0feea5ceb4217997f26442d5a8d94fa48) --- src/i965_post_processing.c | 52 +++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 3ab29e4f..a2546d9a 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -3688,43 +3688,43 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c /* sample dndi index 0 */ index = 0; - sampler_dndi[index].dw0.denoise_asd_threshold = 0; - sampler_dndi[index].dw0.dnmh_delt = 8; + sampler_dndi[index].dw0.denoise_asd_threshold = 38; + sampler_dndi[index].dw0.dnmh_delt = 7; sampler_dndi[index].dw0.vdi_walker_y_stride = 0; sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0; - sampler_dndi[index].dw0.denoise_maximum_history = 128; // 128-240 - sampler_dndi[index].dw0.denoise_stad_threshold = 0; + sampler_dndi[index].dw0.denoise_maximum_history = 192; // 128-240 + sampler_dndi[index].dw0.denoise_stad_threshold = 140; - sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64; - sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0; - sampler_dndi[index].dw1.stmm_c2 = 0; - sampler_dndi[index].dw1.low_temporal_difference_threshold = 8; - sampler_dndi[index].dw1.temporal_difference_threshold = 16; + sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38; + sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1; + sampler_dndi[index].dw1.stmm_c2 = 2; + sampler_dndi[index].dw1.low_temporal_difference_threshold = 0; + sampler_dndi[index].dw1.temporal_difference_threshold = 0; - sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15; // 0-31 + sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20; // 0-31 sampler_dndi[index].dw2.bne_edge_th = 1; sampler_dndi[index].dw2.smooth_mv_th = 0; sampler_dndi[index].dw2.sad_tight_th = 5; sampler_dndi[index].dw2.cat_slope_minus1 = 9; - sampler_dndi[index].dw2.good_neighbor_th = 4; + sampler_dndi[index].dw2.good_neighbor_th = 12; - sampler_dndi[index].dw3.maximum_stmm = 128; - sampler_dndi[index].dw3.multipler_for_vecm = 2; - sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0; + sampler_dndi[index].dw3.maximum_stmm = 150; + sampler_dndi[index].dw3.multipler_for_vecm = 30; + sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125; sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64; sampler_dndi[index].dw3.stmm_blending_constant_select = 0; - sampler_dndi[index].dw4.sdi_delta = 8; - sampler_dndi[index].dw4.sdi_threshold = 128; - sampler_dndi[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift - sampler_dndi[index].dw4.stmm_shift_up = 0; + sampler_dndi[index].dw4.sdi_delta = 5; + sampler_dndi[index].dw4.sdi_threshold = 100; + sampler_dndi[index].dw4.stmm_output_shift = 5; // stmm_max - stmm_min = 2 ^ stmm_output_shift + sampler_dndi[index].dw4.stmm_shift_up = 1; sampler_dndi[index].dw4.stmm_shift_down = 0; - sampler_dndi[index].dw4.minimum_stmm = 0; + sampler_dndi[index].dw4.minimum_stmm = 118; - sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0; - sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0; - sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0; - sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0; + sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175; + sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37; + sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100; + sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50; sampler_dndi[index].dw6.dn_enable = 0; sampler_dndi[index].dw6.di_enable = 1; sampler_dndi[index].dw6.di_partial = 0; @@ -3733,10 +3733,10 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c sampler_dndi[index].dw6.dndi_first_frame = 1; sampler_dndi[index].dw6.progressive_dn = 0; sampler_dndi[index].dw6.mcdi_enable = 0; - sampler_dndi[index].dw6.fmd_tear_threshold = 32; + sampler_dndi[index].dw6.fmd_tear_threshold = 2; sampler_dndi[index].dw6.cat_th1 = 0; - sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32; - sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32; + sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100; + sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16; sampler_dndi[index].dw7.sad_tha = 5; sampler_dndi[index].dw7.sad_thb = 10; -- cgit v1.2.1 From d22d367eb36ae127ffebaf4cf32495593e469cd3 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 11 Sep 2013 09:04:52 +0800 Subject: VPP: the similar fix to Bob DI on SNB Signed-off-by: Xiang, Haihao (cherry picked from commit c55cc476b1f75ddf0504a8b85ac80c0168585c5c) --- src/i965_post_processing.c | 62 ++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index a2546d9a..256d7164 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -3193,14 +3193,16 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex int w, h; int orig_w, orig_h; int dndi_top_first = 1; + VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param; - if (src_surface->flags == I965_SURFACE_FLAG_FRAME) + if (di_filter_param->flags == I965_SURFACE_FLAG_FRAME) return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED; - if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) + if (di_filter_param->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) { dndi_top_first = 1; - else + } else { dndi_top_first = 0; + } /* surface */ obj_surface = (struct object_surface *)src_surface->base; @@ -3263,39 +3265,39 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex /* sample dndi index 1 */ index = 0; - sampler_dndi[index].dw0.denoise_asd_threshold = 0; - sampler_dndi[index].dw0.denoise_history_delta = 8; // 0-15, default is 8 - sampler_dndi[index].dw0.denoise_maximum_history = 128; // 128-240 - sampler_dndi[index].dw0.denoise_stad_threshold = 0; + sampler_dndi[index].dw0.denoise_asd_threshold = 38; + sampler_dndi[index].dw0.denoise_history_delta = 7; // 0-15, default is 8 + sampler_dndi[index].dw0.denoise_maximum_history = 192; // 128-240 + sampler_dndi[index].dw0.denoise_stad_threshold = 140; - sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64; - sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4; + sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38; + sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1; sampler_dndi[index].dw1.stmm_c2 = 1; - sampler_dndi[index].dw1.low_temporal_difference_threshold = 8; - sampler_dndi[index].dw1.temporal_difference_threshold = 16; + sampler_dndi[index].dw1.low_temporal_difference_threshold = 0; + sampler_dndi[index].dw1.temporal_difference_threshold = 0; - sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15; // 0-31 - sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7; // 0-15 + sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20; // 0-31 + sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1; // 0-15 sampler_dndi[index].dw2.denoise_edge_threshold = 7; // 0-15 - sampler_dndi[index].dw2.good_neighbor_threshold = 4; // 0-63 + sampler_dndi[index].dw2.good_neighbor_threshold = 12; // 0-63 - sampler_dndi[index].dw3.maximum_stmm = 128; - sampler_dndi[index].dw3.multipler_for_vecm = 2; - sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0; + sampler_dndi[index].dw3.maximum_stmm = 150; + sampler_dndi[index].dw3.multipler_for_vecm = 30; + sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125; sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64; sampler_dndi[index].dw3.stmm_blending_constant_select = 0; - sampler_dndi[index].dw4.sdi_delta = 8; - sampler_dndi[index].dw4.sdi_threshold = 128; - sampler_dndi[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift - sampler_dndi[index].dw4.stmm_shift_up = 0; + sampler_dndi[index].dw4.sdi_delta = 5; + sampler_dndi[index].dw4.sdi_threshold = 100; + sampler_dndi[index].dw4.stmm_output_shift = 5; // stmm_max - stmm_min = 2 ^ stmm_output_shift + sampler_dndi[index].dw4.stmm_shift_up = 1; sampler_dndi[index].dw4.stmm_shift_down = 0; - sampler_dndi[index].dw4.minimum_stmm = 0; + sampler_dndi[index].dw4.minimum_stmm = 118; - sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8; - sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32; - sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64; - sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32; + sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175; + sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37; + sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100; + sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50; sampler_dndi[index].dw6.dn_enable = 1; sampler_dndi[index].dw6.di_enable = 1; @@ -3304,14 +3306,14 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex sampler_dndi[index].dw6.dndi_stream_id = 0; sampler_dndi[index].dw6.dndi_first_frame = 1; sampler_dndi[index].dw6.progressive_dn = 0; - sampler_dndi[index].dw6.fmd_tear_threshold = 63; - sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32; - sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32; + sampler_dndi[index].dw6.fmd_tear_threshold = 2; + sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100; + sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16; sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0; sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0; sampler_dndi[index].dw7.vdi_walker_enable = 0; - sampler_dndi[index].dw7.column_width_minus1 = 0; + sampler_dndi[index].dw7.column_width_minus1 = w / 16; dri_bo_unmap(pp_context->sampler_state_table.bo); -- cgit v1.2.1 From 2de479ab5fc1bd03e3669da66311368d370c43f2 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 11 Sep 2013 10:33:25 +0800 Subject: VPP: Create separate sub-context for each processing Signed-off-by: Xiang, Haihao (cherry picked from commit 4faf6bf47f8e4e2fe587e3bb6a004340edd59c4c) Conflicts: src/i965_post_processing.c src/i965_post_processing.h --- src/i965_post_processing.c | 44 +++++++++++++++++++++++++++----------------- src/i965_post_processing.h | 13 ++++++------- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 256d7164..6a2bafe8 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1436,8 +1436,8 @@ ironlake_pp_object_walker(VADriverContextP ctx, int x, x_steps, y, y_steps; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; - x_steps = pp_context->pp_x_steps(&pp_context->private_context); - y_steps = pp_context->pp_y_steps(&pp_context->private_context); + x_steps = pp_context->pp_x_steps(pp_context->private_context); + y_steps = pp_context->pp_y_steps(pp_context->private_context); for (y = 0; y < y_steps; y++) { for (x = 0; x < x_steps; x++) { @@ -2012,6 +2012,7 @@ pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp /* private function & data */ pp_context->pp_x_steps = pp_null_x_steps; pp_context->pp_y_steps = pp_null_y_steps; + pp_context->private_context = NULL; pp_context->pp_set_block_parameter = pp_null_set_block_parameter; dst_surface->flags = src_surface->flags; @@ -2037,7 +2038,7 @@ static int pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y) { struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; - struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context; + struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context; pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x; pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y; @@ -2086,7 +2087,7 @@ pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processin const VARectangle *dst_rect, void *filter_param) { - struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context; + struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; int width[3], height[3], pitch[3], offset[3]; @@ -2102,6 +2103,7 @@ pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processin /* private function & data */ pp_context->pp_x_steps = pp_load_save_x_steps; pp_context->pp_y_steps = pp_load_save_y_steps; + pp_context->private_context = &pp_context->pp_load_save_context; pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter; int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;; @@ -2142,7 +2144,7 @@ pp_scaling_y_steps(void *private_context) static int pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y) { - struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context; + struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step; @@ -2164,7 +2166,7 @@ pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_con const VARectangle *dst_rect, void *filter_param) { - struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context; + struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; struct object_surface *obj_surface; @@ -2234,6 +2236,7 @@ pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_con /* private function & data */ pp_context->pp_x_steps = pp_scaling_x_steps; pp_context->pp_y_steps = pp_scaling_y_steps; + pp_context->private_context = &pp_context->pp_scaling_context; pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter; int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; @@ -2273,7 +2276,7 @@ pp_avs_y_steps(void *private_context) static int pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y) { - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; float src_x_steping, src_y_steping, video_step_delta; @@ -2380,7 +2383,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context void *filter_param, int nlas) { - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; struct object_surface *obj_surface; @@ -2623,6 +2626,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context /* private function & data */ pp_context->pp_x_steps = pp_avs_x_steps; pp_context->pp_y_steps = pp_avs_y_steps; + pp_context->private_context = &pp_context->pp_avs_context; pp_context->pp_set_block_parameter = pp_avs_set_block_parameter; int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; @@ -2702,7 +2706,7 @@ gen7_pp_avs_y_steps(void *private_context) static int gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y) { - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context; struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x; @@ -2739,7 +2743,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con const VARectangle *dst_rect, void *filter_param) { - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context; struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; struct gen7_sampler_8x8 *sampler_8x8; @@ -2905,6 +2909,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con /* private function & data */ pp_context->pp_x_steps = gen7_pp_avs_x_steps; pp_context->pp_y_steps = gen7_pp_avs_y_steps; + pp_context->private_context = &pp_context->pp_avs_context; pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter; pp_avs_context->dest_x = dst_rect->x; @@ -2950,7 +2955,7 @@ gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_co const VARectangle *dst_rect, void *filter_param) { - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context; struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; struct gen7_sampler_8x8 *sampler_8x8; struct i965_sampler_8x8_state *sampler_8x8_state; @@ -3118,6 +3123,7 @@ gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_co /* private function & data */ pp_context->pp_x_steps = gen7_pp_avs_x_steps; pp_context->pp_y_steps = gen7_pp_avs_y_steps; + pp_context->private_context = &pp_context->pp_avs_context; pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter; pp_avs_context->dest_x = dst_rect->x; @@ -3184,7 +3190,7 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex void *filter_param) { struct i965_driver_data *i965 = i965_driver_data(ctx); - struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context; + struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; struct object_surface *obj_surface; @@ -3320,6 +3326,7 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex /* private function & data */ pp_context->pp_x_steps = pp_dndi_x_steps; pp_context->pp_y_steps = pp_dndi_y_steps; + pp_context->private_context = &pp_context->pp_dndi_context; pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter; pp_static_parameter->grf1.statistics_surface_picth = w / 2; @@ -3374,7 +3381,7 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context void *filter_param) { struct i965_driver_data *i965 = i965_driver_data(ctx); - struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context; + struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context; struct object_surface *obj_surface; struct i965_sampler_dndi *sampler_dndi; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; @@ -3526,6 +3533,7 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context /* private function & data */ pp_context->pp_x_steps = pp_dn_x_steps; pp_context->pp_y_steps = pp_dn_y_steps; + pp_context->private_context = &pp_context->pp_dn_context; pp_context->pp_set_block_parameter = pp_dn_set_block_parameter; pp_static_parameter->grf1.statistics_surface_picth = w / 2; @@ -3582,7 +3590,7 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c void *filter_param) { struct i965_driver_data *i965 = i965_driver_data(ctx); - struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context; + struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context; struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; struct object_surface *obj_surface; struct gen7_sampler_dndi *sampler_dndi; @@ -3754,6 +3762,7 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c /* private function & data */ pp_context->pp_x_steps = gen7_pp_dndi_x_steps; pp_context->pp_y_steps = gen7_pp_dndi_y_steps; + pp_context->private_context = &pp_context->pp_dndi_context; pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter; pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2; @@ -3812,7 +3821,7 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con void *filter_param) { struct i965_driver_data *i965 = i965_driver_data(ctx); - struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context; + struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context; struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; struct object_surface *obj_surface; struct gen7_sampler_dndi *sampler_dn; @@ -3982,6 +3991,7 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con /* private function & data */ pp_context->pp_x_steps = gen7_pp_dn_x_steps; pp_context->pp_y_steps = gen7_pp_dn_y_steps; + pp_context->private_context = &pp_context->pp_dn_context; pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter; pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2; @@ -4467,8 +4477,8 @@ gen6_pp_object_walker(VADriverContextP ctx, else param_size = sizeof(struct pp_inline_parameter); - x_steps = pp_context->pp_x_steps(&pp_context->private_context); - y_steps = pp_context->pp_y_steps(&pp_context->private_context); + x_steps = pp_context->pp_x_steps(pp_context->private_context); + y_steps = pp_context->pp_y_steps(pp_context->private_context); command_length_in_dws = 6 + (param_size >> 2); command_buffer = dri_bo_alloc(i965->intel.bufmgr, "command objects buffer", diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index b1ff3dae..60e040f9 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -476,13 +476,12 @@ struct i965_post_processing_context dri_bo *bo; } stmm; - union { - struct pp_load_save_context pp_load_save_context; - struct pp_scaling_context pp_scaling_context; - struct pp_avs_context pp_avs_context; - struct pp_dndi_context pp_dndi_context; - struct pp_dn_context pp_dn_context; - } private_context; + struct pp_load_save_context pp_load_save_context; + struct pp_scaling_context pp_scaling_context; + struct pp_avs_context pp_avs_context; + struct pp_dndi_context pp_dndi_context; + struct pp_dn_context pp_dn_context; + void *private_context; /* pointer to the current private context */ int (*pp_x_steps)(void *private_context); int (*pp_y_steps)(void *private_context); -- cgit v1.2.1 From d2512dededcc45e27957b597a9379450aa755cab Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 11 Sep 2013 11:05:03 +0800 Subject: VPP: move the BO for STMM into sub-contexts Signed-off-by: Xiang, Haihao (cherry picked from commit 6ce827df0a9d6cb841308813ec54a019e3c63d37) --- src/i965_post_processing.c | 65 ++++++++++++++++++++++++---------------------- src/i965_post_processing.h | 6 ++--- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 6a2bafe8..4bd0a4b9 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -3217,12 +3217,12 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex w = obj_surface->width; h = obj_surface->height; - if (pp_context->stmm.bo == NULL) { - pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr, - "STMM surface", - w * h, - 4096); - assert(pp_context->stmm.bo); + if (pp_dndi_context->stmm_bo == NULL) { + pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr, + "STMM surface", + w * h, + 4096); + assert(pp_dndi_context->stmm_bo); } /* source UV surface index 2 */ @@ -3241,7 +3241,7 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex /* source STMM surface index 20 */ i965_pp_set_surface_state(ctx, pp_context, - pp_context->stmm.bo, 0, + pp_dndi_context->stmm_bo, 0, orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 20, 1); @@ -3424,12 +3424,12 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context w = obj_surface->width; h = obj_surface->height; - if (pp_context->stmm.bo == NULL) { - pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr, - "STMM surface", - w * h, - 4096); - assert(pp_context->stmm.bo); + if (pp_dn_context->stmm_bo == NULL) { + pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr, + "STMM surface", + w * h, + 4096); + assert(pp_dn_context->stmm_bo); } /* source UV surface index 2 */ @@ -3448,7 +3448,7 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context /* source STMM surface index 20 */ i965_pp_set_surface_state(ctx, pp_context, - pp_context->stmm.bo, 0, + pp_dn_context->stmm_bo, 0, orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 20, 1); @@ -3616,12 +3616,12 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c w = obj_surface->width; h = obj_surface->height; - if (pp_context->stmm.bo == NULL) { - pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr, - "STMM surface", - w * h, - 4096); - assert(pp_context->stmm.bo); + if (pp_dndi_context->stmm_bo == NULL) { + pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr, + "STMM surface", + w * h, + 4096); + assert(pp_dndi_context->stmm_bo); } /* source UV surface index 1 */ @@ -3648,7 +3648,7 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c /* STMM / History Statistics input surface, index 5 */ gen7_pp_set_surface_state(ctx, pp_context, - pp_context->stmm.bo, 0, + pp_dndi_context->stmm_bo, 0, orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 5, 1); @@ -3685,7 +3685,7 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c /* STMM output surface, index 33 */ gen7_pp_set_surface_state(ctx, pp_context, - pp_context->stmm.bo, 0, + pp_dndi_context->stmm_bo, 0, orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 33, 1); @@ -3863,12 +3863,12 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con w = obj_surface->width; h = obj_surface->height; - if (pp_context->stmm.bo == NULL) { - pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr, - "STMM surface", - w * h, - 4096); - assert(pp_context->stmm.bo); + if (pp_dn_context->stmm_bo == NULL) { + pp_dn_context->stmm_bo= dri_bo_alloc(i965->intel.bufmgr, + "STMM surface", + w * h, + 4096); + assert(pp_dn_context->stmm_bo); } /* source UV surface index 1 */ @@ -3895,7 +3895,7 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con /* STMM / History Statistics input surface, index 5 */ gen7_pp_set_surface_state(ctx, pp_context, - pp_context->stmm.bo, 0, + pp_dn_context->stmm_bo, 0, orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 33, 1); @@ -5255,8 +5255,11 @@ i965_post_processing_context_finalize(struct i965_post_processing_context *pp_co dri_bo_unreference(pp_context->vfe_state.bo); pp_context->vfe_state.bo = NULL; - dri_bo_unreference(pp_context->stmm.bo); - pp_context->stmm.bo = NULL; + dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo); + pp_context->pp_dndi_context.stmm_bo = NULL; + + dri_bo_unreference(pp_context->pp_dn_context.stmm_bo); + pp_context->pp_dn_context.stmm_bo = NULL; for (i = 0; i < NUM_PP_MODULES; i++) { struct pp_module *pp_module = &pp_context->pp_modules[i]; diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index 60e040f9..fbe684c2 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -94,12 +94,14 @@ struct pp_dndi_context { int dest_w; int dest_h; + dri_bo *stmm_bo; }; struct pp_dn_context { int dest_w; int dest_h; + dri_bo *stmm_bo; }; struct i965_post_processing_context; @@ -472,10 +474,6 @@ struct i965_post_processing_context unsigned int size_cs_entry; } urb; - struct { - dri_bo *bo; - } stmm; - struct pp_load_save_context pp_load_save_context; struct pp_scaling_context pp_scaling_context; struct pp_avs_context pp_avs_context; -- cgit v1.2.1 From 944b61f7e9e8c7026cd59fb28583ea36535c48f4 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 13 Sep 2013 10:12:53 +0800 Subject: VPP: track the frame sequence for DI on IVB Signed-off-by: Xiang, Haihao (cherry picked from commit c008b4222d593594330fbb087204ff53722f9765) --- src/i965_post_processing.c | 3 +++ src/i965_post_processing.h | 1 + 2 files changed, 4 insertions(+) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 4bd0a4b9..c3dc953b 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -3782,6 +3782,8 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c dst_surface->flags = I965_SURFACE_FLAG_FRAME; + pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2; + return VA_STATUS_SUCCESS; } @@ -5350,6 +5352,7 @@ i965_post_processing_context_init(VADriverContextP ctx, pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1); } + pp_context->pp_dndi_context.frame_order = -1; pp_context->batch = batch; } diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index fbe684c2..052b5734 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -95,6 +95,7 @@ struct pp_dndi_context int dest_w; int dest_h; dri_bo *stmm_bo; + int frame_order; /* -1 for the first frame */ }; struct pp_dn_context -- cgit v1.2.1 From ecb50e50fd0abcc80f2518f9d36a7e7f7f3b8a94 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 13 Sep 2013 13:21:09 +0800 Subject: VPP: MADI on IVB Signed-off-by: Xiang, Haihao (cherry picked from commit 219452451ede9d52d8940be55cd38feda50baea7) Conflicts: src/i965_drv_video.c --- src/i965_drv_video.c | 1 + src/i965_post_processing.c | 177 +++++++++++++++++++++++++++++++++------------ src/i965_post_processing.h | 3 + 3 files changed, 136 insertions(+), 45 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 89e776a1..31dafa2f 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -256,6 +256,7 @@ static struct hw_codec_info gen7_hw_codec_info = { .has_accelerated_getimage = 1, .has_accelerated_putimage = 1, .has_tiled_surface = 1, + .has_di_motion_adptive = 1, .num_filters = 2, .filters = { diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index c3dc953b..b2fa23c1 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -60,6 +60,20 @@ #define GPU_ASM_BLOCK_HEIGHT 8 #define GPU_ASM_X_OFFSET_ALIGNMENT 4 +#define VA_STATUS_SUCCESS_1 0xFFFFFFFE + +extern VAStatus +i965_DestroySurfaces(VADriverContextP ctx, + VASurfaceID *surface_list, + int num_surfaces); +extern VAStatus +i965_CreateSurfaces(VADriverContextP ctx, + int width, + int height, + int format, + int num_surfaces, + VASurfaceID *surfaces); + static const uint32_t pp_null_gen5[][4] = { #include "shaders/post_processing/gen5_6/null.g4b.gen5" }; @@ -3581,6 +3595,12 @@ gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context return 0; } + +extern VAStatus +vpp_surface_convert(VADriverContextP ctx, + struct object_surface *src_obj_surf, + struct object_surface *dst_obj_surf); + static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, const struct i965_surface *src_surface, @@ -3592,13 +3612,14 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c struct i965_driver_data *i965 = i965_driver_data(ctx); struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context; struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - struct object_surface *obj_surface; + struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface; struct gen7_sampler_dndi *sampler_dndi; int index; int w, h; int orig_w, orig_h; int dndi_top_first = 1; VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param; + int is_first_frame = (pp_dndi_context->frame_order == -1); if (di_filter_param->flags == I965_SURFACE_FLAG_FRAME) return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED; @@ -3610,76 +3631,144 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c } /* surface */ - obj_surface = (struct object_surface *)src_surface->base; - orig_w = obj_surface->orig_width; - orig_h = obj_surface->orig_height; - w = obj_surface->width; - h = obj_surface->height; - - if (pp_dndi_context->stmm_bo == NULL) { - pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr, - "STMM surface", - w * h, - 4096); - assert(pp_dndi_context->stmm_bo); + current_in_obj_surface = (struct object_surface *)src_surface->base; + + if (di_filter_param->algorithm == VAProcDeinterlacingBob) { + previous_in_obj_surface = current_in_obj_surface; + is_first_frame = 1; + } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) { + if (pp_dndi_context->frame_order == 0) { + VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param; + assert(pipeline_param->num_forward_references == 1); + assert(pipeline_param->forward_references[0] != VA_INVALID_ID); + + previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]); + assert(previous_in_obj_surface && previous_in_obj_surface->bo); + + is_first_frame = 0; + } else if (pp_dndi_context->frame_order == 1) { + vpp_surface_convert(ctx, + pp_dndi_context->current_out_obj_surface, + (struct object_surface *)dst_surface->base); + pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2; + is_first_frame = 0; + + return VA_STATUS_SUCCESS_1; + } else { + previous_in_obj_surface = current_in_obj_surface; + is_first_frame = 1; + } + } else { + return VA_STATUS_ERROR_UNIMPLEMENTED; } + /* source (temporal reference) YUV surface index 4 */ + orig_w = previous_in_obj_surface->orig_width; + orig_h = previous_in_obj_surface->orig_height; + w = previous_in_obj_surface->width; + h = previous_in_obj_surface->height; + gen7_pp_set_surface2_state(ctx, pp_context, + previous_in_obj_surface->bo, 0, + orig_w, orig_h, w, + 0, h, + SURFACE_FORMAT_PLANAR_420_8, 1, + 4); + + /* source surface */ + orig_w = current_in_obj_surface->orig_width; + orig_h = current_in_obj_surface->orig_height; + w = current_in_obj_surface->width; + h = current_in_obj_surface->height; + /* source UV surface index 1 */ gen7_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, w * h, + current_in_obj_surface->bo, w * h, orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 1, 0); /* source YUV surface index 3 */ gen7_pp_set_surface2_state(ctx, pp_context, - obj_surface->bo, 0, + current_in_obj_surface->bo, 0, orig_w, orig_h, w, 0, h, SURFACE_FORMAT_PLANAR_420_8, 1, 3); - /* source (temporal reference) YUV surface index 4 */ - gen7_pp_set_surface2_state(ctx, pp_context, - obj_surface->bo, 0, - orig_w, orig_h, w, - 0, h, - SURFACE_FORMAT_PLANAR_420_8, 1, - 4); - /* STMM / History Statistics input surface, index 5 */ + if (pp_dndi_context->stmm_bo == NULL) { + pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr, + "STMM surface", + w * h, + 4096); + assert(pp_dndi_context->stmm_bo); + } + gen7_pp_set_surface_state(ctx, pp_context, pp_dndi_context->stmm_bo, 0, orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 5, 1); /* destination surface */ - obj_surface = (struct object_surface *)dst_surface->base; - orig_w = obj_surface->orig_width; - orig_h = obj_surface->orig_height; - w = obj_surface->width; - h = obj_surface->height; + previous_out_obj_surface = (struct object_surface *)dst_surface->base; + orig_w = previous_out_obj_surface->orig_width; + orig_h = previous_out_obj_surface->orig_height; + w = previous_out_obj_surface->width; + h = previous_out_obj_surface->height; + + if (is_first_frame) { + current_out_obj_surface = previous_out_obj_surface; + } else { + VAStatus va_status; + + if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) { + unsigned int tiling = 0, swizzle = 0; + dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle); + + va_status = i965_CreateSurfaces(ctx, + orig_w, + orig_h, + VA_RT_FORMAT_YUV420, + 1, + &pp_dndi_context->current_out_surface); + assert(va_status == VA_STATUS_SUCCESS); + pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface); + assert(pp_dndi_context->current_out_obj_surface); + i965_check_alloc_surface_bo(ctx, + pp_dndi_context->current_out_obj_surface, + tiling != I915_TILING_NONE, + VA_FOURCC('N','V','1','2'), + SUBSAMPLE_YUV420); + } + + current_out_obj_surface = pp_dndi_context->current_out_obj_surface; + } /* destination(Previous frame) Y surface index 27 */ gen7_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, 0, + previous_out_obj_surface->bo, 0, orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 27, 1); /* destination(Previous frame) UV surface index 28 */ gen7_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, w * h, + previous_out_obj_surface->bo, w * h, orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 28, 1); /* destination(Current frame) Y surface index 30 */ gen7_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, 0, + current_out_obj_surface->bo, 0, orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 30, 1); /* destination(Current frame) UV surface index 31 */ + orig_w = current_out_obj_surface->orig_width; + orig_h = current_out_obj_surface->orig_height; + w = current_out_obj_surface->width; + h = current_out_obj_surface->height; + gen7_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, w * h, + current_out_obj_surface->bo, w * h, orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 31, 1); @@ -3740,7 +3829,7 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c sampler_dndi[index].dw6.di_partial = 0; sampler_dndi[index].dw6.dndi_top_first = dndi_top_first; sampler_dndi[index].dw6.dndi_stream_id = 1; - sampler_dndi[index].dw6.dndi_first_frame = 1; + sampler_dndi[index].dw6.dndi_first_frame = is_first_frame; sampler_dndi[index].dw6.progressive_dn = 0; sampler_dndi[index].dw6.mcdi_enable = 0; sampler_dndi[index].dw6.fmd_tear_threshold = 2; @@ -4577,6 +4666,9 @@ gen6_post_processing( gen6_pp_pipeline_setup(ctx, pp_context); } + if (va_status == VA_STATUS_SUCCESS_1) + va_status = VA_STATUS_SUCCESS; + return va_status; } @@ -4604,18 +4696,6 @@ i965_post_processing_internal( return va_status; } -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); -VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - static void rgb_to_yuv(unsigned int argb, unsigned char *y, @@ -5352,6 +5432,8 @@ i965_post_processing_context_init(VADriverContextP ctx, pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1); } + pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE; + pp_context->pp_dndi_context.current_out_obj_surface = NULL; pp_context->pp_dndi_context.frame_order = -1; pp_context->batch = batch; } @@ -5387,6 +5469,8 @@ static const int proc_frame_to_pp_frame[3] = { I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST }; +#define VA_STATUS_SUCCESS_1 0xFFFFFFFE + VAStatus i965_proc_picture(VADriverContextP ctx, VAProfile profile, @@ -5490,6 +5574,8 @@ i965_proc_picture(VADriverContextP ctx, dst_rect.height = in_height; } + proc_context->pp_context.pipeline_param = pipeline_param; + for (i = 0; i < pipeline_param->num_filters; i++) { struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]); VAProcFilterParameterBufferBase *filter_param = NULL; @@ -5537,6 +5623,7 @@ i965_proc_picture(VADriverContextP ctx, } } + proc_context->pp_context.pipeline_param = NULL; obj_surface = SURFACE(proc_state->current_render_target); assert(obj_surface); diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index 052b5734..9b2d7c63 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -96,6 +96,8 @@ struct pp_dndi_context int dest_h; dri_bo *stmm_bo; int frame_order; /* -1 for the first frame */ + VASurfaceID current_out_surface; + struct object_surface *current_out_obj_surface; }; struct pp_dn_context @@ -481,6 +483,7 @@ struct i965_post_processing_context struct pp_dndi_context pp_dndi_context; struct pp_dn_context pp_dn_context; void *private_context; /* pointer to the current private context */ + void *pipeline_param; /* pointer to the pipeline parameter */ int (*pp_x_steps)(void *private_context); int (*pp_y_steps)(void *private_context); -- cgit v1.2.1 From 789046916bf50310e9dcb3882c53977eb2c0078e Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 13 Sep 2013 15:47:50 +0800 Subject: VPP: Remove an assert on forward temporal reference on IVB Add a warning and return VA_STATUS_ERROR_INVALID_PARAMETER instead Signed-off-by: Xiang, Haihao (cherry picked from commit 8a0a61ce47e086ac156602c3daaf70f20e4ca222) --- src/i965_post_processing.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index b2fa23c1..e1b60323 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -3639,13 +3639,18 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) { if (pp_dndi_context->frame_order == 0) { VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param; - assert(pipeline_param->num_forward_references == 1); - assert(pipeline_param->forward_references[0] != VA_INVALID_ID); + if (!pipeline_param || + !pipeline_param->num_forward_references || + pipeline_param->forward_references[0] == VA_INVALID_ID) { + WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n"); - previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]); - assert(previous_in_obj_surface && previous_in_obj_surface->bo); + return VA_STATUS_ERROR_INVALID_PARAMETER; + } else { + previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]); + assert(previous_in_obj_surface && previous_in_obj_surface->bo); - is_first_frame = 0; + is_first_frame = 0; + } } else if (pp_dndi_context->frame_order == 1) { vpp_surface_convert(ctx, pp_dndi_context->current_out_obj_surface, -- cgit v1.2.1 From b99d5891a50bc2059143af6db25b8bd05fd73c85 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 23 Sep 2013 09:51:39 +0800 Subject: VPP: also remove an assert on forward temporal reference on HSW Do the same thing of commit 8a0a61c Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=69669 Signed-off-by: Xiang, Haihao (cherry picked from commit f6685c309d94fb7679c9772703c8790cb71cdd73) --- src/gen75_vpp_vebox.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 50df6274..a973ed49 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -844,8 +844,9 @@ void hsw_veb_resource_prepare(VADriverContextP ctx, } -void hsw_veb_surface_reference(VADriverContextP ctx, - struct intel_vebox_context *proc_ctx) +static VAStatus +hsw_veb_surface_reference(VADriverContextP ctx, + struct intel_vebox_context *proc_ctx) { struct object_surface * obj_surf; VEBFrameStore tmp_store; @@ -880,9 +881,14 @@ void hsw_veb_surface_reference(VADriverContextP ctx, VAProcPipelineParameterBuffer *pipe = proc_ctx->pipeline_param; struct object_surface *obj_surf = NULL; struct i965_driver_data * const i965 = i965_driver_data(ctx); - - assert(pipe->num_forward_references == 1); - assert(pipe->forward_references[0] != VA_INVALID_ID); + + if (!pipe || + !pipe->num_forward_references || + pipe->forward_references[0] == VA_INVALID_ID) { + WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n"); + + return VA_STATUS_ERROR_INVALID_PARAMETER; + } obj_surf = SURFACE(pipe->forward_references[0]); assert(obj_surf && obj_surf->bo); @@ -946,6 +952,8 @@ void hsw_veb_surface_reference(VADriverContextP ctx, proc_ctx->frame_store[FRAME_OUT_CURRENT].obj_surface = obj_surf; proc_ctx->current_output = FRAME_OUT_CURRENT; } + + return VA_STATUS_SUCCESS; } void hsw_veb_surface_unreference(VADriverContextP ctx, -- cgit v1.2.1 From 53697bc0fad0a048999b28ea7c8c40a291f9ab2d Mon Sep 17 00:00:00 2001 From: Zhao Halley Date: Thu, 12 Sep 2013 08:09:23 +0800 Subject: support 422H/422V/411P/444P format for wayland video output (cherry picked from commit 7a02190eda3abc245ba918f6078a121373f50703) --- src/i965_output_wayland.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/i965_output_wayland.c b/src/i965_output_wayland.c index be7f32c3..569d79be 100644 --- a/src/i965_output_wayland.c +++ b/src/i965_output_wayland.c @@ -250,6 +250,10 @@ va_GetSurfaceBufferWl( case VA_FOURCC('I','4','2','0'): case VA_FOURCC('I','M','C','1'): case VA_FOURCC('I','M','C','3'): + case VA_FOURCC('4','2','2','H'): + case VA_FOURCC('4','2','2','V'): + case VA_FOURCC('4','1','1','P'): + case VA_FOURCC('4','4','4','P'): switch (obj_surface->subsampling) { case SUBSAMPLE_YUV411: drm_format = WL_DRM_FORMAT_YUV411; -- cgit v1.2.1 From 6eeca792ef619ea4f5175256d297c3ea343d78ba Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sun, 22 Sep 2013 13:03:42 +0800 Subject: VPP: fix the check for DI filter flags on SNB/IVB Signed-off-by: Xiang, Haihao (cherry picked from commit ea3c725a7f9fa66e7ef05e1c08af30c28f8c1aa0) --- src/i965_post_processing.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index e1b60323..536eb64a 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -3215,14 +3215,10 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex int dndi_top_first = 1; VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param; - if (di_filter_param->flags == I965_SURFACE_FLAG_FRAME) - return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED; - - if (di_filter_param->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) { - dndi_top_first = 1; - } else { + if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST) dndi_top_first = 0; - } + else + dndi_top_first = 1; /* surface */ obj_surface = (struct object_surface *)src_surface->base; @@ -3621,14 +3617,10 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param; int is_first_frame = (pp_dndi_context->frame_order == -1); - if (di_filter_param->flags == I965_SURFACE_FLAG_FRAME) - return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED; - - if (di_filter_param->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) { - dndi_top_first = 1; - } else { + if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST) dndi_top_first = 0; - } + else + dndi_top_first = 1; /* surface */ current_in_obj_surface = (struct object_surface *)src_surface->base; -- cgit v1.2.1 From 8f306e3ca10bd610c1150f71d37d7fda5b16471e Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 23 Sep 2013 15:36:41 +0800 Subject: Intel driver 1.2.1 Signed-off-by: Xiang, Haihao --- NEWS | 12 +++++++++--- configure.ac | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index 83b237d1..267af5ff 100644 --- a/NEWS +++ b/NEWS @@ -1,11 +1,17 @@ -libva-intel-driver NEWS -- summary of changes. 2013-09-DD +libva-intel-driver NEWS -- summary of changes. 2013-09-23 Copyright (C) 2009-2013 Intel Corporation -Version 1.2.1 - DD.Sep.2013 +Version 1.2.1 - 23.Sep.2013 * Add PCI IDs for Bay Trail * Performance improvement for MPEG-2 Encoding on IVB/HSW * Add basic processing support for packed YUV to packed YUV on ILK+ -* Bug fixes +* Check the underlying OS support for VEBOX on HSW +* Quality improvement for BobDI on SNB/IVB +* Add support for Motion Adaptive Deinterlacing on IVB +* vaDeriveImage() works for UYVY formats +* Fix thread safety issue +* Fix GPU hang issue when decoding some videos on SNB +* Fix output filter count from QueryVideoProcFilters() Version 1.2.0 - 26.Jun.2013 * The new H.264 encoding API on SNB/IVB/HSW diff --git a/configure.ac b/configure.ac index e39f1d53..913c4022 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [2]) m4_define([intel_driver_micro_version], [1]) -m4_define([intel_driver_pre_version], [1]) +m4_define([intel_driver_pre_version], [0]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1 From f6bb88614af80b55d4dff6c804e0e510d85b855a Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 24 Sep 2013 09:13:22 +0800 Subject: 1.2.2.pre1 for development Signed-off-by: Xiang, Haihao --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 913c4022..1d9ae852 100644 --- a/configure.ac +++ b/configure.ac @@ -1,8 +1,8 @@ # intel-driver package version number m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [2]) -m4_define([intel_driver_micro_version], [1]) -m4_define([intel_driver_pre_version], [0]) +m4_define([intel_driver_micro_version], [2]) +m4_define([intel_driver_pre_version], [1]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1 From 09082257b49fecf213522e84c42482115ea84878 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 24 Sep 2013 12:52:37 +0800 Subject: fix make dist Signed-off-by: Xiang, Haihao --- src/shaders/vme/Makefile.am | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index d3c20998..096cee45 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -64,11 +64,8 @@ EXTRA_DIST = \ $(INTEL_G75B) \ $(INTEL_G7A) \ $(INTEL_G7B) \ - $(INTEL_GEN6_ASM) \ $(INTEL_GEN6_INC) \ - $(INTEL_GEN75_ASM) \ $(INTEL_GEN75_INC) \ - $(INTEL_GEN7_ASM) \ $(INTEL_GEN7_INC) \ $(VME75_CORE) \ $(VME_CORE) \ -- cgit v1.2.1 From 01b39f7ef75c028a99a02e98af6ec9ca2a9c6773 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 24 Sep 2013 13:25:37 +0800 Subject: fix the broken package generated by make dist Signed-off-by: Xiang, Haihao --- configure.ac | 1 + src/Makefile.am | 1 + src/shaders/post_processing/Makefile.am | 2 +- src/shaders/post_processing/gen75/Makefile.am | 9 +++++++++ src/shaders/vme/Makefile.am | 1 + 5 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 src/shaders/post_processing/gen75/Makefile.am diff --git a/configure.ac b/configure.ac index 1d9ae852..f531c950 100644 --- a/configure.ac +++ b/configure.ac @@ -178,6 +178,7 @@ AC_OUTPUT([ src/shaders/post_processing/Makefile src/shaders/post_processing/gen5_6/Makefile src/shaders/post_processing/gen7/Makefile + src/shaders/post_processing/gen75/Makefile src/shaders/render/Makefile src/shaders/utils/Makefile src/shaders/vme/Makefile diff --git a/src/Makefile.am b/src/Makefile.am index 3299733c..edf8f4e7 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -138,6 +138,7 @@ driver_cflags += $(WAYLAND_CFLAGS) endif # Wayland protocol +protocol_source_h = wayland-drm-client-protocol.h i965_output_wayland.c: $(protocol_source_h) @wayland_scanner_rules@ diff --git a/src/shaders/post_processing/Makefile.am b/src/shaders/post_processing/Makefile.am index b19020f7..a0022ca9 100644 --- a/src/shaders/post_processing/Makefile.am +++ b/src/shaders/post_processing/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = gen5_6 gen7 +SUBDIRS = gen5_6 gen7 gen75 # Extra clean files so that maintainer-clean removes *everything* MAINTAINERCLEANFILES = Makefile.in diff --git a/src/shaders/post_processing/gen75/Makefile.am b/src/shaders/post_processing/gen75/Makefile.am new file mode 100644 index 00000000..cdad1d90 --- /dev/null +++ b/src/shaders/post_processing/gen75/Makefile.am @@ -0,0 +1,9 @@ +INTEL_PP_PRE_G75B = \ + sharpening_h_blur.g75b \ + sharpening_unmask.g75b \ + sharpening_v_blur.g75b + +EXTRA_DIST = $(INTEL_PP_PRE_G75B) + +# Extra clean files so that maintainer-clean removes *everything* +MAINTAINERCLEANFILES = Makefile.in diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index 096cee45..867a93c1 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -68,6 +68,7 @@ EXTRA_DIST = \ $(INTEL_GEN75_INC) \ $(INTEL_GEN7_INC) \ $(VME75_CORE) \ + $(VME7_CORE) \ $(VME_CORE) \ $(NULL) -- cgit v1.2.1 From 34de629665bed1be2413ff503ed12c6e189cb0d5 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sun, 29 Sep 2013 14:44:35 +0800 Subject: Indent the code of encoding Signed-off-by: Xiang, Haihao (cherry picked from commit 6ad68f55e9ba49af541a5e4d86a305bbd0f22d63) --- src/gen6_mfc.c | 4 +- src/gen6_mfc.h | 36 +-- src/gen6_mfc_common.c | 636 +++++++++++++++++++++++++------------------------- src/gen6_vme.c | 62 ++--- src/gen6_vme.h | 46 ++-- src/gen75_mfc.c | 524 ++++++++++++++++++++--------------------- src/gen75_vme.c | 110 ++++----- src/gen7_mfc.c | 214 ++++++++--------- src/gen7_vme.c | 256 ++++++++++---------- 9 files changed, 944 insertions(+), 944 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index 1103e612..62fa2e93 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -609,8 +609,8 @@ gen6_mfc_init(VADriverContextP ctx, } static void gen6_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h index e6b04a16..1b71218a 100644 --- a/src/gen6_mfc.h +++ b/src/gen6_mfc.h @@ -200,10 +200,10 @@ struct gen6_mfc_context void (*set_surface_state)(VADriverContextP ctx, struct intel_encoder_context *encoder_context); void (*ind_obj_base_addr_state)(VADriverContextP ctx, - struct intel_encoder_context *encoder_context); + struct intel_encoder_context *encoder_context); void (*avc_img_state)(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context); + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); void (*avc_qm_state)(VADriverContextP ctx, struct intel_encoder_context *encoder_context); void (*avc_fqm_state)(VADriverContextP ctx, @@ -234,38 +234,38 @@ Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context * extern int intel_mfc_update_hrd(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context, - int frame_bits); + struct gen6_mfc_context *mfc_context, + int frame_bits); extern int intel_mfc_brc_postpack(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context, - int frame_bits); + struct gen6_mfc_context *mfc_context, + int frame_bits); extern void intel_mfc_hrd_context_update(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context); + struct gen6_mfc_context *mfc_context); extern int intel_mfc_interlace_check(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context); + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); extern void intel_mfc_brc_prepare(struct encode_state *encode_state, - struct intel_encoder_context *encoder_context); + struct intel_encoder_context *encoder_context); extern void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - struct intel_batchbuffer *slice_batch); + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct intel_batchbuffer *slice_batch); extern VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context); + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); extern int intel_avc_enc_slice_type_fixup(int type); extern void intel_mfc_avc_ref_idx_state(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context); + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); #endif /* _GEN6_MFC_BCS_H_ */ diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 469bf640..88a8d618 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -43,10 +43,10 @@ #include "gen6_vme.h" #include "intel_media.h" -#define BRC_CLIP(x, min, max) \ -{ \ - x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \ -} +#define BRC_CLIP(x, min, max) \ + { \ + x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \ + } #define BRC_P_B_QP_DIFF 4 #define BRC_I_P_QP_DIFF 2 @@ -86,7 +86,7 @@ int intel_avc_enc_slice_type_fixup(int slice_type) static void intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context) + struct gen6_mfc_context *mfc_context) { VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; @@ -130,7 +130,7 @@ intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, } static void intel_mfc_brc_init(struct encode_state *encode_state, - struct intel_encoder_context* encoder_context) + struct intel_encoder_context* encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; @@ -166,7 +166,7 @@ static void intel_mfc_brc_init(struct encode_state *encode_state, mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size; mfc_context->hrd.current_buffer_fullness = (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)? - pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.; + pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.; mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.; mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size; mfc_context->hrd.violation_noted = 0; @@ -188,8 +188,8 @@ static void intel_mfc_brc_init(struct encode_state *encode_state, } int intel_mfc_update_hrd(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context, - int frame_bits) + struct gen6_mfc_context *mfc_context, + int frame_bits) { double prev_bf = mfc_context->hrd.current_buffer_fullness; @@ -213,8 +213,8 @@ int intel_mfc_update_hrd(struct encode_state *encode_state, } int intel_mfc_brc_postpack(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context, - int frame_bits) + struct gen6_mfc_context *mfc_context, + int frame_bits) { gen6_brc_status sts = BRC_NO_HRD_VIOLATION; VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; @@ -243,7 +243,7 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state, frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype]; if (frame_size_alpha > 30) frame_size_alpha = 30; frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) / - (double)(frame_size_alpha + 1.); + (double)(frame_size_alpha + 1.); /* frame_size_next: avoiding negative number and too small value */ if ((double)frame_size_next < (double)(target_frame_size * 0.25)) @@ -333,7 +333,7 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state, } static void intel_mfc_hrd_context_init(struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; @@ -357,14 +357,14 @@ static void intel_mfc_hrd_context_init(struct encode_state *encode_state, void intel_mfc_hrd_context_update(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context) + struct gen6_mfc_context *mfc_context) { mfc_context->vui_hrd.i_frame_number++; } int intel_mfc_interlace_check(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; VAEncSliceParameterBufferH264 *pSliceParameter; @@ -385,7 +385,7 @@ int intel_mfc_interlace_check(VADriverContextP ctx, } void intel_mfc_brc_prepare(struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { unsigned int rate_control_mode = encoder_context->rate_control_mode; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -404,9 +404,9 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state, } void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - struct intel_batchbuffer *slice_batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct intel_batchbuffer *slice_batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS); @@ -484,13 +484,13 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, unsigned char *sei_data = NULL; int length_in_bits = build_avc_sei_buffer_timing( - mfc_context->vui_hrd.i_initial_cpb_removal_delay_length, - mfc_context->vui_hrd.i_initial_cpb_removal_delay, - 0, - mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number, - mfc_context->vui_hrd.i_dpb_output_delay_length, - 0, - &sei_data); + mfc_context->vui_hrd.i_initial_cpb_removal_delay_length, + mfc_context->vui_hrd.i_initial_cpb_removal_delay, + 0, + mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number, + mfc_context->vui_hrd.i_dpb_output_delay_length, + 0, + &sei_data); mfc_context->insert_object(ctx, encoder_context, (unsigned int *)sei_data, @@ -506,8 +506,8 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, } VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -662,44 +662,44 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, */ int intel_format_lutvalue(int value, int max) { - int ret; - int logvalue, temp1, temp2; + int ret; + int logvalue, temp1, temp2; - if (value <= 0) - return 0; + if (value <= 0) + return 0; - logvalue = (int)(log2f((float)value)); - if (logvalue < 4) { - ret = value; - } else { - int error, temp_value, base, j, temp_err; - error = value; - j = logvalue - 4 + 1; - ret = -1; - for(; j <= logvalue; j++) { - if (j == 0) { - base = value >> j; - } else { - base = (value + (1 << (j - 1)) - 1) >> j; - } - if (base >= 16) - continue; - - temp_value = base << j; - temp_err = abs(value - temp_value); - if (temp_err < error) { - error = temp_err; - ret = (j << 4) | base; - if (temp_err == 0) - break; - } - } - } - temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4); - temp2 = (max & 0xf) << ((max & 0xf0) >> 4); - if (temp1 > temp2) - ret = max; - return ret; + logvalue = (int)(log2f((float)value)); + if (logvalue < 4) { + ret = value; + } else { + int error, temp_value, base, j, temp_err; + error = value; + j = logvalue - 4 + 1; + ret = -1; + for(; j <= logvalue; j++) { + if (j == 0) { + base = value >> j; + } else { + base = (value + (1 << (j - 1)) - 1) >> j; + } + if (base >= 16) + continue; + + temp_value = base << j; + temp_err = abs(value - temp_value); + if (temp_err < error) { + error = temp_err; + ret = (j << 4) | base; + if (temp_err == 0) + break; + } + } + } + temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4); + temp2 = (max & 0xf) << ((max & 0xf0) >> 4); + if (temp1 > temp2) + ret = max; + return ret; } @@ -709,19 +709,19 @@ int intel_format_lutvalue(int value, int max) static float intel_lambda_qp(int qp) { - float value, lambdaf; - value = qp; - value = value / 6 - 2; - if (value < 0) - value = 0; - lambdaf = roundf(powf(2, value)); - return lambdaf; + float value, lambdaf; + value = qp; + value = value / 6 - 2; + if (value < 0) + value = 0; + lambdaf = roundf(powf(2, value)); + return lambdaf; } void intel_vme_update_mbmv_cost(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct gen6_vme_context *vme_context = encoder_context->vme_context; @@ -756,30 +756,30 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx, m_cost = 0; vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f); for (j = 1; j < 3; j++) { - m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; - m_cost = (int)m_costf; - vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; + m_cost = (int)m_costf; + vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f); } mv_count = 3; for (j = 4; j <= 64; j *= 2) { - m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; - m_cost = (int)m_costf; - vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f); - mv_count++; + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; + m_cost = (int)m_costf; + vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f); + mv_count++; } if (qp <= 25) { - vme_state_message[MODE_INTRA_16X16] = 0x4a; - vme_state_message[MODE_INTRA_8X8] = 0x4a; - vme_state_message[MODE_INTRA_4X4] = 0x4a; - vme_state_message[MODE_INTRA_NONPRED] = 0x4a; - vme_state_message[MODE_INTER_16X16] = 0x4a; - vme_state_message[MODE_INTER_16X8] = 0x4a; - vme_state_message[MODE_INTER_8X8] = 0x4a; - vme_state_message[MODE_INTER_8X4] = 0x4a; - vme_state_message[MODE_INTER_4X4] = 0x4a; - vme_state_message[MODE_INTER_BWD] = 0x2a; - return; + vme_state_message[MODE_INTRA_16X16] = 0x4a; + vme_state_message[MODE_INTRA_8X8] = 0x4a; + vme_state_message[MODE_INTRA_4X4] = 0x4a; + vme_state_message[MODE_INTRA_NONPRED] = 0x4a; + vme_state_message[MODE_INTER_16X16] = 0x4a; + vme_state_message[MODE_INTER_16X8] = 0x4a; + vme_state_message[MODE_INTER_8X8] = 0x4a; + vme_state_message[MODE_INTER_8X4] = 0x4a; + vme_state_message[MODE_INTER_4X4] = 0x4a; + vme_state_message[MODE_INTER_BWD] = 0x2a; + return; } m_costf = lambda * 10; vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f); @@ -791,42 +791,42 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx, m_cost = m_costf; vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f); if (slice_type == SLICE_TYPE_P) { - m_costf = lambda * 2.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); - m_costf = lambda * 4; - m_cost = m_costf; - vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f); - m_costf = lambda * 1.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f); - m_costf = lambda * 3; - m_cost = m_costf; - vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f); - m_costf = lambda * 5; - m_cost = m_costf; - vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f); - /* BWD is not used in P-frame */ - vme_state_message[MODE_INTER_BWD] = 0; + m_costf = lambda * 2.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); + m_costf = lambda * 4; + m_cost = m_costf; + vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f); + m_costf = lambda * 1.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 3; + m_cost = m_costf; + vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 5; + m_cost = m_costf; + vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f); + /* BWD is not used in P-frame */ + vme_state_message[MODE_INTER_BWD] = 0; } else { - m_costf = lambda * 2.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); - m_costf = lambda * 5.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f); - m_costf = lambda * 3.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f); - m_costf = lambda * 5.0; - m_cost = m_costf; - vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f); - m_costf = lambda * 6.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f); - m_costf = lambda * 1.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 2.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); + m_costf = lambda * 5.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f); + m_costf = lambda * 3.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 5.0; + m_cost = m_costf; + vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 6.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 1.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f); } } } @@ -841,8 +841,8 @@ gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_cont vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1; vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING; vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A | - MB_SCOREBOARD_B | - MB_SCOREBOARD_C); + MB_SCOREBOARD_B | + MB_SCOREBOARD_C); /* In VME prediction the current mb depends on the neighbour * A/B/C macroblock. So the left/up/up-right dependency should @@ -862,25 +862,25 @@ gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_cont /* check whether the mb of (x_index, y_index) is out of bound */ static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height) { - int mb_index; - if (x_index < 0 || x_index >= mb_width) - return -1; - if (y_index < 0 || y_index >= mb_height) - return -1; + int mb_index; + if (x_index < 0 || x_index >= mb_width) + return -1; + if (y_index < 0 || y_index >= mb_height) + return -1; - mb_index = y_index * mb_width + x_index; - if (mb_index < first_mb || mb_index > (first_mb + num_mb)) - return -1; - return 0; + mb_index = y_index * mb_width + x_index; + if (mb_index < first_mb || mb_index > (first_mb + num_mb)) + return -1; + return 0; } void gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - int mb_width, int mb_height, - int kernel, - int transform_8x8_mode_flag, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; int mb_row; @@ -922,7 +922,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, if (x_inner != (mb_width -1)) { mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; score_dep |= MB_SCOREBOARD_C; - } + } } *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); @@ -944,7 +944,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, xtemp_outer = mb_width - 2; if (xtemp_outer < 0) - xtemp_outer = 0; + xtemp_outer = 0; x_outer = xtemp_outer; y_outer = first_mb / mb_width; for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { @@ -966,7 +966,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, if (x_inner != (mb_width -1)) { mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; score_dep |= MB_SCOREBOARD_C; - } + } } *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); @@ -1001,198 +1001,198 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, static uint8_t intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id) { - unsigned int is_long_term = - !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE); - unsigned int is_top_field = - !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD); - unsigned int is_bottom_field = - !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD); - - return ((is_long_term << 6) | - ((is_top_field ^ is_bottom_field ^ 1) << 5) | - (frame_store_id << 1) | - ((is_top_field ^ 1) & is_bottom_field)); + unsigned int is_long_term = + !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE); + unsigned int is_top_field = + !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD); + unsigned int is_bottom_field = + !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD); + + return ((is_long_term << 6) | + ((is_top_field ^ is_bottom_field ^ 1) << 5) | + (frame_store_id << 1) | + ((is_top_field ^ 1) & is_bottom_field)); } void intel_mfc_avc_ref_idx_state(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { - struct intel_batchbuffer *batch = encoder_context->base.batch; - struct i965_driver_data *i965 = i965_driver_data(ctx); - int slice_type; - struct object_surface *slice_obj_surface, *obj_surface; - int ref_surface_id; - unsigned int fref_entry, bref_entry; - int frame_index, i; - VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; - - fref_entry = 0x80808080; - bref_entry = 0x80808080; - slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); - - if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - frame_index = -1; - for (i = 0; i < 16; i++) { - if (obj_surface == encode_state->reference_objects[i]) { - frame_index = i; - break; - } - } - if (frame_index == -1) { - WARN_ONCE("RefPicList0 is not found in DPB!\n"); - } else if (slice_obj_surface && slice_obj_surface->bo) { - /* This is passed by Slice_param->RefPicList0 */ - fref_entry &= ~(0xFF); - fref_entry += intel_get_ref_idx_state_1(&slice_param->RefPicList0[0], frame_index); - } else { - /* This is passed by the hacked mode */ - fref_entry &= ~(0xFF); - fref_entry += intel_get_ref_idx_state_1(&pic_param->ReferenceFrames[frame_index], frame_index); - } - } + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct i965_driver_data *i965 = i965_driver_data(ctx); + int slice_type; + struct object_surface *slice_obj_surface, *obj_surface; + int ref_surface_id; + unsigned int fref_entry, bref_entry; + int frame_index, i; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; - if (slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[1]; - } - frame_index = -1; - for (i = 0; i < 16; i++) { - if (obj_surface == encode_state->reference_objects[i]) { - frame_index = i; - break; - } - } - if (frame_index == -1) { - WARN_ONCE("RefPicList1 is not found in DPB!\n"); - } else if (slice_obj_surface && slice_obj_surface->bo) { - bref_entry &= ~(0xFF); - bref_entry += intel_get_ref_idx_state_1(&slice_param->RefPicList1[0], frame_index); - } else { - bref_entry &= ~(0xFF); - bref_entry += intel_get_ref_idx_state_1(&pic_param->ReferenceFrames[frame_index], frame_index); - } + fref_entry = 0x80808080; + bref_entry = 0x80808080; + slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { + slice_obj_surface = NULL; + ref_surface_id = slice_param->RefPicList0[0].picture_id; + if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + slice_obj_surface = SURFACE(ref_surface_id); + } + if (slice_obj_surface && slice_obj_surface->bo) { + obj_surface = slice_obj_surface; + } else { + obj_surface = encode_state->reference_objects[0]; } + frame_index = -1; + for (i = 0; i < 16; i++) { + if (obj_surface == encode_state->reference_objects[i]) { + frame_index = i; + break; + } + } + if (frame_index == -1) { + WARN_ONCE("RefPicList0 is not found in DPB!\n"); + } else if (slice_obj_surface && slice_obj_surface->bo) { + /* This is passed by Slice_param->RefPicList0 */ + fref_entry &= ~(0xFF); + fref_entry += intel_get_ref_idx_state_1(&slice_param->RefPicList0[0], frame_index); + } else { + /* This is passed by the hacked mode */ + fref_entry &= ~(0xFF); + fref_entry += intel_get_ref_idx_state_1(&pic_param->ReferenceFrames[frame_index], frame_index); + } + } - BEGIN_BCS_BATCH(batch, 10); - OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); - OUT_BCS_BATCH(batch, 0); //Select L0 - OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference - for(i = 0; i < 7; i++) { - OUT_BCS_BATCH(batch, 0x80808080); - } - ADVANCE_BCS_BATCH(batch); - - BEGIN_BCS_BATCH(batch, 10); - OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); - OUT_BCS_BATCH(batch, 1); //Select L1 - OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference - for(i = 0; i < 7; i++) { - OUT_BCS_BATCH(batch, 0x80808080); - } - ADVANCE_BCS_BATCH(batch); + if (slice_type == SLICE_TYPE_B) { + slice_obj_surface = NULL; + ref_surface_id = slice_param->RefPicList1[0].picture_id; + if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + slice_obj_surface = SURFACE(ref_surface_id); + } + if (slice_obj_surface && slice_obj_surface->bo) { + obj_surface = slice_obj_surface; + } else { + obj_surface = encode_state->reference_objects[1]; + } + frame_index = -1; + for (i = 0; i < 16; i++) { + if (obj_surface == encode_state->reference_objects[i]) { + frame_index = i; + break; + } + } + if (frame_index == -1) { + WARN_ONCE("RefPicList1 is not found in DPB!\n"); + } else if (slice_obj_surface && slice_obj_surface->bo) { + bref_entry &= ~(0xFF); + bref_entry += intel_get_ref_idx_state_1(&slice_param->RefPicList1[0], frame_index); + } else { + bref_entry &= ~(0xFF); + bref_entry += intel_get_ref_idx_state_1(&pic_param->ReferenceFrames[frame_index], frame_index); + } + } + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); + OUT_BCS_BATCH(batch, 0); //Select L0 + OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference + for(i = 0; i < 7; i++) { + OUT_BCS_BATCH(batch, 0x80808080); + } + ADVANCE_BCS_BATCH(batch); + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); + OUT_BCS_BATCH(batch, 1); //Select L1 + OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference + for(i = 0; i < 7; i++) { + OUT_BCS_BATCH(batch, 0x80808080); + } + ADVANCE_BCS_BATCH(batch); } void intel_vme_mpeg2_state_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { - struct gen6_vme_context *vme_context = encoder_context->vme_context; - uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message); - VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; - int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; - uint32_t mv_x, mv_y; - VAEncSliceParameterBufferMPEG2 *slice_param = NULL; - VAEncPictureParameterBufferMPEG2 *pic_param = NULL; - slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; - - if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) { - mv_x = 512; - mv_y = 64; - } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) { - mv_x = 1024; - mv_y = 128; - } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) { - mv_x = 2048; - mv_y = 128; - } else { - WARN_ONCE("Incorrect Mpeg2 level setting!\n"); - mv_x = 512; - mv_y = 64; - } + struct gen6_vme_context *vme_context = encoder_context->vme_context; + uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message); + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + uint32_t mv_x, mv_y; + VAEncSliceParameterBufferMPEG2 *slice_param = NULL; + VAEncPictureParameterBufferMPEG2 *pic_param = NULL; + slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; + + if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) { + mv_x = 512; + mv_y = 64; + } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) { + mv_x = 1024; + mv_y = 128; + } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) { + mv_x = 2048; + mv_y = 128; + } else { + WARN_ONCE("Incorrect Mpeg2 level setting!\n"); + mv_x = 512; + mv_y = 64; + } - pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; - if (pic_param->picture_type != VAEncPictureTypeIntra) { - int qp, m_cost, j, mv_count; - float lambda, m_costf; - slice_param = (VAEncSliceParameterBufferMPEG2 *) - encode_state->slice_params_ext[0]->buffer; - qp = slice_param->quantiser_scale_code; - lambda = intel_lambda_qp(qp); - /* No Intra prediction. So it is zero */ - vme_state_message[MODE_INTRA_8X8] = 0; - vme_state_message[MODE_INTRA_4X4] = 0; - vme_state_message[MODE_INTER_MV0] = 0; - for (j = 1; j < 3; j++) { - m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; - m_cost = (int)m_costf; - vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f); - } - mv_count = 3; - for (j = 4; j <= 64; j *= 2) { - m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; - m_cost = (int)m_costf; - vme_state_message[MODE_INTER_MV0 + mv_count] = - intel_format_lutvalue(m_cost, 0x6f); - mv_count++; - } - m_cost = lambda; - /* It can only perform the 16x16 search. So mode cost can be ignored for - * the other mode. for example: 16x8/8x8 - */ - vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f); - vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); + pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + if (pic_param->picture_type != VAEncPictureTypeIntra) { + int qp, m_cost, j, mv_count; + float lambda, m_costf; + slice_param = (VAEncSliceParameterBufferMPEG2 *) + encode_state->slice_params_ext[0]->buffer; + qp = slice_param->quantiser_scale_code; + lambda = intel_lambda_qp(qp); + /* No Intra prediction. So it is zero */ + vme_state_message[MODE_INTRA_8X8] = 0; + vme_state_message[MODE_INTRA_4X4] = 0; + vme_state_message[MODE_INTER_MV0] = 0; + for (j = 1; j < 3; j++) { + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; + m_cost = (int)m_costf; + vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f); + } + mv_count = 3; + for (j = 4; j <= 64; j *= 2) { + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; + m_cost = (int)m_costf; + vme_state_message[MODE_INTER_MV0 + mv_count] = + intel_format_lutvalue(m_cost, 0x6f); + mv_count++; + } + m_cost = lambda; + /* It can only perform the 16x16 search. So mode cost can be ignored for + * the other mode. for example: 16x8/8x8 + */ + vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f); + vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); - vme_state_message[MODE_INTER_16X8] = 0; - vme_state_message[MODE_INTER_8X8] = 0; - vme_state_message[MODE_INTER_8X4] = 0; - vme_state_message[MODE_INTER_4X4] = 0; - vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f); + vme_state_message[MODE_INTER_16X8] = 0; + vme_state_message[MODE_INTER_8X8] = 0; + vme_state_message[MODE_INTER_8X4] = 0; + vme_state_message[MODE_INTER_4X4] = 0; + vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f); - } - vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x); + } + vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x); - vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) | - width_in_mbs; + vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) | + width_in_mbs; } void gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - int mb_width, int mb_height, - int kernel, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; unsigned int *command_ptr; @@ -1255,7 +1255,7 @@ gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, xtemp_outer = mb_width - 2; if (xtemp_outer < 0) - xtemp_outer = 0; + xtemp_outer = 0; x_outer = xtemp_outer; y_outer = 0; for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { diff --git a/src/gen6_vme.c b/src/gen6_vme.c index 443dda89..1d475179 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -221,36 +221,36 @@ gen6_vme_surface_setup(VADriverContextP ctx, slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - /* reference 0 */ - if (obj_surface && obj_surface->bo) - gen6_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); + slice_obj_surface = NULL; + ref_surface_id = slice_param->RefPicList0[0].picture_id; + if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + slice_obj_surface = SURFACE(ref_surface_id); + } + if (slice_obj_surface && slice_obj_surface->bo) { + obj_surface = slice_obj_surface; + } else { + obj_surface = encode_state->reference_objects[0]; + } + /* reference 0 */ + if (obj_surface && obj_surface->bo) + gen6_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); } if (slice_type == SLICE_TYPE_B) { - /* reference 1 */ - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - - obj_surface = encode_state->reference_objects[1]; - if (obj_surface && obj_surface->bo) - gen6_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); + /* reference 1 */ + slice_obj_surface = NULL; + ref_surface_id = slice_param->RefPicList1[0].picture_id; + if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + slice_obj_surface = SURFACE(ref_surface_id); + } + if (slice_obj_surface && slice_obj_surface->bo) { + obj_surface = slice_obj_surface; + } else { + obj_surface = encode_state->reference_objects[0]; + } + + obj_surface = encode_state->reference_objects[1]; + if (obj_surface && obj_surface->bo) + gen6_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); } } @@ -319,7 +319,7 @@ static VAStatus gen6_vme_constant_setup(VADriverContextP ctx, if (vme_context->h264_level >= 30) { mv_num = 16; if (vme_context->h264_level >= 31) - mv_num = 8; + mv_num = 8; } dri_bo_map(vme_context->gpe_context.curbe.bo, 1); @@ -579,7 +579,7 @@ static VAStatus gen6_vme_prepare(VADriverContextP ctx, struct gen6_vme_context *vme_context = encoder_context->vme_context; if (!vme_context->h264_level || - (vme_context->h264_level != pSequenceParameter->level_idc)) { + (vme_context->h264_level != pSequenceParameter->level_idc)) { vme_context->h264_level = pSequenceParameter->level_idc; } /*Setup all the memory object*/ @@ -657,7 +657,7 @@ Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e vme_context = calloc(1, sizeof(struct gen6_vme_context)); vme_context->gpe_context.surface_state_binding_table.length = - (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6; vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); diff --git a/src/gen6_vme.h b/src/gen6_vme.h index b130b58f..09d9673b 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -62,25 +62,25 @@ struct gen6_vme_context void (*vme_surface2_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct object_surface *obj_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); void (*vme_media_rw_surface_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct object_surface *obj_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); void (*vme_buffer_suface_setup)(VADriverContextP ctx, struct i965_gpe_context *gpe_context, struct i965_buffer_surface *buffer_surface, unsigned long binding_table_offset, unsigned long surface_state_offset); void (*vme_media_chroma_surface_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct object_surface *obj_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); void *vme_state_message; unsigned int h264_level; unsigned int video_coding_type; @@ -137,25 +137,25 @@ Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e extern void gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - int mb_width, int mb_height, - int kernel, - int transform_8x8_mode_flag, - struct intel_encoder_context *encoder_context); + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context); extern void gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context); extern void intel_vme_mpeg2_state_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context); + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); extern void gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - int mb_width, int mb_height, - int kernel, - struct intel_encoder_context *encoder_context); + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + struct intel_encoder_context *encoder_context); #endif /* _GEN6_VME_H_ */ diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index b3b6f881..4eecc9c7 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -86,8 +86,8 @@ static struct i965_kernel gen75_mfc_kernels[] = { static void gen75_mfc_pipe_mode_select(VADriverContextP ctx, - int standard_select, - struct intel_encoder_context *encoder_context) + int standard_select, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -152,7 +152,7 @@ gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *enco static void gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -161,11 +161,11 @@ gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 26); OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2)); - /* the DW1-3 is for the MFX indirect bistream offset */ + /* the DW1-3 is for the MFX indirect bistream offset */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW4-5 is the MFX upper bound */ + /* the DW4-5 is the MFX upper bound */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -176,14 +176,14 @@ gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ OUT_BCS_BATCH(batch, 0); - /* the DW11-15 is for MFX IT-COFF. Not used on encoder */ + /* the DW11-15 is for MFX IT-COFF. Not used on encoder */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */ + /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -247,7 +247,7 @@ gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_con static void gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -291,22 +291,22 @@ gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, (1 << 2) | /* Frame MB only flag */ (0 << 1) | /* MBAFF mode is in active */ (0 << 0)); /* Field picture flag */ - /* DW5 Trellis quantization */ + /* DW5 Trellis quantization */ OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */ OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */ (0xBB8 << 16) | /* InterMbMaxSz */ (0xEE8) ); /* IntraMbMaxSz */ OUT_BCS_BATCH(batch, 0); /* Reserved */ - /* DW8. QP delta */ + /* DW8. QP delta */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ - /* DW10. Bit setting for MB */ + /* DW10. Bit setting for MB */ OUT_BCS_BATCH(batch, 0x8C000000); OUT_BCS_BATCH(batch, 0x00010000); - /* DW12. */ + /* DW12. */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0x02010100); - /* DW14. For short format */ + /* DW14. For short format */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -315,10 +315,10 @@ gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, static void gen75_mfc_qm_state(VADriverContextP ctx, - int qm_type, - unsigned int *qm, - int qm_length, - struct intel_encoder_context *encoder_context) + int qm_type, + unsigned int *qm, + int qm_length, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; unsigned int qm_buffer[16]; @@ -352,10 +352,10 @@ gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encod static void gen75_mfc_fqm_state(VADriverContextP ctx, - int fqm_type, - unsigned int *fqm, - int fqm_length, - struct intel_encoder_context *encoder_context) + int fqm_type, + unsigned int *fqm, + int fqm_length, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; unsigned int fqm_buffer[32]; @@ -393,9 +393,9 @@ gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *enco static void gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context, - unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw, - int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag, - struct intel_batchbuffer *batch) + unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw, + int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag, + struct intel_batchbuffer *batch) { if (batch == NULL) batch = encoder_context->base.batch; @@ -418,8 +418,8 @@ gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context * static void gen75_mfc_init(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -519,7 +519,7 @@ static void gen75_mfc_init(VADriverContextP ctx, static void gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -537,9 +537,9 @@ gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx, else OUT_BCS_BATCH(batch, 0); /* pre output addr */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - /* the DW4-6 is for the post_deblocking */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* the DW4-6 is for the post_deblocking */ if (mfc_context->post_deblocking_output.bo) OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo, @@ -547,37 +547,37 @@ gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx, 0); /* post output addr */ else OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW7-9 is for the uncompressed_picture */ + /* the DW7-9 is for the uncompressed_picture */ OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* uncompressed data */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW10-12 is for the mb status */ + /* the DW10-12 is for the mb status */ OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* StreamOut data*/ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW13-15 is for the intra_row_store_scratch */ + /* the DW13-15 is for the intra_row_store_scratch */ OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW16-18 is for the deblocking filter */ + /* the DW16-18 is for the deblocking filter */ OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* the DW 19-50 is for Reference pictures*/ for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) { @@ -590,25 +590,25 @@ gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx, } OUT_BCS_BATCH(batch, 0); } - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* The DW 52-54 is for the MB status buffer */ + /* The DW 52-54 is for the MB status buffer */ OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* Macroblock status buffer*/ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW 55-57 is the ILDB buffer */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + /* the DW 55-57 is the ILDB buffer */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW 58-60 is the second ILDB buffer */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + /* the DW 58-60 is the second ILDB buffer */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); ADVANCE_BCS_BATCH(batch); } @@ -669,14 +669,14 @@ gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* Macroblock status buffer*/ - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); ADVANCE_BCS_BATCH(batch); } static void gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -700,15 +700,15 @@ gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); } } - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW34-36 is the MV for the current reference */ - OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); + /* the DW34-36 is the MV for the current reference */ + OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* POL list */ for(i = 0; i < 32; i++) { @@ -761,7 +761,7 @@ gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_contex static void gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -775,12 +775,12 @@ gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */ + /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW7-9 is for Bitplane Read Buffer Base Address */ + /* the DW7-9 is for Bitplane Read Buffer Base Address */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -814,8 +814,8 @@ gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_con static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -833,8 +833,8 @@ static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, static VAStatus gen75_mfc_run(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; @@ -846,9 +846,9 @@ static VAStatus gen75_mfc_run(VADriverContextP ctx, static VAStatus gen75_mfc_stop(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int *encoded_bits_size) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int *encoded_bits_size) { VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; @@ -865,13 +865,13 @@ gen75_mfc_stop(VADriverContextP ctx, static void gen75_mfc_avc_slice_state(VADriverContextP ctx, - VAEncPictureParameterBufferH264 *pic_param, - VAEncSliceParameterBufferH264 *slice_param, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int rate_control_enable, - int qp, - struct intel_batchbuffer *batch) + VAEncPictureParameterBufferH264 *pic_param, + VAEncSliceParameterBufferH264 *slice_param, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int rate_control_enable, + int qp, + struct intel_batchbuffer *batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; @@ -985,10 +985,10 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx, static int gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, - int qp,unsigned int *msg, - struct intel_encoder_context *encoder_context, - unsigned char target_mb_size, unsigned char max_mb_size, - struct intel_batchbuffer *batch) + int qp,unsigned int *msg, + struct intel_encoder_context *encoder_context, + unsigned char target_mb_size, unsigned char max_mb_size, + struct intel_batchbuffer *batch) { int len_in_dwords = 12; unsigned int intra_msg; @@ -1035,13 +1035,13 @@ gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, static int gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, - unsigned int *msg, unsigned int offset, - struct intel_encoder_context *encoder_context, - unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, - struct intel_batchbuffer *batch) + unsigned int *msg, unsigned int offset, + struct intel_encoder_context *encoder_context, + unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, + struct intel_batchbuffer *batch) { int len_in_dwords = 12; - unsigned int inter_msg = 0; + unsigned int inter_msg = 0; if (batch == NULL) batch = encoder_context->base.batch; { @@ -1053,30 +1053,30 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i * command. */ if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) { - /* MV[0] and MV[2] are replicated */ - mv_ptr[4] = mv_ptr[0]; - mv_ptr[5] = mv_ptr[1]; - mv_ptr[2] = mv_ptr[8]; - mv_ptr[3] = mv_ptr[9]; - mv_ptr[6] = mv_ptr[8]; - mv_ptr[7] = mv_ptr[9]; + /* MV[0] and MV[2] are replicated */ + mv_ptr[4] = mv_ptr[0]; + mv_ptr[5] = mv_ptr[1]; + mv_ptr[2] = mv_ptr[8]; + mv_ptr[3] = mv_ptr[9]; + mv_ptr[6] = mv_ptr[8]; + mv_ptr[7] = mv_ptr[9]; } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) { - /* MV[0] and MV[1] are replicated */ - mv_ptr[2] = mv_ptr[0]; - mv_ptr[3] = mv_ptr[1]; - mv_ptr[4] = mv_ptr[16]; - mv_ptr[5] = mv_ptr[17]; - mv_ptr[6] = mv_ptr[24]; - mv_ptr[7] = mv_ptr[25]; + /* MV[0] and MV[1] are replicated */ + mv_ptr[2] = mv_ptr[0]; + mv_ptr[3] = mv_ptr[1]; + mv_ptr[4] = mv_ptr[16]; + mv_ptr[5] = mv_ptr[17]; + mv_ptr[6] = mv_ptr[24]; + mv_ptr[7] = mv_ptr[25]; } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && - !(msg[1] & SUBMB_SHAPE_MASK)) { - /* Don't touch MV[0] or MV[1] */ - mv_ptr[2] = mv_ptr[8]; - mv_ptr[3] = mv_ptr[9]; - mv_ptr[4] = mv_ptr[16]; - mv_ptr[5] = mv_ptr[17]; - mv_ptr[6] = mv_ptr[24]; - mv_ptr[7] = mv_ptr[25]; + !(msg[1] & SUBMB_SHAPE_MASK)) { + /* Don't touch MV[0] or MV[1] */ + mv_ptr[2] = mv_ptr[8]; + mv_ptr[3] = mv_ptr[9]; + mv_ptr[4] = mv_ptr[16]; + mv_ptr[5] = mv_ptr[17]; + mv_ptr[6] = mv_ptr[24]; + mv_ptr[7] = mv_ptr[25]; } } @@ -1084,21 +1084,21 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); - inter_msg = 32; - /* MV quantity */ - if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) { - if (msg[1] & SUBMB_SHAPE_MASK) - inter_msg = 128; - } + inter_msg = 32; + /* MV quantity */ + if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) { + if (msg[1] & SUBMB_SHAPE_MASK) + inter_msg = 128; + } OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/ OUT_BCS_BATCH(batch, offset); - inter_msg = msg[0] & (0x1F00FFFF); - inter_msg |= INTER_MV8; - inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17)); - if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && - (msg[1] & SUBMB_SHAPE_MASK)) { - inter_msg |= INTER_MV32; - } + inter_msg = msg[0] & (0x1F00FFFF); + inter_msg |= INTER_MV8; + inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17)); + if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && + (msg[1] & SUBMB_SHAPE_MASK)) { + inter_msg |= INTER_MV32; + } OUT_BCS_BATCH(batch, inter_msg); @@ -1114,7 +1114,7 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */ #endif - inter_msg = msg[1] >> 8; + inter_msg = msg[1] >> 8; /*Stuff for Inter MB*/ OUT_BCS_BATCH(batch, inter_msg); OUT_BCS_BATCH(batch, 0x0); @@ -1139,10 +1139,10 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i static void gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int slice_index, - struct intel_batchbuffer *slice_batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + struct intel_batchbuffer *slice_batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct gen6_vme_context *vme_context = encoder_context->vme_context; @@ -1175,10 +1175,10 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, assert(qp >= 0 && qp < 52); gen75_mfc_avc_slice_state(ctx, - pPicParameter, - pSliceParameter, - encode_state, encoder_context, - (rate_control_mode == VA_RC_CBR), qp, slice_batch); + pPicParameter, + pSliceParameter, + encode_state, encoder_context, + (rate_control_mode == VA_RC_CBR), qp, slice_batch); if ( slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); @@ -1242,8 +1242,8 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, static dri_bo * gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch; @@ -1278,8 +1278,8 @@ gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx, static void gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; @@ -1301,8 +1301,8 @@ gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, static void gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -1326,8 +1326,8 @@ gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx, static void gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context); gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context); @@ -1335,8 +1335,8 @@ gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, static void gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct gen6_interface_descriptor_data *desc; @@ -1378,8 +1378,8 @@ gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, static void gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -1388,19 +1388,19 @@ gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, static void gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, - int index, - int head_offset, - int batchbuffer_offset, - int head_size, - int tail_size, - int number_mb_cmds, - int first_object, - int last_object, - int last_slice, - int mb_x, - int mb_y, - int width_in_mbs, - int qp) + int index, + int head_offset, + int batchbuffer_offset, + int head_size, + int tail_size, + int number_mb_cmds, + int first_object, + int last_object, + int last_slice, + int mb_x, + int mb_y, + int width_in_mbs, + int qp) { BEGIN_BATCH(batch, 12); @@ -1434,14 +1434,14 @@ gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, static void gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - VAEncSliceParameterBufferH264 *slice_param, - int head_offset, - unsigned short head_size, - unsigned short tail_size, - int batchbuffer_offset, - int qp, - int last_slice) + struct intel_encoder_context *encoder_context, + VAEncSliceParameterBufferH264 *slice_param, + int head_offset, + unsigned short head_size, + unsigned short tail_size, + int batchbuffer_offset, + int qp, + int last_slice) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -1464,19 +1464,19 @@ gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, starting_mb += number_mb_cmds; gen75_mfc_batchbuffer_emit_object_command(batch, - index, - head_offset, - batchbuffer_offset, - head_size, - tail_size, - number_mb_cmds, - first_object, - last_object, - last_slice, - mb_x, - mb_y, - width_in_mbs, - qp); + index, + head_offset, + batchbuffer_offset, + head_size, + tail_size, + number_mb_cmds, + first_object, + last_object, + last_slice, + mb_x, + mb_y, + width_in_mbs, + qp); if (first_object) { head_offset += head_size; @@ -1502,19 +1502,19 @@ gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, starting_mb += number_mb_cmds; gen75_mfc_batchbuffer_emit_object_command(batch, - index, - head_offset, - batchbuffer_offset, - head_size, - tail_size, - number_mb_cmds, - first_object, - last_object, - last_slice, - mb_x, - mb_y, - width_in_mbs, - qp); + index, + head_offset, + batchbuffer_offset, + head_size, + tail_size, + number_mb_cmds, + first_object, + last_object, + last_slice, + mb_x, + mb_y, + width_in_mbs, + qp); } } @@ -1523,10 +1523,10 @@ gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, */ static int gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int slice_index, - int batchbuffer_offset) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + int batchbuffer_offset) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer; @@ -1559,13 +1559,13 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, head_offset = old_used / 16; gen75_mfc_avc_slice_state(ctx, - pPicParameter, - pSliceParameter, - encode_state, - encoder_context, - (rate_control_mode == VA_RC_CBR), - qp, - slice_batch); + pPicParameter, + pSliceParameter, + encode_state, + encoder_context, + (rate_control_mode == VA_RC_CBR), + qp, + slice_batch); if (slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); @@ -1621,22 +1621,22 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, gen75_mfc_avc_batchbuffer_slice_command(ctx, - encoder_context, - pSliceParameter, - head_offset, - head_size, - tail_size, - batchbuffer_offset, - qp, - last_slice); + encoder_context, + pSliceParameter, + head_offset, + head_size, + tail_size, + batchbuffer_offset, + qp, + last_slice); return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD; } static void gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *batch = encoder_context->base.batch; @@ -1655,8 +1655,8 @@ gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx, static void gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context); gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context); @@ -1666,8 +1666,8 @@ gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, static dri_bo * gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -1681,8 +1681,8 @@ gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, static void gen75_mfc_avc_pipeline_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; dri_bo *slice_batch_bo; @@ -1723,8 +1723,8 @@ gen75_mfc_avc_pipeline_programing(VADriverContextP ctx, static VAStatus gen75_mfc_avc_encode_picture(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; unsigned int rate_control_mode = encoder_context->rate_control_mode; @@ -1832,7 +1832,7 @@ static void gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned char intra_qm[64] = { - 8, 16, 19, 22, 26, 27, 29, 34, + 8, 16, 19, 22, 26, 27, 29, 34, 16, 16, 22, 24, 27, 29, 34, 37, 19, 22, 26, 27, 29, 34, 34, 38, 22, 22, 26, 27, 29, 34, 37, 40, @@ -1861,14 +1861,14 @@ static void gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned short intra_fqm[64] = { - 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, - 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, - 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, - 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, - 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, - 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, - 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, - 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, + 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, + 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, + 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, + 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, + 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, + 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, + 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, + 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, }; unsigned short non_intra_fqm[64] = { @@ -2251,34 +2251,34 @@ gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; if (intra_rdo < inter_rdo) - gen75_mfc_mpeg2_pak_object_intra(ctx, - encoder_context, - h_pos, v_pos, - first_mb_in_slice, - last_mb_in_slice, - first_mb_in_slice_group, - last_mb_in_slice_group, - 0x1a, - slice_param->quantiser_scale_code, - 0x3f, - 0, - 0xff, - slice_batch); + gen75_mfc_mpeg2_pak_object_intra(ctx, + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); else - gen75_mfc_mpeg2_pak_object_inter(ctx, - encode_state, - encoder_context, - msg, - width_in_mbs, height_in_mbs, - h_pos, v_pos, - first_mb_in_slice, - last_mb_in_slice, - first_mb_in_slice_group, - last_mb_in_slice_group, - slice_param->quantiser_scale_code, - 0, - 0xff, - slice_batch); + gen75_mfc_mpeg2_pak_object_inter(ctx, + encode_state, + encoder_context, + msg, + width_in_mbs, height_in_mbs, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + slice_param->quantiser_scale_code, + 0, + 0xff, + slice_batch); } } @@ -2553,9 +2553,9 @@ gen75_mfc_context_destroy(void *context) } static VAStatus gen75_mfc_pipeline(VADriverContextP ctx, - VAProfile profile, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { VAStatus vaStatus; diff --git a/src/gen75_vme.c b/src/gen75_vme.c index 3e769ed0..515d8c00 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -282,36 +282,36 @@ gen75_vme_surface_setup(VADriverContextP ctx, slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - /* reference 0 */ - if (obj_surface && obj_surface->bo) - gen75_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); + slice_obj_surface = NULL; + ref_surface_id = slice_param->RefPicList0[0].picture_id; + if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + slice_obj_surface = SURFACE(ref_surface_id); + } + if (slice_obj_surface && slice_obj_surface->bo) { + obj_surface = slice_obj_surface; + } else { + obj_surface = encode_state->reference_objects[0]; + } + /* reference 0 */ + if (obj_surface && obj_surface->bo) + gen75_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); } if (slice_type == SLICE_TYPE_B) { - /* reference 1 */ - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } + /* reference 1 */ + slice_obj_surface = NULL; + ref_surface_id = slice_param->RefPicList1[0].picture_id; + if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + slice_obj_surface = SURFACE(ref_surface_id); + } + if (slice_obj_surface && slice_obj_surface->bo) { + obj_surface = slice_obj_surface; + } else { + obj_surface = encode_state->reference_objects[0]; + } - obj_surface = encode_state->reference_objects[1]; - if (obj_surface && obj_surface->bo) - gen75_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); + obj_surface = encode_state->reference_objects[1]; + if (obj_surface && obj_surface->bo) + gen75_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); } } @@ -618,35 +618,35 @@ static void gen75_vme_pipeline_programing(VADriverContextP ctx, for (s = 0; s < encode_state->num_slice_params_ext; s++) { pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; if ((pSliceParameter->macroblock_address % width_in_mbs)) { - allow_hwscore = false; - break; + allow_hwscore = false; + break; } } if ((pSliceParameter->slice_type == SLICE_TYPE_I) || (pSliceParameter->slice_type == SLICE_TYPE_I)) { kernel_shader = VME_INTRA_SHADER; - } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) || - (pSliceParameter->slice_type == SLICE_TYPE_SP)) { + } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) || + (pSliceParameter->slice_type == SLICE_TYPE_SP)) { kernel_shader = VME_INTER_SHADER; - } else { + } else { kernel_shader = VME_BINTER_SHADER; if (!allow_hwscore) - kernel_shader = VME_INTER_SHADER; - } + kernel_shader = VME_INTER_SHADER; + } if (allow_hwscore) gen7_vme_walker_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - pPicParameter->pic_fields.bits.transform_8x8_mode_flag, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); else gen75_vme_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - pPicParameter->pic_fields.bits.transform_8x8_mode_flag, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); @@ -948,17 +948,17 @@ gen75_vme_mpeg2_pipeline_programing(VADriverContextP ctx, if (allow_hwscore) gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + encoder_context); else gen75_vme_mpeg2_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - 0, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + 0, + encoder_context); intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); @@ -985,7 +985,7 @@ gen75_vme_mpeg2_prepare(VADriverContextP ctx, struct gen6_vme_context *vme_context = encoder_context->vme_context; if ((!vme_context->mpeg2_level) || - (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) { + (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) { vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK; } @@ -1044,7 +1044,7 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context * { struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context)); struct i965_kernel *vme_kernel_list = NULL; - int i965_kernel_num; + int i965_kernel_num; switch (encoder_context->codec) { case CODEC_H264: diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index e35ca85e..1412a149 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -46,7 +46,7 @@ gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context); extern void gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context); + struct intel_encoder_context *encoder_context); extern void gen6_mfc_init(VADriverContextP ctx, struct encode_state *encode_state, @@ -208,13 +208,13 @@ gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, BEGIN_BCS_BATCH(batch, 16); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); - /*DW1 frame size */ + /*DW1 frame size */ OUT_BCS_BATCH(batch, ((width_in_mbs * height_in_mbs) & 0xFFFF)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); - /*DW3 Qp setting */ + /*DW3 Qp setting */ OUT_BCS_BATCH(batch, (0 << 24) | /* Second Chroma QP Offset */ (0 << 16) | /* Chroma QP Offset */ @@ -240,20 +240,20 @@ gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, (1 << 2) | /* Frame MB only flag */ (0 << 1) | /* MBAFF mode is in active */ (0 << 0)); /* Field picture flag */ - /*DW5 trequllis quantization */ + /*DW5 trequllis quantization */ OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */ OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */ (0xBB8 << 16) | /* InterMbMaxSz */ (0xEE8) ); /* IntraMbMaxSz */ - /* DW7 */ + /* DW7 */ OUT_BCS_BATCH(batch, 0); /* Reserved */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ - /* DW10 frame bit setting */ + /* DW10 frame bit setting */ OUT_BCS_BATCH(batch, 0x8C000000); OUT_BCS_BATCH(batch, 0x00010000); OUT_BCS_BATCH(batch, 0); - /* DW13 Ref setting */ + /* DW13 Ref setting */ OUT_BCS_BATCH(batch, 0x02010100); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -373,8 +373,8 @@ va_to_gen7_mpeg2_picture_type[3] = { static void gen7_mfc_mpeg2_pic_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - struct encode_state *encode_state) + struct intel_encoder_context *encoder_context, + struct encode_state *encode_state) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -434,7 +434,7 @@ static void gen7_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned char intra_qm[64] = { - 8, 16, 19, 22, 26, 27, 29, 34, + 8, 16, 19, 22, 26, 27, 29, 34, 16, 16, 22, 24, 27, 29, 34, 37, 19, 22, 26, 27, 29, 34, 34, 38, 22, 22, 26, 27, 29, 34, 37, 40, @@ -463,14 +463,14 @@ static void gen7_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned short intra_fqm[64] = { - 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, - 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, - 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, - 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, - 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, - 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, - 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, - 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, + 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, + 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, + 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, + 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, + 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, + 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, + 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, + 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, }; unsigned short non_intra_fqm[64] = { @@ -490,14 +490,14 @@ gen7_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *enc static void gen7_mfc_mpeg2_slicegroup_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - int x, int y, - int next_x, int next_y, - int is_fisrt_slice_group, - int is_last_slice_group, - int intra_slice, - int qp, - struct intel_batchbuffer *batch) + struct intel_encoder_context *encoder_context, + int x, int y, + int next_x, int next_y, + int is_fisrt_slice_group, + int is_last_slice_group, + int intra_slice, + int qp, + struct intel_batchbuffer *batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -537,18 +537,18 @@ gen7_mfc_mpeg2_slicegroup_state(VADriverContextP ctx, static int gen7_mfc_mpeg2_pak_object_intra(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - int x, int y, - int first_mb_in_slice, - int last_mb_in_slice, - int first_mb_in_slice_group, - int last_mb_in_slice_group, - int mb_type, - int qp_scale_code, - int coded_block_pattern, - unsigned char target_size_in_word, - unsigned char max_size_in_word, - struct intel_batchbuffer *batch) + struct intel_encoder_context *encoder_context, + int x, int y, + int first_mb_in_slice, + int last_mb_in_slice, + int first_mb_in_slice_group, + int last_mb_in_slice_group, + int mb_type, + int qp_scale_code, + int coded_block_pattern, + unsigned char target_size_in_word, + unsigned char max_size_in_word, + struct intel_batchbuffer *batch) { int len_in_dwords = 9; @@ -634,19 +634,19 @@ mpeg2_motion_vector(int mv, int pos, int display_max, int f_code) static int gen7_mfc_mpeg2_pak_object_inter(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - unsigned int *msg, - int width_in_mbs, int height_in_mbs, - int x, int y, - int first_mb_in_slice, - int last_mb_in_slice, - int first_mb_in_slice_group, - int last_mb_in_slice_group, - int qp_scale_code, - unsigned char target_size_in_word, - unsigned char max_size_in_word, - struct intel_batchbuffer *batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + unsigned int *msg, + int width_in_mbs, int height_in_mbs, + int x, int y, + int first_mb_in_slice, + int last_mb_in_slice, + int first_mb_in_slice_group, + int last_mb_in_slice_group, + int qp_scale_code, + unsigned char target_size_in_word, + unsigned char max_size_in_word, + struct intel_batchbuffer *batch) { VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; int len_in_dwords = 9; @@ -704,9 +704,9 @@ gen7_mfc_mpeg2_pak_object_inter(VADriverContextP ctx, static void gen7_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - struct intel_batchbuffer *slice_batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct intel_batchbuffer *slice_batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS); @@ -758,11 +758,11 @@ gen7_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx, static void gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int slice_index, - VAEncSliceParameterBufferMPEG2 *next_slice_group_param, - struct intel_batchbuffer *slice_batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + VAEncSliceParameterBufferMPEG2 *next_slice_group_param, + struct intel_batchbuffer *slice_batch) { struct gen6_vme_context *vme_context = encoder_context->vme_context; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -794,16 +794,16 @@ gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, } gen7_mfc_mpeg2_slicegroup_state(ctx, - encoder_context, - h_start_pos, - v_start_pos, - h_next_start_pos, - v_next_start_pos, - slice_index == 0, - next_slice_group_param == NULL, - slice_param->is_intra_slice, - slice_param->quantiser_scale_code, - slice_batch); + encoder_context, + h_start_pos, + v_start_pos, + h_next_start_pos, + v_next_start_pos, + slice_index == 0, + next_slice_group_param == NULL, + slice_param->is_intra_slice, + slice_param->quantiser_scale_code, + slice_batch); if (slice_index == 0) gen7_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); @@ -833,36 +833,36 @@ gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, if (slice_param->is_intra_slice) { gen7_mfc_mpeg2_pak_object_intra(ctx, - encoder_context, - h_pos, v_pos, - first_mb_in_slice, - last_mb_in_slice, - first_mb_in_slice_group, - last_mb_in_slice_group, - 0x1a, - slice_param->quantiser_scale_code, - 0x3f, - 0, - 0xff, - slice_batch); + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); } else { msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block); if(msg[32] & INTRA_MB_FLAG_MASK) { - gen7_mfc_mpeg2_pak_object_intra(ctx, - encoder_context, - h_pos, v_pos, - first_mb_in_slice, - last_mb_in_slice, - first_mb_in_slice_group, - last_mb_in_slice_group, - 0x1a, - slice_param->quantiser_scale_code, - 0x3f, - 0, - 0xff, - slice_batch); - } else { + gen7_mfc_mpeg2_pak_object_intra(ctx, + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); + } else { gen7_mfc_mpeg2_pak_object_inter(ctx, encode_state, @@ -878,8 +878,8 @@ gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, 0, 0xff, slice_batch); - } - } + } + } } slice_param++; @@ -920,8 +920,8 @@ gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, */ static dri_bo * gen7_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch; @@ -961,8 +961,8 @@ gen7_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, static void gen7_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -978,8 +978,8 @@ gen7_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx, static void gen7_mfc_mpeg2_pipeline_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; dri_bo *slice_batch_bo; @@ -1009,8 +1009,8 @@ gen7_mfc_mpeg2_pipeline_programing(VADriverContextP ctx, static VAStatus gen7_mfc_mpeg2_prepare(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct object_surface *obj_surface; @@ -1084,8 +1084,8 @@ gen7_mfc_mpeg2_prepare(VADriverContextP ctx, static VAStatus gen7_mfc_mpeg2_encode_picture(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { gen6_mfc_init(ctx, encode_state, encoder_context); gen7_mfc_mpeg2_prepare(ctx, encode_state, encoder_context); diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 097fe08b..e6de3af1 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -267,36 +267,36 @@ gen7_vme_surface_setup(VADriverContextP ctx, slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - /* reference 0 */ - if (obj_surface && obj_surface->bo) - gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); + slice_obj_surface = NULL; + ref_surface_id = slice_param->RefPicList0[0].picture_id; + if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + slice_obj_surface = SURFACE(ref_surface_id); + } + if (slice_obj_surface && slice_obj_surface->bo) { + obj_surface = slice_obj_surface; + } else { + obj_surface = encode_state->reference_objects[0]; + } + /* reference 0 */ + if (obj_surface && obj_surface->bo) + gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); } if (slice_type == SLICE_TYPE_B) { - /* reference 1 */ - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - - obj_surface = encode_state->reference_objects[1]; - if (obj_surface && obj_surface->bo) - gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); + /* reference 1 */ + slice_obj_surface = NULL; + ref_surface_id = slice_param->RefPicList1[0].picture_id; + if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + slice_obj_surface = SURFACE(ref_surface_id); + } + if (slice_obj_surface && slice_obj_surface->bo) { + obj_surface = slice_obj_surface; + } else { + obj_surface = encode_state->reference_objects[0]; + } + + obj_surface = encode_state->reference_objects[1]; + if (obj_surface && obj_surface->bo) + gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); } } @@ -400,11 +400,11 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx, { struct gen6_vme_context *vme_context = encoder_context->vme_context; unsigned int *vme_state_message; - unsigned int *mb_cost_table; + unsigned int *mb_cost_table; int i; VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; - mb_cost_table = (unsigned int *)vme_context->vme_state_message; + mb_cost_table = (unsigned int *)vme_context->vme_state_message; //building VME state message dri_bo_map(vme_context->vme_state.bo, 1); assert(vme_context->vme_state.bo->virtual); @@ -412,36 +412,36 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx, if ((slice_param->slice_type == SLICE_TYPE_P) || (slice_param->slice_type == SLICE_TYPE_SP)) { - vme_state_message[0] = 0x01010101; - vme_state_message[1] = 0x10010101; - vme_state_message[2] = 0x0F0F0F0F; - vme_state_message[3] = 0x100F0F0F; - vme_state_message[4] = 0x01010101; - vme_state_message[5] = 0x10010101; - vme_state_message[6] = 0x0F0F0F0F; - vme_state_message[7] = 0x100F0F0F; - vme_state_message[8] = 0x01010101; - vme_state_message[9] = 0x10010101; - vme_state_message[10] = 0x0F0F0F0F; - vme_state_message[11] = 0x000F0F0F; - vme_state_message[12] = 0x00; - vme_state_message[13] = 0x00; - } else { - vme_state_message[0] = 0x10010101; - vme_state_message[1] = 0x100F0F0F; - vme_state_message[2] = 0x10010101; - vme_state_message[3] = 0x000F0F0F; - vme_state_message[4] = 0; - vme_state_message[5] = 0; - vme_state_message[6] = 0; - vme_state_message[7] = 0; - vme_state_message[8] = 0; - vme_state_message[9] = 0; - vme_state_message[10] = 0; - vme_state_message[11] = 0; - vme_state_message[12] = 0; - vme_state_message[13] = 0; - } + vme_state_message[0] = 0x01010101; + vme_state_message[1] = 0x10010101; + vme_state_message[2] = 0x0F0F0F0F; + vme_state_message[3] = 0x100F0F0F; + vme_state_message[4] = 0x01010101; + vme_state_message[5] = 0x10010101; + vme_state_message[6] = 0x0F0F0F0F; + vme_state_message[7] = 0x100F0F0F; + vme_state_message[8] = 0x01010101; + vme_state_message[9] = 0x10010101; + vme_state_message[10] = 0x0F0F0F0F; + vme_state_message[11] = 0x000F0F0F; + vme_state_message[12] = 0x00; + vme_state_message[13] = 0x00; + } else { + vme_state_message[0] = 0x10010101; + vme_state_message[1] = 0x100F0F0F; + vme_state_message[2] = 0x10010101; + vme_state_message[3] = 0x000F0F0F; + vme_state_message[4] = 0; + vme_state_message[5] = 0; + vme_state_message[6] = 0; + vme_state_message[7] = 0; + vme_state_message[8] = 0; + vme_state_message[9] = 0; + vme_state_message[10] = 0; + vme_state_message[11] = 0; + vme_state_message[12] = 0; + vme_state_message[13] = 0; + } vme_state_message[14] = (mb_cost_table[2] & 0xFFFF); vme_state_message[15] = 0; @@ -459,9 +459,9 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx, } static VAStatus gen7_vme_mpeg2_state_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int is_intra, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; unsigned int *vme_state_message; @@ -635,8 +635,8 @@ static void gen7_vme_pipeline_programing(VADriverContextP ctx, for (s = 0; s < encode_state->num_slice_params_ext; s++) { pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; if ((pSliceParameter->macroblock_address % width_in_mbs)) { - allow_hwscore = false; - break; + allow_hwscore = false; + break; } } @@ -644,29 +644,29 @@ static void gen7_vme_pipeline_programing(VADriverContextP ctx, (pSliceParameter->slice_type == SLICE_TYPE_I)) { kernel_shader = AVC_VME_INTRA_SHADER; } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) || - (pSliceParameter->slice_type == SLICE_TYPE_SP)) { + (pSliceParameter->slice_type == SLICE_TYPE_SP)) { kernel_shader = AVC_VME_INTER_SHADER; } else { kernel_shader = AVC_VME_BINTER_SHADER; if (!allow_hwscore) - kernel_shader = AVC_VME_INTER_SHADER; + kernel_shader = AVC_VME_INTER_SHADER; } if (allow_hwscore) gen7_vme_walker_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - pPicParameter->pic_fields.bits.transform_8x8_mode_flag, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); else gen7_vme_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - pPicParameter->pic_fields.bits.transform_8x8_mode_flag, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); @@ -692,7 +692,7 @@ static VAStatus gen7_vme_prepare(VADriverContextP ctx, struct gen6_vme_context *vme_context = encoder_context->vme_context; if (!vme_context->h264_level || - (vme_context->h264_level != pSequenceParameter->level_idc)) { + (vme_context->h264_level != pSequenceParameter->level_idc)) { vme_context->h264_level = pSequenceParameter->level_idc; } @@ -743,10 +743,10 @@ gen7_vme_pipeline(VADriverContextP ctx, static void gen7_vme_mpeg2_output_buffer_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int index, - int is_intra, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int index, + int is_intra, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -777,9 +777,9 @@ gen7_vme_mpeg2_output_buffer_setup(VADriverContextP ctx, static void gen7_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int index, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int index, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -804,9 +804,9 @@ gen7_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx, static VAStatus gen7_vme_mpeg2_surface_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int is_intra, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) { struct object_surface *obj_surface; @@ -837,11 +837,11 @@ gen7_vme_mpeg2_surface_setup(VADriverContextP ctx, static void gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - int mb_width, int mb_height, - int kernel, - int transform_8x8_mode_flag, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; int number_mb_cmds; @@ -909,9 +909,9 @@ gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, static void gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, - struct encode_state *encode_state, - int is_intra, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; struct intel_batchbuffer *batch = encoder_context->base.batch; @@ -936,17 +936,17 @@ gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, if (allow_hwscore) gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - MPEG2_VME_INTER_SHADER, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + MPEG2_VME_INTER_SHADER, + encoder_context); else gen7_vme_mpeg2_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - MPEG2_VME_INTER_SHADER, - 0, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + MPEG2_VME_INTER_SHADER, + 0, + encoder_context); intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); @@ -963,15 +963,15 @@ gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, static VAStatus gen7_vme_mpeg2_prepare(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { VAStatus vaStatus = VA_STATUS_SUCCESS; VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; struct gen6_vme_context *vme_context = encoder_context->vme_context; if ((!vme_context->mpeg2_level) || - (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) { + (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) { vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK; } @@ -991,34 +991,34 @@ gen7_vme_mpeg2_prepare(VADriverContextP ctx, static VAStatus gen7_vme_mpeg2_pipeline(VADriverContextP ctx, - VAProfile profile, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_vme_context *vme_context = encoder_context->vme_context; VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; VAEncSequenceParameterBufferMPEG2 *seq_param = - (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; /*No need of to exec VME for Intra slice */ if (slice_param->is_intra_slice) { - if(!vme_context->vme_output.bo) { - int w_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; - int h_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; - - vme_context->vme_output.num_blocks = w_in_mbs * h_in_mbs; - vme_context->vme_output.pitch = 16; /* in bytes, always 16 */ - vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES; - vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, - "MPEG2 VME output buffer", - vme_context->vme_output.num_blocks - * vme_context->vme_output.size_block, - 0x1000); - } - - return VA_STATUS_SUCCESS; + if(!vme_context->vme_output.bo) { + int w_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int h_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + + vme_context->vme_output.num_blocks = w_in_mbs * h_in_mbs; + vme_context->vme_output.pitch = 16; /* in bytes, always 16 */ + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES; + vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, + "MPEG2 VME output buffer", + vme_context->vme_output.num_blocks + * vme_context->vme_output.size_block, + 0x1000); + } + + return VA_STATUS_SUCCESS; } gen7_vme_media_init(ctx, encoder_context); @@ -1059,7 +1059,7 @@ Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e struct i965_kernel *vme_kernel_list = NULL; vme_context->gpe_context.surface_state_binding_table.length = - (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6; vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); -- cgit v1.2.1 From c68c6a0c57555acc1e3aa3f4320d1254f45bdc21 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sun, 29 Sep 2013 13:11:10 +0800 Subject: Check the reference surface id against VA_INVALID_SURFACE Signed-off-by: Xiang, Haihao (cherry picked from commit 0c2def319f52bdb222e5480d81feea486cbf3e11) --- src/gen6_mfc_common.c | 4 ++-- src/gen6_vme.c | 4 ++-- src/gen75_vme.c | 4 ++-- src/gen7_vme.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 88a8d618..d66f4c5d 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1036,7 +1036,7 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx, if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { slice_obj_surface = NULL; ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + if (ref_surface_id != VA_INVALID_SURFACE) { slice_obj_surface = SURFACE(ref_surface_id); } if (slice_obj_surface && slice_obj_surface->bo) { @@ -1067,7 +1067,7 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx, if (slice_type == SLICE_TYPE_B) { slice_obj_surface = NULL; ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + if (ref_surface_id != VA_INVALID_SURFACE) { slice_obj_surface = SURFACE(ref_surface_id); } if (slice_obj_surface && slice_obj_surface->bo) { diff --git a/src/gen6_vme.c b/src/gen6_vme.c index 1d475179..13454669 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -223,7 +223,7 @@ gen6_vme_surface_setup(VADriverContextP ctx, if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { slice_obj_surface = NULL; ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + if (ref_surface_id != VA_INVALID_SURFACE) { slice_obj_surface = SURFACE(ref_surface_id); } if (slice_obj_surface && slice_obj_surface->bo) { @@ -239,7 +239,7 @@ gen6_vme_surface_setup(VADriverContextP ctx, /* reference 1 */ slice_obj_surface = NULL; ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + if (ref_surface_id != VA_INVALID_SURFACE) { slice_obj_surface = SURFACE(ref_surface_id); } if (slice_obj_surface && slice_obj_surface->bo) { diff --git a/src/gen75_vme.c b/src/gen75_vme.c index 515d8c00..e9ddf0b4 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -284,7 +284,7 @@ gen75_vme_surface_setup(VADriverContextP ctx, if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { slice_obj_surface = NULL; ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + if (ref_surface_id != VA_INVALID_SURFACE) { slice_obj_surface = SURFACE(ref_surface_id); } if (slice_obj_surface && slice_obj_surface->bo) { @@ -300,7 +300,7 @@ gen75_vme_surface_setup(VADriverContextP ctx, /* reference 1 */ slice_obj_surface = NULL; ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + if (ref_surface_id != VA_INVALID_SURFACE) { slice_obj_surface = SURFACE(ref_surface_id); } if (slice_obj_surface && slice_obj_surface->bo) { diff --git a/src/gen7_vme.c b/src/gen7_vme.c index e6de3af1..2f167ab1 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -269,7 +269,7 @@ gen7_vme_surface_setup(VADriverContextP ctx, if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { slice_obj_surface = NULL; ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + if (ref_surface_id != VA_INVALID_SURFACE) { slice_obj_surface = SURFACE(ref_surface_id); } if (slice_obj_surface && slice_obj_surface->bo) { @@ -285,7 +285,7 @@ gen7_vme_surface_setup(VADriverContextP ctx, /* reference 1 */ slice_obj_surface = NULL; ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + if (ref_surface_id != VA_INVALID_SURFACE) { slice_obj_surface = SURFACE(ref_surface_id); } if (slice_obj_surface && slice_obj_surface->bo) { -- cgit v1.2.1 From f29cbb7e5ea5b92d2bc8de2c7237a7c5704b0cc1 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sun, 29 Sep 2013 13:16:29 +0800 Subject: Fix the reference for list1 Signed-off-by: Xiang, Haihao (cherry picked from commit a45edbef143808ee925ef7708c516e6df21fa36b) --- src/gen6_vme.c | 3 +-- src/gen75_vme.c | 3 +-- src/gen7_vme.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/gen6_vme.c b/src/gen6_vme.c index 13454669..e1403d2c 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -245,10 +245,9 @@ gen6_vme_surface_setup(VADriverContextP ctx, if (slice_obj_surface && slice_obj_surface->bo) { obj_surface = slice_obj_surface; } else { - obj_surface = encode_state->reference_objects[0]; + obj_surface = encode_state->reference_objects[1]; } - obj_surface = encode_state->reference_objects[1]; if (obj_surface && obj_surface->bo) gen6_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); } diff --git a/src/gen75_vme.c b/src/gen75_vme.c index e9ddf0b4..b1596fac 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -306,10 +306,9 @@ gen75_vme_surface_setup(VADriverContextP ctx, if (slice_obj_surface && slice_obj_surface->bo) { obj_surface = slice_obj_surface; } else { - obj_surface = encode_state->reference_objects[0]; + obj_surface = encode_state->reference_objects[1]; } - obj_surface = encode_state->reference_objects[1]; if (obj_surface && obj_surface->bo) gen75_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); } diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 2f167ab1..8594b0f4 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -291,10 +291,9 @@ gen7_vme_surface_setup(VADriverContextP ctx, if (slice_obj_surface && slice_obj_surface->bo) { obj_surface = slice_obj_surface; } else { - obj_surface = encode_state->reference_objects[0]; + obj_surface = encode_state->reference_objects[1]; } - obj_surface = encode_state->reference_objects[1]; if (obj_surface && obj_surface->bo) gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); } -- cgit v1.2.1 From 1142b6a692f0b431aafbdd22f0c5941d583385c5 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sun, 29 Sep 2013 14:53:12 +0800 Subject: Clean up for setting up reference surface state Signed-off-by: Xiang, Haihao (cherry picked from commit 939b6bef6d8f8ecfee589cf70fde51f7a34175a1) --- src/gen6_mfc_common.c | 35 +++++++++++++++++++++++++++++++++++ src/gen6_vme.c | 37 ++++--------------------------------- src/gen6_vme.h | 12 ++++++++++++ src/gen75_vme.c | 37 ++++--------------------------------- src/gen7_vme.c | 37 ++++--------------------------------- 5 files changed, 59 insertions(+), 99 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index d66f4c5d..7761e973 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1310,3 +1310,38 @@ gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, dri_bo_unmap(vme_context->vme_batchbuffer.bo); return; } + +void +intel_avc_vme_reference_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int list_index, + int surface_index, + void (* vme_source_surface_state)( + VADriverContextP ctx, + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context)) +{ + struct object_surface *obj_surface = NULL; + struct i965_driver_data *i965 = i965_driver_data(ctx); + VASurfaceID ref_surface_id; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + + if (list_index == 0) { + ref_surface_id = slice_param->RefPicList0[0].picture_id; + } else { + ref_surface_id = slice_param->RefPicList1[0].picture_id; + } + + if (ref_surface_id != VA_INVALID_SURFACE) + obj_surface = SURFACE(ref_surface_id); + + if (!obj_surface || + !obj_surface->bo) + obj_surface = encode_state->reference_objects[list_index]; + + if (obj_surface && + obj_surface->bo) + vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context); +} diff --git a/src/gen6_vme.c b/src/gen6_vme.c index e1403d2c..dbe099c6 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -204,7 +204,6 @@ gen6_vme_surface_setup(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct object_surface *obj_surface; - struct i965_driver_data *i965 = i965_driver_data(ctx); /*Setup surfaces state*/ /* current picture for encoding */ @@ -215,42 +214,14 @@ gen6_vme_surface_setup(VADriverContextP ctx, if (!is_intra) { VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; int slice_type; - struct object_surface *slice_obj_surface; - int ref_surface_id; slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI); - if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - /* reference 0 */ - if (obj_surface && obj_surface->bo) - gen6_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); - } - if (slice_type == SLICE_TYPE_B) { - /* reference 1 */ - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[1]; - } + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen6_vme_source_surface_state); - if (obj_surface && obj_surface->bo) - gen6_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); - } + if (slice_type == SLICE_TYPE_B) + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen6_vme_source_surface_state); } /* VME output */ diff --git a/src/gen6_vme.h b/src/gen6_vme.h index 09d9673b..5841cfd2 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -158,4 +158,16 @@ gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, int kernel, struct intel_encoder_context *encoder_context); +void +intel_avc_vme_reference_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int list_index, + int surface_index, + void (* vme_source_surface_state)( + VADriverContextP ctx, + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context)); + #endif /* _GEN6_VME_H_ */ diff --git a/src/gen75_vme.c b/src/gen75_vme.c index b1596fac..979b109c 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -264,7 +264,6 @@ gen75_vme_surface_setup(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct object_surface *obj_surface; - struct i965_driver_data *i965 = i965_driver_data(ctx); /*Setup surfaces state*/ /* current picture for encoding */ @@ -276,42 +275,14 @@ gen75_vme_surface_setup(VADriverContextP ctx, if (!is_intra) { VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; int slice_type; - struct object_surface *slice_obj_surface; - int ref_surface_id; slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI); - if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - /* reference 0 */ - if (obj_surface && obj_surface->bo) - gen75_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); - } - if (slice_type == SLICE_TYPE_B) { - /* reference 1 */ - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[1]; - } + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen75_vme_source_surface_state); - if (obj_surface && obj_surface->bo) - gen75_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); - } + if (slice_type == SLICE_TYPE_B) + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen75_vme_source_surface_state); } /* VME output */ diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 8594b0f4..ed2ee5a9 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -250,7 +250,6 @@ gen7_vme_surface_setup(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct object_surface *obj_surface; - struct i965_driver_data *i965 = i965_driver_data(ctx); /*Setup surfaces state*/ /* current picture for encoding */ @@ -261,42 +260,14 @@ gen7_vme_surface_setup(VADriverContextP ctx, if (!is_intra) { VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; int slice_type; - struct object_surface *slice_obj_surface; - int ref_surface_id; slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI); - if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - /* reference 0 */ - if (obj_surface && obj_surface->bo) - gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); - } - if (slice_type == SLICE_TYPE_B) { - /* reference 1 */ - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[1]; - } + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen7_vme_source_surface_state); - if (obj_surface && obj_surface->bo) - gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); - } + if (slice_type == SLICE_TYPE_B) + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen7_vme_source_surface_state); } /* VME output */ -- cgit v1.2.1 From a8e22800b665aec516f8c73282a8aa3ea045f8c5 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sun, 29 Sep 2013 14:29:16 +0800 Subject: Track the used reference surface Signed-off-by: Xiang, Haihao (cherry picked from commit 97e1b531d85bd7b7d3bc1d3e5a7c3355af87a204) --- src/gen6_mfc_common.c | 60 ++++++++++++++++++++------------------------------- src/gen6_vme.h | 3 +++ 2 files changed, 26 insertions(+), 37 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 7761e973..640cbd97 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1019,14 +1019,12 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { + struct gen6_vme_context *vme_context = encoder_context->vme_context; struct intel_batchbuffer *batch = encoder_context->base.batch; - struct i965_driver_data *i965 = i965_driver_data(ctx); int slice_type; - struct object_surface *slice_obj_surface, *obj_surface; - int ref_surface_id; + struct object_surface *obj_surface; unsigned int fref_entry, bref_entry; int frame_index, i; - VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; fref_entry = 0x80808080; @@ -1034,62 +1032,39 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx, slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } + obj_surface = vme_context->used_reference_objects[0]; frame_index = -1; for (i = 0; i < 16; i++) { - if (obj_surface == encode_state->reference_objects[i]) { + if (obj_surface && + obj_surface == encode_state->reference_objects[i]) { frame_index = i; break; } } if (frame_index == -1) { WARN_ONCE("RefPicList0 is not found in DPB!\n"); - } else if (slice_obj_surface && slice_obj_surface->bo) { - /* This is passed by Slice_param->RefPicList0 */ - fref_entry &= ~(0xFF); - fref_entry += intel_get_ref_idx_state_1(&slice_param->RefPicList0[0], frame_index); } else { /* This is passed by the hacked mode */ fref_entry &= ~(0xFF); - fref_entry += intel_get_ref_idx_state_1(&pic_param->ReferenceFrames[frame_index], frame_index); + fref_entry += intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index); } } if (slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[1]; - } + obj_surface = vme_context->used_reference_objects[1]; frame_index = -1; for (i = 0; i < 16; i++) { - if (obj_surface == encode_state->reference_objects[i]) { + if (obj_surface && + obj_surface == encode_state->reference_objects[i]) { frame_index = i; break; } } if (frame_index == -1) { WARN_ONCE("RefPicList1 is not found in DPB!\n"); - } else if (slice_obj_surface && slice_obj_surface->bo) { - bref_entry &= ~(0xFF); - bref_entry += intel_get_ref_idx_state_1(&slice_param->RefPicList1[0], frame_index); } else { bref_entry &= ~(0xFF); - bref_entry += intel_get_ref_idx_state_1(&pic_param->ReferenceFrames[frame_index], frame_index); + bref_entry += intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index); } } @@ -1323,25 +1298,36 @@ intel_avc_vme_reference_state(VADriverContextP ctx, struct object_surface *obj_surface, struct intel_encoder_context *encoder_context)) { + struct gen6_vme_context *vme_context = encoder_context->vme_context; struct object_surface *obj_surface = NULL; struct i965_driver_data *i965 = i965_driver_data(ctx); VASurfaceID ref_surface_id; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; if (list_index == 0) { ref_surface_id = slice_param->RefPicList0[0].picture_id; + vme_context->used_references[0] = &slice_param->RefPicList0[0]; } else { ref_surface_id = slice_param->RefPicList1[0].picture_id; + vme_context->used_references[1] = &slice_param->RefPicList1[0]; } if (ref_surface_id != VA_INVALID_SURFACE) obj_surface = SURFACE(ref_surface_id); if (!obj_surface || - !obj_surface->bo) + !obj_surface->bo) { obj_surface = encode_state->reference_objects[list_index]; + vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index]; + } if (obj_surface && - obj_surface->bo) + obj_surface->bo) { + vme_context->used_reference_objects[list_index] = obj_surface; vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context); + } else { + vme_context->used_reference_objects[list_index] = NULL; + vme_context->used_references[list_index] = NULL; + } } diff --git a/src/gen6_vme.h b/src/gen6_vme.h index 5841cfd2..45f1472f 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -86,6 +86,9 @@ struct gen6_vme_context unsigned int video_coding_type; unsigned int vme_kernel_sum; unsigned int mpeg2_level; + + struct object_surface *used_reference_objects[2]; + void *used_references[2]; }; #define MPEG2_PIC_WIDTH_HEIGHT 30 -- cgit v1.2.1 From 866ff19067e5bdde6f9ad377ae01c2ac16d04a0c Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 30 Sep 2013 15:17:11 +0800 Subject: Select a reference frame from the reference list0/1 Signed-off-by: Xiang, Haihao (cherry picked from commit 3a51e5271773a637ef63ca397285ebdf326daba2) --- src/gen6_mfc_common.c | 76 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 10 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 640cbd97..5c3f82fc 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1286,6 +1286,35 @@ gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, return; } +static int +avc_temporal_find_surface(VAPictureH264 *curr_pic, + VAPictureH264 *ref_list, + int num_pictures, + int dir) +{ + int i, found = -1, min = 0x7FFFFFFF; + + for (i = 0; i < num_pictures; i++) { + int tmp; + + if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) || + (ref_list[i].picture_id == VA_INVALID_SURFACE)) + break; + + tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt; + + if (dir) + tmp = -tmp; + + if (tmp > 0 && tmp < min) { + min = tmp; + found = i; + } + } + + return found; +} + void intel_avc_vme_reference_state(VADriverContextP ctx, struct encode_state *encode_state, @@ -1304,22 +1333,49 @@ intel_avc_vme_reference_state(VADriverContextP ctx, VASurfaceID ref_surface_id; VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int max_num_references; + VAPictureH264 *curr_pic; + VAPictureH264 *ref_list; if (list_index == 0) { - ref_surface_id = slice_param->RefPicList0[0].picture_id; - vme_context->used_references[0] = &slice_param->RefPicList0[0]; + max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1; + ref_list = slice_param->RefPicList0; } else { - ref_surface_id = slice_param->RefPicList1[0].picture_id; - vme_context->used_references[1] = &slice_param->RefPicList1[0]; + max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1; + ref_list = slice_param->RefPicList1; } - if (ref_surface_id != VA_INVALID_SURFACE) - obj_surface = SURFACE(ref_surface_id); + if (max_num_references == 1) { + if (list_index == 0) { + ref_surface_id = slice_param->RefPicList0[0].picture_id; + vme_context->used_references[0] = &slice_param->RefPicList0[0]; + } else { + ref_surface_id = slice_param->RefPicList1[0].picture_id; + vme_context->used_references[1] = &slice_param->RefPicList1[0]; + } + + if (ref_surface_id != VA_INVALID_SURFACE) + obj_surface = SURFACE(ref_surface_id); - if (!obj_surface || - !obj_surface->bo) { - obj_surface = encode_state->reference_objects[list_index]; - vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index]; + if (!obj_surface || + !obj_surface->bo) { + obj_surface = encode_state->reference_objects[list_index]; + vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index]; + } + } else { + int ref_idx; + + curr_pic = &pic_param->CurrPic; + + /* select the reference frame in temporal space */ + ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1); + ref_surface_id = ref_list[ref_idx].picture_id; + + if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */ + obj_surface = SURFACE(ref_surface_id); + + vme_context->used_reference_objects[list_index] = obj_surface; + vme_context->used_references[list_index] = &ref_list[ref_idx]; } if (obj_surface && -- cgit v1.2.1 From 37e75533107a20c2ae3367aa6076c00e7c7bff13 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sun, 29 Sep 2013 16:25:02 +0800 Subject: Pass the reference frame index in List0/1 into the PAK command Signed-off-by: Xiang, Haihao (cherry picked from commit 68380a7f141bedcc0f6fbbbcee2f5e42b6ade0e0) --- src/gen6_mfc.c | 21 ++++++++++++++------- src/gen6_mfc_common.c | 11 +++++++++-- src/gen6_vme.h | 1 + src/gen75_mfc.c | 5 +++-- src/shaders/utils/mfc_batchbuffer.inc | 2 ++ src/shaders/utils/mfc_batchbuffer_avc_inter.asm | 6 ++++++ src/shaders/utils/mfc_batchbuffer_avc_inter.g6b | 6 ++++-- src/shaders/utils/mfc_batchbuffer_avc_inter.g7b | 6 ++++-- 8 files changed, 43 insertions(+), 15 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index 62fa2e93..d152ed98 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -708,6 +708,7 @@ gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, struct intel_batchbuffer *batch) { + struct gen6_vme_context *vme_context = encoder_context->vme_context; int len_in_dwords = 11; if (batch == NULL) @@ -737,8 +738,8 @@ gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in /*Stuff for Inter MB*/ OUT_BCS_BATCH(batch, msg[1]); - OUT_BCS_BATCH(batch, 0x0); - OUT_BCS_BATCH(batch, 0x0); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]); /*MaxSizeInWord and TargetSzieInWord*/ OUT_BCS_BATCH(batch, (max_mb_size << 24) | @@ -1004,11 +1005,12 @@ gen6_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, int mb_x, int mb_y, int width_in_mbs, - int qp) + int qp, + unsigned int ref_index[2]) { - BEGIN_BATCH(batch, 12); + BEGIN_BATCH(batch, 14); - OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2)); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2)); OUT_BATCH(batch, index); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); @@ -1032,6 +1034,8 @@ gen6_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, OUT_BATCH(batch, qp << 16 | width_in_mbs); + OUT_BATCH(batch, ref_index[0]); + OUT_BATCH(batch, ref_index[1]); ADVANCE_BATCH(batch); } @@ -1049,6 +1053,7 @@ gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct gen6_vme_context *vme_context = encoder_context->vme_context; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int total_mbs = slice_param->num_macroblocks; int number_mb_cmds = 128; @@ -1080,7 +1085,8 @@ gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, mb_x, mb_y, width_in_mbs, - qp); + qp, + vme_context->ref_index_in_mb); if (first_object) { head_offset += head_size; @@ -1118,7 +1124,8 @@ gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, mb_x, mb_y, width_in_mbs, - qp); + qp, + vme_context->ref_index_in_mb); } } diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 5c3f82fc..5605ef3f 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1336,6 +1336,7 @@ intel_avc_vme_reference_state(VADriverContextP ctx, int max_num_references; VAPictureH264 *curr_pic; VAPictureH264 *ref_list; + int ref_idx; if (list_index == 0) { max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1; @@ -1362,9 +1363,9 @@ intel_avc_vme_reference_state(VADriverContextP ctx, obj_surface = encode_state->reference_objects[list_index]; vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index]; } - } else { - int ref_idx; + ref_idx = 0; + } else { curr_pic = &pic_param->CurrPic; /* select the reference frame in temporal space */ @@ -1380,10 +1381,16 @@ intel_avc_vme_reference_state(VADriverContextP ctx, if (obj_surface && obj_surface->bo) { + assert(ref_idx >= 0); vme_context->used_reference_objects[list_index] = obj_surface; vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context); + vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 | + ref_idx << 16 | + ref_idx << 8 | + ref_idx); } else { vme_context->used_reference_objects[list_index] = NULL; vme_context->used_references[list_index] = NULL; + vme_context->ref_index_in_mb[list_index] = 0; } } diff --git a/src/gen6_vme.h b/src/gen6_vme.h index 45f1472f..939a4a37 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -89,6 +89,7 @@ struct gen6_vme_context struct object_surface *used_reference_objects[2]; void *used_references[2]; + unsigned int ref_index_in_mb[2]; }; #define MPEG2_PIC_WIDTH_HEIGHT 30 diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 4eecc9c7..c92f74de 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -1040,6 +1040,7 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, struct intel_batchbuffer *batch) { + struct gen6_vme_context *vme_context = encoder_context->vme_context; int len_in_dwords = 12; unsigned int inter_msg = 0; if (batch == NULL) @@ -1117,8 +1118,8 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i inter_msg = msg[1] >> 8; /*Stuff for Inter MB*/ OUT_BCS_BATCH(batch, inter_msg); - OUT_BCS_BATCH(batch, 0x0); - OUT_BCS_BATCH(batch, 0x0); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]); /*MaxSizeInWord and TargetSzieInWord*/ OUT_BCS_BATCH(batch, (max_mb_size << 24) | diff --git a/src/shaders/utils/mfc_batchbuffer.inc b/src/shaders/utils/mfc_batchbuffer.inc index c83d5d43..c3a0fec6 100644 --- a/src/shaders/utils/mfc_batchbuffer.inc +++ b/src/shaders/utils/mfc_batchbuffer.inc @@ -139,6 +139,8 @@ define(`mb_y', `inline_reg0.17') /* :ub, */ define(`mb_xy', `inline_reg0.16') /* :uw, */ define(`width_in_mb', `inline_reg0.20') /* :uw, the picture width in macroblocks */ define(`qp', `inline_reg0.22') /* :ub, */ +define(`ref_idx0', `inline_reg0.24') /* :ud */ +define(`ref_idx1', `inline_reg0.28') /* :ud */ /* * GRF 8~15 -- temporary registers diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.asm b/src/shaders/utils/mfc_batchbuffer_avc_inter.asm index 59152b88..549f0213 100644 --- a/src/shaders/utils/mfc_batchbuffer_avc_inter.asm +++ b/src/shaders/utils/mfc_batchbuffer_avc_inter.asm @@ -103,6 +103,12 @@ __FILL_INTER_PAK_COMMAND: /* DW7 */ mov (1) pak_object7_ud<1>:ud ob_read_wb0.4<0,1,0>:ud {align1} ; + /* DW8 */ + mov (1) pak_object8_ud<1>:ud ref_idx0<0,1,0>:ud {align1} ; + + /* DW9 */ + mov (1) pak_object9_ud<1>:ud ref_idx1<0,1,0>:ud {align1} ; + jmpi (1) __OUTPUT_PAK_COMMAND ; __FILL_INTRA_PAK_COMMAND: diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b b/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b index 2e1703e4..24b268f5 100644 --- a/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b +++ b/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b @@ -24,7 +24,7 @@ { 0x00800001, 0x23400061, 0x00000000, 0x00000000 }, { 0x01000005, 0x20002d28, 0x020000ac, 0x00020002 }, { 0x01000005, 0x20000c20, 0x00000200, 0x00002000 }, - { 0x00110020, 0x34001c00, 0x00001400, 0x00000022 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000026 }, { 0x00000001, 0x23400061, 0x00000000, 0x71490009 }, { 0x00000041, 0x23480c21, 0x000001e0, 0x000000a0 }, { 0x00000001, 0x23540061, 0x00000000, 0x000f000f }, @@ -41,6 +41,8 @@ { 0x00110001, 0x23580061, 0x00000000, 0x00000000 }, { 0x00000040, 0x23584421, 0x00000358, 0x000000b6 }, { 0x00000001, 0x235c0021, 0x00000204, 0x00000000 }, + { 0x00000001, 0x23600021, 0x000000b8, 0x00000000 }, + { 0x00000001, 0x23640021, 0x000000bc, 0x00000000 }, { 0x00000020, 0x34001c00, 0x00001400, 0x00000022 }, { 0x00000001, 0x23400061, 0x00000000, 0x71490009 }, { 0x00000001, 0x23540061, 0x00000000, 0x000f000f }, @@ -67,7 +69,7 @@ { 0x00000040, 0x21480c21, 0x00000148, 0x00000004 }, { 0x00000040, 0x21e00c21, 0x000001e0, 0x00000001 }, { 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff }, - { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9e }, + { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9a }, { 0x00010020, 0x34001c00, 0x02001400, 0x0000001e }, { 0x00600001, 0x20000022, 0x008d0120, 0x00000000 }, { 0x05800031, 0x22001cc9, 0x00000000, 0x021a0001 }, diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b b/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b index 1664010b..f0e20128 100644 --- a/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b +++ b/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b @@ -24,7 +24,7 @@ { 0x00800001, 0x23400061, 0x00000000, 0x00000000 }, { 0x01000005, 0x20002d28, 0x020000ac, 0x00020002 }, { 0x01000005, 0x20000c20, 0x00000200, 0x00002000 }, - { 0x00110020, 0x34001c00, 0x00001400, 0x00000022 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000026 }, { 0x00000001, 0x23400061, 0x00000000, 0x71490009 }, { 0x00000041, 0x23480c21, 0x000001e0, 0x000000a0 }, { 0x00000001, 0x23540061, 0x00000000, 0x000f000f }, @@ -41,6 +41,8 @@ { 0x00110001, 0x23580061, 0x00000000, 0x00000000 }, { 0x00000040, 0x23584421, 0x00000358, 0x000000b6 }, { 0x00000001, 0x235c0021, 0x00000204, 0x00000000 }, + { 0x00000001, 0x23600021, 0x000000b8, 0x00000000 }, + { 0x00000001, 0x23640021, 0x000000bc, 0x00000000 }, { 0x00000020, 0x34001c00, 0x00001400, 0x00000022 }, { 0x00000001, 0x23400061, 0x00000000, 0x71490009 }, { 0x00000001, 0x23540061, 0x00000000, 0x000f000f }, @@ -67,7 +69,7 @@ { 0x00000040, 0x21480c21, 0x00000148, 0x00000004 }, { 0x00000040, 0x21e00c21, 0x000001e0, 0x00000001 }, { 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff }, - { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9e }, + { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9a }, { 0x00010020, 0x34001c00, 0x02001400, 0x0000001e }, { 0x00600001, 0x28000021, 0x008d0120, 0x00000000 }, { 0x0a800031, 0x22001ca9, 0x00000800, 0x02180001 }, -- cgit v1.2.1 From 5f01b4f23db1afebebdec1e70939a28083443940 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 30 Sep 2013 13:12:17 +0800 Subject: Follow the input Picture/Slice parameters to generate slice header/data Signed-off-by: Xiang, Haihao (cherry picked from commit f5a694e64d0163178c28dc25d9a3e7b9b1b5d162) --- src/gen6_mfc.c | 32 +++++++++++++++++++++----------- src/gen75_mfc.c | 32 +++++++++++++++++++++----------- src/i965_encoder_utils.c | 13 +++++++++++-- 3 files changed, 53 insertions(+), 24 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index d152ed98..4d13e1f5 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -360,16 +360,29 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx, int weighted_pred_idc = 0; unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom; unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom; - int bslice = 0; + int num_ref_l0 = 0, num_ref_l1 = 0; if (batch == NULL) batch = encoder_context->base.batch; - if (slice_type == SLICE_TYPE_P) { + if (slice_type == SLICE_TYPE_I) { + luma_log2_weight_denom = 0; + chroma_log2_weight_denom = 0; + } else if (slice_type == SLICE_TYPE_P) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; } else if (slice_type == SLICE_TYPE_B) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc; - bslice = 1; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) { + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1; + } if (weighted_pred_idc == 2) { /* 8.4.3 - Derivation process for prediction weights (8-279) */ @@ -394,14 +407,11 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) ); OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/ - if (slice_type == SLICE_TYPE_I) { - OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/ - } else { - OUT_BCS_BATCH(batch, - (1 << 16) | (bslice << 24) | /*1 reference frame*/ - (chroma_log2_weight_denom << 8) | - (luma_log2_weight_denom << 0)); - } + OUT_BCS_BATCH(batch, + (num_ref_l0 << 16) | + (num_ref_l1 << 24) | + (chroma_log2_weight_denom << 8) | + (luma_log2_weight_denom << 0)); OUT_BCS_BATCH(batch, (weighted_pred_idc << 30) | diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index c92f74de..93cf30ff 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -887,19 +887,32 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx, int maxQpN, maxQpP; unsigned char correct[6], grow, shrink; int i; - int bslice = 0; int weighted_pred_idc = 0; unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom; unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom; + int num_ref_l0 = 0, num_ref_l1 = 0; if (batch == NULL) batch = encoder_context->base.batch; - if (slice_type == SLICE_TYPE_P) { + if (slice_type == SLICE_TYPE_I) { + luma_log2_weight_denom = 0; + chroma_log2_weight_denom = 0; + } else if (slice_type == SLICE_TYPE_P) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; } else if (slice_type == SLICE_TYPE_B) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc; - bslice = 1; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) { + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1; + } if (weighted_pred_idc == 2) { /* 8.4.3 - Derivation process for prediction weights (8-279) */ @@ -924,14 +937,11 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) ); OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/ - if (slice_type == SLICE_TYPE_I) { - OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/ - } else { - OUT_BCS_BATCH(batch, - (1 << 16) | (bslice << 24) | /*1 reference frame*/ - (chroma_log2_weight_denom << 8) | - (luma_log2_weight_denom << 0)); - } + OUT_BCS_BATCH(batch, + (num_ref_l0 << 16) | + (num_ref_l1 << 24) | + (chroma_log2_weight_denom << 8) | + (luma_log2_weight_denom << 0)); OUT_BCS_BATCH(batch, (weighted_pred_idc << 30) | diff --git a/src/i965_encoder_utils.c b/src/i965_encoder_utils.c index cc67d15f..abd25b41 100644 --- a/src/i965_encoder_utils.c +++ b/src/i965_encoder_utils.c @@ -233,13 +233,22 @@ slice_header(avc_bitstream *bs, /* slice type */ if (IS_P_SLICE(slice_param->slice_type)) { - avc_bitstream_put_ui(bs, 0, 1); /* num_ref_idx_active_override_flag: 0 */ + avc_bitstream_put_ui(bs, slice_param->num_ref_idx_active_override_flag, 1); /* num_ref_idx_active_override_flag: */ + + if (slice_param->num_ref_idx_active_override_flag) + avc_bitstream_put_ue(bs, slice_param->num_ref_idx_l0_active_minus1); /* ref_pic_list_reordering */ avc_bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l0: 0 */ } else if (IS_B_SLICE(slice_param->slice_type)) { avc_bitstream_put_ui(bs, slice_param->direct_spatial_mv_pred_flag, 1); /* direct_spatial_mv_pred: 1 */ - avc_bitstream_put_ui(bs, 0, 1); /* num_ref_idx_active_override_flag: 0 */ + + avc_bitstream_put_ui(bs, slice_param->num_ref_idx_active_override_flag, 1); /* num_ref_idx_active_override_flag: */ + + if (slice_param->num_ref_idx_active_override_flag) { + avc_bitstream_put_ue(bs, slice_param->num_ref_idx_l0_active_minus1); + avc_bitstream_put_ue(bs, slice_param->num_ref_idx_l1_active_minus1); + } /* ref_pic_list_reordering */ avc_bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l0: 0 */ -- cgit v1.2.1 From 16d71da80d3c86f8d0d3b41a15d881e30327994f Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 30 Sep 2013 15:06:57 +0800 Subject: Fix the Reference Index Mapping Table L0/L1 Signed-off-by: Xiang, Haihao (cherry picked from commit 3ffbe0297e98a88db18ae90ba5c1f8c429183baf) --- src/gen6_mfc_common.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 5605ef3f..31573ba3 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1032,6 +1032,13 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx, slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { + int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff); + + if (ref_idx_l0 > 3) { + WARN_ONCE("ref_idx_l0 is out of range\n"); + ref_idx_l0 = 0; + } + obj_surface = vme_context->used_reference_objects[0]; frame_index = -1; for (i = 0; i < 16; i++) { @@ -1044,13 +1051,20 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx, if (frame_index == -1) { WARN_ONCE("RefPicList0 is not found in DPB!\n"); } else { - /* This is passed by the hacked mode */ - fref_entry &= ~(0xFF); - fref_entry += intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index); + int ref_idx_l0_shift = ref_idx_l0 * 8; + fref_entry &= ~(0xFF << ref_idx_l0_shift); + fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift); } } if (slice_type == SLICE_TYPE_B) { + int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff); + + if (ref_idx_l1 > 3) { + WARN_ONCE("ref_idx_l1 is out of range\n"); + ref_idx_l1 = 0; + } + obj_surface = vme_context->used_reference_objects[1]; frame_index = -1; for (i = 0; i < 16; i++) { @@ -1063,8 +1077,9 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx, if (frame_index == -1) { WARN_ONCE("RefPicList1 is not found in DPB!\n"); } else { - bref_entry &= ~(0xFF); - bref_entry += intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index); + int ref_idx_l1_shift = ref_idx_l1 * 8; + bref_entry &= ~(0xFF << ref_idx_l1_shift); + bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift); } } -- cgit v1.2.1 From 7d2608dfe36742409e839b0bb28b67a026c3512a Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Wed, 25 Sep 2013 15:56:58 +0800 Subject: VPP: add vebox motion compensation support on HSW Signed-off-by: Zhong Li (cherry picked from commit 2c7c7c4d20014342538a80bfd1525f9bef5ea971) Conflicts: src/i965_drv_video.c --- src/gen75_vpp_vebox.c | 15 ++++++++++----- src/i965_drv_video.c | 16 +++++++++++++--- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index a973ed49..75d922d7 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -133,6 +133,7 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c unsigned int* p_table ; int progressive_dn = 1; int dndi_top_first = 0; + int motion_compensated_enable = 0; if (proc_ctx->filters_mask & VPP_DNDI_DI) { VAProcFilterParameterBufferDeinterlacing *di_param = @@ -141,6 +142,7 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c progressive_dn = 0; dndi_top_first = !(di_param->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST); + motion_compensated_enable = (di_param->algorithm == VAProcDeinterlacingMotionCompensated); } /* @@ -199,7 +201,7 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c 100<< 16 | // FMD #2 vertical difference th 0 << 14 | // CAT th1 2 << 8 | // FMD tear threshold - 0 << 7 | // MCDI Enable, use motion compensated deinterlace algorithm + motion_compensated_enable << 7 | // MCDI Enable, use motion compensated deinterlace algorithm progressive_dn << 6 | // progressive DN 0 << 4 | // reserved dndi_top_first << 3 | // DN/DI Top First @@ -543,7 +545,8 @@ void hsw_veb_state_command(VADriverContextP ctx, struct intel_vebox_context *pro if (di_param->algorithm == VAProcDeinterlacingBob) is_first_frame = 1; - if (di_param->algorithm == VAProcDeinterlacingMotionAdaptive && + if ((di_param->algorithm == VAProcDeinterlacingMotionAdaptive || + di_param->algorithm == VAProcDeinterlacingMotionCompensated) && proc_ctx->frame_order != -1) di_output_frames_flag = 0; /* Output both Current Frame and Previous Frame */ } @@ -871,7 +874,8 @@ hsw_veb_surface_reference(VADriverContextP ctx, (VAProcFilterParameterBufferDeinterlacing *)proc_ctx->filter_di; if (di_param && - di_param->algorithm == VAProcDeinterlacingMotionAdaptive) { + (di_param->algorithm == VAProcDeinterlacingMotionAdaptive || + di_param->algorithm == VAProcDeinterlacingMotionCompensated)) { if ((proc_ctx->filters_mask & VPP_DNDI_DN) && proc_ctx->frame_order == 0) { /* DNDI */ tmp_store = proc_ctx->frame_store[FRAME_OUT_CURRENT_DN]; @@ -885,7 +889,7 @@ hsw_veb_surface_reference(VADriverContextP ctx, if (!pipe || !pipe->num_forward_references || pipe->forward_references[0] == VA_INVALID_ID) { - WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n"); + WARN_ONCE("A forward temporal reference is needed for Motion adaptive/compensated deinterlacing !!!\n"); return VA_STATUS_ERROR_INVALID_PARAMETER; } @@ -925,7 +929,8 @@ hsw_veb_surface_reference(VADriverContextP ctx, (VAProcFilterParameterBufferDeinterlacing *)proc_ctx->filter_di; if (di_param && - di_param->algorithm == VAProcDeinterlacingMotionAdaptive) { + (di_param->algorithm == VAProcDeinterlacingMotionAdaptive || + di_param->algorithm == VAProcDeinterlacingMotionCompensated)) { if (proc_ctx->frame_order == -1) { proc_ctx->frame_store[FRAME_OUT_CURRENT].surface_id = VA_INVALID_ID; proc_ctx->frame_store[FRAME_OUT_CURRENT].is_internal_surface = 0; diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 31dafa2f..af0a2fae 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -284,6 +284,8 @@ static struct hw_codec_info gen75_hw_codec_info = { .has_accelerated_putimage = 1, .has_tiled_surface = 1, .has_di_motion_adptive = 1, + .has_di_motion_compensated = 1, + .num_filters = 4, .filters = { { VAProcFilterNoiseReduction, I965_RING_VEBOX }, @@ -4638,7 +4640,13 @@ VAStatus i965_QueryVideoProcFilterCaps( i++; cap++; } - } + + if (i965->codec_info->has_di_motion_compensated) { + cap->type = VAProcDeinterlacingMotionCompensated; + i++; + cap++; + } + } break; @@ -4741,9 +4749,11 @@ VAStatus i965_QueryVideoProcPipelineCaps( VAProcFilterParameterBufferDeinterlacing *deint = (VAProcFilterParameterBufferDeinterlacing *)base; assert(deint->algorithm == VAProcDeinterlacingBob || - deint->algorithm == VAProcDeinterlacingMotionAdaptive); + deint->algorithm == VAProcDeinterlacingMotionAdaptive || + deint->algorithm == VAProcDeinterlacingMotionCompensated); - if (deint->algorithm == VAProcDeinterlacingMotionAdaptive) + if (deint->algorithm == VAProcDeinterlacingMotionAdaptive || + deint->algorithm == VAProcDeinterlacingMotionCompensated); pipeline_cap->num_forward_references++; } } -- cgit v1.2.1 From 654d0d6d8c9450298513964fc4db0b16f59c4b42 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 14 Oct 2013 09:56:21 +0800 Subject: Fix an incorrect makefile rule for VME shader on Ivybridge Otherwise when the corresponding source file is modified, the binary shader is not updated. Signed-off-by: Zhao Yakui --- src/shaders/vme/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index 867a93c1..634e6d4a 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -37,7 +37,7 @@ $(INTEL_GEN6_ASM): $(VME_CORE) $(INTEL_GEN6_INC) .gen6.asm.g6b: $(AM_V_GEN)$(GEN4ASM) -g 6 -o $@ $< -$(INTEL_GEN7_ASM): $(VME_CORE) $(INTEL_GEN7_INC) +$(INTEL_GEN7_ASM): $(VME7_CORE) $(INTEL_GEN7_INC) .g7a.gen7.asm: $(AM_V_GEN)cpp -P -DDEV_IVB $< > _vme0.$@ && \ m4 _vme0.$@ > $@ && \ -- cgit v1.2.1 From 0bb182049c3d290ed8affdf2b462b379d6f7de55 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 14 Oct 2013 09:56:26 +0800 Subject: Fix one error of VME shader for H264 encoding Otherwise the h264 encoding will use the incorrect prediction result for the macroblocks in the first row if the MVP is used. Signed-off-by: Zhao Yakui --- src/shaders/vme/inter_bframe_haswell.asm | 2 +- src/shaders/vme/inter_bframe_haswell.g75b | 2 +- src/shaders/vme/inter_bframe_ivb.asm | 2 +- src/shaders/vme/inter_bframe_ivb.g7b | 2 +- src/shaders/vme/inter_frame_haswell.asm | 2 +- src/shaders/vme/inter_frame_haswell.g75b | 2 +- src/shaders/vme/inter_frame_ivb.asm | 2 +- src/shaders/vme/inter_frame_ivb.g7b | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/shaders/vme/inter_bframe_haswell.asm b/src/shaders/vme/inter_bframe_haswell.asm index 9e54b9d6..f8ff0af5 100644 --- a/src/shaders/vme/inter_bframe_haswell.asm +++ b/src/shaders/vme/inter_bframe_haswell.asm @@ -396,7 +396,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (2) mbb_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; (f0.0) mov (2) mbc_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; (f0.0) mov (2) mbb_result.20<1>:uw mba_result.20<2,2,1>:uw {align1}; diff --git a/src/shaders/vme/inter_bframe_haswell.g75b b/src/shaders/vme/inter_bframe_haswell.g75b index 03da639c..cabef201 100644 --- a/src/shaders/vme/inter_bframe_haswell.g75b +++ b/src/shaders/vme/inter_bframe_haswell.g75b @@ -186,7 +186,7 @@ { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000080 }, - { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 }, { 0x00210001, 0x2b040021, 0x00450ae4, 0x00000000 }, { 0x00210001, 0x2b240021, 0x00450ae4, 0x00000000 }, { 0x00210001, 0x2b140129, 0x00450af4, 0x00000000 }, diff --git a/src/shaders/vme/inter_bframe_ivb.asm b/src/shaders/vme/inter_bframe_ivb.asm index 577895ca..8a75962c 100644 --- a/src/shaders/vme/inter_bframe_ivb.asm +++ b/src/shaders/vme/inter_bframe_ivb.asm @@ -388,7 +388,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (2) mbb_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; (f0.0) mov (2) mbc_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; (f0.0) mov (2) mbb_result.20<1>:uw mba_result.20<2,2,1>:uw {align1}; diff --git a/src/shaders/vme/inter_bframe_ivb.g7b b/src/shaders/vme/inter_bframe_ivb.g7b index fe6f98d6..adcb3907 100644 --- a/src/shaders/vme/inter_bframe_ivb.g7b +++ b/src/shaders/vme/inter_bframe_ivb.g7b @@ -180,7 +180,7 @@ { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000010 }, - { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 }, { 0x00210001, 0x2b040021, 0x00450ae4, 0x00000000 }, { 0x00210001, 0x2b240021, 0x00450ae4, 0x00000000 }, { 0x00210001, 0x2b140129, 0x00450af4, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame_haswell.asm b/src/shaders/vme/inter_frame_haswell.asm index 6305c3c2..bcfd2601 100644 --- a/src/shaders/vme/inter_frame_haswell.asm +++ b/src/shaders/vme/inter_frame_haswell.asm @@ -329,7 +329,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; diff --git a/src/shaders/vme/inter_frame_haswell.g75b b/src/shaders/vme/inter_frame_haswell.g75b index d9d791d1..61551f90 100644 --- a/src/shaders/vme/inter_frame_haswell.g75b +++ b/src/shaders/vme/inter_frame_haswell.g75b @@ -120,7 +120,7 @@ { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000080 }, - { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 }, { 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame_ivb.asm b/src/shaders/vme/inter_frame_ivb.asm index b5cafdd2..3c088511 100644 --- a/src/shaders/vme/inter_frame_ivb.asm +++ b/src/shaders/vme/inter_frame_ivb.asm @@ -323,7 +323,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; diff --git a/src/shaders/vme/inter_frame_ivb.g7b b/src/shaders/vme/inter_frame_ivb.g7b index 1bb41b20..e4db6ea7 100644 --- a/src/shaders/vme/inter_frame_ivb.g7b +++ b/src/shaders/vme/inter_frame_ivb.g7b @@ -116,7 +116,7 @@ { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000010 }, - { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 }, { 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 }, -- cgit v1.2.1 From 7b6c3bfa1111b26a5519438a9eca67ff88d7ef04 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 14 Oct 2013 09:56:30 +0800 Subject: Fix one error of VME shader for MPEG2 encoding Otherwise the MPEG2 encoding will use the incorrect prediction result for the macroblocks in the first row if the MVP is used. Signed-off-by: Zhao Yakui (cherry picked from commit 44889dc0f3054cce226d5c09d431022fdffe3aac) --- src/shaders/vme/mpeg2_inter_haswell.asm | 2 +- src/shaders/vme/mpeg2_inter_haswell.g75b | 2 +- src/shaders/vme/mpeg2_inter_ivb.asm | 2 +- src/shaders/vme/mpeg2_inter_ivb.g7b | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/shaders/vme/mpeg2_inter_haswell.asm b/src/shaders/vme/mpeg2_inter_haswell.asm index 0e91a04a..c224cf05 100644 --- a/src/shaders/vme/mpeg2_inter_haswell.asm +++ b/src/shaders/vme/mpeg2_inter_haswell.asm @@ -291,7 +291,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; diff --git a/src/shaders/vme/mpeg2_inter_haswell.g75b b/src/shaders/vme/mpeg2_inter_haswell.g75b index d6625e3e..25c629de 100644 --- a/src/shaders/vme/mpeg2_inter_haswell.g75b +++ b/src/shaders/vme/mpeg2_inter_haswell.g75b @@ -97,7 +97,7 @@ { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000080 }, - { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 }, { 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 }, diff --git a/src/shaders/vme/mpeg2_inter_ivb.asm b/src/shaders/vme/mpeg2_inter_ivb.asm index 261e74c2..dde96434 100644 --- a/src/shaders/vme/mpeg2_inter_ivb.asm +++ b/src/shaders/vme/mpeg2_inter_ivb.asm @@ -304,7 +304,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; diff --git a/src/shaders/vme/mpeg2_inter_ivb.g7b b/src/shaders/vme/mpeg2_inter_ivb.g7b index 2ef3b13b..1ffcdd5a 100644 --- a/src/shaders/vme/mpeg2_inter_ivb.g7b +++ b/src/shaders/vme/mpeg2_inter_ivb.g7b @@ -105,7 +105,7 @@ { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000010 }, - { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 }, { 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 }, -- cgit v1.2.1 From daede78837ea4384df4c10dce9f7fabef4f31a17 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 15 Oct 2013 11:01:04 +0800 Subject: Add more strict check to fix crash issue caused by invalid parameter This is to fix the crash issue in https://bugs.freedesktop.org/show_bug.cgi?id=70397 Signed-off-by: Zhao Yakui (cherry picked from commit 80d665eb670fd700d03f9a2486e452947177a058) --- src/i965_drv_video.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index af0a2fae..6b81fb08 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -1816,6 +1816,9 @@ i965_UnmapBuffer(VADriverContextP ctx, VABufferID buf_id) struct object_buffer *obj_buffer = BUFFER(buf_id); VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; + if ((buf_id & OBJECT_HEAP_OFFSET_MASK) != BUFFER_ID_OFFSET) + return VA_STATUS_ERROR_INVALID_BUFFER; + assert(obj_buffer && obj_buffer->buffer_store); assert(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer); assert(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer)); @@ -2024,7 +2027,6 @@ i965_decoder_render_picture(VADriverContextP ctx, for (i = 0; i < num_buffers && vaStatus == VA_STATUS_SUCCESS; i++) { struct object_buffer *obj_buffer = BUFFER(buffers[i]); - assert(obj_buffer); if (!obj_buffer) return VA_STATUS_ERROR_INVALID_BUFFER; @@ -2147,7 +2149,6 @@ i965_encoder_render_picture(VADriverContextP ctx, for (i = 0; i < num_buffers; i++) { struct object_buffer *obj_buffer = BUFFER(buffers[i]); - assert(obj_buffer); if (!obj_buffer) return VA_STATUS_ERROR_INVALID_BUFFER; @@ -2240,7 +2241,6 @@ i965_proc_render_picture(VADriverContextP ctx, for (i = 0; i < num_buffers && vaStatus == VA_STATUS_SUCCESS; i++) { struct object_buffer *obj_buffer = BUFFER(buffers[i]); - assert(obj_buffer); if (!obj_buffer) return VA_STATUS_ERROR_INVALID_BUFFER; @@ -2275,7 +2275,10 @@ i965_RenderPicture(VADriverContextP ctx, if (!obj_context) return VA_STATUS_ERROR_INVALID_CONTEXT; - + + if (num_buffers <= 0) + return VA_STATUS_ERROR_INVALID_PARAMETER; + obj_config = obj_context->obj_config; assert(obj_config); -- cgit v1.2.1 From d68025cd2000ea6b195b24e03acff838572c223e Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 15 Oct 2013 11:04:00 +0800 Subject: Return the error instead of assert in vaEndPicture This is to fix the crash issue caused by the incorrect parameter. Signed-off-by: Zhao Yakui (cherry picked from commit 1cee858036a87837deddc87586701ed869f96261) --- src/i965_drv_video.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 6b81fb08..efe72b45 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -2313,17 +2313,33 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context) } else if (obj_context->codec_type == CODEC_ENC) { assert(VAEntrypointEncSlice == obj_config->entrypoint); - assert(obj_context->codec_state.encode.pic_param || - obj_context->codec_state.encode.pic_param_ext); - assert(obj_context->codec_state.encode.seq_param || - obj_context->codec_state.encode.seq_param_ext); - assert(obj_context->codec_state.encode.num_slice_params >= 1 || - obj_context->codec_state.encode.num_slice_params_ext >= 1); + if (!(obj_context->codec_state.encode.pic_param || + obj_context->codec_state.encode.pic_param_ext)) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + if (!(obj_context->codec_state.encode.seq_param || + obj_context->codec_state.encode.seq_param_ext)) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + if ((obj_context->codec_state.encode.num_slice_params <=0) && + (obj_context->codec_state.encode.num_slice_params_ext <=0)) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } } else { - assert(obj_context->codec_state.decode.pic_param); - assert(obj_context->codec_state.decode.num_slice_params >= 1); - assert(obj_context->codec_state.decode.num_slice_datas >= 1); - assert(obj_context->codec_state.decode.num_slice_params == obj_context->codec_state.decode.num_slice_datas); + if (obj_context->codec_state.decode.pic_param == NULL) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + if (obj_context->codec_state.decode.num_slice_params <=0) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + if (obj_context->codec_state.decode.num_slice_datas <=0) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + + if (obj_context->codec_state.decode.num_slice_params != + obj_context->codec_state.decode.num_slice_datas) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } } assert(obj_context->hw_context->run); -- cgit v1.2.1 From 8f68c229ba15f3d2c830edc043ad8a7895fa1bcb Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 8 Nov 2013 15:36:28 +0800 Subject: Calculate the required space of batch buffer to avoid buffer overflow in encoding The required size is based on the number of macroblocks and slice parameter. Then it can avoid that too large buffer is allocated or possible overflow. Signed-off-by: Zhao Yakui (cherry picked from commit 8acdfd023e50af37a5642e2517683c34accd78b0) --- src/gen6_mfc.c | 7 ++++++- src/gen6_mfc.h | 7 +++++++ src/gen75_mfc.c | 7 ++++++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index 4d13e1f5..d6b66b3b 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -528,6 +528,7 @@ gen6_mfc_init(VADriverContextP ctx, int i; int width_in_mbs = 0; int height_in_mbs = 0; + int slice_batchbuffer_size; if (encoder_context->codec == CODEC_H264) { VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; @@ -542,6 +543,9 @@ gen6_mfc_init(VADriverContextP ctx, height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16; } + slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 + + (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext; + /*Encode common setup for MFC*/ dri_bo_unreference(mfc_context->post_deblocking_output.bo); mfc_context->post_deblocking_output.bo = NULL; @@ -608,7 +612,8 @@ gen6_mfc_init(VADriverContextP ctx, if (mfc_context->aux_batchbuffer) intel_batchbuffer_free(mfc_context->aux_batchbuffer); - mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0); + mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, + slice_batchbuffer_size); mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer; dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); mfc_context->aux_batchbuffer_surface.pitch = 16; diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h index 1b71218a..6a5777f2 100644 --- a/src/gen6_mfc.h +++ b/src/gen6_mfc.h @@ -42,6 +42,13 @@ struct encode_state; #define INTRA_MB_FLAG_MASK 0x00002000 +/* The space required for slice header SLICE_STATE + header. + * Is it enough? */ +#define SLICE_HEADER 80 + +/* the space required for slice tail. */ +#define SLICE_TAIL 16 + #define __SOFTWARE__ 0 #define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 93cf30ff..e16e6498 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -427,6 +427,7 @@ static void gen75_mfc_init(VADriverContextP ctx, int i; int width_in_mbs = 0; int height_in_mbs = 0; + int slice_batchbuffer_size; if (encoder_context->codec == CODEC_H264) { VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; @@ -441,6 +442,9 @@ static void gen75_mfc_init(VADriverContextP ctx, height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16; } + slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 + + (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext; + /*Encode common setup for MFC*/ dri_bo_unreference(mfc_context->post_deblocking_output.bo); mfc_context->post_deblocking_output.bo = NULL; @@ -507,7 +511,8 @@ static void gen75_mfc_init(VADriverContextP ctx, if (mfc_context->aux_batchbuffer) intel_batchbuffer_free(mfc_context->aux_batchbuffer); - mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0); + mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, + slice_batchbuffer_size); mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer; dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); mfc_context->aux_batchbuffer_surface.pitch = 16; -- cgit v1.2.1 From b06e32d127e2f466e44a52b2b7ca559ab79890ff Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 8 Nov 2013 15:36:32 +0800 Subject: Encoding reuses aux_batchbuffer instead of allocating another new buffer Signed-off-by: Zhao Yakui (cherry picked from commit 052ce2930cd4661b7ce62902e6553eec0e2db9f1) --- src/gen6_mfc.c | 10 ++++++++-- src/gen75_mfc.c | 18 +++++++----------- src/gen7_mfc.c | 9 +++------ 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index d6b66b3b..38a065eb 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -873,10 +873,14 @@ gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0); - dri_bo *batch_bo = batch->buffer; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct intel_batchbuffer *batch;; + dri_bo *batch_bo; int i; + batch = mfc_context->aux_batchbuffer; + batch_bo = batch->buffer; + for (i = 0; i < encode_state->num_slice_params_ext; i++) { gen6_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch); } @@ -889,7 +893,9 @@ gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); dri_bo_reference(batch_bo); + intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; return batch_bo; } diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index e16e6498..c2b26d60 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -1261,17 +1261,14 @@ gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch; dri_bo *batch_bo; int i; int buffer_size; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - buffer_size = width_in_mbs * height_in_mbs * 64; - batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size); + batch = mfc_context->aux_batchbuffer; batch_bo = batch->buffer; for (i = 0; i < encode_state->num_slice_params_ext; i++) { gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch); @@ -1285,7 +1282,9 @@ gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); dri_bo_reference(batch_bo); + intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; return batch_bo; } @@ -2339,18 +2338,14 @@ gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch; - VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL; dri_bo *batch_bo; int i; - int buffer_size; - int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; - int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; - buffer_size = width_in_mbs * height_in_mbs * 64; - batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size); + batch = mfc_context->aux_batchbuffer; batch_bo = batch->buffer; for (i = 0; i < encode_state->num_slice_params_ext; i++) { @@ -2371,6 +2366,7 @@ gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, dri_bo_reference(batch_bo); intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; return batch_bo; } diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index 1412a149..3c3ae211 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -923,18 +923,14 @@ gen7_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch; - VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL; dri_bo *batch_bo; int i; - int buffer_size; - int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; - int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; - buffer_size = width_in_mbs * height_in_mbs * 64; - batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size); + batch = mfc_context->aux_batchbuffer; batch_bo = batch->buffer; for (i = 0; i < encode_state->num_slice_params_ext; i++) { @@ -955,6 +951,7 @@ gen7_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, dri_bo_reference(batch_bo); intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; return batch_bo; } -- cgit v1.2.1 From 4b8b3e8fc09ab953a40feae6ca0501267e97ee62 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 8 Nov 2013 15:36:36 +0800 Subject: Use GPU to construct MFX command buffer for H264 encoding on Haswell This is to optimze the performance of h264 encoding. The GPU can accelerate the construction of MFX command buffer for H264 encoding. Signed-off-by: Zhao Yakui (cherry picked from commit 87bc38d4300212dea51b5635f184aa1ae37fa71c) --- src/gen75_mfc.c | 270 +++++++++++--------------- src/shaders/utils/Makefile.am | 25 ++- src/shaders/utils/mfc_batchbuffer_hsw.asm | 296 +++++++++++++++++++++++++++++ src/shaders/utils/mfc_batchbuffer_hsw.g75a | 29 +++ src/shaders/utils/mfc_batchbuffer_hsw.g75b | 105 ++++++++++ src/shaders/utils/mfc_batchbuffer_hsw.inc | 195 +++++++++++++++++++ 6 files changed, 761 insertions(+), 159 deletions(-) create mode 100644 src/shaders/utils/mfc_batchbuffer_hsw.asm create mode 100644 src/shaders/utils/mfc_batchbuffer_hsw.g75a create mode 100644 src/shaders/utils/mfc_batchbuffer_hsw.g75b create mode 100644 src/shaders/utils/mfc_batchbuffer_hsw.inc diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index c2b26d60..784a5e20 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -43,33 +43,27 @@ #include "gen6_vme.h" #include "intel_media.h" -#define MFC_SOFTWARE_HASWELL 1 +#define AVC_INTRA_RDO_OFFSET 4 +#define AVC_INTER_RDO_OFFSET 10 +#define AVC_INTER_MSG_OFFSET 8 +#define AVC_INTER_MV_OFFSET 48 +#define AVC_RDO_MASK 0xFFFF + +#define MFC_SOFTWARE_HASWELL 0 #define B0_STEP_REV 2 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV) -static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = { -#include "shaders/utils/mfc_batchbuffer_avc_intra.g7b" -}; - -static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = { -#include "shaders/utils/mfc_batchbuffer_avc_inter.g7b" +static const uint32_t gen75_mfc_batchbuffer_avc[][4] = { +#include "shaders/utils/mfc_batchbuffer_hsw.g75b" }; static struct i965_kernel gen75_mfc_kernels[] = { { "MFC AVC INTRA BATCHBUFFER ", MFC_BATCHBUFFER_AVC_INTRA, - gen75_mfc_batchbuffer_avc_intra, - sizeof(gen75_mfc_batchbuffer_avc_intra), - NULL - }, - - { - "MFC AVC INTER BATCHBUFFER ", - MFC_BATCHBUFFER_AVC_INTER, - gen75_mfc_batchbuffer_avc_inter, - sizeof(gen75_mfc_batchbuffer_avc_inter), + gen75_mfc_batchbuffer_avc, + sizeof(gen75_mfc_batchbuffer_avc), NULL }, }; @@ -996,7 +990,7 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx, } -#ifdef MFC_SOFTWARE_HASWELL +#if MFC_SOFTWARE_HASWELL static int gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, @@ -1147,12 +1141,6 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i return len_in_dwords; } -#define AVC_INTRA_RDO_OFFSET 4 -#define AVC_INTER_RDO_OFFSET 10 -#define AVC_INTER_MSG_OFFSET 8 -#define AVC_INTER_MV_OFFSET 48 -#define AVC_RDO_MASK 0xFFFF - static void gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, struct encode_state *encode_state, @@ -1306,12 +1294,6 @@ gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, &vme_context->vme_output, BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT), SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT)); - assert(mfc_context->aux_batchbuffer_surface.bo); - mfc_context->buffer_suface_setup(ctx, - &mfc_context->gpe_context, - &mfc_context->aux_batchbuffer_surface, - BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER), - SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER)); } static void @@ -1322,19 +1304,10 @@ gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx, { struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1; - mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */ - mfc_context->mfc_batchbuffer_surface.pitch = 16; - mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, - "MFC batchbuffer", - mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block, - 0x1000); + assert(mfc_context->aux_batchbuffer_surface.bo); mfc_context->buffer_suface_setup(ctx, &mfc_context->gpe_context, - &mfc_context->mfc_batchbuffer_surface, + &mfc_context->aux_batchbuffer_surface, BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER), SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER)); } @@ -1401,48 +1374,47 @@ gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, (void)mfc_context; } +#define AVC_PAK_LEN_IN_BYTE 48 +#define AVC_PAK_LEN_IN_OWORD 3 + static void gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, - int index, + uint32_t intra_flag, int head_offset, - int batchbuffer_offset, - int head_size, - int tail_size, int number_mb_cmds, - int first_object, - int last_object, - int last_slice, + int slice_end_x, + int slice_end_y, int mb_x, int mb_y, int width_in_mbs, - int qp) + int qp, + uint32_t fwd_ref, + uint32_t bwd_ref) { - BEGIN_BATCH(batch, 12); + uint32_t temp_value; + BEGIN_BATCH(batch, 14); - OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2)); - OUT_BATCH(batch, index); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2)); + OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); /*inline data */ - OUT_BATCH(batch, head_offset); - OUT_BATCH(batch, batchbuffer_offset); - OUT_BATCH(batch, - head_size << 16 | - tail_size); - OUT_BATCH(batch, - number_mb_cmds << 16 | - first_object << 2 | - last_object << 1 | - last_slice); - OUT_BATCH(batch, - mb_y << 8 | - mb_x); + OUT_BATCH(batch, head_offset / 16); + OUT_BATCH(batch, (intra_flag) | (qp << 16)); + temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16)); + OUT_BATCH(batch, temp_value); + + OUT_BATCH(batch, number_mb_cmds); + OUT_BATCH(batch, - qp << 16 | - width_in_mbs); + ((slice_end_y << 8) | (slice_end_x))); + OUT_BATCH(batch, fwd_ref); + OUT_BATCH(batch, bwd_ref); + + OUT_BATCH(batch, MI_NOOP); ADVANCE_BATCH(batch); } @@ -1452,96 +1424,83 @@ gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, struct intel_encoder_context *encoder_context, VAEncSliceParameterBufferH264 *slice_param, int head_offset, - unsigned short head_size, - unsigned short tail_size, - int batchbuffer_offset, int qp, int last_slice) { struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_vme_context *vme_context = encoder_context->vme_context; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int total_mbs = slice_param->num_macroblocks; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); int number_mb_cmds = 128; - int starting_mb = 0; - int last_object = 0; - int first_object = 1; + int starting_offset = 0; int i; int mb_x, mb_y; - int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER; - - for (i = 0; i < total_mbs / number_mb_cmds; i++) { - last_object = (total_mbs - starting_mb) == number_mb_cmds; - mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; - mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; - assert(mb_x <= 255 && mb_y <= 255); - - starting_mb += number_mb_cmds; - - gen75_mfc_batchbuffer_emit_object_command(batch, - index, - head_offset, - batchbuffer_offset, - head_size, - tail_size, - number_mb_cmds, - first_object, - last_object, - last_slice, - mb_x, - mb_y, - width_in_mbs, - qp); - - if (first_object) { - head_offset += head_size; - batchbuffer_offset += head_size; - } + int last_mb, slice_end_x, slice_end_y; + int remaining_mb = total_mbs; + uint32_t fwd_ref , bwd_ref, mb_flag; - if (last_object) { - head_offset += tail_size; - batchbuffer_offset += tail_size; - } + last_mb = slice_param->macroblock_address + total_mbs - 1; + slice_end_x = last_mb % width_in_mbs; + slice_end_y = last_mb / width_in_mbs; - batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD; + if (slice_type == SLICE_TYPE_I) { + fwd_ref = 0; + bwd_ref = 0; + mb_flag = 1; + } else { + fwd_ref = vme_context->ref_index_in_mb[0]; + bwd_ref = vme_context->ref_index_in_mb[1]; + mb_flag = 0; + } - first_object = 0; + if (width_in_mbs >= 100) { + number_mb_cmds = width_in_mbs / 5; + } else if (width_in_mbs >= 80) { + number_mb_cmds = width_in_mbs / 4; + } else if (width_in_mbs >= 60) { + number_mb_cmds = width_in_mbs / 3; + } else if (width_in_mbs >= 40) { + number_mb_cmds = width_in_mbs / 2; + } else { + number_mb_cmds = width_in_mbs; } - if (!last_object) { - last_object = 1; - number_mb_cmds = total_mbs % number_mb_cmds; - mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; - mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; - assert(mb_x <= 255 && mb_y <= 255); - starting_mb += number_mb_cmds; + do { + if (number_mb_cmds >= remaining_mb) { + number_mb_cmds = remaining_mb; + } + mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs; + mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs; gen75_mfc_batchbuffer_emit_object_command(batch, - index, + mb_flag, head_offset, - batchbuffer_offset, - head_size, - tail_size, number_mb_cmds, - first_object, - last_object, - last_slice, + slice_end_x, + slice_end_y, mb_x, mb_y, width_in_mbs, - qp); - } + qp, + fwd_ref, + bwd_ref); + + head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE); + remaining_mb -= number_mb_cmds; + starting_offset += number_mb_cmds; + } while (remaining_mb > 0); } /* * return size in Owords (16bytes) */ -static int +static void gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context, - int slice_index, - int batchbuffer_offset) + int slice_index) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer; @@ -1557,8 +1516,6 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; long head_offset; - int old_used = intel_batchbuffer_used_size(slice_batch), used; - unsigned short head_size, tail_size; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); if (rate_control_mode == VA_RC_CBR) { @@ -1572,7 +1529,6 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); assert(qp >= 0 && qp < 52); - head_offset = old_used / 16; gen75_mfc_avc_slice_state(ctx, pPicParameter, pSliceParameter, @@ -1601,11 +1557,20 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, free(slice_header); intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ - used = intel_batchbuffer_used_size(slice_batch); - head_size = (used - old_used) / 16; - old_used = used; + head_offset = intel_batchbuffer_used_size(slice_batch); + + slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE; + + gen75_mfc_avc_batchbuffer_slice_command(ctx, + encoder_context, + pSliceParameter, + head_offset, + qp, + last_slice); + - /* tail */ + /* Aligned for tail */ + intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ if (last_slice) { mfc_context->insert_object(ctx, encoder_context, @@ -1630,22 +1595,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, slice_batch); } - intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ - used = intel_batchbuffer_used_size(slice_batch); - tail_size = (used - old_used) / 16; - - - gen75_mfc_avc_batchbuffer_slice_command(ctx, - encoder_context, - pSliceParameter, - head_offset, - head_size, - tail_size, - batchbuffer_offset, - qp, - last_slice); - - return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD; + return; } static void @@ -1660,10 +1610,16 @@ gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx, gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch); for ( i = 0; i < encode_state->num_slice_params_ext; i++) { - size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset); - offset += size; + gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i); + } + { + struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer; + intel_batchbuffer_align(slice_batch, 8); + BEGIN_BCS_BATCH(slice_batch, 2); + OUT_BCS_BATCH(slice_batch, 0); + OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END); + ADVANCE_BCS_BATCH(slice_batch); } - intel_batchbuffer_end_atomic(batch); intel_batchbuffer_flush(batch); } @@ -1687,9 +1643,9 @@ gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context); - dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo); + dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); - return mfc_context->mfc_batchbuffer_surface.bo; + return mfc_context->aux_batchbuffer_surface.bo; } #endif @@ -1708,7 +1664,7 @@ gen75_mfc_avc_pipeline_programing(VADriverContextP ctx, return; } -#ifdef MFC_SOFTWARE_HASWELL +#if MFC_SOFTWARE_HASWELL slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); #else slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); @@ -2612,7 +2568,7 @@ Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context * i965_gpe_load_kernels(ctx, &mfc_context->gpe_context, gen75_mfc_kernels, - NUM_MFC_KERNEL); + 1); mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select; mfc_context->set_surface_state = gen75_mfc_surface_state; diff --git a/src/shaders/utils/Makefile.am b/src/shaders/utils/Makefile.am index b8f31218..dd19d627 100644 --- a/src/shaders/utils/Makefile.am +++ b/src/shaders/utils/Makefile.am @@ -6,6 +6,9 @@ MFC_CORE_AVC = \ mfc_batchbuffer_avc_intra.asm \ mfc_batchbuffer_avc_inter.asm +MFC_CORE_HSW = \ + mfc_batchbuffer_hsw.asm + INTEL_G6B = mfc_batchbuffer_avc_intra.g6b mfc_batchbuffer_avc_inter.g6b INTEL_G6A = mfc_batchbuffer_avc_intra.g6a mfc_batchbuffer_avc_inter.g6a INTEL_GEN6_INC = mfc_batchbuffer.inc @@ -16,15 +19,21 @@ INTEL_G7A = mfc_batchbuffer_avc_intra.g7a mfc_batchbuffer_avc_inter.g7a INTEL_GEN7_INC = mfc_batchbuffer.inc INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm) +INTEL_G75B = mfc_batchbuffer_hsw.g75b +INTEL_G75A = mfc_batchbuffer_hsw.g75a +INTEL_GEN75_INC = mfc_batchbuffer_hsw.inc +INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm) + TARGETS = if HAVE_GEN4ASM TARGETS += $(INTEL_G6B) TARGETS += $(INTEL_G7B) +TARGETS += $(INTEL_G75B) endif all-local: $(TARGETS) -SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm +SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm if HAVE_GEN4ASM $(INTEL_GEN6_ASM): $(MFC_CORE) $(MFC_CORE_AVC) $(INTEL_GEN6_INC) @@ -42,19 +51,31 @@ $(INTEL_GEN7_ASM): $(MFC_CORE) $(MFC_CORE_AVC) $(INTEL_GEN7_INC) rm _mfc0.$@ .gen7.asm.g7b: $(AM_V_GEN)$(GEN4ASM) -g 7 -o $@ $< + +$(INTEL_GEN75_ASM): $(MFC_CORE_HSW) $(INTEL_GEN75_INC) +.g75a.gen75.asm: + $(AM_V_GEN)cpp -P $< > _mfc0.$@ && \ + m4 _mfc0.$@ > $@ && \ + rm _mfc0.$@ +.gen75.asm.g75b: + $(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $< endif -CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) +CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM) EXTRA_DIST = \ $(INTEL_G6A) \ $(INTEL_G6B) \ $(INTEL_G7A) \ $(INTEL_G7B) \ + $(INTEL_G75A) \ + $(INTEL_G75B) \ $(INTEL_GEN6_INC) \ $(INTEL_GEN7_INC) \ + $(INTEL_GEN75_INC) \ $(MFC_CORE) \ $(MFC_CORE_AVC) \ + $(MFC_CORE_HSW) \ $(NULL) # Extra clean files so that maintainer-clean removes *everything* diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.asm b/src/shaders/utils/mfc_batchbuffer_hsw.asm new file mode 100644 index 00000000..c34e9347 --- /dev/null +++ b/src/shaders/utils/mfc_batchbuffer_hsw.asm @@ -0,0 +1,296 @@ +/* + * Copyright © 2010-2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui + */ + +START: + mov (16) pak_object_reg0.0<1>:ud 0x0:ud {align1}; + mov (8) obw_m0.0<1>:ud 0x0:ud {align1}; + mov (8) mb_cur_msg.0<1>:ud 0x0:ud {align1}; + mov (16) mb_temp.0<1>:ud 0x0:ud {align1}; + mov (1) cur_mb_x<1>:uw mb_x<0,1,0>:ub {align1}; + mov (1) cur_mb_y<1>:uw mb_y<0,1,0>:ub {align1}; + mov (1) end_mb_x<1>:uw slice_end_x<0,1,0>:ub {align1}; + mov (1) end_mb_y<1>:uw slice_end_y<0,1,0>:ub {align1}; + mov (1) end_loop_count<1>:uw total_mbs<0,1,0>:uw {align1}; + mov (1) vme_len<1>:ud 2:ud {align1}; + and.z.f0.0 (1) null:uw mb_flag<0,1,0>:ub INTRA_SLICE:uw {align1}; + (f0.0) mov (1) vme_len<1>:ud 24:ud {align1}; + + mov (1) obw_m0.8<1>:UD buffer_offset<0,1,0>:ud {align1}; + mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + + mul (1) mb_cur_msg.8<1>:UD width_in_mbs<0,1,0>:UW cur_mb_y<0,1,0>:UW {align1}; + add (1) mb_cur_msg.8<1>:UD mb_cur_msg.8<0,1,0>:UD cur_mb_x<0,1,0>:uw {align1}; + mul (1) mb_cur_msg.8<1>:UD mb_cur_msg.8<0,1,0>:UD vme_len<0,1,0>:UD {align1}; + mov (1) mb_cur_msg.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + mov (1) pak_object0_ud<1>:ud MFC_AVC_PAK_OBJECT_DW0:ud {align1}; + mov (1) pak_object5_ud<1>:ud MFC_AVC_PAK_OBJECT_DW5:ud {align1}; + mov (1) pak_object10_ud<1>:ud MFC_AVC_PAK_OBJECT_DW10:ud {align1}; + mov (1) pak_object6_ud<1>:ub qp_flag<0,1,0>:ub {align1}; + +pak_object_loop: + mov (8) mb_msg0.0<1>:ud mb_cur_msg.0<8,8,1>:ud {align1}; + mov (1) pak_object4_ud<1>:ud MFC_AVC_PAK_OBJECT_DW4:ud {align1}; + mov (1) tmp_reg0.0<1>:ub cur_mb_x<0,1,0>:ub {align1}; + mov (1) tmp_reg0.1<1>:ub cur_mb_y<0,1,0>:ub {align1}; + mov (1) pak_object4_ud<1>:uw tmp_reg0.0<0,1,0>:uw {align1}; + /* pak_object6_ud */ + mov (1) pak_object_reg0.26<1>:uw 0x0:uw {align1}; + + cmp.e.f0.0 (1) null:uw cur_mb_x<0,1,0>:uw end_mb_x<0,1,0>:uw {align1}; + (-f0.0) jmpi (1) start_mb_flag; + cmp.e.f0.0 (1) null:uw cur_mb_y<0,1,0>:uw end_mb_y<0,1,0>:uw {align1}; + (f0.0) mov (1) pak_object_reg0.26<1>:uw MFC_AVC_PAK_LAST_MB:uw {align1}; +start_mb_flag: + and.z.f0.0 (1) null:uw mb_flag<0,1,0>:ub INTRA_SLICE:uw {align1}; + (f0.0) jmpi (1) inter_frame_start; + +/* bind index 0, read 2 oword (32bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + null + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + MV_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + jmpi (1) intra_pak_command; + +nop; +nop; +inter_frame_start: +/* bind index 0, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + null + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + MV_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) jmpi (1) intra_pak_command; + +/* MV len and MV mode */ + and (1) pak_object3_ud<1>:ud mb_inter_wb.0<0,1,0>:ud MFC_AVC_INTER_MASK_DW3:ud {align1}; + add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud MFC_AVC_PAK_CBP:ud {align1}; + and (1) tmp_reg0.0<1>:uw mb_inter_wb.0<0,1,0>:uw INTER_MASK:uw {align1}; + mov (1) pak_object1_ud<1>:ud 32:ud {align1}; + cmp.e.f0.0 (1) null:uw tmp_reg0.0<0,1,0>:uw INTER_8X8MODE:uw {align1}; + (-f0.0) add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud INTER_MV8:ud {align1}; + (-f0.0) jmpi (1) inter_mv_check; + and.nz.f0.0 (1) null:ud mb_inter_wb.4<0,1,0>:uw SUBSHAPE_MASK:uw {align1}; + (f0.0) mov (1) pak_object1_ud<1>:ud 128:ud {align1}; + (f0.0) add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud INTER_MV32:ud {align1}; + (f0.0) jmpi (1) mv_check_end; + + add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud INTER_MV8:ud {align1}; + +inter_mv_check: + and (1) tmp_reg0.0<1>:uw mb_inter_wb.0<0,1,0>:uw INTER_MASK:uw {align1}; + cmp.e.f0.0 (1) null:uw tmp_reg0.0<0,1,0>:uw INTER_16X16MODE:uw {align1}; + (f0.0) jmpi (1) mv_check_end; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB A */ +/* bind index 0, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + null + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + MV_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ + + mov (2) mb_mv0.8<1>:ud mb_mv1.0<2,2,1>:ud {align1}; + mov (2) mb_mv0.16<1>:ud mb_mv2.0<2,2,1>:ud {align1}; + mov (2) mb_mv0.24<1>:ud mb_mv3.0<2,2,1>:ud {align1}; + + mov (8) msg_reg0.0<1>:ud mb_msg0.0<8,8,1>:ud {align1} ; + mov (8) msg_reg1.0<1>:ud mb_mv0.0<8,8,1>:ud {align1} ; +/* Write MV for MB A */ +/* bind index 0, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + MV_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +mv_check_end: + +/* ref list */ + mov (1) pak_object8_ud<1>:ud fwd_ref<0,1,0>:ud {align1}; + mov (1) pak_object9_ud<1>:ud bwd_ref<0,1,0>:ud {align1}; +/* inter_mode. pak_object7_ud */ + mov (1) pak_object7_ud<1>:ud 0x0:ud {align1}; + mov (1) pak_object_reg0.28<1>:ub mb_inter_wb.5<0,1,0>:ub {align1}; + mov (1) pak_object_reg0.29<1>:ub mb_inter_wb.6<0,1,0>:ub {align1}; + +/* mv start address */ + add (1) tmp_reg0.4<1>:ud mb_cur_msg.8<0,1,0>:ud 3:ud {align1}; + mul (1) pak_object2_ud<1>:ud tmp_reg0.4<0,1,0>:ud 16:ud {align1}; + + jmpi (1) write_pak_command; + +intra_pak_command: + /* object 1/2 is set to zero */ + mov (2) pak_object1_ud<1>:ud 0x0:ud {align1}; + /* object 7/8 intra mode */ + mov (1) pak_object7_ud<1>:ud mb_intra_wb.4<0,1,0>:ud {align1}; + mov (1) pak_object8_ud<1>:ud mb_intra_wb.8<0,1,0>:ud {align1}; + /* object 9 Intra structure */ + mov (1) pak_object9_ud<1>:ud 0x0:ud {align1}; + mov (1) pak_object9_ud<1>:ub mb_intra_wb.12<0,1,0>:ub {align1}; + + and (1) pak_object3_ud<1>:ud mb_intra_wb.0<0,1,0>:ud MFC_AVC_INTRA_MASK_DW3:ud {align1}; + add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud MFC_AVC_INTRA_FLAG + MFC_AVC_PAK_CBP:ud {align1}; + + mov (1) tmp_reg0.0<1>:ud 0:ud {align1}; + mov (1) tmp_reg0.1<1>:ub mb_intra_wb.2<0,1,0>:ub {align1}; + and (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw AVC_INTRA_MASK:uw {align1}; + add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud tmp_reg0.0<0,1,0>:ud {align1}; + +/* Write the pak command into the batchbuffer */ +write_pak_command: + mov (8) msg_reg0.0<1>:ud obw_m0.0<8,8,1>:ud {align1} ; + mov (8) msg_reg1.0<1>:ud pak_object_reg0.0<8,8,1>:ud {align1} ; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + MFC_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + + add (1) msg_reg0.8<1>:ud msg_reg0.8<0,1,0>:ud 2:ud {align1}; + mov (8) msg_reg1.0<1>:ud pak_object_reg1.0<8,8,1>:ud {align1}; + +/* bind index 3, write 1 oword (16bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_0, + MFC_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + + +/* Check the next mb */ +add (1) cur_loop_count<1>:uw cur_loop_count<0,1,0>:uw 1:uw {align1}; +cmp.e.f0.0 (1) null:uw cur_loop_count<0,1,0>:uw end_loop_count<0,1,0>:uw {align1}; +(f0.0) jmpi (1) pak_loop_end; +/* the buffer offset for next block */ +add (1) obw_m0.8<1>:ud obw_m0.8<0,1,0>:ud 3:uw {align1}; +add (1) mb_cur_msg.8<1>:ud mb_cur_msg.8<0,1,0>:ud vme_len<0,1,0>:ud {align1}; +add (1) cur_mb_x<1>:uw cur_mb_x<0,1,0>:uw 1:uw {align1}; +/* Check whether it is already equal to width in mbs */ +cmp.e.f0.0 (1) null:uw cur_mb_x<0,1,0>:uw width_in_mbs<0,1,0>:uw {align1}; +(f0.0) add (1) cur_mb_y<1>:uw cur_mb_y<0,1,0>:uw 1:uw {align1}; +(f0.0) mov (1) cur_mb_x<1>:uw 0:uw {align1}; + +/* continue the pak command for next mb */ +jmpi (1) pak_object_loop; +nop; +nop; +pak_loop_end: +/* Issue message fence so that the previous write message is committed */ +send (16) + msg_ind + mb_wb.0<1>:ud + null + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_FENCE, + OBR_MF_COMMIT, + MFC_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (1) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; + +nop; + diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.g75a b/src/shaders/utils/mfc_batchbuffer_hsw.g75a new file mode 100644 index 00000000..4a967548 --- /dev/null +++ b/src/shaders/utils/mfc_batchbuffer_hsw.g75a @@ -0,0 +1,29 @@ +/* + * Copyright © 2010-2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui + */ + +#include "mfc_batchbuffer_hsw.inc" +#include "mfc_batchbuffer_hsw.asm" + diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.g75b b/src/shaders/utils/mfc_batchbuffer_hsw.g75b new file mode 100644 index 00000000..2f42643f --- /dev/null +++ b/src/shaders/utils/mfc_batchbuffer_hsw.g75b @@ -0,0 +1,105 @@ + { 0x00800001, 0x23400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x2ac00061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x2ac00229, 0x000000a8, 0x00000000 }, + { 0x00000001, 0x2ac20229, 0x000000a9, 0x00000000 }, + { 0x00000001, 0x2ae00229, 0x000000b0, 0x00000000 }, + { 0x00000001, 0x2ae20229, 0x000000b1, 0x00000000 }, + { 0x00000001, 0x2ae40129, 0x000000ac, 0x00000000 }, + { 0x00000001, 0x2ae80061, 0x00000000, 0x00000002 }, + { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 }, + { 0x00010001, 0x2ae80061, 0x00000000, 0x00000018 }, + { 0x00000001, 0x21e80021, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x21f40231, 0x00000014, 0x00000000 }, + { 0x00000041, 0x2b082521, 0x000000aa, 0x00000ac2 }, + { 0x00000040, 0x2b082421, 0x00000b08, 0x00000ac0 }, + { 0x00000041, 0x2b080421, 0x00000b08, 0x00000ae8 }, + { 0x00000001, 0x2b140231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x23400061, 0x00000000, 0x7149000a }, + { 0x00000001, 0x23540061, 0x00000000, 0x000f000f }, + { 0x00000001, 0x23680061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23580231, 0x000000a6, 0x00000000 }, + { 0x00600001, 0x2b400021, 0x008d0b00, 0x00000000 }, + { 0x00000001, 0x23500061, 0x00000000, 0xffff0000 }, + { 0x00000001, 0x21000231, 0x00000ac0, 0x00000000 }, + { 0x00000001, 0x21010231, 0x00000ac2, 0x00000000 }, + { 0x00000001, 0x23500129, 0x00000100, 0x00000000 }, + { 0x00000001, 0x235a0169, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20002528, 0x00000ac0, 0x00000ae0 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000020 }, + { 0x01000010, 0x20002528, 0x00000ac2, 0x00000ae2 }, + { 0x00010001, 0x235a0169, 0x00000000, 0x04000400 }, + { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 }, + { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02180200 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000240 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02280300 }, + { 0x05000010, 0x2000252c, 0x00000b70, 0x00000b88 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x000001f0 }, + { 0x00000005, 0x234c0c21, 0x00000b80, 0x1f00ffff }, + { 0x00000040, 0x234c0c21, 0x0000034c, 0x000e0000 }, + { 0x00000005, 0x21002d29, 0x00000b80, 0x00030003 }, + { 0x00000001, 0x23440061, 0x00000000, 0x00000020 }, + { 0x01000010, 0x20002d28, 0x00000100, 0x00030003 }, + { 0x00110040, 0x234c0c21, 0x0000034c, 0x00400000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000050 }, + { 0x02000005, 0x20002d20, 0x00000b84, 0xff00ff00 }, + { 0x00010001, 0x23440061, 0x00000000, 0x00000080 }, + { 0x00010040, 0x234c0c21, 0x0000034c, 0x00600000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x000000c0 }, + { 0x00000040, 0x234c0c21, 0x0000034c, 0x00400000 }, + { 0x00000005, 0x21002d29, 0x00000b80, 0x00030003 }, + { 0x01000010, 0x20002d28, 0x00000100, 0x00000000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000080 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480400 }, + { 0x00200001, 0x2ba80021, 0x00450bc0, 0x00000000 }, + { 0x00200001, 0x2bb00021, 0x00450be0, 0x00000000 }, + { 0x00200001, 0x2bb80021, 0x00450c00, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0b40, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0ba0, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0200 }, + { 0x00000001, 0x23600021, 0x000000b4, 0x00000000 }, + { 0x00000001, 0x23640021, 0x000000b8, 0x00000000 }, + { 0x00000001, 0x235c0061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x235c0231, 0x00000b85, 0x00000000 }, + { 0x00000001, 0x235d0231, 0x00000b86, 0x00000000 }, + { 0x00000040, 0x21040c21, 0x00000b08, 0x00000003 }, + { 0x00000041, 0x23480c21, 0x00000104, 0x00000010 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000b0 }, + { 0x00200001, 0x23440061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x235c0021, 0x00000b64, 0x00000000 }, + { 0x00000001, 0x23600021, 0x00000b68, 0x00000000 }, + { 0x00000001, 0x23640061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23640231, 0x00000b6c, 0x00000000 }, + { 0x00000005, 0x234c0c21, 0x00000b60, 0x0000c0ff }, + { 0x00000040, 0x234c0c21, 0x0000034c, 0x000e2000 }, + { 0x00000001, 0x21000061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x21010231, 0x00000b62, 0x00000000 }, + { 0x00000005, 0x21002d29, 0x00000100, 0x1f001f00 }, + { 0x00000040, 0x234c0421, 0x0000034c, 0x00000100 }, + { 0x00600001, 0x28000021, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0340, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0202 }, + { 0x00000040, 0x28080c21, 0x00000808, 0x00000002 }, + { 0x00600001, 0x28200021, 0x008d0360, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0002 }, + { 0x00000040, 0x2ac42d29, 0x00000ac4, 0x00010001 }, + { 0x01000010, 0x20002528, 0x00000ac4, 0x00000ae4 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000090 }, + { 0x00000040, 0x21e82c21, 0x000001e8, 0x00030003 }, + { 0x00000040, 0x2b080421, 0x00000b08, 0x00000ae8 }, + { 0x00000040, 0x2ac02d29, 0x00000ac0, 0x00010001 }, + { 0x01000010, 0x20002528, 0x00000ac0, 0x000000aa }, + { 0x00010040, 0x2ac22d29, 0x00000ac2, 0x00010001 }, + { 0x00010001, 0x2ac00169, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0xfffffb30 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0a800031, 0x2b601ca1, 0x00000800, 0x0219e002 }, + { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x24001ca8, 0x00000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.inc b/src/shaders/utils/mfc_batchbuffer_hsw.inc new file mode 100644 index 00000000..588006e9 --- /dev/null +++ b/src/shaders/utils/mfc_batchbuffer_hsw.inc @@ -0,0 +1,195 @@ +/* + * Copyright © 2010-2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui + */ + +/* GRF registers + * r0 header + * r1~r4 constant buffer (reserved) + * r5 inline data + * r6~r7 reserved + * r8~r15 temporary registers + * r16 write back of Oword Block Write + */ + +/* + * GRF 0 -- header + */ +define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */ + +define(`inline_reg0', `r5') +define(`buffer_offset', `inline_reg0.0') /* :ud, in units of Owords */ +/* :ub, + * bit0 indicates the frame type. 1 is the I-frame. 0 is P-B frame + */ +define(`mb_flag', `inline_reg0.4') +define(`qp_flag', `inline_reg0.6') /* :ub */ + +define(`mb_x', `inline_reg0.8') /* :ub, */ +define(`mb_y', `inline_reg0.9') /* :ub, */ +define(`mb_xy', `inline_reg0.8') /* :uw, */ +/* :uw, the picture width in macroblocks */ +define(`width_in_mbs', `inline_reg0.10') +/* :w, the number of macroblock commands being processed by the kernel */ +define(`total_mbs', `inline_reg0.12') +/* ub, the mb x/y of the last mb in slice */ +define(`slice_end_x', `inline_reg0.16') +define(`slice_end_y', `inline_reg0.17') + +/* :ud the forward reference picture list */ +define(`fwd_ref', `inline_reg0.20') +/* :ud the backward reference picture list */ +define(`bwd_ref', `inline_reg0.24') + +/* + * GRF 8~15 -- temporary registers + */ +define(`tmp_reg0', `r8') +define(`tmp_reg1', `r9') +define(`tmp_reg2', `r10') +define(`tmp_reg3', `r11') +define(`tmp_reg4', `r12') +define(`tmp_reg5', `r13') +define(`tmp_reg6', `r14') +define(`tmp_reg7', `r15') + +define(`obw_m0', `tmp_reg7') + +define(`obw_wb', `null<1>:W') +define(`obw_wb_length', `0') + +/* + * GRF 26~27 + */ +define(`pak_object_reg0', `r26') +define(`pak_object0_ud', `r26.0') +define(`pak_object1_ud', `r26.4') +define(`pak_object2_ud', `r26.8') +define(`pak_object3_ud', `r26.12') +define(`pak_object4_ud', `r26.16') +define(`pak_object5_ud', `r26.20') +define(`pak_object6_ud', `r26.24') +define(`pak_object7_ud', `r26.28') + +define(`pak_object_reg1', `r27') +define(`pak_object8_ud', `r27.0') +define(`pak_object9_ud', `r27.4') +define(`pak_object10_ud', `r27.8') +define(`pak_object11_ud', `r27.12') + +/* + * Message Payload registers + */ +define(`msg_ind', `64') +define(`msg_reg0', `g64') +define(`msg_reg1', `g65') +define(`msg_reg2', `g66') +define(`msg_reg3', `g67') +define(`msg_reg4', `g68') +define(`msg_reg5', `g69') +define(`msg_reg6', `g70') +define(`msg_reg7', `g71') +define(`msg_reg8', `g72') + +define(`MV_BIND_IDX', `0') +define(`MFC_BIND_IDX', `2') + +define(`ts_msg_ind', `112') +define(`ts_msg_reg0', `r112') + + +define(`MFC_AVC_PAK_OBJECT_DW0', `0x7149000a') +define(`MFC_AVC_PAK_OBJECT_DW4', `0xFFFF0000') /* CBP for Y */ +define(`MFC_AVC_PAK_OBJECT_DW5', `0x000F000F') +define(`MFC_AVC_PAK_OBJECT_DW10', `0x0000000') + +define(`OBR_MESSAGE_TYPE', `0') +define(`OBR_CACHE_TYPE', `10') + +define(`OBR_MESSAGE_FENCE', `7') +define(`OBR_MF_NOCOMMIT', `0') +define(`OBR_MF_COMMIT', `0x20') + +define(`OBR_CONTROL_0', `0') /* 1 OWord, low 128 bits */ +define(`OBR_CONTROL_1', `1') /* 1 OWord, high 128 bits */ +define(`OBR_CONTROL_2', `2') /* 2 OWords */ +define(`OBR_CONTROL_4', `3') /* 4 OWords */ +define(`OBR_CONTROL_8', `4') /* 8 OWords */ + +define(`OBR_HEADER_PRESENT', `1') +define(`OBR_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ + +define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ + +define(`OBW_CACHE_TYPE', `10') + + +define(`OBW_MESSAGE_TYPE', `8') + +define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */ +define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */ +define(`OBW_CONTROL_2', `2') /* 2 OWords */ +define(`OBW_CONTROL_4', `3') /* 4 OWords */ +define(`OBW_CONTROL_8', `4') /* 8 OWords */ +define(`OBW_HEADER_PRESENT', `1') + +define(`INTER_MASK', `0x03') +define(`INTER_16X16MODE', `0x0') +define(`INTER_16X8MODE', `0x01') +define(`INTER_8X16MODE', `0x02') +define(`INTER_8X8MODE', `0x03') +define(`SUBSHAPE_MASK', `0xFF00') + +define(`mb_ind', `90') +define(`mb_msg0', `r90') +define(`mb_wb', `r91') +define(`mb_intra_wb', `r91') +define(`mb_inter_wb', `r92') +define(`mb_mv0', `r93') +define(`mb_mv1', `r94') +define(`mb_mv2', `r95') +define(`mb_mv3', `r96') + +define(`mb_temp', `r86') +define(`cur_mb_x', `mb_temp.0') /* :uw, */ +define(`cur_mb_y', `mb_temp.2') /* :uw, */ +define(`cur_loop_count', `mb_temp.4') /* :uw, */ +define(`mb_end', `r87') +define(`end_mb_x', `mb_end.0') /* :uw, */ +define(`end_mb_y', `mb_end.2') /* :uw, */ +define(`end_loop_count', `mb_end.4') /* :uw, */ +/* :ud the length of VME predict result for every mb. Units in owords */ +define(`vme_len', `mb_end.8') +define(`mb_cur_msg', `r88') + +define(`INTRA_SLICE', `0x0001') +define(`MFC_AVC_PAK_LAST_MB', `0x0400') + +define(`MFC_AVC_INTER_MASK_DW3', `0x1F00FFFF') +define(`MFC_AVC_INTRA_MASK_DW3', `0x0000C0FF') +define(`INTER_MV8', `0x00400000') +define(`INTER_MV32', `0x00600000') +define(`MFC_AVC_PAK_CBP', `0x000E0000') +define(`MFC_AVC_INTRA_FLAG', `0x00002000') +define(`AVC_INTRA_MASK', `0x1F00') -- cgit v1.2.1 From 8760d1016c6c5f75fbd19eb37f32d6eee7dddf38 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 11 Nov 2013 14:51:50 +0800 Subject: Remove the unused variable to avoid the warning Signed-off-by: Zhao Yakui (cherry picked from commit 0d37a309bd99f6bded4df922d0ece22bf3bb1757) --- src/gen75_mfc.c | 5 +---- src/gen7_mfc.c | 1 - src/gen7_vme.c | 1 - 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 784a5e20..81cf7525 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -1302,7 +1302,6 @@ gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { - struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; assert(mfc_context->aux_batchbuffer_surface.bo); mfc_context->buffer_suface_setup(ctx, @@ -1435,7 +1434,6 @@ gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); int number_mb_cmds = 128; int starting_offset = 0; - int i; int mb_x, mb_y; int last_mb, slice_end_x, slice_end_y; int remaining_mb = total_mbs; @@ -1605,7 +1603,7 @@ gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx, { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *batch = encoder_context->base.batch; - int i, size, offset = 0; + int i; intel_batchbuffer_start_atomic(batch, 0x4000); gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch); @@ -2295,7 +2293,6 @@ gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch; VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL; dri_bo *batch_bo; diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index 3c3ae211..ddf3ce18 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -924,7 +924,6 @@ gen7_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch; VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL; dri_bo *batch_bo; diff --git a/src/gen7_vme.c b/src/gen7_vme.c index ed2ee5a9..77eb5b8d 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -814,7 +814,6 @@ gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; - int number_mb_cmds; int mb_x = 0, mb_y = 0; int i, s, j; unsigned int *command_ptr; -- cgit v1.2.1 From 500d8d174b25f7ea91f134166e5968beb43f3957 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 18 Sep 2013 15:12:55 +0800 Subject: VPP: change the default values for Saturation and Contrast Signed-off-by: Xiang, Haihao (cherry picked from commit ce0c8f0019e8545d0db529b0f28338be4b8adc15) --- src/i965_drv_video.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index efe72b45..b1dac63d 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -4689,7 +4689,7 @@ VAStatus i965_QueryVideoProcFilterCaps( cap->type = VAProcColorBalanceSaturation; cap->range.min_value = 0.0; cap->range.max_value = 10.0; - cap->range.default_value = 0.0; + cap->range.default_value = 1.0; cap->range.step = 0.1; i++; cap++; @@ -4705,7 +4705,7 @@ VAStatus i965_QueryVideoProcFilterCaps( cap->type = VAProcColorBalanceContrast; cap->range.min_value = 0.0; cap->range.max_value = 10.0; - cap->range.default_value = 0.0; + cap->range.default_value = 1.0; cap->range.step = 0.1; i++; cap++; -- cgit v1.2.1 From 04ed735e9056aa1ee74ffdc9a545619cb1eba28c Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 26 Sep 2013 11:02:09 +0800 Subject: render: add support for brightness/contrast/hue/saturation Signed-off-by: Xiang, Haihao (cherry picked from commit 04ecb6e79f4382d96eb5d4b51733049d420f592a) --- src/i965_drv_video.c | 35 +++++++++- src/i965_drv_video.h | 9 +++ src/i965_render.c | 33 +++++++++- src/shaders/render/Makefile.am | 17 ++++- src/shaders/render/exa_wm.g4i | 19 ++++++ src/shaders/render/exa_wm_yuv_color_balance.g4a | 38 +++++++++++ src/shaders/render/exa_wm_yuv_color_balance.g4b | 15 +++++ .../render/exa_wm_yuv_color_balance.g4b.gen5 | 15 +++++ src/shaders/render/exa_wm_yuv_color_balance.g6a | 38 +++++++++++ src/shaders/render/exa_wm_yuv_color_balance.g6b | 15 +++++ src/shaders/render/exa_wm_yuv_color_balance.g7a | 38 +++++++++++ src/shaders/render/exa_wm_yuv_color_balance.g7b | 15 +++++ .../render/exa_wm_yuv_color_balance.g7b.haswell | 15 +++++ src/shaders/render/exa_wm_yuv_color_balance.gxa | 75 ++++++++++++++++++++++ 14 files changed, 371 insertions(+), 6 deletions(-) create mode 100644 src/shaders/render/exa_wm_yuv_color_balance.g4a create mode 100644 src/shaders/render/exa_wm_yuv_color_balance.g4b create mode 100644 src/shaders/render/exa_wm_yuv_color_balance.g4b.gen5 create mode 100644 src/shaders/render/exa_wm_yuv_color_balance.g6a create mode 100644 src/shaders/render/exa_wm_yuv_color_balance.g6b create mode 100644 src/shaders/render/exa_wm_yuv_color_balance.g7a create mode 100644 src/shaders/render/exa_wm_yuv_color_balance.g7b create mode 100644 src/shaders/render/exa_wm_yuv_color_balance.g7b.haswell create mode 100644 src/shaders/render/exa_wm_yuv_color_balance.gxa diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index b1dac63d..eec77620 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -96,6 +96,30 @@ enum { /* List of supported display attributes */ static const VADisplayAttribute i965_display_attributes[] = { + { + VADisplayAttribBrightness, + -100, 100, DEFAULT_BRIGHTNESS, + VA_DISPLAY_ATTRIB_GETTABLE | VA_DISPLAY_ATTRIB_SETTABLE + }, + + { + VADisplayAttribContrast, + 0, 100, DEFAULT_CONTRAST, + VA_DISPLAY_ATTRIB_GETTABLE | VA_DISPLAY_ATTRIB_SETTABLE + }, + + { + VADisplayAttribHue, + -180, 180, DEFAULT_HUE, + VA_DISPLAY_ATTRIB_GETTABLE | VA_DISPLAY_ATTRIB_SETTABLE + }, + + { + VADisplayAttribSaturation, + 0, 100, DEFAULT_SATURATION, + VA_DISPLAY_ATTRIB_GETTABLE | VA_DISPLAY_ATTRIB_SETTABLE + }, + { VADisplayAttribRotation, 0, 3, VA_ROTATION_NONE, @@ -2431,7 +2455,16 @@ i965_display_attributes_init(VADriverContextP ctx) ); i965->rotation_attrib = get_display_attribute(ctx, VADisplayAttribRotation); - if (!i965->rotation_attrib) { + i965->brightness_attrib = get_display_attribute(ctx, VADisplayAttribBrightness); + i965->contrast_attrib = get_display_attribute(ctx, VADisplayAttribContrast); + i965->hue_attrib = get_display_attribute(ctx, VADisplayAttribHue); + i965->saturation_attrib = get_display_attribute(ctx, VADisplayAttribSaturation); + + if (!i965->rotation_attrib || + !i965->brightness_attrib || + !i965->contrast_attrib || + !i965->hue_attrib || + !i965->saturation_attrib) { goto error; } return true; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index a0e7790d..3b06ac08 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -59,6 +59,11 @@ #define I965_SURFACE_FLAG_TOP_FIELD_FIRST 0x00000001 #define I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST 0x00000002 +#define DEFAULT_BRIGHTNESS 0 +#define DEFAULT_CONTRAST 10 +#define DEFAULT_HUE 0 +#define DEFAULT_SATURATION 10 + struct i965_surface { struct object_base *base; @@ -321,6 +326,10 @@ struct i965_driver_data VADisplayAttribute *display_attributes; unsigned int num_display_attributes; VADisplayAttribute *rotation_attrib; + VADisplayAttribute *brightness_attrib; + VADisplayAttribute *contrast_attrib; + VADisplayAttribute *hue_attrib; + VADisplayAttribute *saturation_attrib; VAContextID current_context_id; /* VA/DRI (X11) specific data */ diff --git a/src/i965_render.c b/src/i965_render.c index 26a7baf1..b4fd29b6 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -64,6 +65,7 @@ static const uint32_t ps_kernel_static[][4] = #include "shaders/render/exa_wm_xy.g4b" #include "shaders/render/exa_wm_src_affine.g4b" #include "shaders/render/exa_wm_src_sample_planar.g4b" +#include "shaders/render/exa_wm_yuv_color_balance.g4b" #include "shaders/render/exa_wm_yuv_rgb.g4b" #include "shaders/render/exa_wm_write.g4b" }; @@ -86,6 +88,7 @@ static const uint32_t ps_kernel_static_gen5[][4] = #include "shaders/render/exa_wm_xy.g4b.gen5" #include "shaders/render/exa_wm_src_affine.g4b.gen5" #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5" +#include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5" #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5" #include "shaders/render/exa_wm_write.g4b.gen5" }; @@ -105,6 +108,7 @@ static const uint32_t sf_kernel_static_gen6[][4] = static const uint32_t ps_kernel_static_gen6[][4] = { #include "shaders/render/exa_wm_src_affine.g6b" #include "shaders/render/exa_wm_src_sample_planar.g6b" +#include "shaders/render/exa_wm_yuv_color_balance.g6b" #include "shaders/render/exa_wm_yuv_rgb.g6b" #include "shaders/render/exa_wm_write.g6b" }; @@ -123,6 +127,7 @@ static const uint32_t sf_kernel_static_gen7[][4] = static const uint32_t ps_kernel_static_gen7[][4] = { #include "shaders/render/exa_wm_src_affine.g7b" #include "shaders/render/exa_wm_src_sample_planar.g7b" +#include "shaders/render/exa_wm_yuv_color_balance.g7b" #include "shaders/render/exa_wm_yuv_rgb.g7b" #include "shaders/render/exa_wm_write.g7b" }; @@ -137,6 +142,7 @@ static const uint32_t ps_subpic_kernel_static_gen7[][4] = { static const uint32_t ps_kernel_static_gen7_haswell[][4] = { #include "shaders/render/exa_wm_src_affine.g7b" #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell" +#include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell" #include "shaders/render/exa_wm_yuv_rgb.g7b" #include "shaders/render/exa_wm_write.g7b" }; @@ -1050,6 +1056,8 @@ i965_render_upload_vertex( i965_fill_vertex_buffer(ctx, tex_coords, vid_coords); } +#define PI 3.1415926 + static void i965_render_upload_constants(VADriverContextP ctx, struct object_surface *obj_surface) @@ -1057,6 +1065,11 @@ i965_render_upload_constants(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; unsigned short *constant_buffer; + float *color_balance_base; + float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST; + float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */ + float hue = (float)i965->hue_attrib->value / 180 * PI; + float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION; dri_bo_map(render_state->curbe.bo, 1); assert(render_state->curbe.bo->virtual); @@ -1065,14 +1078,28 @@ i965_render_upload_constants(VADriverContextP ctx, if (obj_surface->subsampling == SUBSAMPLE_YUV400) { assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0')); - *constant_buffer = 2; + constant_buffer[0] = 2; } else { if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) - *constant_buffer = 1; + constant_buffer[0] = 1; else - *constant_buffer = 0; + constant_buffer[0] = 0; } + if (i965->contrast_attrib->value == DEFAULT_CONTRAST && + i965->brightness_attrib->value == DEFAULT_BRIGHTNESS && + i965->hue_attrib->value == DEFAULT_HUE && + i965->saturation_attrib->value == DEFAULT_SATURATION) + constant_buffer[1] = 1; /* skip color balance transformation */ + else + constant_buffer[1] = 0; + + color_balance_base = (float *)constant_buffer + 4; + *color_balance_base++ = contrast; + *color_balance_base++ = brightness; + *color_balance_base++ = cos(hue) * contrast * saturation; + *color_balance_base++ = sin(hue) * contrast * saturation; + dri_bo_unmap(render_state->curbe.bo); } diff --git a/src/shaders/render/Makefile.am b/src/shaders/render/Makefile.am index dac58c76..1653b4ae 100644 --- a/src/shaders/render/Makefile.am +++ b/src/shaders/render/Makefile.am @@ -1,7 +1,8 @@ INTEL_G4I = \ exa_wm.g4i \ - exa_wm_affine.g4i + exa_wm_affine.g4i \ + exa_wm_yuv_color_balance.gxa INTEL_G4A = \ exa_sf.g4a \ @@ -9,6 +10,7 @@ INTEL_G4A = \ exa_wm_src_affine.g4a \ exa_wm_src_sample_argb.g4a \ exa_wm_src_sample_planar.g4a \ + exa_wm_yuv_color_balance.g4a \ exa_wm_yuv_rgb.g4a \ exa_wm_write.g4a @@ -20,6 +22,7 @@ INTEL_G4B = \ exa_wm_src_affine.g4b \ exa_wm_src_sample_argb.g4b \ exa_wm_src_sample_planar.g4b \ + exa_wm_yuv_color_balance.g4b \ exa_wm_yuv_rgb.g4b \ exa_wm_write.g4b @@ -29,14 +32,18 @@ INTEL_G4B_GEN5 = \ exa_wm_src_affine.g4b.gen5 \ exa_wm_src_sample_argb.g4b.gen5 \ exa_wm_src_sample_planar.g4b.gen5 \ + exa_wm_yuv_color_balance.g4b.gen5 \ exa_wm_yuv_rgb.g4b.gen5 \ exa_wm_write.g4b.gen5 +INTEL_G6I = $(INTEL_G4I) + INTEL_G6A = \ exa_wm_src_affine.g6a \ exa_wm_src_sample_argb.g6a \ exa_wm_src_sample_planar.g6a \ exa_wm_write.g6a \ + exa_wm_yuv_color_balance.g6a \ exa_wm_yuv_rgb.g6a INTEL_G6S = $(INTEL_G6A:%.g6a=%.g6s) @@ -46,13 +53,17 @@ INTEL_G6B = \ exa_wm_src_sample_argb.g6b \ exa_wm_src_sample_planar.g6b \ exa_wm_write.g6b \ + exa_wm_yuv_color_balance.g6b \ exa_wm_yuv_rgb.g6b +INTEL_G7I = $(INTEL_G4I) + INTEL_G7A = \ exa_wm_src_affine.g7a \ exa_wm_src_sample_argb.g7a \ exa_wm_src_sample_planar.g7a \ exa_wm_write.g7a \ + exa_wm_yuv_color_balance.g7a \ exa_wm_yuv_rgb.g7a INTEL_G7S = $(INTEL_G7A:%.g7a=%.g7s) @@ -62,11 +73,13 @@ INTEL_G7B = \ exa_wm_src_sample_argb.g7b \ exa_wm_src_sample_planar.g7b \ exa_wm_write.g7b \ + exa_wm_yuv_color_balance.g7b \ exa_wm_yuv_rgb.g7b # XXX: only regenerate binary for EU code containing JMPI instructions INTEL_G7B_HASWELL = \ exa_wm_src_sample_planar.g7b.haswell \ + exa_wm_yuv_color_balance.g7b.haswell \ $(NULL) TARGETS = @@ -80,7 +93,7 @@ endif all-local: $(TARGETS) -SUFFIXES = .g4a .g4s .g4b .g6a .g6s .g6b .g7a .g7s .g7b .g7b.haswell +SUFFIXES = .g4a .g4s .g4b .g4b.gen5 .g6a .g6s .g6b .g7a .g7s .g7b .g7b.haswell if HAVE_GEN4ASM $(INTEL_G4S): $(INTEL_G4A) $(INTEL_G4I) diff --git a/src/shaders/render/exa_wm.g4i b/src/shaders/render/exa_wm.g4i index 8163de59..dd47d515 100644 --- a/src/shaders/render/exa_wm.g4i +++ b/src/shaders/render/exa_wm.g4i @@ -142,6 +142,25 @@ define(`mask_sample_a', `g28') define(`mask_sample_a_01', `g28') define(`mask_sample_a_23', `g29') +/* Color Balance to these registers */ +define(`color_balance_base', `g32') + +define(`color_balance_r', `g32') +define(`color_balance_r_01', `g32') +define(`color_balance_r_23', `g33') + +define(`color_balance_g', `g34') +define(`color_balance_g_01', `g34') +define(`color_balance_g_23', `g35') + +define(`color_balance_b', `g36') +define(`color_balance_b_01', `g37') +define(`color_balance_b_23', `g37') + +define(`color_balance_a', `g38') +define(`color_balance_a_01', `g39') +define(`color_balance_a_23', `g39') + /* data port SIMD16 send registers */ define(`data_port_msg_0', `m0') diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g4a b/src/shaders/render/exa_wm_yuv_color_balance.g4a new file mode 100644 index 00000000..33ba67a9 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g4a @@ -0,0 +1,38 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Haihao Xiang + * + */ + +include(`exa_wm.g4i') + +/* Color Balance parameters */ +define(`skip_color_balance', `g2.2<0,1,0>uw') +define(`contrast', `g2.16<0,1,0>f') +define(`brightness', `g2.20<0,1,0>f') +define(`cos_c_s', `g2.24<0,1,0>f') +define(`sin_c_s', `g2.28<0,1,0>f') +define(`sin_c_s_t', `g2.28') + +include(`exa_wm_yuv_color_balance.gxa') diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g4b b/src/shaders/render/exa_wm_yuv_color_balance.g4b new file mode 100644 index 00000000..cba9aca8 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g4b @@ -0,0 +1,15 @@ + { 0x01000010, 0x20002d3c, 0x00000042, 0x00010001 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000000d }, + { 0x00802040, 0x24007fbd, 0x008d01c0, 0xbd808081 }, + { 0x00802041, 0x240077bd, 0x008d0400, 0x00000050 }, + { 0x00802040, 0x240077bd, 0x008d0400, 0x00000054 }, + { 0x00802040, 0x21c07fbd, 0x008d0400, 0x3d808081 }, + { 0x00802040, 0x24807fbd, 0x008d0200, 0xbf008084 }, + { 0x00802040, 0x24407fbd, 0x008d0240, 0xbf008084 }, + { 0x00802001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00802048, 0x240077bc, 0x008d0440, 0x0000005c }, + { 0x00802048, 0x220077bd, 0x008d0480, 0x00000058 }, + { 0x00000041, 0x205c7fbd, 0x0000005c, 0xbf800000 }, + { 0x00802001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00802048, 0x240077bc, 0x008d0480, 0x0000005c }, + { 0x00802048, 0x224077bd, 0x008d0440, 0x00000058 }, diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g4b.gen5 b/src/shaders/render/exa_wm_yuv_color_balance.g4b.gen5 new file mode 100644 index 00000000..5a24a0e8 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g4b.gen5 @@ -0,0 +1,15 @@ + { 0x01000010, 0x20002d3c, 0x00000042, 0x00010001 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000001a }, + { 0x00802040, 0x24007fbd, 0x008d01c0, 0xbd808081 }, + { 0x00802041, 0x240077bd, 0x008d0400, 0x00000050 }, + { 0x00802040, 0x240077bd, 0x008d0400, 0x00000054 }, + { 0x00802040, 0x21c07fbd, 0x008d0400, 0x3d808081 }, + { 0x00802040, 0x24807fbd, 0x008d0200, 0xbf008084 }, + { 0x00802040, 0x24407fbd, 0x008d0240, 0xbf008084 }, + { 0x00802001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00802048, 0x240077bc, 0x008d0440, 0x0000005c }, + { 0x00802048, 0x220077bd, 0x008d0480, 0x00000058 }, + { 0x00000041, 0x205c7fbd, 0x0000005c, 0xbf800000 }, + { 0x00802001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00802048, 0x240077bc, 0x008d0480, 0x0000005c }, + { 0x00802048, 0x224077bd, 0x008d0440, 0x00000058 }, diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g6a b/src/shaders/render/exa_wm_yuv_color_balance.g6a new file mode 100644 index 00000000..69063575 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g6a @@ -0,0 +1,38 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Haihao Xiang + * + */ + +include(`exa_wm.g4i') + +/* Color Balance parameters */ +define(`skip_color_balance', `g6.2<0,1,0>uw') +define(`contrast', `g6.16<0,1,0>f') +define(`brightness', `g6.20<0,1,0>f') +define(`cos_c_s', `g6.24<0,1,0>f') +define(`sin_c_s', `g6.28<0,1,0>f') +define(`sin_c_s_t', `g6.28') + +include(`exa_wm_yuv_color_balance.gxa') diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g6b b/src/shaders/render/exa_wm_yuv_color_balance.g6b new file mode 100644 index 00000000..0a9e6b92 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g6b @@ -0,0 +1,15 @@ + { 0x01000010, 0x20002d3c, 0x000000c2, 0x00010001 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000001a }, + { 0x00800040, 0x24007fbd, 0x008d01c0, 0xbd808081 }, + { 0x00800041, 0x240077bd, 0x008d0400, 0x000000d0 }, + { 0x00800040, 0x240077bd, 0x008d0400, 0x000000d4 }, + { 0x00800040, 0x21c07fbd, 0x008d0400, 0x3d808081 }, + { 0x00800040, 0x24807fbd, 0x008d0200, 0xbf008084 }, + { 0x00800040, 0x24407fbd, 0x008d0240, 0xbf008084 }, + { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00800048, 0x240077bc, 0x008d0440, 0x000000dc }, + { 0x00800048, 0x220077bd, 0x008d0480, 0x000000d8 }, + { 0x00000041, 0x20dc7fbd, 0x000000dc, 0xbf800000 }, + { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00800048, 0x240077bc, 0x008d0480, 0x000000dc }, + { 0x00800048, 0x224077bd, 0x008d0440, 0x000000d8 }, diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g7a b/src/shaders/render/exa_wm_yuv_color_balance.g7a new file mode 100644 index 00000000..69063575 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g7a @@ -0,0 +1,38 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Haihao Xiang + * + */ + +include(`exa_wm.g4i') + +/* Color Balance parameters */ +define(`skip_color_balance', `g6.2<0,1,0>uw') +define(`contrast', `g6.16<0,1,0>f') +define(`brightness', `g6.20<0,1,0>f') +define(`cos_c_s', `g6.24<0,1,0>f') +define(`sin_c_s', `g6.28<0,1,0>f') +define(`sin_c_s_t', `g6.28') + +include(`exa_wm_yuv_color_balance.gxa') diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g7b b/src/shaders/render/exa_wm_yuv_color_balance.g7b new file mode 100644 index 00000000..0a9e6b92 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g7b @@ -0,0 +1,15 @@ + { 0x01000010, 0x20002d3c, 0x000000c2, 0x00010001 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000001a }, + { 0x00800040, 0x24007fbd, 0x008d01c0, 0xbd808081 }, + { 0x00800041, 0x240077bd, 0x008d0400, 0x000000d0 }, + { 0x00800040, 0x240077bd, 0x008d0400, 0x000000d4 }, + { 0x00800040, 0x21c07fbd, 0x008d0400, 0x3d808081 }, + { 0x00800040, 0x24807fbd, 0x008d0200, 0xbf008084 }, + { 0x00800040, 0x24407fbd, 0x008d0240, 0xbf008084 }, + { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00800048, 0x240077bc, 0x008d0440, 0x000000dc }, + { 0x00800048, 0x220077bd, 0x008d0480, 0x000000d8 }, + { 0x00000041, 0x20dc7fbd, 0x000000dc, 0xbf800000 }, + { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00800048, 0x240077bc, 0x008d0480, 0x000000dc }, + { 0x00800048, 0x224077bd, 0x008d0440, 0x000000d8 }, diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g7b.haswell b/src/shaders/render/exa_wm_yuv_color_balance.g7b.haswell new file mode 100644 index 00000000..2780c08a --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g7b.haswell @@ -0,0 +1,15 @@ + { 0x01000010, 0x20002d3c, 0x000000c2, 0x00010001 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x000000d0 }, + { 0x00800040, 0x24007fbd, 0x008d01c0, 0xbd808081 }, + { 0x00800041, 0x240077bd, 0x008d0400, 0x000000d0 }, + { 0x00800040, 0x240077bd, 0x008d0400, 0x000000d4 }, + { 0x00800040, 0x21c07fbd, 0x008d0400, 0x3d808081 }, + { 0x00800040, 0x24807fbd, 0x008d0200, 0xbf008084 }, + { 0x00800040, 0x24407fbd, 0x008d0240, 0xbf008084 }, + { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00800048, 0x240077bc, 0x008d0440, 0x000000dc }, + { 0x00800048, 0x220077bd, 0x008d0480, 0x000000d8 }, + { 0x00000041, 0x20dc7fbd, 0x000000dc, 0xbf800000 }, + { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00800048, 0x240077bc, 0x008d0480, 0x000000dc }, + { 0x00800048, 0x224077bd, 0x008d0440, 0x000000d8 }, diff --git a/src/shaders/render/exa_wm_yuv_color_balance.gxa b/src/shaders/render/exa_wm_yuv_color_balance.gxa new file mode 100644 index 00000000..948067cf --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.gxa @@ -0,0 +1,75 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Haihao Xiang + * + */ + +define(`Cr', `src_sample_b') +define(`Cr_01', `src_sample_b_01') +define(`Cr_23', `src_sample_b_23') + +define(`Y', `src_sample_r') +define(`Y_01', `src_sample_r_01') +define(`Y_23', `src_sample_r_23') + +define(`Cb', `src_sample_g') +define(`Cb_01', `src_sample_g_01') +define(`Cb_23', `src_sample_g_23') + +define(`Crn', `color_balance_g') +define(`Crn_01', `color_balance_g_01') +define(`Crn_23', `color_balance_g_23') + +define(`Yn', `color_balance_r') +define(`Yn_01', `color_balance_r_01') +define(`Yn_23', `color_balance_r_23') + +define(`Cbn', `color_balance_b') +define(`Cbn_01', `color_balance_b_01') +define(`Cbn_23', `color_balance_b_23') + +cmp.e.f0.0 (1) null skip_color_balance 0x1uw {align1}; +(f0.0) jmpi _DONE_COLOR_BALANCE; + +/* Yout = (Yin - 16 / 255) * contrast + brightness + 16 / 255 */ +add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 }; +mul (16) Yn<1>F Yn<8,8,1>F contrast { compr align1 }; +add (16) Yn<1>F Yn<8,8,1>F brightness { compr align1 }; +add (16) Y<1>F Yn<8,8,1>F 0.0627451F { compr align1 }; + +/* Uout = (Uin - 128 / 255) * cos_c_s + (Vin - 128 / 255) * sin_c_s + 128 / 255 */ +/* Vout = (Vin - 128 / 255) * cos_c_s - (Uin - 128 / 255) * sin_c_s + 128 / 255 */ +add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; +add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 }; + +mov (16) acc0<1>F 0.501961F { compr align1 }; +mac (16) acc0<1>F Crn<8,8,1>F sin_c_s { compr align1 }; +mac (16) Cb<1>F Cbn<8,8,1>F cos_c_s { compr align1 }; + +mul (1) sin_c_s_t<1>F sin_c_s -1.0F { align1}; +mov (16) acc0<1>F 0.501961F { compr align1 }; +mac (16) acc0<1>F Cbn<8,8,1>F sin_c_s { compr align1 }; +mac (16) Cr<1>F Crn<8,8,1>F cos_c_s { compr align1 }; + +_DONE_COLOR_BALANCE: -- cgit v1.2.1 From 852172b78093baea101c2aaef9e8b488b95940b6 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 13 Nov 2013 10:27:59 +0800 Subject: VPP: use the target widht/height to calculate the horizontal/vertical step on IVB Signed-off-by: Xiang, Haihao (cherry picked from commit ba020ed2c7730980649d9131e41e9677f603c52a) --- src/i965_post_processing.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 536eb64a..d5847eaa 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -2935,7 +2935,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con pp_avs_context->horiz_range = (float)src_rect->width / src_width; int dw = (pp_avs_context->src_w - 1) / 16 + 1; - dw = MAX(dw, pp_avs_context->dest_w); + dw = MAX(dw, dst_rect->width); pp_static_parameter->grf1.pointer_to_inline_parameter = 7; pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */ @@ -2947,7 +2947,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw); pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; - pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h; + pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height; pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height - (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step; pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width - @@ -3149,7 +3149,7 @@ gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_co pp_avs_context->horiz_range = (float)src_rect->width / src_width; int dw = (pp_avs_context->src_w - 1) / 16 + 1; - dw = MAX(dw, pp_avs_context->dest_w); + dw = MAX(dw, dst_rect->width); pp_static_parameter->grf1.pointer_to_inline_parameter = 7; pp_static_parameter->grf2.avs_wa_enable = 0; /* It is unnecessary to use WA for RGBX surface */ @@ -3158,7 +3158,7 @@ gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_co pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw); pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; - pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h; + pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height; pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height - (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step; pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width - -- cgit v1.2.1 From fdde376c8c59a076f09e08cf5dbc62fd5f031f6d Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 13 Nov 2013 13:19:16 +0800 Subject: Correct the usage of width/height in struct object_surface Add comments for width/height, orig_width/orig_height as well Signed-off-by: Xiang, Haihao (cherry picked from commit f886f24eaaacba9544fa5f6405b7382c686f3a1f) --- src/i965_drv_video.c | 24 +++++++++++++----------- src/i965_drv_video.h | 8 ++++---- src/i965_post_processing.c | 5 ----- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index eec77620..b6522db8 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -2874,12 +2874,13 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, case VA_FOURCC('Y', 'U', 'Y', '2'): case VA_FOURCC('U', 'Y', 'V', 'Y'): assert(subsampling == SUBSAMPLE_YUV422H); - obj_surface->cb_cr_pitch = obj_surface->width * 2; + obj_surface->width = ALIGN(obj_surface->orig_width * 2, 128); + obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->y_cb_offset = 0; obj_surface->y_cr_offset = 0; obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height / 2; - region_width = obj_surface->width * 2; + region_width = obj_surface->width; region_height = obj_surface->height; break; @@ -2890,7 +2891,8 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, case VA_FOURCC('B', 'G', 'R', 'X'): assert(subsampling == SUBSAMPLE_RGBX); - region_width = obj_surface->width * 4; + obj_surface->width = ALIGN(obj_surface->orig_width * 4, 128); + region_width = obj_surface->width; region_height = obj_surface->height; break; @@ -2936,19 +2938,21 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, case VA_FOURCC('Y', 'U', 'Y', '2'): case VA_FOURCC('U', 'Y', 'V', 'Y'): + obj_surface->width = ALIGN(obj_surface->orig_width * 2, 16); obj_surface->y_cb_offset = 0; obj_surface->y_cr_offset = 0; obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height; - obj_surface->cb_cr_pitch = obj_surface->width * 2; - region_width = obj_surface->width * 2; + obj_surface->cb_cr_pitch = obj_surface->width; + region_width = obj_surface->width; region_height = obj_surface->height; break; case VA_FOURCC('R', 'G', 'B', 'A'): case VA_FOURCC('R', 'G', 'B', 'X'): case VA_FOURCC('B', 'G', 'R', 'A'): case VA_FOURCC('B', 'G', 'R', 'X'): - region_width = obj_surface->width * 4; + obj_surface->width = ALIGN(obj_surface->orig_width * 4, 16); + region_width = obj_surface->width; region_height = obj_surface->height; break; @@ -2974,9 +2978,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, &pitch, 0); assert(tiling_mode == I915_TILING_Y); - assert(pitch == obj_surface->width || - pitch == obj_surface->width * 2 || - pitch == obj_surface->width * 4) ; + assert(pitch == obj_surface->width); } else { obj_surface->bo = dri_bo_alloc(i965->intel.bufmgr, "vaapi surface", @@ -3078,7 +3080,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, case VA_FOURCC('Y', 'U', 'Y', '2'): case VA_FOURCC('U', 'Y', 'V', 'Y'): image->num_planes = 1; - image->pitches[0] = obj_surface->width * 2; /* Y, width is aligned already */ + image->pitches[0] = obj_surface->width; /* Y, width is aligned already */ image->offsets[0] = 0; break; case VA_FOURCC('R', 'G', 'B', 'A'): @@ -3086,7 +3088,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, case VA_FOURCC('B', 'G', 'R', 'A'): case VA_FOURCC('B', 'G', 'R', 'X'): image->num_planes = 1; - image->pitches[0] = obj_surface->width * 4; + image->pitches[0] = obj_surface->width; break; default: goto error; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 3b06ac08..f51f39fe 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -207,11 +207,11 @@ struct object_surface struct object_subpic *obj_subpic[I965_MAX_SUBPIC_SUM]; unsigned int subpic_render_idx; - int width; - int height; + int width; /* the pitch of plane 0 in bytes in horizontal direction */ + int height; /* the pitch of plane 0 in bytes in vertical direction */ int size; - int orig_width; - int orig_height; + int orig_width; /* the width of plane 0 in pixels */ + int orig_height; /* the height of plane 0 in pixels */ int flags; unsigned int fourcc; dri_bo *bo; diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index d5847eaa..eeff289f 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1738,11 +1738,9 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin if (full_packed_format) { scale_factor_of_1st_plane_width_in_byte = 4; - pitch[0] = obj_surface->width * 4; } else if (packed_yuv ) { scale_factor_of_1st_plane_width_in_byte = 2; - pitch[0] = obj_surface->width * 2; } else if (interleaved_uv) { width[1] = obj_surface->orig_width; @@ -1853,12 +1851,9 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */ else width[0] = obj_surface->orig_width; /* surface foramt is YCBCR, width is specified in units of pixels */ - - pitch[0] = obj_surface->width * 2; } else if (rgbx_format) { if (is_target) width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */ - pitch[0] = obj_surface->width * 4; } width[1] = obj_surface->cb_cr_width; -- cgit v1.2.1 From b45f566afed8470fc505132494b4c9985cf755a9 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 13 Nov 2013 14:18:20 +0800 Subject: VPP: remove some assert() Instead check the input parameters and return corresponding error status if failed Signed-off-by: Xiang, Haihao (cherry picked from commit 40fa7d9ede00e804f15df4b7b805c7345a925e17) --- src/gen75_picture_process.c | 43 +++++++++++++++++++++++++++++-------------- src/i965_post_processing.c | 37 +++++++++++++++++++++++++++---------- 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/src/gen75_picture_process.c b/src/gen75_picture_process.c index e2344aa3..fee378f6 100644 --- a/src/gen75_picture_process.c +++ b/src/gen75_picture_process.c @@ -123,25 +123,39 @@ gen75_proc_picture(VADriverContextP ctx, (VAProcPipelineParameterBuffer *)proc_st->pipeline_param->buffer; struct object_surface *obj_dst_surf = NULL; struct object_surface *obj_src_surf = NULL; + VAStatus status; + proc_ctx->pipeline_param = pipeline_param; - assert(proc_st->current_render_target != VA_INVALID_SURFACE); if (proc_st->current_render_target == VA_INVALID_SURFACE || - pipeline_param->surface == VA_INVALID_SURFACE) + pipeline_param->surface == VA_INVALID_SURFACE) { + status = VA_STATUS_ERROR_INVALID_SURFACE; goto error; + } obj_dst_surf = SURFACE(proc_st->current_render_target); - if (!obj_dst_surf) + if (!obj_dst_surf) { + status = VA_STATUS_ERROR_INVALID_SURFACE; goto error; + } obj_src_surf = SURFACE(proc_ctx->pipeline_param->surface); - if (!obj_src_surf) + if (!obj_src_surf) { + status = VA_STATUS_ERROR_INVALID_SURFACE; goto error; + } - if (pipeline_param->num_filters && !pipeline_param->filters) + if (!obj_src_surf->bo) { + status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */ goto error; + } + + if (pipeline_param->num_filters && !pipeline_param->filters) { + status = VA_STATUS_ERROR_INVALID_PARAMETER; + goto error; + } if (!obj_dst_surf->bo) { unsigned int is_tiled = 0; @@ -166,8 +180,10 @@ gen75_proc_picture(VADriverContextP ctx, if (!obj_buf || !obj_buf->buffer_store || - !obj_buf->buffer_store->buffer) + !obj_buf->buffer_store->buffer) { + status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN; goto error; + } VAProcFilterParameterBuffer* filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer; @@ -177,12 +193,11 @@ gen75_proc_picture(VADriverContextP ctx, filter->type == VAProcFilterColorBalance){ gen75_vpp_vebox(ctx, proc_ctx); }else if(filter->type == VAProcFilterSharpening){ - assert(obj_src_surf->fourcc == VA_FOURCC('N','V','1','2') && - obj_dst_surf->fourcc == VA_FOURCC('N','V','1','2')); - if (obj_src_surf->fourcc != VA_FOURCC('N', 'V', '1', '2') || - obj_dst_surf->fourcc != VA_FOURCC('N', 'V', '1', '2')) + obj_dst_surf->fourcc != VA_FOURCC('N', 'V', '1', '2')) { + status = VA_STATUS_ERROR_UNIMPLEMENTED; goto error; + } gen75_vpp_gpe(ctx, proc_ctx); } @@ -191,12 +206,12 @@ gen75_proc_picture(VADriverContextP ctx, for (i = 0; i < pipeline_param->num_filters; i++){ struct object_buffer * obj_buf = BUFFER(pipeline_param->filters[i]); - assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer); - if (!obj_buf || !obj_buf->buffer_store || - !obj_buf->buffer_store->buffer) + !obj_buf->buffer_store->buffer) { + status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN; goto error; + } VAProcFilterParameterBuffer* filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer; @@ -214,7 +229,7 @@ gen75_proc_picture(VADriverContextP ctx, return VA_STATUS_SUCCESS; error: - return VA_STATUS_ERROR_INVALID_PARAMETER; + return status; } static void diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index eeff289f..9ab6fde3 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -5483,14 +5483,28 @@ i965_proc_picture(VADriverContextP ctx, unsigned int tiling = 0, swizzle = 0; int in_width, in_height; - assert(pipeline_param->surface != VA_INVALID_ID); - assert(proc_state->current_render_target != VA_INVALID_ID); + if (pipeline_param->surface == VA_INVALID_ID || + proc_state->current_render_target == VA_INVALID_ID) { + status = VA_STATUS_ERROR_INVALID_SURFACE; + goto error; + } obj_surface = SURFACE(pipeline_param->surface); - assert(obj_surface && obj_surface->bo); - if (!obj_surface || !obj_surface->bo) + if (!obj_surface) { + status = VA_STATUS_ERROR_INVALID_SURFACE; + goto error; + } + + if (!obj_surface->bo) { + status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */ + goto error; + } + + if (pipeline_param->num_filters && !pipeline_param->filters) { + status = VA_STATUS_ERROR_INVALID_PARAMETER; goto error; + } in_width = obj_surface->orig_width; in_height = obj_surface->orig_height; @@ -5574,10 +5588,12 @@ i965_proc_picture(VADriverContextP ctx, VAProcFilterType filter_type; int kernel_index; - assert(obj_buffer && obj_buffer->buffer_store); - - if (!obj_buffer || !obj_buffer->buffer_store) + if (!obj_buffer || + !obj_buffer->buffer_store || + !obj_buffer->buffer_store->buffer) { + status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN; goto error; + } out_surface_id = VA_INVALID_ID; filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer; @@ -5617,10 +5633,11 @@ i965_proc_picture(VADriverContextP ctx, proc_context->pp_context.pipeline_param = NULL; obj_surface = SURFACE(proc_state->current_render_target); - assert(obj_surface); - if (!obj_surface) + if (!obj_surface) { + status = VA_STATUS_ERROR_INVALID_SURFACE; goto error; + } int csc_needed = 0; if (obj_surface->fourcc && obj_surface->fourcc != VA_FOURCC('N','V','1','2')){ @@ -5694,7 +5711,7 @@ error: tmp_surfaces, num_tmp_surfaces); - return VA_STATUS_ERROR_INVALID_PARAMETER; + return status; } static void -- cgit v1.2.1 From 46c490188fee9d717db1ad526265e0dc1b09d91d Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 6 Sep 2013 16:43:17 +0800 Subject: Workaround for SNB Backporting from d0184b5 in xf86-video-intel Signed-off-by: Xiang, Haihao (cherry picked from commit e622ecedf169bccddc8910b45d92dbec7675441e) --- src/intel_batchbuffer.c | 50 +++++++++++++++++++++++++++++++++++++++++-------- src/intel_batchbuffer.h | 3 +++ src/intel_driver.h | 2 ++ 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index 94d968c8..8b357448 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -86,6 +86,16 @@ intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size batch->intel = intel; batch->flag = flag; batch->run = drm_intel_bo_mrb_exec; + + if (IS_GEN6(intel->device_id) && + flag == I915_EXEC_RENDER) + batch->wa_render_bo = dri_bo_alloc(intel->bufmgr, + "wa scratch", + 4096, + 4096); + else + batch->wa_render_bo = NULL; + intel_batchbuffer_reset(batch, buffer_size); return batch; @@ -99,6 +109,7 @@ void intel_batchbuffer_free(struct intel_batchbuffer *batch) } dri_bo_unreference(batch->buffer); + dri_bo_unreference(batch->wa_render_bo); free(batch); } @@ -175,23 +186,46 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) if (IS_GEN6(intel->device_id) || IS_GEN7(intel->device_id)) { if (batch->flag == I915_EXEC_RENDER) { - BEGIN_BATCH(batch, 4); - OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x2); - - if (IS_GEN6(intel->device_id)) - OUT_BATCH(batch, + if (IS_GEN6(intel->device_id)) { + assert(batch->wa_render_bo); + + BEGIN_BATCH(batch, 4 * 3); + + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(batch, + CMD_PIPE_CONTROL_CS_STALL | + CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(batch, 0); /* address */ + OUT_BATCH(batch, 0); /* write data */ + + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(batch, CMD_PIPE_CONTROL_WRITE_QWORD); + OUT_RELOC(batch, + batch->wa_render_bo, + I915_GEM_DOMAIN_INSTRUCTION, + I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BATCH(batch, 0); /* write data */ + + /* now finally the _real flush */ + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(batch, CMD_PIPE_CONTROL_WC_FLUSH | CMD_PIPE_CONTROL_TC_FLUSH | CMD_PIPE_CONTROL_NOWRITE); - else + } else { + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(batch, CMD_PIPE_CONTROL_WC_FLUSH | CMD_PIPE_CONTROL_TC_FLUSH | CMD_PIPE_CONTROL_DC_FLUSH | CMD_PIPE_CONTROL_NOWRITE); + } - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* write address */ + OUT_BATCH(batch, 0); /* write data */ ADVANCE_BATCH(batch); } else { if (batch->flag == I915_EXEC_BLT) { diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h index 70ceddb9..34ff66d2 100644 --- a/src/intel_batchbuffer.h +++ b/src/intel_batchbuffer.h @@ -24,6 +24,9 @@ struct intel_batchbuffer int (*run)(drm_intel_bo *bo, int used, drm_clip_rect_t *cliprects, int num_cliprects, int DR4, unsigned int ring_flag); + + /* Used for Sandybdrige workaround */ + dri_bo *wa_render_bo; }; struct intel_batchbuffer *intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size); diff --git a/src/intel_driver.h b/src/intel_driver.h index c36dbbe2..8f442744 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -45,6 +45,7 @@ #define BR13_8888 (0x3 << 24) #define CMD_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | (0 << 16)) +#define CMD_PIPE_CONTROL_CS_STALL (1 << 20) #define CMD_PIPE_CONTROL_NOWRITE (0 << 14) #define CMD_PIPE_CONTROL_WRITE_QWORD (1 << 14) #define CMD_PIPE_CONTROL_WRITE_DEPTH (2 << 14) @@ -57,6 +58,7 @@ #define CMD_PIPE_CONTROL_DC_FLUSH (1 << 5) #define CMD_PIPE_CONTROL_GLOBAL_GTT (1 << 2) #define CMD_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) #define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) -- cgit v1.2.1 From 4a0f76c5b706fccbc85fadaeee9d785cd7b57d5a Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 26 Nov 2013 10:43:45 +0800 Subject: SNB doesn't support MPEG-2 encoding Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72016 Signed-off-by: Xiang, Haihao --- src/i965_drv_video.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index b6522db8..178a0f27 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -245,7 +245,6 @@ static struct hw_codec_info gen6_hw_codec_info = { .max_height = 2048, .has_mpeg2_decoding = 1, - .has_mpeg2_encoding = 1, .has_h264_decoding = 1, .has_h264_encoding = 1, .has_vc1_decoding = 1, -- cgit v1.2.1 From c4063375dd864cc90739603a3547b8b37b78e461 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 26 Nov 2013 09:21:11 +0800 Subject: dec/mpeg2: ignore slices which aren't in raster scan order on SNB Sometimes codec layer incorrectly fills slice parameters due to the corrupted video Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=71276 Signed-off-by: Xiang, Haihao --- src/gen6_mfd.c | 34 ++++++++++++---------------------- src/i965_decoder_utils.c | 41 +++++++++++++++++++++++++++++++++++++++++ src/i965_decoder_utils.h | 8 ++++++++ 3 files changed, 61 insertions(+), 22 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index afbfc4c2..f2b0fdff 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -1167,9 +1167,9 @@ gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx, { struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; VAPictureParameterBufferMPEG2 *pic_param; - VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param; + VASliceParameterBufferMPEG2 *slice_param, *next_slice_param; dri_bo *slice_data_bo; - int i, j; + int group_idx = 0, pre_group_idx = -1, element_idx = 0; assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; @@ -1188,28 +1188,18 @@ gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx, gen6_mfd_context->wa_mpeg2_slice_vertical_position = mpeg2_wa_slice_vertical_position(decode_state, pic_param); - for (j = 0; j < decode_state->num_slice_params; j++) { - assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); - slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer; - slice_data_bo = decode_state->slice_datas[j]->bo; - gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context); - - if (j == decode_state->num_slice_params - 1) - next_slice_group_param = NULL; - else - next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer; - - for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { - assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[group_idx]->buffer; - if (i < decode_state->slice_params[j]->num_elements - 1) - next_slice_param = slice_param + 1; - else - next_slice_param = next_slice_group_param; - - gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context); - slice_param++; + for (; slice_param;) { + if (pre_group_idx != group_idx) { + slice_data_bo = decode_state->slice_datas[group_idx]->bo; + gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context); + pre_group_idx = group_idx; } + + next_slice_param = intel_mpeg2_find_next_slice(decode_state, pic_param, slice_param, &group_idx, &element_idx); + gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context); + slice_param = next_slice_param; } intel_batchbuffer_end_atomic(batch); diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 4ef09b55..41102ba9 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -703,3 +703,44 @@ intel_decoder_sanity_check_input(VADriverContextP ctx, out: return vaStatus; } + +/* + * Return the next slice paramter + * + * Input: + * slice_param: the current slice + * *group_idx & *element_idx the current slice position in slice groups + * Output: + * Return the next slice parameter + * *group_idx & *element_idx the next slice position in slice groups, + * if the next slice is NULL, *group_idx & *element_idx will be ignored + */ +VASliceParameterBufferMPEG2 * +intel_mpeg2_find_next_slice(struct decode_state *decode_state, + VAPictureParameterBufferMPEG2 *pic_param, + VASliceParameterBufferMPEG2 *slice_param, + int *group_idx, + int *element_idx) +{ + VASliceParameterBufferMPEG2 *next_slice_param; + unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16; + int j = *group_idx, i = *element_idx + 1; + + for (; j < decode_state->num_slice_params; j++) { + for (; i < decode_state->slice_params[j]->num_elements; i++) { + next_slice_param = ((VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer) + i; + + if ((next_slice_param->slice_vertical_position * width_in_mbs + next_slice_param->slice_horizontal_position) >= + (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position)) { + *group_idx = j; + *element_idx = i; + + return next_slice_param; + } + } + + i = 0; + } + + return NULL; +} diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h index 2a71f3e6..8a9fbe2f 100644 --- a/src/i965_decoder_utils.h +++ b/src/i965_decoder_utils.h @@ -91,4 +91,12 @@ intel_update_vc1_frame_store_index(VADriverContextP ctx, struct decode_state *decode_state, VAPictureParameterBufferVC1 *pic_param, GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]); + +VASliceParameterBufferMPEG2 * +intel_mpeg2_find_next_slice(struct decode_state *decode_state, + VAPictureParameterBufferMPEG2 *pic_param, + VASliceParameterBufferMPEG2 *slice_param, + int *group_idx, + int *element_idx); + #endif /* I965_DECODER_UTILS_H */ -- cgit v1.2.1 From 92422a1edb5c29da025e357830a41b957c69f6e2 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 14 Nov 2013 15:48:59 +0800 Subject: Enlarge the size of array misc_param And check the type before storing misc parameters Signed-off-by: Xiang, Haihao --- src/i965_drv_video.c | 4 ++++ src/i965_drv_video.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 178a0f27..1703dfbd 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -2148,6 +2148,10 @@ i965_encoder_render_misc_parameter_buffer(VADriverContextP ctx, assert(obj_buffer->buffer_store->buffer); param = (VAEncMiscParameterBuffer *)obj_buffer->buffer_store->buffer; + + if (param->type > ARRAY_ELEMS(encode->misc_param)) + return VA_STATUS_ERROR_INVALID_PARAMETER; + i965_release_buffer_store(&encode->misc_param[param->type]); i965_reference_buffer_store(&encode->misc_param[param->type], obj_buffer->buffer_store); diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index f51f39fe..ccf9a593 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -138,7 +138,7 @@ struct encode_state int num_slice_params_ext; int last_packed_header_type; - struct buffer_store *misc_param[8]; + struct buffer_store *misc_param[16]; VASurfaceID current_render_target; struct object_surface *input_yuv_object; -- cgit v1.2.1 From 8d800b88c8bc924f8e2a7fb58c09ff82a772cbc3 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 22 Nov 2013 13:39:34 +0800 Subject: Fix the error in render shader on Ivy/Haswell Signed-off-by: Zhao Yakui (cherry picked from commit 7af528d17924cf2ec855c0d0b1550b6c3d095682) --- src/shaders/render/exa_wm_write.g7a | 16 ++++++++-------- src/shaders/render/exa_wm_write.g7b | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/shaders/render/exa_wm_write.g7a b/src/shaders/render/exa_wm_write.g7a index a2fb4478..4b17929c 100644 --- a/src/shaders/render/exa_wm_write.g7a +++ b/src/shaders/render/exa_wm_write.g7a @@ -45,17 +45,17 @@ define(`slot_b_01', `g71') define(`slot_a_00', `g72') define(`slot_a_01', `g73') -mov (8) slot_r_00<1>F src_sample_r_01<1>F { align1 mask_disable }; -mov (8) slot_r_01<1>F src_sample_r_23<1>F { align1 mask_disable }; +mov (8) slot_r_00<1>F src_sample_r_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_r_01<1>F src_sample_r_23<8,8,1>F { align1 mask_disable }; -mov (8) slot_g_00<1>F src_sample_g_01<1>F { align1 mask_disable }; -mov (8) slot_g_01<1>F src_sample_g_23<1>F { align1 mask_disable }; +mov (8) slot_g_00<1>F src_sample_g_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_g_01<1>F src_sample_g_23<8,8,1>F { align1 mask_disable }; -mov (8) slot_b_00<1>F src_sample_b_01<1>F { align1 mask_disable }; -mov (8) slot_b_01<1>F src_sample_b_23<1>F { align1 mask_disable }; +mov (8) slot_b_00<1>F src_sample_b_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_b_01<1>F src_sample_b_23<8,8,1>F { align1 mask_disable }; -mov (8) slot_a_00<1>F src_sample_a_01<1>F { align1 mask_disable }; -mov (8) slot_a_01<1>F src_sample_a_23<1>F { align1 mask_disable }; +mov (8) slot_a_00<1>F src_sample_a_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_a_01<1>F src_sample_a_23<8,8,1>F { align1 mask_disable }; send (16) data_port_msg_2_ind diff --git a/src/shaders/render/exa_wm_write.g7b b/src/shaders/render/exa_wm_write.g7b index 05e18014..4f347cbc 100644 --- a/src/shaders/render/exa_wm_write.g7b +++ b/src/shaders/render/exa_wm_write.g7b @@ -1,13 +1,13 @@ { 0x00600201, 0x28000021, 0x008d0000, 0x00000000 }, { 0x00600201, 0x28200021, 0x008d0020, 0x00000000 }, - { 0x00600201, 0x284003bd, 0x002001c0, 0x00000000 }, - { 0x00600201, 0x286003bd, 0x002001e0, 0x00000000 }, - { 0x00600201, 0x288003bd, 0x00200200, 0x00000000 }, - { 0x00600201, 0x28a003bd, 0x00200220, 0x00000000 }, - { 0x00600201, 0x28c003bd, 0x00200240, 0x00000000 }, - { 0x00600201, 0x28e003bd, 0x00200260, 0x00000000 }, - { 0x00600201, 0x290003bd, 0x00200280, 0x00000000 }, - { 0x00600201, 0x292003bd, 0x002002a0, 0x00000000 }, + { 0x00600201, 0x284003bd, 0x008d01c0, 0x00000000 }, + { 0x00600201, 0x286003bd, 0x008d01e0, 0x00000000 }, + { 0x00600201, 0x288003bd, 0x008d0200, 0x00000000 }, + { 0x00600201, 0x28a003bd, 0x008d0220, 0x00000000 }, + { 0x00600201, 0x28c003bd, 0x008d0240, 0x00000000 }, + { 0x00600201, 0x28e003bd, 0x008d0260, 0x00000000 }, + { 0x00600201, 0x290003bd, 0x008d0280, 0x00000000 }, + { 0x00600201, 0x292003bd, 0x008d02a0, 0x00000000 }, { 0x05800031, 0x20001ca8, 0x00000800, 0x940b1000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, -- cgit v1.2.1 From bdcf33793ce22e005007429b59d66c7b7557a051 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 22 Nov 2013 13:39:34 +0800 Subject: Increase the size of constant buffer for PS thread to pass more info Signed-off-by: Zhao Yakui (cherry picked from commit a6372850042e2a9201f4893193f77c9a55a6598e) --- src/i965_render.c | 16 ++++----- src/shaders/render/exa_wm.g4i | 52 ++++++++++++++------------- src/shaders/render/exa_wm_src_affine.g4b | 12 +++---- src/shaders/render/exa_wm_src_affine.g4b.gen5 | 12 +++---- src/shaders/render/exa_wm_src_affine.g6a | 3 -- src/shaders/render/exa_wm_src_affine.g6b | 8 ++--- src/shaders/render/exa_wm_src_affine.g7a | 2 -- src/shaders/render/exa_wm_src_affine.g7b | 8 ++--- src/shaders/render/exa_wm_xy.g4b | 4 +-- src/shaders/render/exa_wm_xy.g4b.gen5 | 4 +-- 10 files changed, 60 insertions(+), 61 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index b4fd29b6..0777ce01 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -55,7 +55,7 @@ static const uint32_t sf_kernel_static[][4] = #include "shaders/render/exa_sf.g4b" }; -#define PS_KERNEL_NUM_GRF 32 +#define PS_KERNEL_NUM_GRF 48 #define PS_MAX_THREADS 32 #define I965_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) @@ -308,8 +308,8 @@ static struct i965_kernel render_kernels_gen7_haswell[] = { #define URB_SF_ENTRIES 1 #define URB_SF_ENTRY_SIZE 2 -#define URB_CS_ENTRIES 1 -#define URB_CS_ENTRY_SIZE 1 +#define URB_CS_ENTRIES 4 +#define URB_CS_ENTRY_SIZE 4 static void i965_render_vs_unit(VADriverContextP ctx) @@ -445,8 +445,8 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->thread2.scratch_space_base_pointer = 0; wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */ - wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */ - wm_state->thread3.const_urb_entry_read_length = 0; + wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */ + wm_state->thread3.const_urb_entry_read_length = 4; wm_state->thread3.const_urb_entry_read_offset = 0; wm_state->thread3.urb_entry_read_length = 1; /* XXX */ wm_state->thread3.urb_entry_read_offset = 0; /* XXX */ @@ -510,7 +510,7 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */ wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */ - wm_state->thread3.const_urb_entry_read_length = 1; + wm_state->thread3.const_urb_entry_read_length = 4; wm_state->thread3.const_urb_entry_read_offset = 0; wm_state->thread3.urb_entry_read_length = 1; /* XXX */ wm_state->thread3.urb_entry_read_offset = 0; /* XXX */ @@ -2071,7 +2071,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel) OUT_RELOC(batch, render_state->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); + (URB_CS_ENTRY_SIZE-1)); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); @@ -2837,7 +2837,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) BEGIN_BATCH(batch, 7); OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2)); - OUT_BATCH(batch, 1); + OUT_BATCH(batch, URB_CS_ENTRY_SIZE); OUT_BATCH(batch, 0); OUT_RELOC(batch, render_state->curbe.bo, diff --git a/src/shaders/render/exa_wm.g4i b/src/shaders/render/exa_wm.g4i index dd47d515..e186d3a9 100644 --- a/src/shaders/render/exa_wm.g4i +++ b/src/shaders/render/exa_wm.g4i @@ -1,5 +1,5 @@ /* - * Copyright © 2006 Intel Corporation + * Copyright © 2006-2013 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -39,39 +39,43 @@ define(`screen_y0', `g1.4<0,1,0>F') define(`interleaved_uv', `g2.0<0,1,0>UW') /* Source transformation parameters */ -define(`src_du_dx', `g3.0<0,1,0>F') -define(`src_du_dy', `g3.4<0,1,0>F') -define(`src_uo', `g3.12<0,1,0>F') -define(`src_dv_dx', `g3.16<0,1,0>F') -define(`src_dv_dy', `g3.20<0,1,0>F') -define(`src_vo', `g3.28<0,1,0>F') -define(`src_dw_dx', `g4.0<0,1,0>F') -define(`src_dw_dy', `g4.4<0,1,0>F') -define(`src_wo', `g4.12<0,1,0>F') - -define(`mask_du_dx', `g5.0<0,1,0>F') -define(`mask_du_dy', `g5.4<0,1,0>F') -define(`mask_uo', `g5.12<0,1,0>F') -define(`mask_dv_dx', `g5.16<0,1,0>F') -define(`mask_dv_dy', `g5.20<0,1,0>F') -define(`mask_vo', `g5.28<0,1,0>F') -define(`mask_dw_dx', `g6.0<0,1,0>F') -define(`mask_dw_dy', `g6.4<0,1,0>F') -define(`mask_wo', `g6.12<0,1,0>F') +define(`src_du_dx', `g6.0<0,1,0>F') +define(`src_du_dy', `g6.4<0,1,0>F') +define(`src_uo', `g6.12<0,1,0>F') +define(`src_dv_dx', `g6.16<0,1,0>F') +define(`src_dv_dy', `g6.20<0,1,0>F') +define(`src_vo', `g6.28<0,1,0>F') +define(`src_dw_dx', `g7.0<0,1,0>F') +define(`src_dw_dy', `g7.4<0,1,0>F') +define(`src_wo', `g7.12<0,1,0>F') + +define(`mask_du_dx', `g8.0<0,1,0>F') +define(`mask_du_dy', `g8.4<0,1,0>F') +define(`mask_uo', `g8.12<0,1,0>F') +define(`mask_dv_dx', `g8.16<0,1,0>F') +define(`mask_dv_dy', `g8.20<0,1,0>F') +define(`mask_vo', `g8.28<0,1,0>F') +define(`mask_dw_dx', `g9.0<0,1,0>F') +define(`mask_dw_dy', `g9.4<0,1,0>F') +define(`mask_wo', `g9.12<0,1,0>F') + +/* Attribute for snb+ */ +define(`a0_a_x',`g10.0<0,1,0>F') +define(`a0_a_y',`g10.16<0,1,0>F') /* * Local variables. Pairs must be aligned on even reg boundry */ /* this holds the X dest coordinates */ -define(`dst_x', `g8') +define(`dst_x', `g42') define(`dst_x_0', `dst_x') -define(`dst_x_1', `g9') +define(`dst_x_1', `g43') /* this holds the Y dest coordinates */ -define(`dst_y', `g10') +define(`dst_y', `g44') define(`dst_y_0', `dst_y') -define(`dst_y_1', `g11') +define(`dst_y_1', `g45') /* When computing x * dn/dx, use this */ define(`temp_x', `g30') diff --git a/src/shaders/render/exa_wm_src_affine.g4b b/src/shaders/render/exa_wm_src_affine.g4b index d30da873..7507b722 100644 --- a/src/shaders/render/exa_wm_src_affine.g4b +++ b/src/shaders/render/exa_wm_src_affine.g4b @@ -1,8 +1,8 @@ - { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 }, - { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 }, + { 0x00802041, 0x23c077bd, 0x008d0540, 0x000000c0 }, + { 0x00802041, 0x238077bd, 0x008d0580, 0x000000c4 }, { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, - { 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c }, - { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 }, - { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x204077be, 0x008d03c0, 0x000000cc }, + { 0x00802041, 0x23c077bd, 0x008d0540, 0x000000d0 }, + { 0x00802041, 0x238077bd, 0x008d0580, 0x000000d4 }, { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, - { 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c }, + { 0x00802040, 0x208077be, 0x008d03c0, 0x000000dc }, diff --git a/src/shaders/render/exa_wm_src_affine.g4b.gen5 b/src/shaders/render/exa_wm_src_affine.g4b.gen5 index d30da873..7507b722 100644 --- a/src/shaders/render/exa_wm_src_affine.g4b.gen5 +++ b/src/shaders/render/exa_wm_src_affine.g4b.gen5 @@ -1,8 +1,8 @@ - { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 }, - { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 }, + { 0x00802041, 0x23c077bd, 0x008d0540, 0x000000c0 }, + { 0x00802041, 0x238077bd, 0x008d0580, 0x000000c4 }, { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, - { 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c }, - { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 }, - { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x204077be, 0x008d03c0, 0x000000cc }, + { 0x00802041, 0x23c077bd, 0x008d0540, 0x000000d0 }, + { 0x00802041, 0x238077bd, 0x008d0580, 0x000000d4 }, { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, - { 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c }, + { 0x00802040, 0x208077be, 0x008d03c0, 0x000000dc }, diff --git a/src/shaders/render/exa_wm_src_affine.g6a b/src/shaders/render/exa_wm_src_affine.g6a index 568aef3e..04358cb6 100644 --- a/src/shaders/render/exa_wm_src_affine.g6a +++ b/src/shaders/render/exa_wm_src_affine.g6a @@ -35,9 +35,6 @@ define(`vh', `m5') define(`bl', `g2.0<8,8,1>F') define(`bh', `g4.0<8,8,1>F') -define(`a0_a_x',`g7.0<0,1,0>F') -define(`a0_a_y',`g7.16<0,1,0>F') - /* U */ pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */ pln (8) uh<1>F a0_a_x bh { align1 }; /* pixel 8-15 */ diff --git a/src/shaders/render/exa_wm_src_affine.g6b b/src/shaders/render/exa_wm_src_affine.g6b index 5d0ffccb..22c1d221 100644 --- a/src/shaders/render/exa_wm_src_affine.g6b +++ b/src/shaders/render/exa_wm_src_affine.g6b @@ -1,4 +1,4 @@ - { 0x0060005a, 0x204077be, 0x000000e0, 0x008d0040 }, - { 0x0060005a, 0x206077be, 0x000000e0, 0x008d0080 }, - { 0x0060005a, 0x208077be, 0x000000f0, 0x008d0040 }, - { 0x0060005a, 0x20a077be, 0x000000f0, 0x008d0080 }, + { 0x0060005a, 0x204077be, 0x00000140, 0x008d0040 }, + { 0x0060005a, 0x206077be, 0x00000140, 0x008d0080 }, + { 0x0060005a, 0x208077be, 0x00000150, 0x008d0040 }, + { 0x0060005a, 0x20a077be, 0x00000150, 0x008d0080 }, diff --git a/src/shaders/render/exa_wm_src_affine.g7a b/src/shaders/render/exa_wm_src_affine.g7a index a786bc07..88e5ed5b 100644 --- a/src/shaders/render/exa_wm_src_affine.g7a +++ b/src/shaders/render/exa_wm_src_affine.g7a @@ -35,8 +35,6 @@ define(`vh', `g69') define(`bl', `g2.0<8,8,1>F') define(`bh', `g4.0<8,8,1>F') -define(`a0_a_x',`g7.0<0,1,0>F') -define(`a0_a_y',`g7.16<0,1,0>F') /* U */ pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */ diff --git a/src/shaders/render/exa_wm_src_affine.g7b b/src/shaders/render/exa_wm_src_affine.g7b index 5dbbf1b8..a15b7b69 100644 --- a/src/shaders/render/exa_wm_src_affine.g7b +++ b/src/shaders/render/exa_wm_src_affine.g7b @@ -1,4 +1,4 @@ - { 0x0060005a, 0x284077bd, 0x000000e0, 0x008d0040 }, - { 0x0060005a, 0x286077bd, 0x000000e0, 0x008d0080 }, - { 0x0060005a, 0x288077bd, 0x000000f0, 0x008d0040 }, - { 0x0060005a, 0x28a077bd, 0x000000f0, 0x008d0080 }, + { 0x0060005a, 0x284077bd, 0x00000140, 0x008d0040 }, + { 0x0060005a, 0x286077bd, 0x00000140, 0x008d0080 }, + { 0x0060005a, 0x288077bd, 0x00000150, 0x008d0040 }, + { 0x0060005a, 0x28a077bd, 0x00000150, 0x008d0080 }, diff --git a/src/shaders/render/exa_wm_xy.g4b b/src/shaders/render/exa_wm_xy.g4b index 327fc29c..2b3b235f 100644 --- a/src/shaders/render/exa_wm_xy.g4b +++ b/src/shaders/render/exa_wm_xy.g4b @@ -1,4 +1,4 @@ { 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 }, { 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 }, - { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 }, - { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 }, + { 0x00802040, 0x2540753d, 0x008d03c0, 0x00004020 }, + { 0x00802040, 0x2580753d, 0x008d0380, 0x00004024 }, diff --git a/src/shaders/render/exa_wm_xy.g4b.gen5 b/src/shaders/render/exa_wm_xy.g4b.gen5 index 327fc29c..2b3b235f 100644 --- a/src/shaders/render/exa_wm_xy.g4b.gen5 +++ b/src/shaders/render/exa_wm_xy.g4b.gen5 @@ -1,4 +1,4 @@ { 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 }, { 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 }, - { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 }, - { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 }, + { 0x00802040, 0x2540753d, 0x008d03c0, 0x00004020 }, + { 0x00802040, 0x2580753d, 0x008d0380, 0x00004024 }, -- cgit v1.2.1 From b4c9ca6eb7f2858cf8bd0d4799c2eb164f434781 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 22 Nov 2013 13:39:34 +0800 Subject: Constant buffer passes YUV2RGB CSC matrix instead of hardcoded matrix Signed-off-by: Zhao Yakui (cherry picked from commit 0aa6ccc405726e5521452ee437be6b3cc6fabdee) --- src/i965_render.c | 10 ++++ src/shaders/render/Makefile.am | 5 +- src/shaders/render/exa_wm_yuv_rgb.g4a | 72 ++--------------------------- src/shaders/render/exa_wm_yuv_rgb.g4b | 23 +++++----- src/shaders/render/exa_wm_yuv_rgb.g4b.gen5 | 23 +++++----- src/shaders/render/exa_wm_yuv_rgb.g6a | 73 ++--------------------------- src/shaders/render/exa_wm_yuv_rgb.g6b | 23 +++++----- src/shaders/render/exa_wm_yuv_rgb.g7a | 73 ++--------------------------- src/shaders/render/exa_wm_yuv_rgb.g7b | 23 +++++----- src/shaders/render/exa_yuv_gen4.g4i | 42 +++++++++++++++++ src/shaders/render/exa_yuv_gen6.g4i | 42 +++++++++++++++++ src/shaders/render/exa_yuv_rgb.gxa | 74 ++++++++++++++++++++++++++++++ 12 files changed, 229 insertions(+), 254 deletions(-) create mode 100644 src/shaders/render/exa_yuv_gen4.g4i create mode 100644 src/shaders/render/exa_yuv_gen6.g4i create mode 100644 src/shaders/render/exa_yuv_rgb.gxa diff --git a/src/i965_render.c b/src/i965_render.c index 0777ce01..5b1a1a59 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -311,6 +311,12 @@ static struct i965_kernel render_kernels_gen7_haswell[] = { #define URB_CS_ENTRIES 4 #define URB_CS_ENTRY_SIZE 4 +static float yuv_to_rgb_bt601[3][4] = { +{1.164, 0, 1.596, -0.06275,}, +{1.164, -0.392, -0.813, -0.50196,}, +{1.164, 2.017, 0, -0.50196,}, +}; + static void i965_render_vs_unit(VADriverContextP ctx) { @@ -1070,6 +1076,7 @@ i965_render_upload_constants(VADriverContextP ctx, float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */ float hue = (float)i965->hue_attrib->value / 180 * PI; float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION; + float *yuv_to_rgb; dri_bo_map(render_state->curbe.bo, 1); assert(render_state->curbe.bo->virtual); @@ -1100,6 +1107,9 @@ i965_render_upload_constants(VADriverContextP ctx, *color_balance_base++ = cos(hue) * contrast * saturation; *color_balance_base++ = sin(hue) * contrast * saturation; + yuv_to_rgb = (float *)constant_buffer + 8; + memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601)); + dri_bo_unmap(render_state->curbe.bo); } diff --git a/src/shaders/render/Makefile.am b/src/shaders/render/Makefile.am index 1653b4ae..bed683b0 100644 --- a/src/shaders/render/Makefile.am +++ b/src/shaders/render/Makefile.am @@ -2,7 +2,10 @@ INTEL_G4I = \ exa_wm.g4i \ exa_wm_affine.g4i \ - exa_wm_yuv_color_balance.gxa + exa_wm_yuv_color_balance.gxa \ + exa_yuv_rgb.gxa \ + exa_yuv_gen4.g4i \ + exa_yuv_gen6.g4i INTEL_G4A = \ exa_sf.g4a \ diff --git a/src/shaders/render/exa_wm_yuv_rgb.g4a b/src/shaders/render/exa_wm_yuv_rgb.g4a index b3abe4bf..e3d24640 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g4a +++ b/src/shaders/render/exa_wm_yuv_rgb.g4a @@ -1,5 +1,5 @@ /* - * Copyright © 2006 Intel Corporation + * Copyright © 2006-2013 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,72 +27,6 @@ */ include(`exa_wm.g4i') +include(`exa_yuv_gen4.g4i') +include(`exa_yuv_rgb.gxa') -define(`YCbCr_base', `src_sample_base') - -define(`Cr', `src_sample_b') -define(`Cr_01', `src_sample_b_01') -define(`Cr_23', `src_sample_b_23') - -define(`Y', `src_sample_r') -define(`Y_01', `src_sample_r_01') -define(`Y_23', `src_sample_r_23') - -define(`Cb', `src_sample_g') -define(`Cb_01', `src_sample_g_01') -define(`Cb_23', `src_sample_g_23') - -define(`Crn', `mask_sample_g') -define(`Crn_01', `mask_sample_g_01') -define(`Crn_23', `mask_sample_g_23') - -define(`Yn', `mask_sample_r') -define(`Yn_01', `mask_sample_r_01') -define(`Yn_23', `mask_sample_r_23') - -define(`Cbn', `mask_sample_b') -define(`Cbn_01', `mask_sample_b_01') -define(`Cbn_23', `mask_sample_b_23') - - /* color space conversion function: - * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) - * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) - * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) - */ - - /* Normalize Y, Cb and Cr: - * - * Yn = (Y - 16/255) * 1.164 - * Crn = Cr - 128 / 255 - * Cbn = Cb - 128 / 255 - */ -add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 }; -mul (16) Yn<1>F Yn<8,8,1>F 1.164F { compr align1 }; - -add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 }; - -add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; - - /* - * R = Y + Cr * 1.596 - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 }; - - /* - * G = Crn * -0.813 + Cbn * -0.392 + Y - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 }; -mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 }; - - /* - * B = Cbn * 2.017 + Y - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 }; - - /* - * A = 1.0 - */ -mov (16) src_sample_a<1>F 1.0F { compr align1 }; diff --git a/src/shaders/render/exa_wm_yuv_rgb.g4b b/src/shaders/render/exa_wm_yuv_rgb.g4b index 6b99838e..b116ece6 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g4b +++ b/src/shaders/render/exa_wm_yuv_rgb.g4b @@ -1,12 +1,13 @@ - { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbd808081 }, - { 0x00802041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 }, - { 0x00802040, 0x23007fbd, 0x008d0240, 0xbf008084 }, - { 0x00802040, 0x23407fbd, 0x008d0200, 0xbf008084 }, - { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80802048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba }, - { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x00802048, 0x24007fbc, 0x008d0300, 0xbf5020c5 }, - { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, - { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00802040, 0x22c077bd, 0x008d01c0, 0x0000006c }, + { 0x00802040, 0x230077bd, 0x008d0200, 0x0000007c }, + { 0x00802040, 0x234077bd, 0x008d0240, 0x0000008c }, + { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000060 }, + { 0x00802048, 0x240077bc, 0x008d0300, 0x00000064 }, + { 0x80802048, 0x21c077bd, 0x008d0340, 0x00000068 }, + { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000070 }, + { 0x00802048, 0x240077bc, 0x008d0300, 0x00000074 }, + { 0x80802048, 0x220077bd, 0x008d0340, 0x00000078 }, + { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000080 }, + { 0x00802048, 0x240077bc, 0x008d0300, 0x00000084 }, + { 0x80802048, 0x224077bd, 0x008d0340, 0x00000088 }, { 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/src/shaders/render/exa_wm_yuv_rgb.g4b.gen5 b/src/shaders/render/exa_wm_yuv_rgb.g4b.gen5 index 6b99838e..b116ece6 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g4b.gen5 +++ b/src/shaders/render/exa_wm_yuv_rgb.g4b.gen5 @@ -1,12 +1,13 @@ - { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbd808081 }, - { 0x00802041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 }, - { 0x00802040, 0x23007fbd, 0x008d0240, 0xbf008084 }, - { 0x00802040, 0x23407fbd, 0x008d0200, 0xbf008084 }, - { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80802048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba }, - { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x00802048, 0x24007fbc, 0x008d0300, 0xbf5020c5 }, - { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, - { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00802040, 0x22c077bd, 0x008d01c0, 0x0000006c }, + { 0x00802040, 0x230077bd, 0x008d0200, 0x0000007c }, + { 0x00802040, 0x234077bd, 0x008d0240, 0x0000008c }, + { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000060 }, + { 0x00802048, 0x240077bc, 0x008d0300, 0x00000064 }, + { 0x80802048, 0x21c077bd, 0x008d0340, 0x00000068 }, + { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000070 }, + { 0x00802048, 0x240077bc, 0x008d0300, 0x00000074 }, + { 0x80802048, 0x220077bd, 0x008d0340, 0x00000078 }, + { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000080 }, + { 0x00802048, 0x240077bc, 0x008d0300, 0x00000084 }, + { 0x80802048, 0x224077bd, 0x008d0340, 0x00000088 }, { 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/src/shaders/render/exa_wm_yuv_rgb.g6a b/src/shaders/render/exa_wm_yuv_rgb.g6a index b3abe4bf..ede0298a 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g6a +++ b/src/shaders/render/exa_wm_yuv_rgb.g6a @@ -1,5 +1,5 @@ /* - * Copyright © 2006 Intel Corporation + * Copyright © 2006-2013 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,72 +27,5 @@ */ include(`exa_wm.g4i') - -define(`YCbCr_base', `src_sample_base') - -define(`Cr', `src_sample_b') -define(`Cr_01', `src_sample_b_01') -define(`Cr_23', `src_sample_b_23') - -define(`Y', `src_sample_r') -define(`Y_01', `src_sample_r_01') -define(`Y_23', `src_sample_r_23') - -define(`Cb', `src_sample_g') -define(`Cb_01', `src_sample_g_01') -define(`Cb_23', `src_sample_g_23') - -define(`Crn', `mask_sample_g') -define(`Crn_01', `mask_sample_g_01') -define(`Crn_23', `mask_sample_g_23') - -define(`Yn', `mask_sample_r') -define(`Yn_01', `mask_sample_r_01') -define(`Yn_23', `mask_sample_r_23') - -define(`Cbn', `mask_sample_b') -define(`Cbn_01', `mask_sample_b_01') -define(`Cbn_23', `mask_sample_b_23') - - /* color space conversion function: - * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) - * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) - * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) - */ - - /* Normalize Y, Cb and Cr: - * - * Yn = (Y - 16/255) * 1.164 - * Crn = Cr - 128 / 255 - * Cbn = Cb - 128 / 255 - */ -add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 }; -mul (16) Yn<1>F Yn<8,8,1>F 1.164F { compr align1 }; - -add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 }; - -add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; - - /* - * R = Y + Cr * 1.596 - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 }; - - /* - * G = Crn * -0.813 + Cbn * -0.392 + Y - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 }; -mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 }; - - /* - * B = Cbn * 2.017 + Y - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 }; - - /* - * A = 1.0 - */ -mov (16) src_sample_a<1>F 1.0F { compr align1 }; +include(`exa_yuv_gen6.g4i') +include(`exa_yuv_rgb.gxa') diff --git a/src/shaders/render/exa_wm_yuv_rgb.g6b b/src/shaders/render/exa_wm_yuv_rgb.g6b index 6c8c7248..d09ae00e 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g6b +++ b/src/shaders/render/exa_wm_yuv_rgb.g6b @@ -1,12 +1,13 @@ - { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbd808081 }, - { 0x00800041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 }, - { 0x00800040, 0x23007fbd, 0x008d0240, 0xbf008084 }, - { 0x00800040, 0x23407fbd, 0x008d0200, 0xbf008084 }, - { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80800048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba }, - { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x00800048, 0x24007fbc, 0x008d0300, 0xbf5020c5 }, - { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, - { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00800040, 0x22c077bd, 0x008d01c0, 0x000000ec }, + { 0x00800040, 0x230077bd, 0x008d0200, 0x000000fc }, + { 0x00800040, 0x234077bd, 0x008d0240, 0x0000010c }, + { 0x00800041, 0x240077bc, 0x008d02c0, 0x000000e0 }, + { 0x00800048, 0x240077bc, 0x008d0300, 0x000000e4 }, + { 0x80800048, 0x21c077bd, 0x008d0340, 0x000000e8 }, + { 0x00800041, 0x240077bc, 0x008d02c0, 0x000000f0 }, + { 0x00800048, 0x240077bc, 0x008d0300, 0x000000f4 }, + { 0x80800048, 0x220077bd, 0x008d0340, 0x000000f8 }, + { 0x00800041, 0x240077bc, 0x008d02c0, 0x00000100 }, + { 0x00800048, 0x240077bc, 0x008d0300, 0x00000104 }, + { 0x80800048, 0x224077bd, 0x008d0340, 0x00000108 }, { 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/src/shaders/render/exa_wm_yuv_rgb.g7a b/src/shaders/render/exa_wm_yuv_rgb.g7a index 5cd33e2b..ede0298a 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g7a +++ b/src/shaders/render/exa_wm_yuv_rgb.g7a @@ -1,5 +1,5 @@ /* - * Copyright © 2006 Intel Corporation + * Copyright © 2006-2013 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,72 +27,5 @@ */ include(`exa_wm.g4i') - -define(`YCbCr_base', `src_sample_base') - -define(`Cr', `src_sample_b') -define(`Cr_01', `src_sample_b_01') -define(`Cr_23', `src_sample_b_23') - -define(`Y', `src_sample_r') -define(`Y_01', `src_sample_r_01') -define(`Y_23', `src_sample_r_23') - -define(`Cb', `src_sample_g') -define(`Cb_01', `src_sample_g_01') -define(`Cb_23', `src_sample_g_23') - -define(`Crn', `mask_sample_g') -define(`Crn_01', `mask_sample_g_01') -define(`Crn_23', `mask_sample_g_23') - -define(`Yn', `mask_sample_r') -define(`Yn_01', `mask_sample_r_01') -define(`Yn_23', `mask_sample_r_23') - -define(`Cbn', `mask_sample_b') -define(`Cbn_01', `mask_sample_b_01') -define(`Cbn_23', `mask_sample_b_23') - - /* color space conversion function: - * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) - * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) - * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) - */ - - /* Normalize Y, Cb and Cr: - * - * Yn = (Y - 16/255) * 1.164 - * Crn = Cr - 128 / 255 - * Cbn = Cb - 128 / 255 - */ -add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 }; -mul (16) Yn<1>F Yn<8,8,1>F 1.164F { compr align1 }; - -add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 }; - -add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; - - /* - * R = Y + Cr * 1.596 - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 }; - - /* - * G = Crn * -0.813 + Cbn * -0.392 + Y - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 }; -mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 }; - - /* - * B = Cbn * 2.017 + Y - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 }; - - /* - * A = 1.0 - */ -mov (16) src_sample_a<1>F 1.0F { compr align1 }; +include(`exa_yuv_gen6.g4i') +include(`exa_yuv_rgb.gxa') diff --git a/src/shaders/render/exa_wm_yuv_rgb.g7b b/src/shaders/render/exa_wm_yuv_rgb.g7b index 6c8c7248..d09ae00e 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g7b +++ b/src/shaders/render/exa_wm_yuv_rgb.g7b @@ -1,12 +1,13 @@ - { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbd808081 }, - { 0x00800041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 }, - { 0x00800040, 0x23007fbd, 0x008d0240, 0xbf008084 }, - { 0x00800040, 0x23407fbd, 0x008d0200, 0xbf008084 }, - { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80800048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba }, - { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x00800048, 0x24007fbc, 0x008d0300, 0xbf5020c5 }, - { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, - { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00800040, 0x22c077bd, 0x008d01c0, 0x000000ec }, + { 0x00800040, 0x230077bd, 0x008d0200, 0x000000fc }, + { 0x00800040, 0x234077bd, 0x008d0240, 0x0000010c }, + { 0x00800041, 0x240077bc, 0x008d02c0, 0x000000e0 }, + { 0x00800048, 0x240077bc, 0x008d0300, 0x000000e4 }, + { 0x80800048, 0x21c077bd, 0x008d0340, 0x000000e8 }, + { 0x00800041, 0x240077bc, 0x008d02c0, 0x000000f0 }, + { 0x00800048, 0x240077bc, 0x008d0300, 0x000000f4 }, + { 0x80800048, 0x220077bd, 0x008d0340, 0x000000f8 }, + { 0x00800041, 0x240077bc, 0x008d02c0, 0x00000100 }, + { 0x00800048, 0x240077bc, 0x008d0300, 0x00000104 }, + { 0x80800048, 0x224077bd, 0x008d0340, 0x00000108 }, { 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/src/shaders/render/exa_yuv_gen4.g4i b/src/shaders/render/exa_yuv_gen4.g4i new file mode 100644 index 00000000..5a66616f --- /dev/null +++ b/src/shaders/render/exa_yuv_gen4.g4i @@ -0,0 +1,42 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui + */ + +/* YUV to RGB matrix coeff */ + +define(`coef_ry', `g3.0<0,1,0>F') +define(`coef_ru', `g3.4<0,1,0>F') +define(`coef_rv', `g3.8<0,1,0>F') +define(`coef_yd', `g3.12<0,1,0>F') + +define(`coef_gy', `g3.16<0,1,0>F') +define(`coef_gu', `g3.20<0,1,0>F') +define(`coef_gv', `g3.24<0,1,0>F') +define(`coef_ud', `g3.28<0,1,0>F') + +define(`coef_by', `g4.0<0,1,0>F') +define(`coef_bu', `g4.4<0,1,0>F') +define(`coef_bv', `g4.8<0,1,0>F') +define(`coef_vd', `g4.12<0,1,0>F') diff --git a/src/shaders/render/exa_yuv_gen6.g4i b/src/shaders/render/exa_yuv_gen6.g4i new file mode 100644 index 00000000..a8d69ee1 --- /dev/null +++ b/src/shaders/render/exa_yuv_gen6.g4i @@ -0,0 +1,42 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui + */ +/* YUV to RGB matrix coeff */ + + +define(`coef_ry', `g7.0<0,1,0>F') +define(`coef_ru', `g7.4<0,1,0>F') +define(`coef_rv', `g7.8<0,1,0>F') +define(`coef_yd', `g7.12<0,1,0>F') + +define(`coef_gy', `g7.16<0,1,0>F') +define(`coef_gu', `g7.20<0,1,0>F') +define(`coef_gv', `g7.24<0,1,0>F') +define(`coef_ud', `g7.28<0,1,0>F') + +define(`coef_by', `g8.0<0,1,0>F') +define(`coef_bu', `g8.4<0,1,0>F') +define(`coef_bv', `g8.8<0,1,0>F') +define(`coef_vd', `g8.12<0,1,0>F') diff --git a/src/shaders/render/exa_yuv_rgb.gxa b/src/shaders/render/exa_yuv_rgb.gxa new file mode 100644 index 00000000..656ae73b --- /dev/null +++ b/src/shaders/render/exa_yuv_rgb.gxa @@ -0,0 +1,74 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui + */ + +define(`YCbCr_base', `src_sample_base') + +define(`Cr', `src_sample_b') +define(`Cr_01', `src_sample_b_01') +define(`Cr_23', `src_sample_b_23') + +define(`Y', `src_sample_r') +define(`Y_01', `src_sample_r_01') +define(`Y_23', `src_sample_r_23') + +define(`Cb', `src_sample_g') +define(`Cb_01', `src_sample_g_01') +define(`Cb_23', `src_sample_g_23') + +define(`Crn', `mask_sample_b') +define(`Crn_01', `mask_sample_b_01') +define(`Crn_23', `mask_sample_b_23') + +define(`Yn', `mask_sample_r') +define(`Yn_01', `mask_sample_r_01') +define(`Yn_23', `mask_sample_r_23') + +define(`Cbn', `mask_sample_g') +define(`Cbn_01', `mask_sample_g_01') +define(`Cbn_23', `mask_sample_g_23') + +add (16) Yn<1>F Y<8,8,1>F coef_yd { compr align1 }; + +add (16) Cbn<1>F Cb<8,8,1>F coef_ud { compr align1 }; + +add (16) Crn<1>F Cr<8,8,1>F coef_vd { compr align1 }; + +mul (16) acc0<1>F Yn<8,8,1>F coef_ry { compr align1 }; +mac (16) acc0<1>F Cbn<8,8,1>F coef_ru { compr align1 }; +mac.sat (16) src_sample_r<1>F Crn<8,8,1>F coef_rv { compr align1 }; + +mul (16) acc0<1>F Yn<8,8,1>F coef_gy { compr align1 }; +mac (16) acc0<1>F Cbn<8,8,1>F coef_gu { compr align1 }; +mac.sat(16) src_sample_g<1>F Crn<8,8,1>F coef_gv { compr align1 }; + +mul (16) acc0<1>F Yn<8,8,1>F coef_by { compr align1 }; +mac (16) acc0<1>F Cbn<8,8,1>F coef_bu { compr align1 }; +mac.sat(16) src_sample_b<1>F Crn<8,8,1>F coef_bv { compr align1 }; + + /* + * A = 1.0 + */ +mov (16) src_sample_a<1>F 1.0F { compr align1 }; -- cgit v1.2.1 From ef157465d77b8cb56fb8b1388e7bcf065b05b2ae Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 22 Nov 2013 13:39:34 +0800 Subject: Support the BT709 color standard for conversion from YUV to RGB Signed-off-by: Zhao Yakui (cherry picked from commit 4c43ff9234a0a6f18744078d2e743cfa0cf8f34c) --- src/i965_output_dri.c | 7 +++++++ src/i965_render.c | 22 +++++++++++++++++----- src/i965_render.h | 2 ++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/i965_output_dri.c b/src/i965_output_dri.c index de7be92f..14673679 100644 --- a/src/i965_output_dri.c +++ b/src/i965_output_dri.c @@ -127,6 +127,7 @@ i965_put_surface_dri( bool new_region = false; uint32_t name; int i, ret; + unsigned int color_flag = 0; /* Currently don't support DRI1 */ if (!VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI2)) @@ -179,6 +180,12 @@ i965_put_surface_dri( assert(ret == 0); } + color_flag = flags & VA_SRC_COLOR_MASK; + if (color_flag == 0) + color_flag = VA_SRC_BT601; + + pp_flag = color_flag; + if ((flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC) pp_flag |= I965_PP_FLAG_AVS; diff --git a/src/i965_render.c b/src/i965_render.c index 5b1a1a59..5be8a96f 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -317,6 +317,12 @@ static float yuv_to_rgb_bt601[3][4] = { {1.164, 2.017, 0, -0.50196,}, }; +static float yuv_to_rgb_bt709[3][4] = { +{1.164, 0, 1.793, -0.06275,}, +{1.164, -0.213, -0.533, -0.50196,}, +{1.164, 2.112, 0, -0.50196,}, +}; + static void i965_render_vs_unit(VADriverContextP ctx) { @@ -1066,7 +1072,8 @@ i965_render_upload_vertex( static void i965_render_upload_constants(VADriverContextP ctx, - struct object_surface *obj_surface) + struct object_surface *obj_surface, + unsigned int flags) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; @@ -1077,6 +1084,7 @@ i965_render_upload_constants(VADriverContextP ctx, float hue = (float)i965->hue_attrib->value / 180 * PI; float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION; float *yuv_to_rgb; + unsigned int color_flag; dri_bo_map(render_state->curbe.bo, 1); assert(render_state->curbe.bo->virtual); @@ -1107,8 +1115,12 @@ i965_render_upload_constants(VADriverContextP ctx, *color_balance_base++ = cos(hue) * contrast * saturation; *color_balance_base++ = sin(hue) * contrast * saturation; + color_flag = flags & VA_SRC_COLOR_MASK; yuv_to_rgb = (float *)constant_buffer + 8; - memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601)); + if (color_flag == VA_SRC_BT709) + memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709)); + else + memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601)); dri_bo_unmap(render_state->curbe.bo); } @@ -1155,7 +1167,7 @@ i965_surface_render_state_setup( i965_render_cc_viewport(ctx); i965_render_cc_unit(ctx); i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); - i965_render_upload_constants(ctx, obj_surface); + i965_render_upload_constants(ctx, obj_surface, flags); } static void @@ -1842,7 +1854,7 @@ gen6_render_setup_states( gen6_render_color_calc_state(ctx); gen6_render_blend_state(ctx); gen6_render_depth_stencil_state(ctx); - i965_render_upload_constants(ctx, obj_surface); + i965_render_upload_constants(ctx, obj_surface, flags); i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } @@ -2436,7 +2448,7 @@ gen7_render_setup_states( gen7_render_color_calc_state(ctx); gen7_render_blend_state(ctx); gen7_render_depth_stencil_state(ctx); - i965_render_upload_constants(ctx, obj_surface); + i965_render_upload_constants(ctx, obj_surface, flags); i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } diff --git a/src/i965_render.h b/src/i965_render.h index f09b5354..1960aced 100644 --- a/src/i965_render.h +++ b/src/i965_render.h @@ -33,6 +33,8 @@ #define NUM_RENDER_KERNEL 3 +#define VA_SRC_COLOR_MASK 0x000000f0 + #include "i965_post_processing.h" struct i965_kernel; -- cgit v1.2.1 From 1a6e3ece55509df544b7a5e4bb9ef05ecb4afed4 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 22 Nov 2013 13:39:34 +0800 Subject: Support the smpte240m color standard for conversion from YUV to RGB Signed-off-by: Zhao Yakui (cherry picked from commit 34627c96f331f7a344270c3d51b634f5f166073e) --- src/i965_render.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/i965_render.c b/src/i965_render.c index 5be8a96f..92270cbc 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -323,6 +323,12 @@ static float yuv_to_rgb_bt709[3][4] = { {1.164, 2.112, 0, -0.50196,}, }; +static float yuv_to_rgb_smpte_240[3][4] = { +{1.164, 0, 1.794, -0.06275,}, +{1.164, -0.258, -0.5425, -0.50196,}, +{1.164, 2.078, 0, -0.50196,}, +}; + static void i965_render_vs_unit(VADriverContextP ctx) { @@ -1119,6 +1125,8 @@ i965_render_upload_constants(VADriverContextP ctx, yuv_to_rgb = (float *)constant_buffer + 8; if (color_flag == VA_SRC_BT709) memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709)); + else if (color_flag == VA_SRC_SMPTE_240) + memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240)); else memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601)); -- cgit v1.2.1 From 323e03e5efa0f30a364c2d6e6d11283625a575bb Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 2 Dec 2013 15:56:20 +0800 Subject: Upload the constant buffer on ILK/CTG so that subpicture can work Signed-off-by: Zhao Yakui (cherry picked from commit 2a61b0d17072bdc5b58608e3dfa2c4f9f80dcec4) --- src/i965_render.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/i965_render.c b/src/i965_render.c index 92270cbc..a902fcb3 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -1597,6 +1597,7 @@ i965_subpic_render_pipeline_setup(VADriverContextP ctx) i965_render_pipelined_pointers(ctx); i965_render_urb_layout(ctx); i965_render_cs_urb_layout(ctx); + i965_render_constant_buffer(ctx); i965_render_drawing_rectangle(ctx); i965_render_vertex_elements(ctx); i965_render_startup(ctx); -- cgit v1.2.1 From 9d0bd942552a68411f2c223e362591ac9520a7f8 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 9 Sep 2013 09:10:51 +0800 Subject: H.264: Support Constrained Baseline profile instead of Baseline profile GENx doesn't support FMO/ASO, so remove the support of Baseline profile for conformance testing. In addition, add the support for Constrained Baseline profile. Signed-off-by: Xiang, Haihao --- src/gen6_mfc.c | 2 +- src/gen6_mfd.c | 2 +- src/gen75_mfc.c | 2 +- src/gen75_mfd.c | 4 ++-- src/gen7_mfc.c | 2 +- src/gen7_mfd.c | 4 ++-- src/i965_decoder_utils.c | 2 +- src/i965_drv_video.c | 16 ++++++++-------- src/i965_encoder.c | 4 ++-- src/i965_media.c | 6 +++--- 10 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index 38a065eb..8ef20463 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -1400,7 +1400,7 @@ gen6_mfc_pipeline(VADriverContextP ctx, VAStatus vaStatus; switch (profile) { - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, encoder_context); diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index f2b0fdff..17f0be2d 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -1873,7 +1873,7 @@ gen6_mfd_decode_picture(VADriverContextP ctx, gen6_mfd_mpeg2_decode_picture(ctx, decode_state, gen6_mfd_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context); diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 81cf7525..42371616 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -2525,7 +2525,7 @@ static VAStatus gen75_mfc_pipeline(VADriverContextP ctx, VAStatus vaStatus; switch (profile) { - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context); diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index 11644d67..dc7c9403 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -3191,7 +3191,7 @@ gen75_mfd_decode_picture(VADriverContextP ctx, gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context); @@ -3285,7 +3285,7 @@ gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: gen75_mfd_avc_context_init(ctx, gen7_mfd_context); diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index ddf3ce18..375c354f 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -1101,7 +1101,7 @@ gen7_mfc_pipeline(VADriverContextP ctx, VAStatus vaStatus; switch (profile) { - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, encoder_context); diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index 8e0d503b..51a1850a 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -2614,7 +2614,7 @@ gen7_mfd_decode_picture(VADriverContextP ctx, gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context); @@ -2708,7 +2708,7 @@ gen7_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) gen7_mfd_mpeg2_context_init(ctx, gen7_mfd_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: gen7_mfd_avc_context_init(ctx, gen7_mfd_context); diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 41102ba9..b3aba3d1 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -679,7 +679,7 @@ intel_decoder_sanity_check_input(VADriverContextP ctx, vaStatus = intel_decoder_check_mpeg2_parameter(ctx, decode_state); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: vaStatus = intel_decoder_check_avc_parameter(ctx, decode_state); diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 1703dfbd..fb7dc168 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -375,7 +375,7 @@ i965_QueryConfigProfiles(VADriverContextP ctx, if (HAS_H264_DECODING(i965) || HAS_H264_ENCODING(i965)) { - profile_list[i++] = VAProfileH264Baseline; + profile_list[i++] = VAProfileH264ConstrainedBaseline; profile_list[i++] = VAProfileH264Main; profile_list[i++] = VAProfileH264High; } @@ -421,7 +421,7 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx, break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: if (HAS_H264_DECODING(i965)) @@ -561,7 +561,7 @@ i965_CreateConfig(VADriverContextP ctx, } break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: if ((HAS_H264_DECODING(i965) && VAEntrypointVLD == entrypoint) || @@ -1488,7 +1488,7 @@ i965_CreateContext(VADriverContextP ctx, render_state->inited = 1; switch (obj_config->profile) { - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: if (!HAS_H264_DECODING(i965) && @@ -1915,7 +1915,7 @@ i965_BeginPicture(VADriverContextP ctx, vaStatus = VA_STATUS_SUCCESS; break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: vaStatus = VA_STATUS_SUCCESS; @@ -4180,7 +4180,7 @@ i965_GetSurfaceAttributes( if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { attrib_list[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); - } else if (obj_config->profile == VAProfileH264Baseline || + } else if (obj_config->profile == VAProfileH264ConstrainedBaseline || obj_config->profile == VAProfileH264Main || obj_config->profile == VAProfileH264High) { attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); @@ -4217,7 +4217,7 @@ i965_GetSurfaceAttributes( attrib_list[i].value.value.i = 0; attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } - } else if (obj_config->profile == VAProfileH264Baseline || + } else if (obj_config->profile == VAProfileH264ConstrainedBaseline || obj_config->profile == VAProfileH264Main || obj_config->profile == VAProfileH264High) { if (attrib_list[i].value.value.i != VA_FOURCC('N', 'V', '1', '2')) { @@ -4372,7 +4372,7 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: attribs[i].type = VASurfaceAttribPixelFormat; diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 73cd3e30..1e46a1a4 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -267,7 +267,7 @@ intel_encoder_sanity_check_input(VADriverContextP ctx, VAStatus vaStatus; switch (profile) { - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: vaStatus = intel_encoder_check_avc_parameter(ctx, encode_state, encoder_context); @@ -352,7 +352,7 @@ intel_enc_hw_context_init(VADriverContextP ctx, encoder_context->codec = CODEC_MPEG2; break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: encoder_context->codec = CODEC_H264; diff --git a/src/i965_media.c b/src/i965_media.c index 32cdf56a..e6f1c16c 100644 --- a/src/i965_media.c +++ b/src/i965_media.c @@ -257,7 +257,7 @@ i965_media_decode_init(VADriverContextP ctx, i965_media_mpeg2_decode_init(ctx, decode_state, media_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: i965_media_h264_decode_init(ctx, decode_state, media_context); @@ -348,7 +348,7 @@ g4x_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) i965_media_mpeg2_dec_context_init(ctx, media_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: case VAProfileVC1Simple: @@ -378,7 +378,7 @@ ironlake_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_con i965_media_mpeg2_dec_context_init(ctx, media_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: i965_media_h264_dec_context_init(ctx, media_context); -- cgit v1.2.1 From f03aca41c77f04e93f06ad91547d46ab3ab5b34f Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 10 Dec 2013 10:19:09 +0800 Subject: VPP: Use the right top/bottom field flag used for DI Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72518 Signed-off-by: Xiang, Haihao --- src/gen75_vpp_vebox.c | 2 +- src/i965_post_processing.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 75d922d7..b5a88b63 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -141,7 +141,7 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c assert(di_param); progressive_dn = 0; - dndi_top_first = !(di_param->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST); + dndi_top_first = !(di_param->flags & VA_DEINTERLACING_BOTTOM_FIELD); motion_compensated_enable = (di_param->algorithm == VAProcDeinterlacingMotionCompensated); } diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 9ab6fde3..015e11cc 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -3210,7 +3210,7 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex int dndi_top_first = 1; VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param; - if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST) + if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD) dndi_top_first = 0; else dndi_top_first = 1; @@ -3612,7 +3612,7 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param; int is_first_frame = (pp_dndi_context->frame_order == -1); - if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST) + if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD) dndi_top_first = 0; else dndi_top_first = 1; -- cgit v1.2.1 From 355ce947511b9c249c4e39b006a1aeb7aff5fe17 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 11 Dec 2013 10:03:54 +0800 Subject: 1.2.2.pre2 Signed-off-by: Xiang, Haihao --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index f531c950..ea8db7d0 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [2]) m4_define([intel_driver_micro_version], [2]) -m4_define([intel_driver_pre_version], [1]) +m4_define([intel_driver_pre_version], [2]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1 From 121e70d34028c5caa24b587988dda4b6b1335bf8 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 16 Dec 2013 16:05:43 +0800 Subject: Intel driver 1.2.2 Signed-off-by: Xiang, Haihao --- NEWS | 10 +++++++++- configure.ac | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 267af5ff..b2d00ab9 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,14 @@ -libva-intel-driver NEWS -- summary of changes. 2013-09-23 +libva-intel-driver NEWS -- summary of changes. 2013-12-16 Copyright (C) 2009-2013 Intel Corporation +Version 1.2.2 - 16.Dec.2013 +* Motion compensation DI on HSW +* Optimization of FPS for H.264 encoding on HSW +* Add brightness/contrast/hue/saturation support for rendering. +* Support BT601/BT709/SMPTE240 in vaPutSurface() +* Expose Constrained Baseline Profile instead of Baseline Profile for H.264 +* Bug fixes + Version 1.2.1 - 23.Sep.2013 * Add PCI IDs for Bay Trail * Performance improvement for MPEG-2 Encoding on IVB/HSW diff --git a/configure.ac b/configure.ac index ea8db7d0..2de7cae3 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [2]) m4_define([intel_driver_micro_version], [2]) -m4_define([intel_driver_pre_version], [2]) +m4_define([intel_driver_pre_version], [0]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1 From dd3a67b39e551842983dc4a0da0a96f8fd9cf85b Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 30 Dec 2013 13:13:02 +0800 Subject: Bump version for development Signed-off-by: Xiang, Haihao --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 2de7cae3..7bd39edb 100644 --- a/configure.ac +++ b/configure.ac @@ -1,8 +1,8 @@ # intel-driver package version number m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [2]) -m4_define([intel_driver_micro_version], [2]) -m4_define([intel_driver_pre_version], [0]) +m4_define([intel_driver_micro_version], [3]) +m4_define([intel_driver_pre_version], [1]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1 From 54cb60f3d973ce6a908ea57f5aa7481803fe653a Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 30 Dec 2013 13:15:40 +0800 Subject: Render: Adjust the default value for contrast/saturation Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73016 Signed-off-by: Xiang, Haihao Tested-by: Mark Lee --- src/i965_drv_video.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index ccf9a593..773c8ca7 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -60,9 +60,9 @@ #define I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST 0x00000002 #define DEFAULT_BRIGHTNESS 0 -#define DEFAULT_CONTRAST 10 +#define DEFAULT_CONTRAST 50 #define DEFAULT_HUE 0 -#define DEFAULT_SATURATION 10 +#define DEFAULT_SATURATION 50 struct i965_surface { -- cgit v1.2.1 From 696606c72768196f13fb8aaa7b821a33f8867689 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 26 Dec 2012 15:07:19 +0800 Subject: Add the PCI ids for BDW This is from the kernel driver. Signed-off-by: Zhao Yakui --- src/intel_driver.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/intel_driver.h b/src/intel_driver.h index 8f442744..77ac815b 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -246,6 +246,26 @@ struct intel_region #define PCI_CHIP_BAYTRAIL_M_4 0x0157 #define PCI_CHIP_BAYTRAIL_D 0x0155 +#define PCI_CHIP_BROADWELL_M_GT1_1 0x1606 +#define PCI_CHIP_BROADWELL_M_GT2_1 0x1616 +#define PCI_CHIP_BROADWELL_M_GT2PLUS_1 0x1626 + +#define PCI_CHIP_BROADWELL_M_GT1_2 0x160B +#define PCI_CHIP_BROADWELL_M_GT2_2 0x161B +#define PCI_CHIP_BROADWELL_M_GT2PLUS_2 0x162B + +#define PCI_CHIP_BROADWELL_M_GT1_3 0x160E +#define PCI_CHIP_BROADWELL_M_GT2_3 0x161E +#define PCI_CHIP_BROADWELL_M_GT2PLUS_3 0x162E + +#define PCI_CHIP_BROADWELL_D_GT1_1 0x160A +#define PCI_CHIP_BROADWELL_D_GT2_1 0x161A +#define PCI_CHIP_BROADWELL_D_GT2PLUS_1 0x162A + +#define PCI_CHIP_BROADWELL_D_GT1_2 0x160D +#define PCI_CHIP_BROADWELL_D_GT2_2 0x161D +#define PCI_CHIP_BROADWELL_D_GT2PLUS_2 0x162D + #define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_G45_G || \ @@ -365,4 +385,27 @@ struct intel_region #define IS_GEN7(devid) (IS_IVYBRIDGE(devid) || \ IS_HASWELL(devid)) + +#define IS_BDW_GT1(devid) (devid == PCI_CHIP_BROADWELL_M_GT1_1 || \ + devid == PCI_CHIP_BROADWELL_M_GT1_2 || \ + devid == PCI_CHIP_BROADWELL_M_GT1_3 || \ + devid == PCI_CHIP_BROADWELL_D_GT1_1 || \ + devid == PCI_CHIP_BROADWELL_D_GT1_2) + +#define IS_BDW_GT2(devid) (devid == PCI_CHIP_BROADWELL_M_GT2_1 || \ + devid == PCI_CHIP_BROADWELL_M_GT2_2 || \ + devid == PCI_CHIP_BROADWELL_M_GT2_3 || \ + devid == PCI_CHIP_BROADWELL_D_GT2_1 || \ + devid == PCI_CHIP_BROADWELL_D_GT2_2) + +#define IS_BDW_GT2PLUS(devid) (devid == PCI_CHIP_BROADWELL_M_GT2PLUS_1 || \ + devid == PCI_CHIP_BROADWELL_M_GT2PLUS_2 || \ + devid == PCI_CHIP_BROADWELL_M_GT2PLUS_3 || \ + devid == PCI_CHIP_BROADWELL_D_GT2PLUS_1 || \ + devid == PCI_CHIP_BROADWELL_D_GT2PLUS_2) + +#define IS_GEN8(devid) (IS_BDW_GT1(devid) || \ + IS_BDW_GT2(devid) || \ + IS_BDW_GT2PLUS(devid)) + #endif /* _INTEL_DRIVER_H_ */ -- cgit v1.2.1 From 20bfe6784c2830ad4cc711b6035ed8a70ca2ad45 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 7 Jan 2013 13:18:47 +0800 Subject: Add the initial support for GEN8 Signed-off-by: Zhao Yakui [Haihao: fix conflict when rebasing] Signed-off-by: Xiang, Haihao --- src/i965_defines.h | 1 + src/i965_drv_video.c | 27 ++++++++++++++++++++++++--- src/i965_post_processing.c | 28 +++++++++++++++++++--------- src/i965_render.c | 18 ++++++++++++------ src/intel_batchbuffer.c | 3 ++- 5 files changed, 58 insertions(+), 19 deletions(-) diff --git a/src/i965_defines.h b/src/i965_defines.h index c7163ae0..58a73d74 100755 --- a/src/i965_defines.h +++ b/src/i965_defines.h @@ -752,6 +752,7 @@ #define SUBSAMPLE_RGBX 6 #define URB_SIZE(intel) (IS_GEN7(intel->device_id) ? 4096 : \ + IS_GEN8(intel->device_id) ? 4096 : \ IS_GEN6(intel->device_id) ? 1024 : \ IS_IRONLAKE(intel->device_id) ? 1024 : \ IS_G4X(intel->device_id) ? 384 : 256) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index fb7dc168..c9ed624a 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -318,6 +318,26 @@ static struct hw_codec_info gen75_hw_codec_info = { }, }; +/* TODO: Add the separate call back function for Gen8 */ +static struct hw_codec_info gen8_hw_codec_info = { + .dec_hw_context_init = gen75_dec_hw_context_init, + .enc_hw_context_init = gen75_enc_hw_context_init, + .proc_hw_context_init = gen75_proc_context_init, + .max_width = 4096, + .max_height = 4096, + + .has_mpeg2_decoding = 1, + .has_mpeg2_encoding = 1, + .has_h264_decoding = 1, + .has_h264_encoding = 1, + .has_vc1_decoding = 1, + .has_jpeg_decoding = 1, + .has_vpp = 1, + .has_accelerated_getimage = 1, + .has_accelerated_putimage = 1, + .has_tiled_surface = 1, +}; + #define I965_PACKED_HEADER_BASE 0 #define I965_PACKED_MISC_HEADER_BASE 3 @@ -358,7 +378,6 @@ va_enc_packed_type_to_idx(int packed_type) return idx; } - VAStatus i965_QueryConfigProfiles(VADriverContextP ctx, VAProfile *profile_list, /* out */ @@ -1497,7 +1516,7 @@ i965_CreateContext(VADriverContextP ctx, render_state->interleaved_uv = 1; break; default: - render_state->interleaved_uv = !!(IS_GEN6(i965->intel.device_id) || IS_GEN7(i965->intel.device_id)); + render_state->interleaved_uv = !!(IS_GEN6(i965->intel.device_id) || IS_GEN7(i965->intel.device_id) || IS_GEN8(i965->intel.device_id)); break; } @@ -4823,7 +4842,9 @@ i965_driver_data_init(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); - if (IS_HASWELL(i965->intel.device_id)) + if (IS_GEN8(i965->intel.device_id)) + i965->codec_info = &gen8_hw_codec_info; + else if (IS_HASWELL(i965->intel.device_id)) i965->codec_info = &gen75_hw_codec_info; else if (IS_G4X(i965->intel.device_id)) i965->codec_info = &g4x_hw_codec_info; diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 015e11cc..03530443 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -42,7 +42,8 @@ #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) || \ IS_GEN6((ctx)->intel.device_id) || \ - IS_GEN7((ctx)->intel.device_id)) + IS_GEN7((ctx)->intel.device_id) || \ + IS_GEN8((ctx)->intel.device_id)) #define SURFACE_STATE_PADDED_SIZE_0_I965 ALIGN(sizeof(struct i965_surface_state), 32) #define SURFACE_STATE_PADDED_SIZE_1_I965 ALIGN(sizeof(struct i965_surface_state2), 32) @@ -4306,7 +4307,8 @@ gen6_pp_initialize( assert(bo); pp_context->vfe_state.bo = bo; - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) { static_param_size = sizeof(struct gen7_pp_static_parameter); inline_param_size = sizeof(struct gen7_pp_inline_parameter); } else { @@ -4361,7 +4363,8 @@ gen6_pp_interface_descriptor_table(VADriverContextP ctx, desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5); desc->desc4.constant_urb_entry_read_offset = 0; - if (IS_GEN7(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */ else desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */ @@ -4393,7 +4396,8 @@ gen6_pp_upload_constants(VADriverContextP ctx, assert(sizeof(struct pp_static_parameter) == 128); assert(sizeof(struct gen7_pp_static_parameter) == 192); - if (IS_GEN7(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) param_size = sizeof(struct gen7_pp_static_parameter); else param_size = sizeof(struct pp_static_parameter); @@ -4555,7 +4559,8 @@ gen6_pp_object_walker(VADriverContextP ctx, dri_bo *command_buffer; unsigned int *command_ptr; - if (IS_GEN7(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) param_size = sizeof(struct gen7_pp_inline_parameter); else param_size = sizeof(struct pp_inline_parameter); @@ -4680,7 +4685,8 @@ i965_post_processing_internal( struct i965_driver_data *i965 = i965_driver_data(ctx); if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) + IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param); else va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param); @@ -4743,7 +4749,8 @@ i965_vpp_clear_surface(VADriverContextP ctx, br13 |= pitch; if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) { + IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) { intel_batchbuffer_start_atomic_blt(batch, 48); BEGIN_BLT_BATCH(batch, 12); } else { @@ -5391,7 +5398,9 @@ i965_post_processing_context_init(VADriverContextP ctx, assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75)); - if (IS_HASWELL(i965->intel.device_id)) + if (IS_GEN8(i965->intel.device_id)) + memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules)); + else if (IS_HASWELL(i965->intel.device_id)) memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules)); else if (IS_GEN7(i965->intel.device_id)) memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules)); @@ -5416,7 +5425,8 @@ i965_post_processing_context_init(VADriverContextP ctx, } /* static & inline parameters */ - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) { pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1); pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1); } else { diff --git a/src/i965_render.c b/src/i965_render.c index a902fcb3..1161fb61 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -827,7 +827,8 @@ i965_render_src_surface_state( assert(ss_bo->virtual); ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) { gen7_render_set_surface_state(ss, region, offset, w, h, @@ -942,7 +943,8 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index) assert(ss_bo->virtual); ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) { gen7_render_set_surface_state(ss, dest_region->bo, 0, dest_region->width, dest_region->height, @@ -1536,7 +1538,8 @@ i965_clear_dest_region(VADriverContextP ctx) br13 |= pitch; if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) { + IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) { intel_batchbuffer_start_atomic_blt(batch, 24); BEGIN_BLT_BATCH(batch, 6); } else { @@ -3107,7 +3110,8 @@ intel_render_put_surface( src_rect = dst_rect; } - if (IS_GEN7(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); else if (IS_GEN6(i965->intel.device_id)) gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); @@ -3128,7 +3132,8 @@ intel_render_put_subpicture( { struct i965_driver_data *i965 = i965_driver_data(ctx); - if (IS_GEN7(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); else if (IS_GEN6(i965->intel.device_id)) gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); @@ -3149,7 +3154,8 @@ i965_render_init(VADriverContextP ctx) assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / sizeof(render_kernels_gen6[0]))); - if (IS_GEN7(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) memcpy(render_state->render_kernels, (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7), sizeof(render_state->render_kernels)); diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index 8b357448..e1f5a5ed 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -184,7 +184,8 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) struct intel_driver_data *intel = batch->intel; if (IS_GEN6(intel->device_id) || - IS_GEN7(intel->device_id)) { + IS_GEN7(intel->device_id) || + IS_GEN8(intel->device_id)) { if (batch->flag == I915_EXEC_RENDER) { if (IS_GEN6(intel->device_id)) { assert(batch->wa_render_bo); -- cgit v1.2.1 From 1e24b10d57f9523cf133332501af9285ab75ed5e Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 7 Jan 2013 13:18:47 +0800 Subject: Add the separated media encoding/decoding files for BDW As a lot of changes about the media are added between Haswell and BDW, the separated media encoding/decoding files are added for BDW. This is to avoid complex backward logic for Haswell. Signed-off-by: Zhao Yakui [Haihao: directly use object instead of id] Signed-off-by: Xiang, Haihao --- src/Makefile.am | 3 + src/gen6_mfc.h | 4 +- src/gen6_vme.h | 1 + src/gen8_mfc.c | 2469 +++++++++++++++++++++++++++++++++++++++++++ src/gen8_mfd.c | 2834 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/gen8_vme.c | 1035 ++++++++++++++++++ src/i965_decoder.h | 2 + src/i965_drv_video.c | 4 +- src/i965_encoder.c | 7 + src/i965_encoder.h | 2 + 10 files changed, 6358 insertions(+), 3 deletions(-) create mode 100644 src/gen8_mfc.c create mode 100644 src/gen8_mfd.c create mode 100644 src/gen8_vme.c diff --git a/src/Makefile.am b/src/Makefile.am index edf8f4e7..5b2ac592 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -56,6 +56,9 @@ source_c = \ gen7_mfd.c \ gen75_mfd.c \ gen75_mfc.c \ + gen8_mfc.c \ + gen8_mfd.c \ + gen8_vme.c \ gen75_picture_process.c \ gen75_vme.c \ gen75_vpp_gpe.c \ diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h index 6a5777f2..d55cff6b 100644 --- a/src/gen6_mfc.h +++ b/src/gen6_mfc.h @@ -269,10 +269,12 @@ extern VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, extern int intel_avc_enc_slice_type_fixup(int type); - extern void intel_mfc_avc_ref_idx_state(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context); +extern +Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); + #endif /* _GEN6_MFC_BCS_H_ */ diff --git a/src/gen6_vme.h b/src/gen6_vme.h index 939a4a37..d4619825 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -174,4 +174,5 @@ intel_avc_vme_reference_state(VADriverContextP ctx, struct object_surface *obj_surface, struct intel_encoder_context *encoder_context)); +extern Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); #endif /* _GEN6_VME_H_ */ diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c new file mode 100644 index 00000000..5f50e0a5 --- /dev/null +++ b/src/gen8_mfc.c @@ -0,0 +1,2469 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui + * Xiang Haihao + * + */ + +#include +#include +#include +#include +#include + +#include "intel_batchbuffer.h" +#include "i965_defines.h" +#include "i965_structs.h" +#include "i965_drv_video.h" +#include "i965_encoder.h" +#include "i965_encoder_utils.h" +#include "gen6_mfc.h" +#include "gen6_vme.h" +#include "intel_media.h" + +#define MFC_SOFTWARE_HASWELL 1 + +#define B0_STEP_REV 2 +#define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV) + +static const uint32_t gen8_mfc_batchbuffer_avc_intra[][4] = { +#include "shaders/utils/mfc_batchbuffer_avc_intra.g7b" +}; + +static const uint32_t gen8_mfc_batchbuffer_avc_inter[][4] = { +#include "shaders/utils/mfc_batchbuffer_avc_inter.g7b" +}; + +static struct i965_kernel gen8_mfc_kernels[] = { + { + "MFC AVC INTRA BATCHBUFFER ", + MFC_BATCHBUFFER_AVC_INTRA, + gen8_mfc_batchbuffer_avc_intra, + sizeof(gen8_mfc_batchbuffer_avc_intra), + NULL + }, + + { + "MFC AVC INTER BATCHBUFFER ", + MFC_BATCHBUFFER_AVC_INTER, + gen8_mfc_batchbuffer_avc_inter, + sizeof(gen8_mfc_batchbuffer_avc_inter), + NULL + }, +}; + +#define INTER_MODE_MASK 0x03 +#define INTER_8X8 0x03 +#define INTER_16X8 0x01 +#define INTER_8X16 0x02 +#define SUBMB_SHAPE_MASK 0x00FF00 + +#define INTER_MV8 (4 << 20) +#define INTER_MV32 (6 << 20) + + +static void +gen8_mfc_pipe_mode_select(VADriverContextP ctx, + int standard_select, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + assert(standard_select == MFX_FORMAT_MPEG2 || + standard_select == MFX_FORMAT_AVC); + + BEGIN_BCS_BATCH(batch, 5); + + OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2)); + OUT_BCS_BATCH(batch, + (MFX_LONG_MODE << 17) | /* Must be long format for encoder */ + (MFD_MODE_VLD << 15) | /* VLD mode */ + (0 << 10) | /* Stream-Out Enable */ + ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */ + ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */ + (0 << 5) | /* not in stitch mode */ + (1 << 4) | /* encoding mode */ + (standard_select << 0)); /* standard select: avc or mpeg2 */ + OUT_BCS_BATCH(batch, + (0 << 7) | /* expand NOA bus flag */ + (0 << 6) | /* disable slice-level clock gating */ + (0 << 5) | /* disable clock gating for NOA */ + (0 << 4) | /* terminate if AVC motion and POC table error occurs */ + (0 << 3) | /* terminate if AVC mbdata error occurs */ + (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ + (0 << 1) | + (0 << 0)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + BEGIN_BCS_BATCH(batch, 6); + + OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + ((mfc_context->surface_state.height - 1) << 18) | + ((mfc_context->surface_state.width - 1) << 4)); + OUT_BCS_BATCH(batch, + (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ + (0 << 22) | /* surface object control state, FIXME??? */ + ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */ + (0 << 2) | /* must be 0 for interleave U/V */ + (1 << 1) | /* must be tiled */ + (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* must be 0 for interleave U/V */ + (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */ + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + BEGIN_BCS_BATCH(batch, 26); + + OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2)); + /* the DW1-3 is for the MFX indirect bistream offset */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* the DW4-5 is the MFX upper bound */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW6-10 is for MFX Indirect MV Object Base Address */ + OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ + OUT_BCS_BATCH(batch, 0); + + /* the DW11-15 is for MFX IT-COFF. Not used on encoder */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/ + OUT_BCS_RELOC(batch, + mfc_context->mfc_indirect_pak_bse_object.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_RELOC(batch, + mfc_context->mfc_indirect_pak_bse_object.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + mfc_context->mfc_indirect_pak_bse_object.end_offset); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + + BEGIN_BCS_BATCH(batch, 16); + + OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); + /*DW1. MB setting of frame */ + OUT_BCS_BATCH(batch, + ((width_in_mbs * height_in_mbs) & 0xFFFF)); + OUT_BCS_BATCH(batch, + ((height_in_mbs - 1) << 16) | + ((width_in_mbs - 1) << 0)); + /* DW3 QP setting */ + OUT_BCS_BATCH(batch, + (0 << 24) | /* Second Chroma QP Offset */ + (0 << 16) | /* Chroma QP Offset */ + (0 << 14) | /* Max-bit conformance Intra flag */ + (0 << 13) | /* Max Macroblock size conformance Inter flag */ + (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */ + (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */ + (0 << 8) | /* FIXME: Image Structure */ + (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* Mininum Frame size */ + (0 << 15) | /* Disable reading of Macroblock Status Buffer */ + (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */ + (0 << 13) | /* CABAC 0 word insertion test enable */ + (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */ + (1 << 10) | /* Chroma Format IDC, 4:2:0 */ + (0 << 8) | /* FIXME: MbMvFormatFlag */ + (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/ + (0 << 6) | /* Only valid for VLD decoding mode */ + (0 << 5) | /* Constrained Intra Predition Flag, from PPS */ + (0 << 4) | /* Direct 8x8 inference flag */ + (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/ + (1 << 2) | /* Frame MB only flag */ + (0 << 1) | /* MBAFF mode is in active */ + (0 << 0)); /* Field picture flag */ + /* DW5 Trellis quantization */ + OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */ + OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */ + (0xBB8 << 16) | /* InterMbMaxSz */ + (0xEE8) ); /* IntraMbMaxSz */ + OUT_BCS_BATCH(batch, 0); /* Reserved */ + /* DW8. QP delta */ + OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ + OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ + /* DW10. Bit setting for MB */ + OUT_BCS_BATCH(batch, 0x8C000000); + OUT_BCS_BATCH(batch, 0x00010000); + /* DW12. */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0x02010100); + /* DW14. For short format */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_qm_state(VADriverContextP ctx, + int qm_type, + unsigned int *qm, + int qm_length, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + unsigned int qm_buffer[16]; + + assert(qm_length <= 16); + assert(sizeof(*qm) == 4); + memcpy(qm_buffer, qm, qm_length * 4); + + BEGIN_BCS_BATCH(batch, 18); + OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2)); + OUT_BCS_BATCH(batch, qm_type << 0); + intel_batchbuffer_data(batch, qm_buffer, 16 * 4); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + unsigned int qm[16] = { + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010 + }; + + gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context); + gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context); + gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context); + gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context); +} + +static void +gen8_mfc_fqm_state(VADriverContextP ctx, + int fqm_type, + unsigned int *fqm, + int fqm_length, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + unsigned int fqm_buffer[32]; + + assert(fqm_length <= 32); + assert(sizeof(*fqm) == 4); + memcpy(fqm_buffer, fqm, fqm_length * 4); + + BEGIN_BCS_BATCH(batch, 34); + OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2)); + OUT_BCS_BATCH(batch, fqm_type << 0); + intel_batchbuffer_data(batch, fqm_buffer, 32 * 4); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + unsigned int qm[32] = { + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000 + }; + + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context); +} + +static void +gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context, + unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw, + int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag, + struct intel_batchbuffer *batch) +{ + if (batch == NULL) + batch = encoder_context->base.batch; + + BEGIN_BCS_BATCH(batch, lenght_in_dws + 2); + + OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2)); + OUT_BCS_BATCH(batch, + (0 << 16) | /* always start at offset 0 */ + (data_bits_in_last_dw << 8) | + (skip_emul_byte_count << 4) | + (!!emulation_flag << 3) | + ((!!is_last_header) << 2) | + ((!!is_end_of_slice) << 1) | + (0 << 0)); /* FIXME: ??? */ + intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4); + + ADVANCE_BCS_BATCH(batch); +} + + +static void gen8_mfc_init(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + dri_bo *bo; + int i; + int width_in_mbs = 0; + int height_in_mbs = 0; + + if (encoder_context->codec == CODEC_H264) { + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + width_in_mbs = pSequenceParameter->picture_width_in_mbs; + height_in_mbs = pSequenceParameter->picture_height_in_mbs; + } else { + VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + + assert(encoder_context->codec == CODEC_MPEG2); + + width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16; + height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16; + } + + /*Encode common setup for MFC*/ + dri_bo_unreference(mfc_context->post_deblocking_output.bo); + mfc_context->post_deblocking_output.bo = NULL; + + dri_bo_unreference(mfc_context->pre_deblocking_output.bo); + mfc_context->pre_deblocking_output.bo = NULL; + + dri_bo_unreference(mfc_context->uncompressed_picture_source.bo); + mfc_context->uncompressed_picture_source.bo = NULL; + + dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); + mfc_context->mfc_indirect_pak_bse_object.bo = NULL; + + for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){ + if ( mfc_context->direct_mv_buffers[i].bo != NULL); + dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo); + mfc_context->direct_mv_buffers[i].bo = NULL; + } + + for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){ + if (mfc_context->reference_surfaces[i].bo != NULL) + dri_bo_unreference(mfc_context->reference_surfaces[i].bo); + mfc_context->reference_surfaces[i].bo = NULL; + } + + dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + width_in_mbs * 64, + 64); + assert(bo); + mfc_context->intra_row_store_scratch_buffer.bo = bo; + + dri_bo_unreference(mfc_context->macroblock_status_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + width_in_mbs * height_in_mbs * 16, + 64); + assert(bo); + mfc_context->macroblock_status_buffer.bo = bo; + + dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */ + 64); + assert(bo); + mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo; + + dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */ + 0x1000); + assert(bo); + mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + + dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo); + mfc_context->mfc_batchbuffer_surface.bo = NULL; + + dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo); + mfc_context->aux_batchbuffer_surface.bo = NULL; + + if (mfc_context->aux_batchbuffer) + intel_batchbuffer_free(mfc_context->aux_batchbuffer); + + mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0); + mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer; + dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); + mfc_context->aux_batchbuffer_surface.pitch = 16; + mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16; + mfc_context->aux_batchbuffer_surface.size_block = 16; + + i965_gpe_context_init(ctx, &mfc_context->gpe_context); +} + +static void +gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + int i; + + BEGIN_BCS_BATCH(batch, 61); + + OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2)); + + /* the DW1-3 is for pre_deblocking */ + if (mfc_context->pre_deblocking_output.bo) + OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); /* pre output addr */ + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* the DW4-6 is for the post_deblocking */ + + if (mfc_context->post_deblocking_output.bo) + OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); /* post output addr */ + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW7-9 is for the uncompressed_picture */ + OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); /* uncompressed data */ + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW10-12 is for the mb status */ + OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); /* StreamOut data*/ + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW13-15 is for the intra_row_store_scratch */ + OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW16-18 is for the deblocking filter */ + OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW 19-50 is for Reference pictures*/ + for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) { + if ( mfc_context->reference_surfaces[i].bo != NULL) { + OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + } else { + OUT_BCS_BATCH(batch, 0); + } + + OUT_BCS_BATCH(batch, 0); + } + + OUT_BCS_BATCH(batch, 0); + + /* The DW 52-54 is for the MB status buffer */ + OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); /* Macroblock status buffer*/ + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW 55-57 is the ILDB buffer */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW 58-60 is the second ILDB buffer */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_avc_directmode_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + int i; + + BEGIN_BCS_BATCH(batch, 71); + + OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2)); + + /* Reference frames and Current frames */ + /* the DW1-32 is for the direct MV for reference */ + for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) { + if ( mfc_context->direct_mv_buffers[i].bo != NULL) { + OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BCS_BATCH(batch, 0); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + } + + OUT_BCS_BATCH(batch, 0); + + /* the DW34-36 is the MV for the current reference */ + OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* POL list */ + for(i = 0; i < 32; i++) { + OUT_BCS_BATCH(batch, i/2); + } + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + int i; + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); + OUT_BCS_BATCH(batch, 0); //Select L0 + OUT_BCS_BATCH(batch, 0x80808020); //Only 1 reference + for(i = 0; i < 7; i++) { + OUT_BCS_BATCH(batch, 0x80808080); + } + ADVANCE_BCS_BATCH(batch); + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); + OUT_BCS_BATCH(batch, 1); //Select L1 + OUT_BCS_BATCH(batch, 0x80808022); //Only 1 reference + for(i = 0; i < 7; i++) { + OUT_BCS_BATCH(batch, 0x80808080); + } + ADVANCE_BCS_BATCH(batch); +} + + +static void +gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + BEGIN_BCS_BATCH(batch, 10); + + OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2)); + OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW7-9 is for Bitplane Read Buffer Base Address */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + + +static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context); + mfc_context->set_surface_state(ctx, encoder_context); + mfc_context->ind_obj_base_addr_state(ctx, encoder_context); + gen8_mfc_pipe_buf_addr_state(ctx, encoder_context); + gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context); + mfc_context->avc_img_state(ctx, encode_state, encoder_context); + mfc_context->avc_qm_state(ctx, encoder_context); + mfc_context->avc_fqm_state(ctx, encoder_context); + gen8_mfc_avc_directmode_state(ctx, encoder_context); + gen8_mfc_avc_ref_idx_state(ctx, encoder_context); +} + + +static VAStatus gen8_mfc_run(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + + intel_batchbuffer_flush(batch); //run the pipeline + + return VA_STATUS_SUCCESS; +} + + +static VAStatus +gen8_mfc_stop(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int *encoded_bits_size) +{ + VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VACodedBufferSegment *coded_buffer_segment; + + vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment); + assert(vaStatus == VA_STATUS_SUCCESS); + *encoded_bits_size = coded_buffer_segment->size * 8; + i965_UnmapBuffer(ctx, pPicParameter->coded_buf); + + return VA_STATUS_SUCCESS; +} + + +static void +gen8_mfc_avc_slice_state(VADriverContextP ctx, + VAEncPictureParameterBufferH264 *pic_param, + VAEncSliceParameterBufferH264 *slice_param, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int rate_control_enable, + int qp, + struct intel_batchbuffer *batch) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + int beginmb = slice_param->macroblock_address; + int endmb = beginmb + slice_param->num_macroblocks; + int beginx = beginmb % width_in_mbs; + int beginy = beginmb / width_in_mbs; + int nextx = endmb % width_in_mbs; + int nexty = endmb / width_in_mbs; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + int last_slice = (endmb == (width_in_mbs * height_in_mbs)); + int maxQpN, maxQpP; + unsigned char correct[6], grow, shrink; + int i; + int weighted_pred_idc = 0; + unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom; + unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom; + int bslice = 0; + + if (batch == NULL) + batch = encoder_context->base.batch; + + if (slice_type == SLICE_TYPE_P) { + weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag; + } else if (slice_type == SLICE_TYPE_B) { + weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc; + bslice = 1; + + if (weighted_pred_idc == 2) { + /* 8.4.3 - Derivation process for prediction weights (8-279) */ + luma_log2_weight_denom = 5; + chroma_log2_weight_denom = 5; + } + } + + maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier; + maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier; + + for (i = 0; i < 6; i++) + correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i]; + + grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + + (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4); + shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + + (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4); + + BEGIN_BCS_BATCH(batch, 11);; + + OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) ); + OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/ + + if (slice_type == SLICE_TYPE_I) { + OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/ + } else { + OUT_BCS_BATCH(batch, + (1 << 16) | (bslice << 24) | /*1 reference frame*/ + (chroma_log2_weight_denom << 8) | + (luma_log2_weight_denom << 0)); + } + + OUT_BCS_BATCH(batch, + (weighted_pred_idc << 30) | + (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/ + (slice_param->disable_deblocking_filter_idc << 27) | + (slice_param->cabac_init_idc << 24) | + (qp<<16) | /*Slice Quantization Parameter*/ + ((slice_param->slice_beta_offset_div2 & 0xf) << 8) | + ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0)); + OUT_BCS_BATCH(batch, + (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/ + (beginx << 16) | + slice_param->macroblock_address ); + OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/ + OUT_BCS_BATCH(batch, + (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/ + (1 << 30) | /*ResetRateControlCounter*/ + (0 << 28) | /*RC Triggle Mode = Always Rate Control*/ + (4 << 24) | /*RC Stable Tolerance, middle level*/ + (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/ + (0 << 22) | /*QP mode, don't modfiy CBP*/ + (0 << 21) | /*MB Type Direct Conversion Enabled*/ + (0 << 20) | /*MB Type Skip Conversion Enabled*/ + (last_slice << 19) | /*IsLastSlice*/ + (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/ + (1 << 17) | /*HeaderPresentFlag*/ + (1 << 16) | /*SliceData PresentFlag*/ + (1 << 15) | /*TailPresentFlag*/ + (1 << 13) | /*RBSP NAL TYPE*/ + (0 << 12) ); /*CabacZeroWordInsertionEnable*/ + OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset); + OUT_BCS_BATCH(batch, + (maxQpN << 24) | /*Target QP - 24 is lowest QP*/ + (maxQpP << 16) | /*Target QP + 20 is highest QP*/ + (shrink << 8) | + (grow << 0)); + OUT_BCS_BATCH(batch, + (correct[5] << 20) | + (correct[4] << 16) | + (correct[3] << 12) | + (correct[2] << 8) | + (correct[1] << 4) | + (correct[0] << 0)); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + + +#ifdef MFC_SOFTWARE_HASWELL + +static int +gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, + int qp,unsigned int *msg, + struct intel_encoder_context *encoder_context, + unsigned char target_mb_size, unsigned char max_mb_size, + struct intel_batchbuffer *batch) +{ + int len_in_dwords = 12; + unsigned int intra_msg; +#define INTRA_MSG_FLAG (1 << 13) +#define INTRA_MBTYPE_MASK (0x1F0000) + if (batch == NULL) + batch = encoder_context->base.batch; + + BEGIN_BCS_BATCH(batch, len_in_dwords); + + intra_msg = msg[0] & 0xC0FF; + intra_msg |= INTRA_MSG_FLAG; + intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8); + OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + (0 << 24) | /* PackedMvNum, Debug*/ + (0 << 20) | /* No motion vector */ + (1 << 19) | /* CbpDcY */ + (1 << 18) | /* CbpDcU */ + (1 << 17) | /* CbpDcV */ + intra_msg); + + OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/ + OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */ + OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */ + + /*Stuff for Intra MB*/ + OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/ + OUT_BCS_BATCH(batch, msg[2]); + OUT_BCS_BATCH(batch, msg[3]&0xFF); + + /*MaxSizeInWord and TargetSzieInWord*/ + OUT_BCS_BATCH(batch, (max_mb_size << 24) | + (target_mb_size << 16) ); + + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); + + return len_in_dwords; +} + +static int +gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, + unsigned int *msg, unsigned int offset, + struct intel_encoder_context *encoder_context, + unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, + struct intel_batchbuffer *batch) +{ + int len_in_dwords = 12; + unsigned int inter_msg = 0; + if (batch == NULL) + batch = encoder_context->base.batch; + { +#define MSG_MV_OFFSET 4 + unsigned int *mv_ptr; + mv_ptr = msg + MSG_MV_OFFSET; + /* MV of VME output is based on 16 sub-blocks. So it is necessary + * to convert them to be compatible with the format of AVC_PAK + * command. + */ + if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) { + /* MV[0] and MV[2] are replicated */ + mv_ptr[4] = mv_ptr[0]; + mv_ptr[5] = mv_ptr[1]; + mv_ptr[2] = mv_ptr[8]; + mv_ptr[3] = mv_ptr[9]; + mv_ptr[6] = mv_ptr[8]; + mv_ptr[7] = mv_ptr[9]; + } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) { + /* MV[0] and MV[1] are replicated */ + mv_ptr[2] = mv_ptr[0]; + mv_ptr[3] = mv_ptr[1]; + mv_ptr[4] = mv_ptr[16]; + mv_ptr[5] = mv_ptr[17]; + mv_ptr[6] = mv_ptr[24]; + mv_ptr[7] = mv_ptr[25]; + } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && + !(msg[1] & SUBMB_SHAPE_MASK)) { + /* Don't touch MV[0] or MV[1] */ + mv_ptr[2] = mv_ptr[8]; + mv_ptr[3] = mv_ptr[9]; + mv_ptr[4] = mv_ptr[16]; + mv_ptr[5] = mv_ptr[17]; + mv_ptr[6] = mv_ptr[24]; + mv_ptr[7] = mv_ptr[25]; + } + } + + BEGIN_BCS_BATCH(batch, len_in_dwords); + + OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); + + inter_msg = 32; + /* MV quantity */ + if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) { + if (msg[1] & SUBMB_SHAPE_MASK) + inter_msg = 128; + } + OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/ + OUT_BCS_BATCH(batch, offset); + inter_msg = msg[0] & (0x1F00FFFF); + inter_msg |= INTER_MV8; + inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17)); + if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && + (msg[1] & SUBMB_SHAPE_MASK)) { + inter_msg |= INTER_MV32; + } + + OUT_BCS_BATCH(batch, inter_msg); + + OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/ + OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */ +#if 0 + if ( slice_type == SLICE_TYPE_B) { + OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */ + } else { + OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */ + } +#else + OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */ +#endif + + inter_msg = msg[1] >> 8; + /*Stuff for Inter MB*/ + OUT_BCS_BATCH(batch, inter_msg); + OUT_BCS_BATCH(batch, 0x0); + OUT_BCS_BATCH(batch, 0x0); + + /*MaxSizeInWord and TargetSzieInWord*/ + OUT_BCS_BATCH(batch, (max_mb_size << 24) | + (target_mb_size << 16) ); + + OUT_BCS_BATCH(batch, 0x0); + + ADVANCE_BCS_BATCH(batch); + + return len_in_dwords; +} + +#define AVC_INTRA_RDO_OFFSET 4 +#define AVC_INTER_RDO_OFFSET 10 +#define AVC_INTER_MSG_OFFSET 8 +#define AVC_INTER_MV_OFFSET 48 +#define AVC_RDO_MASK 0xFFFF + +static void +gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + struct intel_batchbuffer *slice_batch) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; + unsigned int *msg = NULL, offset = 0; + unsigned char *msg_ptr = NULL; + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); + int i,x,y; + int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; + unsigned int rate_control_mode = encoder_context->rate_control_mode; + unsigned char *slice_header = NULL; + int slice_header_length_in_bits = 0; + unsigned int tail_data[] = { 0x0, 0x0 }; + int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); + int is_intra = slice_type == SLICE_TYPE_I; + + + if (rate_control_mode == VA_RC_CBR) { + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + } + + /* only support for 8-bit pixel bit-depth */ + assert(pSequenceParameter->bit_depth_luma_minus8 == 0); + assert(pSequenceParameter->bit_depth_chroma_minus8 == 0); + assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); + assert(qp >= 0 && qp < 52); + + gen8_mfc_avc_slice_state(ctx, + pPicParameter, + pSliceParameter, + encode_state, encoder_context, + (rate_control_mode == VA_RC_CBR), qp, slice_batch); + + if ( slice_index == 0) + intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); + + slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); + + // slice hander + mfc_context->insert_object(ctx, encoder_context, + (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f, + 5, /* first 5 bytes are start code + nal unit type */ + 1, 0, 1, slice_batch); + + dri_bo_map(vme_context->vme_output.bo , 1); + msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; + + if (is_intra) { + msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block); + } else { + msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block); + } + + for (i = pSliceParameter->macroblock_address; + i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) { + int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) ); + x = i % width_in_mbs; + y = i / width_in_mbs; + msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block); + + if (is_intra) { + assert(msg); + gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + } else { + int inter_rdo, intra_rdo; + inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; + intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; + offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET; + if (intra_rdo < inter_rdo) { + gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + } else { + msg += AVC_INTER_MSG_OFFSET; + gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch); + } + } + } + + dri_bo_unmap(vme_context->vme_output.bo); + + if ( last_slice ) { + mfc_context->insert_object(ctx, encoder_context, + tail_data, 2, 8, + 2, 1, 1, 0, slice_batch); + } else { + mfc_context->insert_object(ctx, encoder_context, + tail_data, 1, 8, + 1, 1, 1, 0, slice_batch); + } + + free(slice_header); + +} + +static dri_bo * +gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch; + dri_bo *batch_bo; + int i; + int buffer_size; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + + buffer_size = width_in_mbs * height_in_mbs * 64; + batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size); + batch_bo = batch->buffer; + for (i = 0; i < encode_state->num_slice_params_ext; i++) { + gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch); + } + + intel_batchbuffer_align(batch, 8); + + BEGIN_BCS_BATCH(batch, 2); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END); + ADVANCE_BCS_BATCH(batch); + + dri_bo_reference(batch_bo); + intel_batchbuffer_free(batch); + + return batch_bo; +} + +#else + +static void +gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) + +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + assert(vme_context->vme_output.bo); + mfc_context->buffer_suface_setup(ctx, + &mfc_context->gpe_context, + &vme_context->vme_output, + BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT), + SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT)); + assert(mfc_context->aux_batchbuffer_surface.bo); + mfc_context->buffer_suface_setup(ctx, + &mfc_context->gpe_context, + &mfc_context->aux_batchbuffer_surface, + BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER), + SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER)); +} + +static void +gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) + +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1; + mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */ + mfc_context->mfc_batchbuffer_surface.pitch = 16; + mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, + "MFC batchbuffer", + mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block, + 0x1000); + mfc_context->buffer_suface_setup(ctx, + &mfc_context->gpe_context, + &mfc_context->mfc_batchbuffer_surface, + BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER), + SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER)); +} + +static void +gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context); + gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context); +} + +static void +gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct gen6_interface_descriptor_data *desc; + int i; + dri_bo *bo; + + bo = mfc_context->gpe_context.idrt.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + desc = bo->virtual; + + for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) { + struct i965_kernel *kernel; + + kernel = &mfc_context->gpe_context.kernels[i]; + assert(sizeof(*desc) == 32); + + /*Setup the descritor table*/ + memset(desc, 0, sizeof(*desc)); + desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6); + desc->desc2.sampler_count = 0; + desc->desc2.sampler_state_pointer = 0; + desc->desc3.binding_table_entry_count = 2; + desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5); + desc->desc4.constant_urb_entry_read_offset = 0; + desc->desc4.constant_urb_entry_read_length = 4; + + /*kernel start*/ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0), + kernel->bo); + desc++; + } + + dri_bo_unmap(bo); +} + +static void +gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + (void)mfc_context; +} + +static void +gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, + int index, + int head_offset, + int batchbuffer_offset, + int head_size, + int tail_size, + int number_mb_cmds, + int first_object, + int last_object, + int last_slice, + int mb_x, + int mb_y, + int width_in_mbs, + int qp) +{ + BEGIN_BATCH(batch, 12); + + OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2)); + OUT_BATCH(batch, index); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + + /*inline data */ + OUT_BATCH(batch, head_offset); + OUT_BATCH(batch, batchbuffer_offset); + OUT_BATCH(batch, + head_size << 16 | + tail_size); + OUT_BATCH(batch, + number_mb_cmds << 16 | + first_object << 2 | + last_object << 1 | + last_slice); + OUT_BATCH(batch, + mb_y << 8 | + mb_x); + OUT_BATCH(batch, + qp << 16 | + width_in_mbs); + + ADVANCE_BATCH(batch); +} + +static void +gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + VAEncSliceParameterBufferH264 *slice_param, + int head_offset, + unsigned short head_size, + unsigned short tail_size, + int batchbuffer_offset, + int qp, + int last_slice) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int total_mbs = slice_param->num_macroblocks; + int number_mb_cmds = 128; + int starting_mb = 0; + int last_object = 0; + int first_object = 1; + int i; + int mb_x, mb_y; + int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER; + + for (i = 0; i < total_mbs / number_mb_cmds; i++) { + last_object = (total_mbs - starting_mb) == number_mb_cmds; + mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; + mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; + assert(mb_x <= 255 && mb_y <= 255); + + starting_mb += number_mb_cmds; + + gen8_mfc_batchbuffer_emit_object_command(batch, + index, + head_offset, + batchbuffer_offset, + head_size, + tail_size, + number_mb_cmds, + first_object, + last_object, + last_slice, + mb_x, + mb_y, + width_in_mbs, + qp); + + if (first_object) { + head_offset += head_size; + batchbuffer_offset += head_size; + } + + if (last_object) { + head_offset += tail_size; + batchbuffer_offset += tail_size; + } + + batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD; + + first_object = 0; + } + + if (!last_object) { + last_object = 1; + number_mb_cmds = total_mbs % number_mb_cmds; + mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; + mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; + assert(mb_x <= 255 && mb_y <= 255); + starting_mb += number_mb_cmds; + + gen8_mfc_batchbuffer_emit_object_command(batch, + index, + head_offset, + batchbuffer_offset, + head_size, + tail_size, + number_mb_cmds, + first_object, + last_object, + last_slice, + mb_x, + mb_y, + width_in_mbs, + qp); + } +} + +/* + * return size in Owords (16bytes) + */ +static int +gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + int batchbuffer_offset) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); + int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; + unsigned int rate_control_mode = encoder_context->rate_control_mode; + unsigned char *slice_header = NULL; + int slice_header_length_in_bits = 0; + unsigned int tail_data[] = { 0x0, 0x0 }; + long head_offset; + int old_used = intel_batchbuffer_used_size(slice_batch), used; + unsigned short head_size, tail_size; + int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); + + if (rate_control_mode == VA_RC_CBR) { + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + } + + /* only support for 8-bit pixel bit-depth */ + assert(pSequenceParameter->bit_depth_luma_minus8 == 0); + assert(pSequenceParameter->bit_depth_chroma_minus8 == 0); + assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); + assert(qp >= 0 && qp < 52); + + head_offset = old_used / 16; + gen8_mfc_avc_slice_state(ctx, + pPicParameter, + pSliceParameter, + encode_state, + encoder_context, + (rate_control_mode == VA_RC_CBR), + qp, + slice_batch); + + if (slice_index == 0) + intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); + + slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); + + // slice hander + mfc_context->insert_object(ctx, + encoder_context, + (unsigned int *)slice_header, + ALIGN(slice_header_length_in_bits, 32) >> 5, + slice_header_length_in_bits & 0x1f, + 5, /* first 5 bytes are start code + nal unit type */ + 1, + 0, + 1, + slice_batch); + free(slice_header); + + intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ + used = intel_batchbuffer_used_size(slice_batch); + head_size = (used - old_used) / 16; + old_used = used; + + /* tail */ + if (last_slice) { + mfc_context->insert_object(ctx, + encoder_context, + tail_data, + 2, + 8, + 2, + 1, + 1, + 0, + slice_batch); + } else { + mfc_context->insert_object(ctx, + encoder_context, + tail_data, + 1, + 8, + 1, + 1, + 1, + 0, + slice_batch); + } + + intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ + used = intel_batchbuffer_used_size(slice_batch); + tail_size = (used - old_used) / 16; + + + gen8_mfc_avc_batchbuffer_slice_command(ctx, + encoder_context, + pSliceParameter, + head_offset, + head_size, + tail_size, + batchbuffer_offset, + qp, + last_slice); + + return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD; +} + +static void +gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct intel_batchbuffer *batch = encoder_context->base.batch; + int i, size, offset = 0; + intel_batchbuffer_start_atomic(batch, 0x4000); + gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch); + + for ( i = 0; i < encode_state->num_slice_params_ext; i++) { + size = gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset); + offset += size; + } + + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); +} + +static void +gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context); + gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context); + gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context); + gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context); +} + +static dri_bo * +gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context); + dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo); + + return mfc_context->mfc_batchbuffer_surface.bo; +} + +#endif + +static void +gen8_mfc_avc_pipeline_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + dri_bo *slice_batch_bo; + + if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) { + fprintf(stderr, "Current VA driver don't support interlace mode!\n"); + assert(0); + return; + } + +#ifdef MFC_SOFTWARE_HASWELL + slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); +#else + slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); +#endif + + // begin programing + intel_batchbuffer_start_atomic_bcs(batch, 0x4000); + intel_batchbuffer_emit_mi_flush(batch); + + // picture level programing + gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context); + + BEGIN_BCS_BATCH(batch, 2); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); + OUT_BCS_RELOC(batch, + slice_batch_bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + ADVANCE_BCS_BATCH(batch); + + // end programing + intel_batchbuffer_end_atomic(batch); + + dri_bo_unreference(slice_batch_bo); +} + + +static VAStatus +gen8_mfc_avc_encode_picture(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + unsigned int rate_control_mode = encoder_context->rate_control_mode; + int current_frame_bits_size; + int sts; + + for (;;) { + gen8_mfc_init(ctx, encode_state, encoder_context); + intel_mfc_avc_prepare(ctx, encode_state, encoder_context); + /*Programing bcs pipeline*/ + gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline + gen8_mfc_run(ctx, encode_state, encoder_context); + if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) { + gen8_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size); + sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size); + if (sts == BRC_NO_HRD_VIOLATION) { + intel_mfc_hrd_context_update(encode_state, mfc_context); + break; + } + else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) { + if (!mfc_context->hrd.violation_noted) { + fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow"); + mfc_context->hrd.violation_noted = 1; + } + return VA_STATUS_SUCCESS; + } + } else { + break; + } + } + + return VA_STATUS_SUCCESS; +} + +/* + * MPEG-2 + */ + +static const int +va_to_gen8_mpeg2_picture_type[3] = { + 1, /* I */ + 2, /* P */ + 3 /* B */ +}; + +static void +gen8_mfc_mpeg2_pic_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + struct encode_state *encode_state) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferMPEG2 *pic_param; + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + + assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer); + pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + + BEGIN_BCS_BATCH(batch, 13); + OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2)); + OUT_BCS_BATCH(batch, + (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */ + (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */ + (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */ + (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */ + pic_param->picture_coding_extension.bits.intra_dc_precision << 14 | + pic_param->picture_coding_extension.bits.picture_structure << 12 | + pic_param->picture_coding_extension.bits.top_field_first << 11 | + pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 | + pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 | + pic_param->picture_coding_extension.bits.q_scale_type << 8 | + pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | + pic_param->picture_coding_extension.bits.alternate_scan << 6); + OUT_BCS_BATCH(batch, + 0 << 14 | /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */ + va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 | + 0); + OUT_BCS_BATCH(batch, + 1 << 31 | /* slice concealment */ + (height_in_mbs - 1) << 16 | + (width_in_mbs - 1)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + 0xFFF << 16 | /* InterMBMaxSize */ + 0xFFF << 0 | /* IntraMBMaxSize */ + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + unsigned char intra_qm[64] = { + 8, 16, 19, 22, 26, 27, 29, 34, + 16, 16, 22, 24, 27, 29, 34, 37, + 19, 22, 26, 27, 29, 34, 34, 38, + 22, 22, 26, 27, 29, 34, 37, 40, + 22, 26, 27, 29, 32, 35, 40, 48, + 26, 27, 29, 32, 35, 40, 48, 58, + 26, 27, 29, 34, 38, 46, 56, 69, + 27, 29, 35, 38, 46, 56, 69, 83 + }; + + unsigned char non_intra_qm[64] = { + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16 + }; + + gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context); + gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context); +} + +static void +gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + unsigned short intra_fqm[64] = { + 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, + 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, + 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, + 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, + 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, + 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, + 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, + 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, + }; + + unsigned short non_intra_fqm[64] = { + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + }; + + gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context); + gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context); +} + +static void +gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + int x, int y, + int next_x, int next_y, + int is_fisrt_slice_group, + int is_last_slice_group, + int intra_slice, + int qp, + struct intel_batchbuffer *batch) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + if (batch == NULL) + batch = encoder_context->base.batch; + + BEGIN_BCS_BATCH(batch, 8); + + OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2)); + OUT_BCS_BATCH(batch, + 0 << 31 | /* MbRateCtrlFlag */ + !!is_last_slice_group << 19 | /* IsLastSliceGrp */ + 1 << 17 | /* Insert Header before the first slice group data */ + 1 << 16 | /* SliceData PresentFlag: always 1 */ + 1 << 15 | /* TailPresentFlag: always 1 */ + 0 << 14 | /* FirstSliceHdrDisabled: slice header for each slice */ + !!intra_slice << 13 | /* IntraSlice */ + !!intra_slice << 12 | /* IntraSliceFlag */ + 0); + OUT_BCS_BATCH(batch, + next_y << 24 | + next_x << 16 | + y << 8 | + x << 0 | + 0); + OUT_BCS_BATCH(batch, qp); /* FIXME: SliceGroupQp */ + /* bitstream pointer is only loaded once for the first slice of a frame when + * LoadSlicePointerFlag is 0 + */ + OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset); + OUT_BCS_BATCH(batch, 0); /* FIXME: */ + OUT_BCS_BATCH(batch, 0); /* FIXME: CorrectPoints */ + OUT_BCS_BATCH(batch, 0); /* FIXME: CVxxx */ + + ADVANCE_BCS_BATCH(batch); +} + +static int +gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + int x, int y, + int first_mb_in_slice, + int last_mb_in_slice, + int first_mb_in_slice_group, + int last_mb_in_slice_group, + int mb_type, + int qp_scale_code, + int coded_block_pattern, + unsigned char target_size_in_word, + unsigned char max_size_in_word, + struct intel_batchbuffer *batch) +{ + int len_in_dwords = 9; + + if (batch == NULL) + batch = encoder_context->base.batch; + + BEGIN_BCS_BATCH(batch, len_in_dwords); + + OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2)); + OUT_BCS_BATCH(batch, + 0 << 24 | /* PackedMvNum */ + 0 << 20 | /* MvFormat */ + 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */ + 0 << 15 | /* TransformFlag: frame DCT */ + 0 << 14 | /* FieldMbFlag */ + 1 << 13 | /* IntraMbFlag */ + mb_type << 8 | /* MbType: Intra */ + 0 << 2 | /* SkipMbFlag */ + 0 << 0 | /* InterMbMode */ + 0); + OUT_BCS_BATCH(batch, y << 16 | x); + OUT_BCS_BATCH(batch, + max_size_in_word << 24 | + target_size_in_word << 16 | + coded_block_pattern << 6 | /* CBP */ + 0); + OUT_BCS_BATCH(batch, + last_mb_in_slice << 31 | + first_mb_in_slice << 30 | + 0 << 27 | /* EnableCoeffClamp */ + last_mb_in_slice_group << 26 | + 0 << 25 | /* MbSkipConvDisable */ + first_mb_in_slice_group << 24 | + 0 << 16 | /* MvFieldSelect */ + qp_scale_code << 0 | + 0); + OUT_BCS_BATCH(batch, 0); /* MV[0][0] */ + OUT_BCS_BATCH(batch, 0); /* MV[1][0] */ + OUT_BCS_BATCH(batch, 0); /* MV[0][1] */ + OUT_BCS_BATCH(batch, 0); /* MV[1][1] */ + + ADVANCE_BCS_BATCH(batch); + + return len_in_dwords; +} + +#define MPEG2_INTER_MV_OFFSET 12 + +static struct _mv_ranges +{ + int low; /* in the unit of 1/2 pixel */ + int high; /* in the unit of 1/2 pixel */ +} mv_ranges[] = { + {0, 0}, + {-16, 15}, + {-32, 31}, + {-64, 63}, + {-128, 127}, + {-256, 255}, + {-512, 511}, + {-1024, 1023}, + {-2048, 2047}, + {-4096, 4095} +}; + +static int +mpeg2_motion_vector(int mv, int pos, int display_max, int f_code) +{ + if (mv + pos * 16 * 2 < 0 || + mv + (pos + 1) * 16 * 2 > display_max * 2) + mv = 0; + + if (f_code > 0 && f_code < 10) { + if (mv < mv_ranges[f_code].low) + mv = mv_ranges[f_code].low; + + if (mv > mv_ranges[f_code].high) + mv = mv_ranges[f_code].high; + } + + return mv; +} + +static int +gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + unsigned int *msg, + int width_in_mbs, int height_in_mbs, + int x, int y, + int first_mb_in_slice, + int last_mb_in_slice, + int first_mb_in_slice_group, + int last_mb_in_slice_group, + int qp_scale_code, + unsigned char target_size_in_word, + unsigned char max_size_in_word, + struct intel_batchbuffer *batch) +{ + VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + int len_in_dwords = 9; + short *mvptr, mvx0, mvy0, mvx1, mvy1; + + if (batch == NULL) + batch = encoder_context->base.batch; + + mvptr = (short *)msg; + mvx0 = mpeg2_motion_vector(mvptr[MPEG2_INTER_MV_OFFSET + 0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]); + mvy0 = mpeg2_motion_vector(mvptr[MPEG2_INTER_MV_OFFSET + 1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]); + mvx1 = mpeg2_motion_vector(mvptr[MPEG2_INTER_MV_OFFSET + 2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]); + mvy1 = mpeg2_motion_vector(mvptr[MPEG2_INTER_MV_OFFSET + 3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]); + + BEGIN_BCS_BATCH(batch, len_in_dwords); + + OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2)); + OUT_BCS_BATCH(batch, + 2 << 24 | /* PackedMvNum */ + 7 << 20 | /* MvFormat */ + 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */ + 0 << 15 | /* TransformFlag: frame DCT */ + 0 << 14 | /* FieldMbFlag */ + 0 << 13 | /* IntraMbFlag */ + 1 << 8 | /* MbType: Frame-based */ + 0 << 2 | /* SkipMbFlag */ + 0 << 0 | /* InterMbMode */ + 0); + OUT_BCS_BATCH(batch, y << 16 | x); + OUT_BCS_BATCH(batch, + max_size_in_word << 24 | + target_size_in_word << 16 | + 0x3f << 6 | /* CBP */ + 0); + OUT_BCS_BATCH(batch, + last_mb_in_slice << 31 | + first_mb_in_slice << 30 | + 0 << 27 | /* EnableCoeffClamp */ + last_mb_in_slice_group << 26 | + 0 << 25 | /* MbSkipConvDisable */ + first_mb_in_slice_group << 24 | + 0 << 16 | /* MvFieldSelect */ + qp_scale_code << 0 | + 0); + + OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16); /* MV[0][0] */ + OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16); /* MV[1][0] */ + OUT_BCS_BATCH(batch, 0); /* MV[0][1] */ + OUT_BCS_BATCH(batch, 0); /* MV[1][1] */ + + ADVANCE_BCS_BATCH(batch); + + return len_in_dwords; +} + +static void +intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct intel_batchbuffer *slice_batch) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS); + + if (encode_state->packed_header_data[idx]) { + VAEncPackedHeaderParameterBuffer *param = NULL; + unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer; + unsigned int length_in_bits; + + assert(encode_state->packed_header_param[idx]); + param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; + length_in_bits = param->bit_length; + + mfc_context->insert_object(ctx, + encoder_context, + header_data, + ALIGN(length_in_bits, 32) >> 5, + length_in_bits & 0x1f, + 5, /* FIXME: check it */ + 0, + 0, + 0, /* Needn't insert emulation bytes for MPEG-2 */ + slice_batch); + } + + idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS); + + if (encode_state->packed_header_data[idx]) { + VAEncPackedHeaderParameterBuffer *param = NULL; + unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer; + unsigned int length_in_bits; + + assert(encode_state->packed_header_param[idx]); + param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; + length_in_bits = param->bit_length; + + mfc_context->insert_object(ctx, + encoder_context, + header_data, + ALIGN(length_in_bits, 32) >> 5, + length_in_bits & 0x1f, + 5, /* FIXME: check it */ + 0, + 0, + 0, /* Needn't insert emulation bytes for MPEG-2 */ + slice_batch); + } +} + +static void +gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + VAEncSliceParameterBufferMPEG2 *next_slice_group_param, + struct intel_batchbuffer *slice_batch) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + VAEncSliceParameterBufferMPEG2 *slice_param = NULL; + unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0}; + unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0}; + int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + int i, j; + int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos; + unsigned int *msg = NULL; + unsigned char *msg_ptr = NULL; + + slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer; + h_start_pos = slice_param->macroblock_address % width_in_mbs; + v_start_pos = slice_param->macroblock_address / width_in_mbs; + assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs); + + dri_bo_map(vme_context->vme_output.bo , 0); + msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; + + if (next_slice_group_param) { + h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs; + v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs; + } else { + h_next_start_pos = 0; + v_next_start_pos = height_in_mbs; + } + + gen8_mfc_mpeg2_slicegroup_state(ctx, + encoder_context, + h_start_pos, + v_start_pos, + h_next_start_pos, + v_next_start_pos, + slice_index == 0, + next_slice_group_param == NULL, + slice_param->is_intra_slice, + slice_param->quantiser_scale_code, + slice_batch); + + if (slice_index == 0) + intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); + + /* Insert '00' to make sure the header is valid */ + mfc_context->insert_object(ctx, + encoder_context, + (unsigned int*)section_delimiter, + 1, + 8, /* 8bits in the last DWORD */ + 1, /* 1 byte */ + 1, + 0, + 0, + slice_batch); + + for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) { + /* PAK for each macroblocks */ + for (j = 0; j < slice_param->num_macroblocks; j++) { + int h_pos = (slice_param->macroblock_address + j) % width_in_mbs; + int v_pos = (slice_param->macroblock_address + j) / width_in_mbs; + int first_mb_in_slice = (j == 0); + int last_mb_in_slice = (j == slice_param->num_macroblocks - 1); + int first_mb_in_slice_group = (i == 0 && j == 0); + int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 && + j == slice_param->num_macroblocks - 1); + + msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block); + + if (slice_param->is_intra_slice) { + gen8_mfc_mpeg2_pak_object_intra(ctx, + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); + } else { + gen8_mfc_mpeg2_pak_object_inter(ctx, + encode_state, + encoder_context, + msg, + width_in_mbs, height_in_mbs, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + slice_param->quantiser_scale_code, + 0, + 0xff, + slice_batch); + } + } + + slice_param++; + } + + dri_bo_unmap(vme_context->vme_output.bo); + + /* tail data */ + if (next_slice_group_param == NULL) { /* end of a picture */ + mfc_context->insert_object(ctx, + encoder_context, + (unsigned int *)tail_delimiter, + 2, + 8, /* 8bits in the last DWORD */ + 5, /* 5 bytes */ + 1, + 1, + 0, + slice_batch); + } else { /* end of a lsice group */ + mfc_context->insert_object(ctx, + encoder_context, + (unsigned int *)section_delimiter, + 1, + 8, /* 8bits in the last DWORD */ + 1, /* 1 byte */ + 1, + 1, + 0, + slice_batch); + } +} + +/* + * A batch buffer for all slices, including slice state, + * slice insert object and slice pak object commands + * + */ +static dri_bo * +gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL; + dri_bo *batch_bo; + int i; + int buffer_size; + int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + + buffer_size = width_in_mbs * height_in_mbs * 64; + batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size); + batch_bo = batch->buffer; + + for (i = 0; i < encode_state->num_slice_params_ext; i++) { + if (i == encode_state->num_slice_params_ext - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer; + + gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch); + } + + intel_batchbuffer_align(batch, 8); + + BEGIN_BCS_BATCH(batch, 2); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END); + ADVANCE_BCS_BATCH(batch); + + dri_bo_reference(batch_bo); + intel_batchbuffer_free(batch); + + return batch_bo; +} + +static void +gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context); + mfc_context->set_surface_state(ctx, encoder_context); + mfc_context->ind_obj_base_addr_state(ctx, encoder_context); + gen8_mfc_pipe_buf_addr_state(ctx, encoder_context); + gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context); + gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state); + gen8_mfc_mpeg2_qm_state(ctx, encoder_context); + gen8_mfc_mpeg2_fqm_state(ctx, encoder_context); +} + +static void +gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + dri_bo *slice_batch_bo; + + slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context); + + // begin programing + intel_batchbuffer_start_atomic_bcs(batch, 0x4000); + intel_batchbuffer_emit_mi_flush(batch); + + // picture level programing + gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context); + + BEGIN_BCS_BATCH(batch, 2); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); + OUT_BCS_RELOC(batch, + slice_batch_bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + ADVANCE_BCS_BATCH(batch); + + // end programing + intel_batchbuffer_end_atomic(batch); + + dri_bo_unreference(slice_batch_bo); +} + +static VAStatus +intel_mfc_mpeg2_prepare(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct object_surface *obj_surface; + struct object_buffer *obj_buffer; + struct i965_coded_buffer_segment *coded_buffer_segment; + VAStatus vaStatus = VA_STATUS_SUCCESS; + dri_bo *bo; + int i; + + /* reconstructed surface */ + obj_surface = encode_state->reconstructed_object; + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + mfc_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(mfc_context->pre_deblocking_output.bo); + mfc_context->surface_state.width = obj_surface->orig_width; + mfc_context->surface_state.height = obj_surface->orig_height; + mfc_context->surface_state.w_pitch = obj_surface->width; + mfc_context->surface_state.h_pitch = obj_surface->height; + + /* forward reference */ + obj_surface = encode_state->reference_objects[0]; + + if (obj_surface && obj_surface->bo) { + mfc_context->reference_surfaces[0].bo = obj_surface->bo; + dri_bo_reference(mfc_context->reference_surfaces[0].bo); + } else + mfc_context->reference_surfaces[0].bo = NULL; + + /* backward reference */ + obj_surface = encode_state->reference_objects[1]; + + if (obj_surface && obj_surface->bo) { + mfc_context->reference_surfaces[1].bo = obj_surface->bo; + dri_bo_reference(mfc_context->reference_surfaces[1].bo); + } else { + mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo; + + if (mfc_context->reference_surfaces[1].bo) + dri_bo_reference(mfc_context->reference_surfaces[1].bo); + } + + for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) { + mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo; + + if (mfc_context->reference_surfaces[i].bo) + dri_bo_reference(mfc_context->reference_surfaces[i].bo); + } + + /* input YUV surface */ + obj_surface = encode_state->input_yuv_object; + mfc_context->uncompressed_picture_source.bo = obj_surface->bo; + dri_bo_reference(mfc_context->uncompressed_picture_source.bo); + + /* coded buffer */ + obj_buffer = encode_state->coded_buf_object; + bo = obj_buffer->buffer_store->bo; + mfc_context->mfc_indirect_pak_bse_object.bo = bo; + mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE; + mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000); + dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo); + + /* set the internal flag to 0 to indicate the coded size is unknown */ + dri_bo_map(bo, 1); + coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; + coded_buffer_segment->mapped = 0; + coded_buffer_segment->codec = encoder_context->codec; + dri_bo_unmap(bo); + + return vaStatus; +} + +static VAStatus +gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + gen8_mfc_init(ctx, encode_state, encoder_context); + intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context); + /*Programing bcs pipeline*/ + gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context); + gen8_mfc_run(ctx, encode_state, encoder_context); + + return VA_STATUS_SUCCESS; +} + +static void +gen8_mfc_context_destroy(void *context) +{ + struct gen6_mfc_context *mfc_context = context; + int i; + + dri_bo_unreference(mfc_context->post_deblocking_output.bo); + mfc_context->post_deblocking_output.bo = NULL; + + dri_bo_unreference(mfc_context->pre_deblocking_output.bo); + mfc_context->pre_deblocking_output.bo = NULL; + + dri_bo_unreference(mfc_context->uncompressed_picture_source.bo); + mfc_context->uncompressed_picture_source.bo = NULL; + + dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); + mfc_context->mfc_indirect_pak_bse_object.bo = NULL; + + for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){ + dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo); + mfc_context->direct_mv_buffers[i].bo = NULL; + } + + dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo); + mfc_context->intra_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(mfc_context->macroblock_status_buffer.bo); + mfc_context->macroblock_status_buffer.bo = NULL; + + dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo); + mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo); + mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL; + + + for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){ + dri_bo_unreference(mfc_context->reference_surfaces[i].bo); + mfc_context->reference_surfaces[i].bo = NULL; + } + + i965_gpe_context_destroy(&mfc_context->gpe_context); + + dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo); + mfc_context->mfc_batchbuffer_surface.bo = NULL; + + dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo); + mfc_context->aux_batchbuffer_surface.bo = NULL; + + if (mfc_context->aux_batchbuffer) + intel_batchbuffer_free(mfc_context->aux_batchbuffer); + + mfc_context->aux_batchbuffer = NULL; + + free(mfc_context); +} + +static VAStatus gen8_mfc_pipeline(VADriverContextP ctx, + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + VAStatus vaStatus; + + switch (profile) { + case VAProfileH264Baseline: + case VAProfileH264Main: + case VAProfileH264High: + vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context); + break; + + /* FIXME: add for other profile */ + case VAProfileMPEG2Simple: + case VAProfileMPEG2Main: + vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context); + break; + + default: + vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; + break; + } + + return vaStatus; +} + +Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context)); + + mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + + mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS; + mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); + + mfc_context->gpe_context.curbe.length = 32 * 4; + + mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1; + mfc_context->gpe_context.vfe_state.num_urb_entries = 16; + mfc_context->gpe_context.vfe_state.gpgpu_mode = 0; + mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1; + mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1; + + i965_gpe_load_kernels(ctx, + &mfc_context->gpe_context, + gen8_mfc_kernels, + NUM_MFC_KERNEL); + + mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select; + mfc_context->set_surface_state = gen8_mfc_surface_state; + mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state; + mfc_context->avc_img_state = gen8_mfc_avc_img_state; + mfc_context->avc_qm_state = gen8_mfc_avc_qm_state; + mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state; + mfc_context->insert_object = gen8_mfc_avc_insert_object; + mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup; + + encoder_context->mfc_context = mfc_context; + encoder_context->mfc_context_destroy = gen8_mfc_context_destroy; + encoder_context->mfc_pipeline = gen8_mfc_pipeline; + encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare; + + return True; +} diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c new file mode 100644 index 00000000..c351e4b8 --- /dev/null +++ b/src/gen8_mfd.c @@ -0,0 +1,2834 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Xiang Haihao + * Zhao Yakui + * + */ + +#include +#include +#include +#include +#include + +#include "intel_batchbuffer.h" +#include "intel_driver.h" + +#include "i965_defines.h" +#include "i965_drv_video.h" +#include "i965_decoder_utils.h" + +#include "gen7_mfd.h" +#include "intel_media.h" + +#define B0_STEP_REV 2 +#define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV) + +static const uint32_t zigzag_direct[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +static void +gen8_mfd_init_avc_surface(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + struct object_surface *obj_surface) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + GenAvcSurface *gen7_avc_surface = obj_surface->private_data; + int width_in_mbs, height_in_mbs; + + obj_surface->free_private_data = gen_free_avc_surface; + width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; + height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ + + if (!gen7_avc_surface) { + gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1); + assert((obj_surface->size & 0x3f) == 0); + obj_surface->private_data = gen7_avc_surface; + } + + gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag && + !pic_param->seq_fields.bits.direct_8x8_inference_flag); + + if (gen7_avc_surface->dmv_top == NULL) { + gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr, + "direct mv w/r buffer", + width_in_mbs * height_in_mbs * 128, + 0x1000); + assert(gen7_avc_surface->dmv_top); + } + + if (gen7_avc_surface->dmv_bottom_flag && + gen7_avc_surface->dmv_bottom == NULL) { + gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr, + "direct mv w/r buffer", + width_in_mbs * height_in_mbs * 128, + 0x1000); + assert(gen7_avc_surface->dmv_bottom); + } +} + +static void +gen8_mfd_pipe_mode_select(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + assert(standard_select == MFX_FORMAT_MPEG2 || + standard_select == MFX_FORMAT_AVC || + standard_select == MFX_FORMAT_VC1 || + standard_select == MFX_FORMAT_JPEG); + + BEGIN_BCS_BATCH(batch, 5); + OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2)); + OUT_BCS_BATCH(batch, + (MFX_LONG_MODE << 17) | /* Currently only support long format */ + (MFD_MODE_VLD << 15) | /* VLD mode */ + (0 << 10) | /* disable Stream-Out */ + (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */ + (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */ + (0 << 5) | /* not in stitch mode */ + (MFX_CODEC_DECODE << 4) | /* decoding mode */ + (standard_select << 0)); + OUT_BCS_BATCH(batch, + (0 << 4) | /* terminate if AVC motion and POC table error occurs */ + (0 << 3) | /* terminate if AVC mbdata error occurs */ + (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ + (0 << 1) | + (0 << 0)); + OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ + OUT_BCS_BATCH(batch, 0); /* reserved */ + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_surface_state(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + struct object_surface *obj_surface = decode_state->render_object; + unsigned int y_cb_offset; + unsigned int y_cr_offset; + + assert(obj_surface); + + y_cb_offset = obj_surface->y_cb_offset; + y_cr_offset = obj_surface->y_cr_offset; + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + ((obj_surface->orig_height - 1) << 18) | + ((obj_surface->orig_width - 1) << 4)); + OUT_BCS_BATCH(batch, + (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ + (0 << 22) | /* surface object control state, ignored */ + ((obj_surface->width - 1) << 3) | /* pitch */ + (0 << 2) | /* must be 0 */ + (1 << 1) | /* must be tiled */ + (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* X offset for U(Cb), must be 0 */ + (y_cb_offset << 0)); /* Y offset for U(Cb) */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* X offset for V(Cr), must be 0 */ + (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */ + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int i; + + BEGIN_BCS_BATCH(batch, 61); + OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2)); + /* Pre-deblock 1-3 */ + if (gen7_mfd_context->pre_deblocking_output.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* Post-debloing 4-6 */ + if (gen7_mfd_context->post_deblocking_output.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* uncompressed-video & stream out 7-12 */ + OUT_BCS_BATCH(batch, 0); /* ignore for decoding */ + OUT_BCS_BATCH(batch, 0); /* ignore for decoding */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* intra row-store scratch 13-15 */ + if (gen7_mfd_context->intra_row_store_scratch_buffer.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* deblocking-filter-row-store 16-18 */ + if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* DW 19..50 */ + for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { + struct object_surface *obj_surface; + + if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID && + gen7_mfd_context->reference_surface[i].obj_surface && + gen7_mfd_context->reference_surface[i].obj_surface->bo) { + obj_surface = gen7_mfd_context->reference_surface[i].obj_surface; + + OUT_BCS_RELOC(batch, obj_surface->bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + } else { + OUT_BCS_BATCH(batch, 0); + } + + OUT_BCS_BATCH(batch, 0); + } + + /* reference property 51 */ + OUT_BCS_BATCH(batch, 0); + + /* Macroblock status & ILDB 52-57 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the second Macroblock status 58-60 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx, + dri_bo *slice_data_bo, + int standard_select, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 26); + OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2)); + /* MFX In BS 1-5 */ + OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* Upper bound 4-5 */ + OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ + OUT_BCS_BATCH(batch, 0); + + /* MFX indirect MV 6-10 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* MFX IT_COFF 11-15 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* MFX IT_DBLK 16-20 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* MFX PAK_BSE object for encoder 21-25 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2)); + + if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* MPR Row Store Scratch buffer 4-6 */ + if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* Bitplane 7-9 */ + if (gen7_mfd_context->bitplane_read_buffer.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + else + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_qm_state(VADriverContextP ctx, + int qm_type, + unsigned char *qm, + int qm_length, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + unsigned int qm_buffer[16]; + + assert(qm_length <= 16 * 4); + memcpy(qm_buffer, qm, qm_length); + + BEGIN_BCS_BATCH(batch, 18); + OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2)); + OUT_BCS_BATCH(batch, qm_type << 0); + intel_batchbuffer_data(batch, qm_buffer, 16 * 4); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_avc_img_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int img_struct; + int mbaff_frame_flag; + unsigned int width_in_mbs, height_in_mbs; + VAPictureParameterBufferH264 *pic_param; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID)); + + if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD) + img_struct = 1; + else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD) + img_struct = 3; + else + img_struct = 0; + + if ((img_struct & 0x1) == 0x1) { + assert(pic_param->pic_fields.bits.field_pic_flag == 0x1); + } else { + assert(pic_param->pic_fields.bits.field_pic_flag == 0x0); + } + + if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */ + assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0); + assert(pic_param->pic_fields.bits.field_pic_flag == 0); + } else { + assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */ + } + + mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag && + !pic_param->pic_fields.bits.field_pic_flag); + + width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; + height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ + + /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */ + assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */ + pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */ + assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */ + + BEGIN_BCS_BATCH(batch, 17); + OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2)); + OUT_BCS_BATCH(batch, + width_in_mbs * height_in_mbs); + OUT_BCS_BATCH(batch, + ((height_in_mbs - 1) << 16) | + ((width_in_mbs - 1) << 0)); + OUT_BCS_BATCH(batch, + ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) | + ((pic_param->chroma_qp_index_offset & 0x1f) << 16) | + (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */ + (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */ + (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */ + (pic_param->pic_fields.bits.weighted_bipred_idc << 10) | + (img_struct << 8)); + OUT_BCS_BATCH(batch, + (pic_param->seq_fields.bits.chroma_format_idc << 10) | + (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) | + ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) | + (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) | + (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) | + (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) | + (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) | + (mbaff_frame_flag << 1) | + (pic_param->pic_fields.bits.field_pic_flag << 0)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_avc_qm_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + VAIQMatrixBufferH264 *iq_matrix; + VAPictureParameterBufferH264 *pic_param; + + if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) + iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer; + else + iq_matrix = &gen7_mfd_context->iq_matrix.h264; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + + gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context); + gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context); + + if (pic_param->pic_fields.bits.transform_8x8_mode_flag) { + gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context); + gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context); + } +} + +static void +gen8_mfd_avc_picid_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2)); + OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_avc_directmode_state(VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + struct object_surface *obj_surface; + GenAvcSurface *gen7_avc_surface; + VAPictureH264 *va_pic; + int i, j; + + BEGIN_BCS_BATCH(batch, 71); + OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2)); + + /* reference surfaces 0..15 */ + for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { + if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID && + gen7_mfd_context->reference_surface[i].obj_surface && + gen7_mfd_context->reference_surface[i].obj_surface->private_data) { + + obj_surface = gen7_mfd_context->reference_surface[i].obj_surface; + gen7_avc_surface = obj_surface->private_data; + + OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BCS_BATCH(batch, 0); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + } + + OUT_BCS_BATCH(batch, 0); + + /* the current decoding frame/field */ + va_pic = &pic_param->CurrPic; + obj_surface = decode_state->render_object; + assert(obj_surface->bo && obj_surface->private_data); + gen7_avc_surface = obj_surface->private_data; + + OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* POC List */ + for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { + if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { + int found = 0; + + assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL); + + for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { + va_pic = &pic_param->ReferenceFrames[j]; + + if (va_pic->flags & VA_PICTURE_H264_INVALID) + continue; + + if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) { + found = 1; + break; + } + } + + assert(found == 1); + assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); + + OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + } + + va_pic = &pic_param->CurrPic; + OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_avc_slice_state(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param, + VASliceParameterBufferH264 *next_slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; + int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; + int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos; + int num_ref_idx_l0, num_ref_idx_l1; + int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag && + pic_param->seq_fields.bits.mb_adaptive_frame_field_flag); + int first_mb_in_slice = 0, first_mb_in_next_slice = 0; + int slice_type; + + if (slice_param->slice_type == SLICE_TYPE_I || + slice_param->slice_type == SLICE_TYPE_SI) { + slice_type = SLICE_TYPE_I; + } else if (slice_param->slice_type == SLICE_TYPE_P || + slice_param->slice_type == SLICE_TYPE_SP) { + slice_type = SLICE_TYPE_P; + } else { + assert(slice_param->slice_type == SLICE_TYPE_B); + slice_type = SLICE_TYPE_B; + } + + if (slice_type == SLICE_TYPE_I) { + assert(slice_param->num_ref_idx_l0_active_minus1 == 0); + assert(slice_param->num_ref_idx_l1_active_minus1 == 0); + num_ref_idx_l0 = 0; + num_ref_idx_l1 = 0; + } else if (slice_type == SLICE_TYPE_P) { + assert(slice_param->num_ref_idx_l1_active_minus1 == 0); + num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_idx_l1 = 0; + } else { + num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1; + } + + first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture; + slice_hor_pos = first_mb_in_slice % width_in_mbs; + slice_ver_pos = first_mb_in_slice / width_in_mbs; + + if (next_slice_param) { + first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture; + next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; + next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs; + } else { + next_slice_hor_pos = 0; + next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag); + } + + BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */ + OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2)); + OUT_BCS_BATCH(batch, slice_type); + OUT_BCS_BATCH(batch, + (num_ref_idx_l1 << 24) | + (num_ref_idx_l0 << 16) | + (slice_param->chroma_log2_weight_denom << 8) | + (slice_param->luma_log2_weight_denom << 0)); + OUT_BCS_BATCH(batch, + (slice_param->direct_spatial_mv_pred_flag << 29) | + (slice_param->disable_deblocking_filter_idc << 27) | + (slice_param->cabac_init_idc << 24) | + ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) | + ((slice_param->slice_beta_offset_div2 & 0xf) << 8) | + ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0)); + OUT_BCS_BATCH(batch, + (slice_ver_pos << 24) | + (slice_hor_pos << 16) | + (first_mb_in_slice << 0)); + OUT_BCS_BATCH(batch, + (next_slice_ver_pos << 16) | + (next_slice_hor_pos << 0)); + OUT_BCS_BATCH(batch, + (next_slice_param == NULL) << 19); /* last slice flag */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static inline void +gen8_mfd_avc_ref_idx_state(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + gen6_send_avc_ref_idx_state( + gen7_mfd_context->base.batch, + slice_param, + gen7_mfd_context->reference_surface + ); +} + +static void +gen8_mfd_avc_weightoffset_state(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int i, j, num_weight_offset_table = 0; + short weightoffsets[32 * 6]; + + if ((slice_param->slice_type == SLICE_TYPE_P || + slice_param->slice_type == SLICE_TYPE_SP) && + (pic_param->pic_fields.bits.weighted_pred_flag == 1)) { + num_weight_offset_table = 1; + } + + if ((slice_param->slice_type == SLICE_TYPE_B) && + (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) { + num_weight_offset_table = 2; + } + + for (i = 0; i < num_weight_offset_table; i++) { + BEGIN_BCS_BATCH(batch, 98); + OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2)); + OUT_BCS_BATCH(batch, i); + + if (i == 0) { + for (j = 0; j < 32; j++) { + weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j]; + weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j]; + weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0]; + weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0]; + weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1]; + weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1]; + } + } else { + for (j = 0; j < 32; j++) { + weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j]; + weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j]; + weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0]; + weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0]; + weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1]; + weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1]; + } + } + + intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets)); + ADVANCE_BCS_BATCH(batch); + } +} + +static void +gen8_mfd_avc_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param, + dri_bo *slice_data_bo, + VASliceParameterBufferH264 *next_slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo, + slice_param, + pic_param->pic_fields.bits.entropy_coding_mode_flag); + + /* the input bitsteam format on GEN7 differs from GEN6 */ + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2)); + OUT_BCS_BATCH(batch, + (slice_param->slice_data_size)); + OUT_BCS_BATCH(batch, slice_param->slice_data_offset); + OUT_BCS_BATCH(batch, + (0 << 31) | + (0 << 14) | + (0 << 12) | + (0 << 10) | + (0 << 8)); + OUT_BCS_BATCH(batch, + ((slice_data_bit_offset >> 3) << 16) | + (1 << 7) | + (0 << 5) | + (0 << 4) | + ((next_slice_param == NULL) << 3) | /* LastSlice Flag */ + (slice_data_bit_offset & 0x7)); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static inline void +gen8_mfd_avc_context_init( + VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context +) +{ + /* Initialize flat scaling lists */ + avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264); +} + +static void +gen8_mfd_avc_decode_init(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + VAPictureParameterBufferH264 *pic_param; + VASliceParameterBufferH264 *slice_param; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface; + dri_bo *bo; + int i, j, enable_avc_ildb = 0; + unsigned int width_in_mbs, height_in_mbs; + + for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + assert((slice_param->slice_type == SLICE_TYPE_I) || + (slice_param->slice_type == SLICE_TYPE_SI) || + (slice_param->slice_type == SLICE_TYPE_P) || + (slice_param->slice_type == SLICE_TYPE_SP) || + (slice_param->slice_type == SLICE_TYPE_B)); + + if (slice_param->disable_deblocking_filter_idc != 1) { + enable_avc_ildb = 1; + break; + } + + slice_param++; + } + } + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface); + width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; + height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; + assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */ + assert(height_in_mbs > 0 && height_in_mbs <= 256); + + /* Current decoded picture */ + obj_surface = decode_state->render_object; + obj_surface->flags &= ~SURFACE_REF_DIS_MASK; + obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + + /* initial uv component for YUV400 case */ + if (pic_param->seq_fields.bits.chroma_format_idc == 0) { + unsigned int uv_offset = obj_surface->width * obj_surface->height; + unsigned int uv_size = obj_surface->width * obj_surface->height / 2; + + drm_intel_gem_bo_map_gtt(obj_surface->bo); + memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); + drm_intel_gem_bo_unmap_gtt(obj_surface->bo); + } + + gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface); + + dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb; + + dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb; + + dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "intra row store", + width_in_mbs * 64, + 0x1000); + assert(bo); + gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "deblocking filter row store", + width_in_mbs * 64 * 4, + 0x1000); + assert(bo); + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + width_in_mbs * 64 * 2, + 0x1000); + assert(bo); + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "mpr row store", + width_in_mbs * 64 * 2, + 0x1000); + assert(bo); + gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1; + + gen7_mfd_context->bitplane_read_buffer.valid = 0; +} + +static void +gen8_mfd_avc_decode_picture(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferH264 *pic_param; + VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param; + dri_bo *slice_data_bo; + int i, j; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context); + + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); + gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); + gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); + gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); + gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context); + + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer; + slice_data_bo = decode_state->slice_datas[j]->bo; + gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context); + + if (j == decode_state->num_slice_params - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + assert((slice_param->slice_type == SLICE_TYPE_I) || + (slice_param->slice_type == SLICE_TYPE_SI) || + (slice_param->slice_type == SLICE_TYPE_P) || + (slice_param->slice_type == SLICE_TYPE_SP) || + (slice_param->slice_type == SLICE_TYPE_B)); + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = next_slice_group_param; + + gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context); + gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context); + gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context); + gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context); + gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context); + slice_param++; + } + } + + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); +} + +static void +gen8_mfd_mpeg2_decode_init(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + VAPictureParameterBufferMPEG2 *pic_param; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface; + dri_bo *bo; + unsigned int width_in_mbs; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; + width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16; + + mpeg2_set_reference_surfaces( + ctx, + gen7_mfd_context->reference_surface, + decode_state, + pic_param + ); + + /* Current decoded picture */ + obj_surface = decode_state->render_object; + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + + dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.valid = 1; + + dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + width_in_mbs * 96, + 0x1000); + assert(bo); + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1; + + gen7_mfd_context->post_deblocking_output.valid = 0; + gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0; + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0; + gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0; + gen7_mfd_context->bitplane_read_buffer.valid = 0; +} + +static void +gen8_mfd_mpeg2_pic_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferMPEG2 *pic_param; + unsigned int slice_concealment_disable_bit = 0; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; + + slice_concealment_disable_bit = 1; + + BEGIN_BCS_BATCH(batch, 13); + OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2)); + OUT_BCS_BATCH(batch, + (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */ + ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */ + ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */ + ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */ + pic_param->picture_coding_extension.bits.intra_dc_precision << 14 | + pic_param->picture_coding_extension.bits.picture_structure << 12 | + pic_param->picture_coding_extension.bits.top_field_first << 11 | + pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 | + pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 | + pic_param->picture_coding_extension.bits.q_scale_type << 8 | + pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | + pic_param->picture_coding_extension.bits.alternate_scan << 6); + OUT_BCS_BATCH(batch, + pic_param->picture_coding_type << 9); + OUT_BCS_BATCH(batch, + (slice_concealment_disable_bit << 31) | + ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 | + ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_mpeg2_qm_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2; + int i, j; + + /* Update internal QM state */ + if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) { + VAIQMatrixBufferMPEG2 * const iq_matrix = + (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer; + + if (gen_iq_matrix->load_intra_quantiser_matrix == -1 || + iq_matrix->load_intra_quantiser_matrix) { + gen_iq_matrix->load_intra_quantiser_matrix = + iq_matrix->load_intra_quantiser_matrix; + if (iq_matrix->load_intra_quantiser_matrix) { + for (j = 0; j < 64; j++) + gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] = + iq_matrix->intra_quantiser_matrix[j]; + } + } + + if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 || + iq_matrix->load_non_intra_quantiser_matrix) { + gen_iq_matrix->load_non_intra_quantiser_matrix = + iq_matrix->load_non_intra_quantiser_matrix; + if (iq_matrix->load_non_intra_quantiser_matrix) { + for (j = 0; j < 64; j++) + gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] = + iq_matrix->non_intra_quantiser_matrix[j]; + } + } + } + + /* Commit QM state to HW */ + for (i = 0; i < 2; i++) { + unsigned char *qm = NULL; + int qm_type; + + if (i == 0) { + if (gen_iq_matrix->load_intra_quantiser_matrix) { + qm = gen_iq_matrix->intra_quantiser_matrix; + qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX; + } + } else { + if (gen_iq_matrix->load_non_intra_quantiser_matrix) { + qm = gen_iq_matrix->non_intra_quantiser_matrix; + qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX; + } + } + + if (!qm) + continue; + + gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context); + } +} + +static void +gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferMPEG2 *pic_param, + VASliceParameterBufferMPEG2 *slice_param, + VASliceParameterBufferMPEG2 *next_slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16; + int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0; + + if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD || + pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD) + is_field_pic = 1; + is_field_pic_wa = is_field_pic && + gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0; + + vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa); + hpos0 = slice_param->slice_horizontal_position; + + if (next_slice_param == NULL) { + vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic); + hpos1 = 0; + } else { + vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa); + hpos1 = next_slice_param->slice_horizontal_position; + } + + mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0); + + BEGIN_BCS_BATCH(batch, 5); + OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2)); + OUT_BCS_BATCH(batch, + slice_param->slice_data_size - (slice_param->macroblock_offset >> 3)); + OUT_BCS_BATCH(batch, + slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3)); + OUT_BCS_BATCH(batch, + hpos0 << 24 | + vpos0 << 16 | + mb_count << 8 | + (next_slice_param == NULL) << 5 | + (next_slice_param == NULL) << 3 | + (slice_param->macroblock_offset & 0x7)); + OUT_BCS_BATCH(batch, + (slice_param->quantiser_scale_code << 24) | + (vpos1 << 8 | hpos1)); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferMPEG2 *pic_param; + VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param; + dri_bo *slice_data_bo; + int i, j; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; + + gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context); + gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context); + gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context); + gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context); + gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context); + + if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0) + gen7_mfd_context->wa_mpeg2_slice_vertical_position = + mpeg2_wa_slice_vertical_position(decode_state, pic_param); + + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer; + slice_data_bo = decode_state->slice_datas[j]->bo; + gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context); + + if (j == decode_state->num_slice_params - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = next_slice_group_param; + + gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context); + slice_param++; + } + } + + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); +} + +static const int va_to_gen7_vc1_pic_type[5] = { + GEN7_VC1_I_PICTURE, + GEN7_VC1_P_PICTURE, + GEN7_VC1_B_PICTURE, + GEN7_VC1_BI_PICTURE, + GEN7_VC1_P_PICTURE, +}; + +static const int va_to_gen7_vc1_mv[4] = { + 1, /* 1-MV */ + 2, /* 1-MV half-pel */ + 3, /* 1-MV half-pef bilinear */ + 0, /* Mixed MV */ +}; + +static const int b_picture_scale_factor[21] = { + 128, 85, 170, 64, 192, + 51, 102, 153, 204, 43, + 215, 37, 74, 111, 148, + 185, 222, 32, 96, 160, + 224, +}; + +static const int va_to_gen7_vc1_condover[3] = { + 0, + 2, + 3 +}; + +static const int va_to_gen7_vc1_profile[4] = { + GEN7_VC1_SIMPLE_PROFILE, + GEN7_VC1_MAIN_PROFILE, + GEN7_VC1_RESERVED_PROFILE, + GEN7_VC1_ADVANCED_PROFILE +}; + +static void +gen8_mfd_free_vc1_surface(void **data) +{ + struct gen7_vc1_surface *gen7_vc1_surface = *data; + + if (!gen7_vc1_surface) + return; + + dri_bo_unreference(gen7_vc1_surface->dmv); + free(gen7_vc1_surface); + *data = NULL; +} + +static void +gen8_mfd_init_vc1_surface(VADriverContextP ctx, + VAPictureParameterBufferVC1 *pic_param, + struct object_surface *obj_surface) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data; + int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16; + int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16; + + obj_surface->free_private_data = gen8_mfd_free_vc1_surface; + + if (!gen7_vc1_surface) { + gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1); + assert((obj_surface->size & 0x3f) == 0); + obj_surface->private_data = gen7_vc1_surface; + } + + gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type; + + if (gen7_vc1_surface->dmv == NULL) { + gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr, + "direct mv w/r buffer", + width_in_mbs * height_in_mbs * 64, + 0x1000); + } +} + +static void +gen8_mfd_vc1_decode_init(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + VAPictureParameterBufferVC1 *pic_param; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface; + dri_bo *bo; + int width_in_mbs; + int picture_type; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; + width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16; + picture_type = pic_param->picture_fields.bits.picture_type; + + intel_update_vc1_frame_store_index(ctx, + decode_state, + pic_param, + gen7_mfd_context->reference_surface); + + /* Current decoded picture */ + obj_surface = decode_state->render_object; + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface); + + dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter; + + dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter; + + dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "intra row store", + width_in_mbs * 64, + 0x1000); + assert(bo); + gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "deblocking filter row store", + width_in_mbs * 6 * 64, + 0x1000); + assert(bo); + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + width_in_mbs * 96, + 0x1000); + assert(bo); + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1; + + gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0; + + gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value; + dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo); + + if (gen7_mfd_context->bitplane_read_buffer.valid) { + int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16; + int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16; + int bitplane_width = ALIGN(width_in_mbs, 2) / 2; + int src_w, src_h; + uint8_t *src = NULL, *dst = NULL; + + assert(decode_state->bit_plane->buffer); + src = decode_state->bit_plane->buffer; + + bo = dri_bo_alloc(i965->intel.bufmgr, + "VC-1 Bitplane", + bitplane_width * height_in_mbs, + 0x1000); + assert(bo); + gen7_mfd_context->bitplane_read_buffer.bo = bo; + + dri_bo_map(bo, True); + assert(bo->virtual); + dst = bo->virtual; + + for (src_h = 0; src_h < height_in_mbs; src_h++) { + for(src_w = 0; src_w < width_in_mbs; src_w++) { + int src_index, dst_index; + int src_shift; + uint8_t src_value; + + src_index = (src_h * width_in_mbs + src_w) / 2; + src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4; + src_value = ((src[src_index] >> src_shift) & 0xf); + + if (picture_type == GEN7_VC1_SKIPPED_PICTURE){ + src_value |= 0x2; + } + + dst_index = src_w / 2; + dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4)); + } + + if (src_w & 1) + dst[src_w / 2] >>= 4; + + dst += bitplane_width; + } + + dri_bo_unmap(bo); + } else + gen7_mfd_context->bitplane_read_buffer.bo = NULL; +} + +static void +gen8_mfd_vc1_pic_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferVC1 *pic_param; + struct object_surface *obj_surface; + int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq; + int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel; + int unified_mv_mode; + int ref_field_pic_polarity = 0; + int scale_factor = 0; + int trans_ac_y = 0; + int dmv_surface_valid = 0; + int brfd = 0; + int fcm = 0; + int picture_type; + int profile; + int overlap; + int interpolation_mode = 0; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; + + profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile]; + dquant = pic_param->pic_quantizer_fields.bits.dquant; + dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame; + dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile; + dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge; + dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge; + dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level; + alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer; + + if (dquant == 0) { + alt_pquant_config = 0; + alt_pquant_edge_mask = 0; + } else if (dquant == 2) { + alt_pquant_config = 1; + alt_pquant_edge_mask = 0xf; + } else { + assert(dquant == 1); + if (dquantfrm == 0) { + alt_pquant_config = 0; + alt_pquant_edge_mask = 0; + alt_pq = 0; + } else { + assert(dquantfrm == 1); + alt_pquant_config = 1; + + switch (dqprofile) { + case 3: + if (dqbilevel == 0) { + alt_pquant_config = 2; + alt_pquant_edge_mask = 0; + } else { + assert(dqbilevel == 1); + alt_pquant_config = 3; + alt_pquant_edge_mask = 0; + } + break; + + case 0: + alt_pquant_edge_mask = 0xf; + break; + + case 1: + if (dqdbedge == 3) + alt_pquant_edge_mask = 0x9; + else + alt_pquant_edge_mask = (0x3 << dqdbedge); + + break; + + case 2: + alt_pquant_edge_mask = (0x1 << dqsbedge); + break; + + default: + assert(0); + } + } + } + + if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) { + assert(pic_param->mv_fields.bits.mv_mode2 < 4); + unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2]; + } else { + assert(pic_param->mv_fields.bits.mv_mode < 4); + unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode]; + } + + if (pic_param->sequence_fields.bits.interlace == 1 && + pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */ + /* FIXME: calculate reference field picture polarity */ + assert(0); + ref_field_pic_polarity = 0; + } + + if (pic_param->b_picture_fraction < 21) + scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction]; + + picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type]; + + if (profile == GEN7_VC1_ADVANCED_PROFILE && + picture_type == GEN7_VC1_I_PICTURE) + picture_type = GEN7_VC1_BI_PICTURE; + + if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */ + trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2; + else { + trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1; + + /* + * 8.3.6.2.1 Transform Type Selection + * If variable-sized transform coding is not enabled, + * then the 8x8 transform shall be used for all blocks. + * it is also MFX_VC1_PIC_STATE requirement. + */ + if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) { + pic_param->transform_fields.bits.mb_level_transform_type_flag = 1; + pic_param->transform_fields.bits.frame_level_transform_type = 0; + } + } + + if (picture_type == GEN7_VC1_B_PICTURE) { + struct gen7_vc1_surface *gen7_vc1_surface = NULL; + + obj_surface = decode_state->reference_objects[1]; + + if (obj_surface) + gen7_vc1_surface = obj_surface->private_data; + + if (!gen7_vc1_surface || + (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE || + va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE)) + dmv_surface_valid = 0; + else + dmv_surface_valid = 1; + } + + assert(pic_param->picture_fields.bits.frame_coding_mode < 3); + + if (pic_param->picture_fields.bits.frame_coding_mode < 2) + fcm = pic_param->picture_fields.bits.frame_coding_mode; + else { + if (pic_param->picture_fields.bits.top_field_first) + fcm = 2; + else + fcm = 3; + } + + if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */ + brfd = pic_param->reference_fields.bits.reference_distance; + brfd = (scale_factor * brfd) >> 8; + brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1; + + if (brfd < 0) + brfd = 0; + } + + overlap = 0; + if (profile != GEN7_VC1_ADVANCED_PROFILE){ + if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 && + pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) { + overlap = 1; + } + }else { + if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE && + pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){ + overlap = 1; + } + if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE || + pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){ + if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){ + overlap = 1; + } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 || + va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) { + overlap = 1; + } + } + } + + assert(pic_param->conditional_overlap_flag < 3); + assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */ + + if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear || + (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation && + pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear)) + interpolation_mode = 9; /* Half-pel bilinear */ + else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel || + (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation && + pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel)) + interpolation_mode = 1; /* Half-pel bicubic */ + else + interpolation_mode = 0; /* Quarter-pel bicubic */ + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, + (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) | + ((ALIGN(pic_param->coded_width, 16) / 16) - 1)); + OUT_BCS_BATCH(batch, + ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 | + dmv_surface_valid << 15 | + (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */ + pic_param->rounding_control << 13 | + pic_param->sequence_fields.bits.syncmarker << 12 | + interpolation_mode << 8 | + 0 << 7 | /* FIXME: scale up or down ??? */ + pic_param->range_reduction_frame << 6 | + pic_param->entrypoint_fields.bits.loopfilter << 5 | + overlap << 4 | + !pic_param->picture_fields.bits.is_first_field << 3 | + (pic_param->sequence_fields.bits.profile == 3) << 0); + OUT_BCS_BATCH(batch, + va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 | + picture_type << 26 | + fcm << 24 | + alt_pq << 16 | + pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 | + scale_factor << 0); + OUT_BCS_BATCH(batch, + unified_mv_mode << 28 | + pic_param->mv_fields.bits.four_mv_switch << 27 | + pic_param->fast_uvmc_flag << 26 | + ref_field_pic_polarity << 25 | + pic_param->reference_fields.bits.num_reference_pictures << 24 | + pic_param->reference_fields.bits.reference_distance << 20 | + pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */ + pic_param->mv_fields.bits.extended_dmv_range << 10 | + pic_param->mv_fields.bits.extended_mv_range << 8 | + alt_pquant_edge_mask << 4 | + alt_pquant_config << 2 | + pic_param->pic_quantizer_fields.bits.half_qp << 1 | + pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0); + OUT_BCS_BATCH(batch, + !!pic_param->bitplane_present.value << 31 | + !pic_param->bitplane_present.flags.bp_forward_mb << 30 | + !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 | + !pic_param->bitplane_present.flags.bp_skip_mb << 28 | + !pic_param->bitplane_present.flags.bp_direct_mb << 27 | + !pic_param->bitplane_present.flags.bp_overflags << 26 | + !pic_param->bitplane_present.flags.bp_ac_pred << 25 | + !pic_param->bitplane_present.flags.bp_field_tx << 24 | + pic_param->mv_fields.bits.mv_table << 20 | + pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 | + pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 | + pic_param->transform_fields.bits.frame_level_transform_type << 12 | + pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 | + pic_param->mb_mode_table << 8 | + trans_ac_y << 6 | + pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 | + pic_param->transform_fields.bits.intra_transform_dc_table << 3 | + pic_param->cbp_table << 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferVC1 *pic_param; + int intensitycomp_single; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; + intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation); + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, + 0 << 14 | /* FIXME: double ??? */ + 0 << 12 | + intensitycomp_single << 10 | + intensitycomp_single << 8 | + 0 << 4 | /* FIXME: interlace mode */ + 0); + OUT_BCS_BATCH(batch, + pic_param->luma_shift << 16 | + pic_param->luma_scale << 0); /* FIXME: Luma Scaling */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_vc1_directmode_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + struct object_surface *obj_surface; + dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL; + + obj_surface = decode_state->render_object; + + if (obj_surface && obj_surface->private_data) { + dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv; + } + + obj_surface = decode_state->reference_objects[1]; + + if (obj_surface && obj_surface->private_data) { + dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv; + } + + BEGIN_BCS_BATCH(batch, 7); + OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2)); + + if (dmv_write_buffer) + OUT_BCS_RELOC(batch, dmv_write_buffer, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + if (dmv_read_buffer) + OUT_BCS_RELOC(batch, dmv_read_buffer, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static int +gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile) +{ + int out_slice_data_bit_offset; + int slice_header_size = in_slice_data_bit_offset / 8; + int i, j; + + if (profile != 3) + out_slice_data_bit_offset = in_slice_data_bit_offset; + else { + for (i = 0, j = 0; i < slice_header_size; i++, j++) { + if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) { + i++, j += 2; + } + } + + out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8; + } + + return out_slice_data_bit_offset; +} + +static void +gen8_mfd_vc1_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferVC1 *pic_param, + VASliceParameterBufferVC1 *slice_param, + VASliceParameterBufferVC1 *next_slice_param, + dri_bo *slice_data_bo, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int next_slice_start_vert_pos; + int macroblock_offset; + uint8_t *slice_data = NULL; + + dri_bo_map(slice_data_bo, 0); + slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset); + macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, + slice_param->macroblock_offset, + pic_param->sequence_fields.bits.profile); + dri_bo_unmap(slice_data_bo); + + if (next_slice_param) + next_slice_start_vert_pos = next_slice_param->slice_vertical_position; + else + next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16; + + BEGIN_BCS_BATCH(batch, 5); + OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2)); + OUT_BCS_BATCH(batch, + slice_param->slice_data_size - (macroblock_offset >> 3)); + OUT_BCS_BATCH(batch, + slice_param->slice_data_offset + (macroblock_offset >> 3)); + OUT_BCS_BATCH(batch, + slice_param->slice_vertical_position << 16 | + next_slice_start_vert_pos << 0); + OUT_BCS_BATCH(batch, + (macroblock_offset & 0x7)); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_vc1_decode_picture(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferVC1 *pic_param; + VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param; + dri_bo *slice_data_bo; + int i, j; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; + + gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context); + gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context); + gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context); + gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context); + gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context); + + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer; + slice_data_bo = decode_state->slice_datas[j]->bo; + gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context); + + if (j == decode_state->num_slice_params - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = next_slice_group_param; + + gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context); + slice_param++; + } + } + + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); +} + +static void +gen8_mfd_jpeg_decode_init(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct object_surface *obj_surface; + VAPictureParameterBufferJPEGBaseline *pic_param; + int subsampling = SUBSAMPLE_YUV420; + + pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; + + if (pic_param->num_components == 1) + subsampling = SUBSAMPLE_YUV400; + else if (pic_param->num_components == 3) { + int h1 = pic_param->components[0].h_sampling_factor; + int h2 = pic_param->components[1].h_sampling_factor; + int h3 = pic_param->components[2].h_sampling_factor; + int v1 = pic_param->components[0].v_sampling_factor; + int v2 = pic_param->components[1].v_sampling_factor; + int v3 = pic_param->components[2].v_sampling_factor; + + if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 1 && v3 == 1) + subsampling = SUBSAMPLE_YUV420; + else if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) + subsampling = SUBSAMPLE_YUV422H; + else if (h1 == 1 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) + subsampling = SUBSAMPLE_YUV444; + else if (h1 == 4 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) + subsampling = SUBSAMPLE_YUV411; + else if (h1 == 1 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 1 && v3 == 1) + subsampling = SUBSAMPLE_YUV422V; + else if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 2 && v3 == 2) + subsampling = SUBSAMPLE_YUV422H; + else if (h2 == 2 && h2 == 2 && h3 == 2 && + v1 == 2 && v2 == 1 && v3 == 1) + subsampling = SUBSAMPLE_YUV422V; + else + assert(0); + } else { + assert(0); + } + + /* Current decoded picture */ + obj_surface = decode_state->render_object; + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling); + + dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.valid = 1; + + gen7_mfd_context->post_deblocking_output.bo = NULL; + gen7_mfd_context->post_deblocking_output.valid = 0; + + gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL; + gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0; + + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL; + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0; + + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL; + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0; + + gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL; + gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0; + + gen7_mfd_context->bitplane_read_buffer.bo = NULL; + gen7_mfd_context->bitplane_read_buffer.valid = 0; +} + +static const int va_to_gen7_jpeg_rotation[4] = { + GEN7_JPEG_ROTATION_0, + GEN7_JPEG_ROTATION_90, + GEN7_JPEG_ROTATION_180, + GEN7_JPEG_ROTATION_270 +}; + +static void +gen8_mfd_jpeg_pic_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferJPEGBaseline *pic_param; + int chroma_type = GEN7_YUV420; + int frame_width_in_blks; + int frame_height_in_blks; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; + + if (pic_param->num_components == 1) + chroma_type = GEN7_YUV400; + else if (pic_param->num_components == 3) { + int h1 = pic_param->components[0].h_sampling_factor; + int h2 = pic_param->components[1].h_sampling_factor; + int h3 = pic_param->components[2].h_sampling_factor; + int v1 = pic_param->components[0].v_sampling_factor; + int v2 = pic_param->components[1].v_sampling_factor; + int v3 = pic_param->components[2].v_sampling_factor; + + if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 1 && v3 == 1) + chroma_type = GEN7_YUV420; + else if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) + chroma_type = GEN7_YUV422H_2Y; + else if (h1 == 1 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) + chroma_type = GEN7_YUV444; + else if (h1 == 4 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) + chroma_type = GEN7_YUV411; + else if (h1 == 1 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 1 && v3 == 1) + chroma_type = GEN7_YUV422V_2Y; + else if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 2 && v3 == 2) + chroma_type = GEN7_YUV422H_4Y; + else if (h2 == 2 && h2 == 2 && h3 == 2 && + v1 == 2 && v2 == 1 && v3 == 1) + chroma_type = GEN7_YUV422V_4Y; + else + assert(0); + } + + if (chroma_type == GEN7_YUV400 || + chroma_type == GEN7_YUV444 || + chroma_type == GEN7_YUV422V_2Y) { + frame_width_in_blks = ((pic_param->picture_width + 7) / 8); + frame_height_in_blks = ((pic_param->picture_height + 7) / 8); + } else if (chroma_type == GEN7_YUV411) { + frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4; + frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4; + } else { + frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2; + frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2; + } + + BEGIN_BCS_BATCH(batch, 3); + OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2)); + OUT_BCS_BATCH(batch, + (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */ + (chroma_type << 0)); + OUT_BCS_BATCH(batch, + ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */ + ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */ + ADVANCE_BCS_BATCH(batch); +} + +static const int va_to_gen7_jpeg_hufftable[2] = { + MFX_HUFFTABLE_ID_Y, + MFX_HUFFTABLE_ID_UV +}; + +static void +gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context, + int num_tables) +{ + VAHuffmanTableBufferJPEGBaseline *huffman_table; + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int index; + + if (!decode_state->huffman_table || !decode_state->huffman_table->buffer) + return; + + huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer; + + for (index = 0; index < num_tables; index++) { + int id = va_to_gen7_jpeg_hufftable[index]; + if (!huffman_table->load_huffman_table[index]) + continue; + BEGIN_BCS_BATCH(batch, 53); + OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2)); + OUT_BCS_BATCH(batch, id); + intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12); + intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12); + intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16); + intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164); + ADVANCE_BCS_BATCH(batch); + } +} + +static const int va_to_gen7_jpeg_qm[5] = { + -1, + MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX, + MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX, + MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX, + MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX +}; + +static void +gen8_mfd_jpeg_qm_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + VAPictureParameterBufferJPEGBaseline *pic_param; + VAIQMatrixBufferJPEGBaseline *iq_matrix; + int index; + + if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer) + return; + + iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer; + pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; + + assert(pic_param->num_components <= 3); + + for (index = 0; index < pic_param->num_components; index++) { + int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1; + int qm_type; + unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector]; + unsigned char raster_qm[64]; + int j; + + if (id > 4 || id < 1) + continue; + + if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector]) + continue; + + qm_type = va_to_gen7_jpeg_qm[id]; + + for (j = 0; j < 64; j++) + raster_qm[zigzag_direct[j]] = qm[j]; + + gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context); + } +} + +static void +gen8_mfd_jpeg_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferJPEGBaseline *pic_param, + VASliceParameterBufferJPEGBaseline *slice_param, + VASliceParameterBufferJPEGBaseline *next_slice_param, + dri_bo *slice_data_bo, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int scan_component_mask = 0; + int i; + + assert(slice_param->num_components > 0); + assert(slice_param->num_components < 4); + assert(slice_param->num_components <= pic_param->num_components); + + for (i = 0; i < slice_param->num_components; i++) { + switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) { + case 1: + scan_component_mask |= (1 << 0); + break; + case 2: + scan_component_mask |= (1 << 1); + break; + case 3: + scan_component_mask |= (1 << 2); + break; + default: + assert(0); + break; + } + } + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2)); + OUT_BCS_BATCH(batch, + slice_param->slice_data_size); + OUT_BCS_BATCH(batch, + slice_param->slice_data_offset); + OUT_BCS_BATCH(batch, + slice_param->slice_horizontal_position << 16 | + slice_param->slice_vertical_position << 0); + OUT_BCS_BATCH(batch, + ((slice_param->num_components != 1) << 30) | /* interleaved */ + (scan_component_mask << 27) | /* scan components */ + (0 << 26) | /* disable interrupt allowed */ + (slice_param->num_mcus << 0)); /* MCU count */ + OUT_BCS_BATCH(batch, + (slice_param->restart_interval << 0)); /* RestartInterval */ + ADVANCE_BCS_BATCH(batch); +} + +/* Workaround for JPEG decoding on Ivybridge */ + +VAStatus +i965_DestroySurfaces(VADriverContextP ctx, + VASurfaceID *surface_list, + int num_surfaces); +VAStatus +i965_CreateSurfaces(VADriverContextP ctx, + int width, + int height, + int format, + int num_surfaces, + VASurfaceID *surfaces); + +static struct { + int width; + int height; + unsigned char data[32]; + int data_size; + int data_bit_offset; + int qp; +} gen7_jpeg_wa_clip = { + 16, + 16, + { + 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c, + 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00 + }, + 14, + 40, + 28, +}; + +static void +gen8_jpeg_wa_init(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + VAStatus status; + struct object_surface *obj_surface; + + if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) + i965_DestroySurfaces(ctx, + &gen7_mfd_context->jpeg_wa_surface_id, + 1); + + status = i965_CreateSurfaces(ctx, + gen7_jpeg_wa_clip.width, + gen7_jpeg_wa_clip.height, + VA_RT_FORMAT_YUV420, + 1, + &gen7_mfd_context->jpeg_wa_surface_id); + assert(status == VA_STATUS_SUCCESS); + + obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id); + assert(obj_surface); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + gen7_mfd_context->jpeg_wa_surface_object = obj_surface; + + if (!gen7_mfd_context->jpeg_wa_slice_data_bo) { + gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr, + "JPEG WA data", + 0x1000, + 0x1000); + dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo, + 0, + gen7_jpeg_wa_clip.data_size, + gen7_jpeg_wa_clip.data); + } +} + +static void +gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 5); + OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2)); + OUT_BCS_BATCH(batch, + (MFX_LONG_MODE << 17) | /* Currently only support long format */ + (MFD_MODE_VLD << 15) | /* VLD mode */ + (0 << 10) | /* disable Stream-Out */ + (0 << 9) | /* Post Deblocking Output */ + (1 << 8) | /* Pre Deblocking Output */ + (0 << 5) | /* not in stitch mode */ + (MFX_CODEC_DECODE << 4) | /* decoding mode */ + (MFX_FORMAT_AVC << 0)); + OUT_BCS_BATCH(batch, + (0 << 4) | /* terminate if AVC motion and POC table error occurs */ + (0 << 3) | /* terminate if AVC mbdata error occurs */ + (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ + (0 << 1) | + (0 << 0)); + OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ + OUT_BCS_BATCH(batch, 0); /* reserved */ + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_jpeg_wa_surface_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object; + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + ((obj_surface->orig_width - 1) << 18) | + ((obj_surface->orig_height - 1) << 4)); + OUT_BCS_BATCH(batch, + (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (1 << 27) | /* interleave chroma, set to 0 for JPEG */ + (0 << 22) | /* surface object control state, ignored */ + ((obj_surface->width - 1) << 3) | /* pitch */ + (0 << 2) | /* must be 0 */ + (1 << 1) | /* must be tiled */ + (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* X offset for U(Cb), must be 0 */ + (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* X offset for V(Cr), must be 0 */ + (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */ + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object; + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + dri_bo *intra_bo; + int i; + + intra_bo = dri_bo_alloc(i965->intel.bufmgr, + "intra row store", + 128 * 64, + 0x1000); + + BEGIN_BCS_BATCH(batch, 61); + OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2)); + OUT_BCS_RELOC(batch, + obj_surface->bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + + OUT_BCS_BATCH(batch, 0); /* post deblocking */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* uncompressed-video & stream out 7-12 */ + OUT_BCS_BATCH(batch, 0); /* ignore for decoding */ + OUT_BCS_BATCH(batch, 0); /* ignore for decoding */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW 13-15 is for intra row store scratch */ + OUT_BCS_RELOC(batch, + intra_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW 16-18 is for deblocking filter */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* DW 19..50 */ + for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + OUT_BCS_BATCH(batch, 0); + + /* the DW52-54 is for mb status address */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* the DW56-60 is for ILDB & second ILDB address */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); + + dri_bo_unreference(intra_bo); +} + +static void +gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + dri_bo *bsd_mpc_bo, *mpr_bo; + + bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + 11520, /* 1.5 * 120 * 64 */ + 0x1000); + + mpr_bo = dri_bo_alloc(i965->intel.bufmgr, + "mpr row store", + 7680, /* 1. 0 * 120 * 64 */ + 0x1000); + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2)); + + OUT_BCS_RELOC(batch, + bsd_mpc_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_RELOC(batch, + mpr_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); + + dri_bo_unreference(bsd_mpc_bo); + dri_bo_unreference(mpr_bo); +} + +static void +gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + +} + +static void +gen8_jpeg_wa_avc_img_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int img_struct = 0; + int mbaff_frame_flag = 0; + unsigned int width_in_mbs = 1, height_in_mbs = 1; + + BEGIN_BCS_BATCH(batch, 16); + OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); + OUT_BCS_BATCH(batch, + width_in_mbs * height_in_mbs); + OUT_BCS_BATCH(batch, + ((height_in_mbs - 1) << 16) | + ((width_in_mbs - 1) << 0)); + OUT_BCS_BATCH(batch, + (0 << 24) | + (0 << 16) | + (0 << 14) | + (0 << 13) | + (0 << 12) | /* differ from GEN6 */ + (0 << 10) | + (img_struct << 8)); + OUT_BCS_BATCH(batch, + (1 << 10) | /* 4:2:0 */ + (1 << 7) | /* CABAC */ + (0 << 6) | + (0 << 5) | + (0 << 4) | + (0 << 3) | + (1 << 2) | + (mbaff_frame_flag << 1) | + (0 << 0)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int i; + + BEGIN_BCS_BATCH(batch, 71); + OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2)); + + /* reference surfaces 0..15 */ + for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + OUT_BCS_BATCH(batch, 0); /* top */ + OUT_BCS_BATCH(batch, 0); /* bottom */ + } + + OUT_BCS_BATCH(batch, 0); + + /* the current decoding frame/field */ + OUT_BCS_BATCH(batch, 0); /* top */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* POC List */ + for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 11); + OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2)); + OUT_BCS_RELOC(batch, + gen7_mfd_context->jpeg_wa_slice_data_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + /* the input bitsteam format on GEN7 differs from GEN6 */ + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2)); + OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + (0 << 31) | + (0 << 14) | + (0 << 12) | + (0 << 10) | + (0 << 8)); + OUT_BCS_BATCH(batch, + ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) | + (0 << 5) | + (0 << 4) | + (1 << 3) | /* LastSlice Flag */ + (gen7_jpeg_wa_clip.data_bit_offset & 0x7)); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1; + int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0; + int first_mb_in_slice = 0; + int slice_type = SLICE_TYPE_I; + + BEGIN_BCS_BATCH(batch, 11); + OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2)); + OUT_BCS_BATCH(batch, slice_type); + OUT_BCS_BATCH(batch, + (num_ref_idx_l1 << 24) | + (num_ref_idx_l0 << 16) | + (0 << 8) | + (0 << 0)); + OUT_BCS_BATCH(batch, + (0 << 29) | + (1 << 27) | /* disable Deblocking */ + (0 << 24) | + (gen7_jpeg_wa_clip.qp << 16) | + (0 << 8) | + (0 << 0)); + OUT_BCS_BATCH(batch, + (slice_ver_pos << 24) | + (slice_hor_pos << 16) | + (first_mb_in_slice << 0)); + OUT_BCS_BATCH(batch, + (next_slice_ver_pos << 16) | + (next_slice_hor_pos << 0)); + OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_jpeg_wa(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + gen8_jpeg_wa_init(ctx, gen7_mfd_context); + intel_batchbuffer_emit_mi_flush(batch); + gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context); + gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context); + + gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context); +} + +void +gen8_mfd_jpeg_decode_picture(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferJPEGBaseline *pic_param; + VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param; + dri_bo *slice_data_bo; + int i, j, max_selector = 0; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; + + /* Currently only support Baseline DCT */ + gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); + gen8_mfd_jpeg_wa(ctx, gen7_mfd_context); + intel_batchbuffer_emit_mi_flush(batch); + gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context); + gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context); + gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context); + gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context); + + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer; + slice_data_bo = decode_state->slice_datas[j]->bo; + gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context); + + if (j == decode_state->num_slice_params - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + int component; + + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = next_slice_group_param; + + for (component = 0; component < slice_param->num_components; component++) { + if (max_selector < slice_param->components[component].dc_table_selector) + max_selector = slice_param->components[component].dc_table_selector; + + if (max_selector < slice_param->components[component].ac_table_selector) + max_selector = slice_param->components[component].ac_table_selector; + } + + slice_param++; + } + } + + assert(max_selector < 2); + gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1); + + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer; + slice_data_bo = decode_state->slice_datas[j]->bo; + gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context); + + if (j == decode_state->num_slice_params - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = next_slice_group_param; + + gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context); + slice_param++; + } + } + + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); +} + +static VAStatus +gen8_mfd_decode_picture(VADriverContextP ctx, + VAProfile profile, + union codec_state *codec_state, + struct hw_context *hw_context) + +{ + struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context; + struct decode_state *decode_state = &codec_state->decode; + VAStatus vaStatus; + + assert(gen7_mfd_context); + + vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state); + + if (vaStatus != VA_STATUS_SUCCESS) + goto out; + + gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1; + + switch (profile) { + case VAProfileMPEG2Simple: + case VAProfileMPEG2Main: + gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context); + break; + + case VAProfileH264Baseline: + case VAProfileH264Main: + case VAProfileH264High: + gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context); + break; + + case VAProfileVC1Simple: + case VAProfileVC1Main: + case VAProfileVC1Advanced: + gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context); + break; + + case VAProfileJPEGBaseline: + gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context); + break; + + default: + assert(0); + break; + } + + vaStatus = VA_STATUS_SUCCESS; + +out: + return vaStatus; +} + +static void +gen8_mfd_context_destroy(void *hw_context) +{ + struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context; + + dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo); + gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo); + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo); + gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo); + gen7_mfd_context->bitplane_read_buffer.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo); + + intel_batchbuffer_free(gen7_mfd_context->base.batch); + free(gen7_mfd_context); +} + +static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1; + gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1; + gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1; + gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1; +} + +struct hw_context * +gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) +{ + struct intel_driver_data *intel = intel_driver_data(ctx); + struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context)); + int i; + + gen7_mfd_context->base.destroy = gen8_mfd_context_destroy; + gen7_mfd_context->base.run = gen8_mfd_decode_picture; + gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0); + + for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { + gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID; + gen7_mfd_context->reference_surface[i].frame_store_id = -1; + } + + gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE; + + switch (obj_config->profile) { + case VAProfileMPEG2Simple: + case VAProfileMPEG2Main: + gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context); + break; + + case VAProfileH264Baseline: + case VAProfileH264Main: + case VAProfileH264High: + gen8_mfd_avc_context_init(ctx, gen7_mfd_context); + break; + default: + break; + } + return (struct hw_context *)gen7_mfd_context; +} diff --git a/src/gen8_vme.c b/src/gen8_vme.c new file mode 100644 index 00000000..3fe1605c --- /dev/null +++ b/src/gen8_vme.c @@ -0,0 +1,1035 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui + * Xiang Haihao + */ + +#include +#include +#include +#include +#include + +#include "intel_batchbuffer.h" +#include "intel_driver.h" + +#include "i965_defines.h" +#include "i965_drv_video.h" +#include "i965_encoder.h" +#include "gen6_vme.h" +#include "gen6_mfc.h" + +#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) + +#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6) + +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) + +#define VME_INTRA_SHADER 0 +#define VME_INTER_SHADER 1 +#define VME_BINTER_SHADER 3 +#define VME_BATCHBUFFER 2 + +#define CURBE_ALLOCATION_SIZE 37 /* in 256-bit */ +#define CURBE_TOTAL_DATA_LENGTH (4 * 32) /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */ +#define CURBE_URB_ENTRY_LENGTH 4 /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */ + +#define VME_MSG_LENGTH 32 + +static const uint32_t gen8_vme_intra_frame[][4] = { +#include "shaders/vme/intra_frame_haswell.g75b" +}; + +static const uint32_t gen8_vme_inter_frame[][4] = { +#include "shaders/vme/inter_frame_haswell.g75b" +}; + +static const uint32_t gen8_vme_inter_bframe[][4] = { +#include "shaders/vme/inter_bframe_haswell.g75b" +}; + +static const uint32_t gen8_vme_batchbuffer[][4] = { +#include "shaders/vme/batchbuffer.g75b" +}; + +static struct i965_kernel gen8_vme_kernels[] = { + { + "VME Intra Frame", + VME_INTRA_SHADER, /*index*/ + gen8_vme_intra_frame, + sizeof(gen8_vme_intra_frame), + NULL + }, + { + "VME inter Frame", + VME_INTER_SHADER, + gen8_vme_inter_frame, + sizeof(gen8_vme_inter_frame), + NULL + }, + { + "VME BATCHBUFFER", + VME_BATCHBUFFER, + gen8_vme_batchbuffer, + sizeof(gen8_vme_batchbuffer), + NULL + }, + { + "VME inter BFrame", + VME_BINTER_SHADER, + gen8_vme_inter_bframe, + sizeof(gen8_vme_inter_bframe), + NULL + } +}; + +static const uint32_t gen8_vme_mpeg2_intra_frame[][4] = { +#include "shaders/vme/intra_frame_haswell.g75b" +}; + +static const uint32_t gen8_vme_mpeg2_inter_frame[][4] = { +#include "shaders/vme/mpeg2_inter_haswell.g75b" +}; + +static const uint32_t gen8_vme_mpeg2_batchbuffer[][4] = { +#include "shaders/vme/batchbuffer.g75b" +}; + +static struct i965_kernel gen8_vme_mpeg2_kernels[] = { + { + "VME Intra Frame", + VME_INTRA_SHADER, /*index*/ + gen8_vme_mpeg2_intra_frame, + sizeof(gen8_vme_mpeg2_intra_frame), + NULL + }, + { + "VME inter Frame", + VME_INTER_SHADER, + gen8_vme_mpeg2_inter_frame, + sizeof(gen8_vme_mpeg2_inter_frame), + NULL + }, + { + "VME BATCHBUFFER", + VME_BATCHBUFFER, + gen8_vme_mpeg2_batchbuffer, + sizeof(gen8_vme_mpeg2_batchbuffer), + NULL + }, +}; + +/* only used for VME source surface state */ +static void +gen8_vme_source_surface_state(VADriverContextP ctx, + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + vme_context->vme_surface2_setup(ctx, + &vme_context->gpe_context, + obj_surface, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static void +gen8_vme_media_source_surface_state(VADriverContextP ctx, + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + vme_context->vme_media_rw_surface_setup(ctx, + &vme_context->gpe_context, + obj_surface, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static void +gen8_vme_media_chroma_source_surface_state(VADriverContextP ctx, + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + vme_context->vme_media_chroma_surface_setup(ctx, + &vme_context->gpe_context, + obj_surface, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static void +gen8_vme_output_buffer_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int index, + struct intel_encoder_context *encoder_context) + +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + + vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs; + vme_context->vme_output.pitch = 16; /* in bytes, always 16 */ + + if (is_intra) + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2; + else + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24; + /* + * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref + * + 16 FBR Info + 128 FBR MV + 32 FBR Ref. + * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24. + */ + + vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, + "VME output buffer", + vme_context->vme_output.num_blocks * vme_context->vme_output.size_block, + 0x1000); + assert(vme_context->vme_output.bo); + vme_context->vme_buffer_suface_setup(ctx, + &vme_context->gpe_context, + &vme_context->vme_output, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static void +gen8_vme_output_vme_batchbuffer_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int index, + struct intel_encoder_context *encoder_context) + +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + + vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1; + vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */ + vme_context->vme_batchbuffer.pitch = 16; + vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, + "VME batchbuffer", + vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block, + 0x1000); + vme_context->vme_buffer_suface_setup(ctx, + &vme_context->gpe_context, + &vme_context->vme_batchbuffer, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static VAStatus +gen8_vme_surface_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) +{ + struct object_surface *obj_surface; + + /*Setup surfaces state*/ + /* current picture for encoding */ + obj_surface = encode_state->input_yuv_object; + gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context); + gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context); + gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context); + + if (!is_intra) { + /* reference 0 */ + obj_surface = encode_state->reference_objects[0]; + + if (obj_surface && obj_surface->bo) + gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); + + /* reference 1 */ + obj_surface = encode_state->reference_objects[1]; + + if (obj_surface && obj_surface->bo) + gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); + } + + /* VME output */ + gen8_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context); + gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); + + return VA_STATUS_SUCCESS; +} + +static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct gen6_interface_descriptor_data *desc; + int i; + dri_bo *bo; + + bo = vme_context->gpe_context.idrt.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + desc = bo->virtual; + + for (i = 0; i < vme_context->vme_kernel_sum; i++) { + struct i965_kernel *kernel; + kernel = &vme_context->gpe_context.kernels[i]; + assert(sizeof(*desc) == 32); + /*Setup the descritor table*/ + memset(desc, 0, sizeof(*desc)); + desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6); + desc->desc2.sampler_count = 0; /* FIXME: */ + desc->desc2.sampler_state_pointer = 0; + desc->desc3.binding_table_entry_count = 1; /* FIXME: */ + desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5); + desc->desc4.constant_urb_entry_read_offset = 0; + desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH; + + /*kernel start*/ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0), + kernel->bo); + desc++; + } + dri_bo_unmap(bo); + + return VA_STATUS_SUCCESS; +} + +static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + unsigned char *constant_buffer; + unsigned int *vme_state_message; + int mv_num = 32; + + vme_state_message = (unsigned int *)vme_context->vme_state_message; + + if (encoder_context->codec == CODEC_H264) { + if (vme_context->h264_level >= 30) { + mv_num = 16; + + if (vme_context->h264_level >= 31) + mv_num = 8; + } + } else if (encoder_context->codec == CODEC_MPEG2) { + mv_num = 2; + } + + vme_state_message[31] = mv_num; + + dri_bo_map(vme_context->gpe_context.curbe.bo, 1); + assert(vme_context->gpe_context.curbe.bo->virtual); + constant_buffer = vme_context->gpe_context.curbe.bo->virtual; + + /* VME MV/Mb cost table is passed by using const buffer */ + /* Now it uses the fixed search path. So it is constructed directly + * in the GPU shader. + */ + memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128); + + dri_bo_unmap(vme_context->gpe_context.curbe.bo); + + return VA_STATUS_SUCCESS; +} + +static const unsigned int intra_mb_mode_cost_table[] = { + 0x31110001, // for qp0 + 0x09110001, // for qp1 + 0x15030001, // for qp2 + 0x0b030001, // for qp3 + 0x0d030011, // for qp4 + 0x17210011, // for qp5 + 0x41210011, // for qp6 + 0x19210011, // for qp7 + 0x25050003, // for qp8 + 0x1b130003, // for qp9 + 0x1d130003, // for qp10 + 0x27070021, // for qp11 + 0x51310021, // for qp12 + 0x29090021, // for qp13 + 0x35150005, // for qp14 + 0x2b0b0013, // for qp15 + 0x2d0d0013, // for qp16 + 0x37170007, // for qp17 + 0x61410031, // for qp18 + 0x39190009, // for qp19 + 0x45250015, // for qp20 + 0x3b1b000b, // for qp21 + 0x3d1d000d, // for qp22 + 0x47270017, // for qp23 + 0x71510041, // for qp24 ! center for qp=0..30 + 0x49290019, // for qp25 + 0x55350025, // for qp26 + 0x4b2b001b, // for qp27 + 0x4d2d001d, // for qp28 + 0x57370027, // for qp29 + 0x81610051, // for qp30 + 0x57270017, // for qp31 + 0x81510041, // for qp32 ! center for qp=31..51 + 0x59290019, // for qp33 + 0x65350025, // for qp34 + 0x5b2b001b, // for qp35 + 0x5d2d001d, // for qp36 + 0x67370027, // for qp37 + 0x91610051, // for qp38 + 0x69390029, // for qp39 + 0x75450035, // for qp40 + 0x6b3b002b, // for qp41 + 0x6d3d002d, // for qp42 + 0x77470037, // for qp43 + 0xa1710061, // for qp44 + 0x79490039, // for qp45 + 0x85550045, // for qp46 + 0x7b4b003b, // for qp47 + 0x7d4d003d, // for qp48 + 0x87570047, // for qp49 + 0xb1810071, // for qp50 + 0x89590049 // for qp51 +}; + +static void gen8_vme_state_setup_fixup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + unsigned int *vme_state_message) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + + if (slice_param->slice_type != SLICE_TYPE_I && + slice_param->slice_type != SLICE_TYPE_SI) + return; + if (encoder_context->rate_control_mode == VA_RC_CQP) + vme_state_message[0] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta]; + else + vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[slice_param->slice_type].QpPrimeY]; +} + +static VAStatus gen8_vme_vme_state_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + unsigned int *vme_state_message; + int i; + + //pass the MV/Mb cost into VME message on HASWell + assert(vme_context->vme_state_message); + vme_state_message = (unsigned int *)vme_context->vme_state_message; + + vme_state_message[0] = 0x4a4a4a4a; + vme_state_message[1] = 0x4a4a4a4a; + vme_state_message[2] = 0x4a4a4a4a; + vme_state_message[3] = 0x22120200; + vme_state_message[4] = 0x62524232; + + for (i=5; i < 8; i++) { + vme_state_message[i] = 0; + } + + switch (encoder_context->codec) { + case CODEC_H264: + gen8_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message); + + break; + + default: + /* no fixup */ + break; + } + + return VA_STATUS_SUCCESS; +} + + +static void +gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + int mb_x = 0, mb_y = 0; + int i, s; + unsigned int *command_ptr; + + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); + command_ptr = vme_context->vme_batchbuffer.bo->virtual; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; + int slice_mb_begin = pSliceParameter->macroblock_address; + int slice_mb_number = pSliceParameter->num_macroblocks; + unsigned int mb_intra_ub; + int slice_mb_x = pSliceParameter->macroblock_address % mb_width; + for (i = 0; i < slice_mb_number; ) { + int mb_count = i + slice_mb_begin; + mb_x = mb_count % mb_width; + mb_y = mb_count / mb_width; + mb_intra_ub = 0; + if (mb_x != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + } + if (mb_y != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + if (mb_x != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + if (mb_x != (mb_width -1)) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + } + if (i < mb_width) { + if (i == 0) + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE); + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK); + if ((i == (mb_width - 1)) && slice_mb_x) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + } + } + + if ((i == mb_width) && slice_mb_x) { + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D); + } + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + + /*inline data */ + *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); + *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + + i += 1; + } + } + + *command_ptr++ = 0; + *command_ptr++ = MI_BATCH_BUFFER_END; + + dri_bo_unmap(vme_context->vme_batchbuffer.bo); +} + +static void gen8_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + i965_gpe_context_init(ctx, &vme_context->gpe_context); + + /* VME output buffer */ + dri_bo_unreference(vme_context->vme_output.bo); + vme_context->vme_output.bo = NULL; + + dri_bo_unreference(vme_context->vme_batchbuffer.bo); + vme_context->vme_batchbuffer.bo = NULL; + + /* VME state */ + dri_bo_unreference(vme_context->vme_state.bo); + vme_context->vme_state.bo = NULL; +} + +static void gen8_vme_pipeline_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct intel_batchbuffer *batch = encoder_context->base.batch; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + int kernel_shader; + bool allow_hwscore = true; + int s; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; + if ((pSliceParameter->macroblock_address % width_in_mbs)) { + allow_hwscore = false; + break; + } + } + if ((pSliceParameter->slice_type == SLICE_TYPE_I) || + (pSliceParameter->slice_type == SLICE_TYPE_I)) { + kernel_shader = VME_INTRA_SHADER; + } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) || + (pSliceParameter->slice_type == SLICE_TYPE_SP)) { + kernel_shader = VME_INTER_SHADER; + } else { + kernel_shader = VME_BINTER_SHADER; + if (!allow_hwscore) + kernel_shader = VME_INTER_SHADER; + } + if (allow_hwscore) + gen7_vme_walker_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); + else + gen8_vme_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); + + intel_batchbuffer_start_atomic(batch, 0x1000); + gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_RELOC(batch, + vme_context->vme_batchbuffer.bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + ADVANCE_BATCH(batch); + + intel_batchbuffer_end_atomic(batch); +} + +static VAStatus gen8_vme_prepare(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + VAStatus vaStatus = VA_STATUS_SUCCESS; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + if (!vme_context->h264_level || + (vme_context->h264_level != pSequenceParameter->level_idc)) { + vme_context->h264_level = pSequenceParameter->level_idc; + } + + intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); + + /*Setup all the memory object*/ + gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); + gen8_vme_interface_setup(ctx, encode_state, encoder_context); + //gen8_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context); + gen8_vme_constant_setup(ctx, encode_state, encoder_context); + + /*Programing media pipeline*/ + gen8_vme_pipeline_programing(ctx, encode_state, encoder_context); + + return vaStatus; +} + +static VAStatus gen8_vme_run(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + + intel_batchbuffer_flush(batch); + + return VA_STATUS_SUCCESS; +} + +static VAStatus gen8_vme_stop(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_vme_pipeline(VADriverContextP ctx, + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + gen8_vme_media_init(ctx, encoder_context); + gen8_vme_prepare(ctx, encode_state, encoder_context); + gen8_vme_run(ctx, encode_state, encoder_context); + gen8_vme_stop(ctx, encode_state, encoder_context); + + return VA_STATUS_SUCCESS; +} + +static void +gen8_vme_mpeg2_output_buffer_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int index, + int is_intra, + struct intel_encoder_context *encoder_context) + +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + + vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs; + vme_context->vme_output.pitch = 16; /* in bytes, always 16 */ + + if (is_intra) + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2; + else + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24; + /* + * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref + * + 16 FBR Info + 128 FBR MV + 32 FBR Ref. + * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24. + */ + + vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, + "VME output buffer", + vme_context->vme_output.num_blocks * vme_context->vme_output.size_block, + 0x1000); + assert(vme_context->vme_output.bo); + vme_context->vme_buffer_suface_setup(ctx, + &vme_context->gpe_context, + &vme_context->vme_output, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static void +gen8_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int index, + struct intel_encoder_context *encoder_context) + +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + + vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1; + vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */ + vme_context->vme_batchbuffer.pitch = 16; + vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, + "VME batchbuffer", + vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block, + 0x1000); + vme_context->vme_buffer_suface_setup(ctx, + &vme_context->gpe_context, + &vme_context->vme_batchbuffer, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static VAStatus +gen8_vme_mpeg2_surface_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) +{ + struct object_surface *obj_surface; + + /*Setup surfaces state*/ + /* current picture for encoding */ + obj_surface = encode_state->input_yuv_object; + gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context); + gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context); + gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context); + + if (!is_intra) { + /* reference 0 */ + obj_surface = encode_state->reference_objects[0]; + + if (obj_surface->bo != NULL) + gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); + + /* reference 1 */ + obj_surface = encode_state->reference_objects[1]; + + if (obj_surface && obj_surface->bo != NULL) + gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); + } + + /* VME output */ + gen8_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context); + gen8_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); + + return VA_STATUS_SUCCESS; +} + +static void +gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + int mb_x = 0, mb_y = 0; + int i, s, j; + unsigned int *command_ptr; + + + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); + command_ptr = vme_context->vme_batchbuffer.bo->virtual; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer; + + for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) { + int slice_mb_begin = slice_param->macroblock_address; + int slice_mb_number = slice_param->num_macroblocks; + unsigned int mb_intra_ub; + int slice_mb_x = slice_param->macroblock_address % mb_width; + + for (i = 0; i < slice_mb_number;) { + int mb_count = i + slice_mb_begin; + + mb_x = mb_count % mb_width; + mb_y = mb_count / mb_width; + mb_intra_ub = 0; + + if (mb_x != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + } + + if (mb_y != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + + if (mb_x != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (mb_x != (mb_width -1)) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + } + + if (i < mb_width) { + if (i == 0) + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE); + + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK); + + if ((i == (mb_width - 1)) && slice_mb_x) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + } + } + + if ((i == mb_width) && slice_mb_x) { + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D); + } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + + /*inline data */ + *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); + *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + + i += 1; + } + + slice_param++; + } + } + + *command_ptr++ = 0; + *command_ptr++ = MI_BATCH_BUFFER_END; + + dri_bo_unmap(vme_context->vme_batchbuffer.bo); +} + +static void +gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx, + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct intel_batchbuffer *batch = encoder_context->base.batch; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + + gen8_vme_mpeg2_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, + 0, + encoder_context); + + intel_batchbuffer_start_atomic(batch, 0x1000); + gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_RELOC(batch, + vme_context->vme_batchbuffer.bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + ADVANCE_BATCH(batch); + + intel_batchbuffer_end_atomic(batch); +} + +static VAStatus +gen8_vme_mpeg2_prepare(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + VAStatus vaStatus = VA_STATUS_SUCCESS; + VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; + + /*Setup all the memory object*/ + gen8_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); + gen8_vme_interface_setup(ctx, encode_state, encoder_context); + gen8_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); + gen8_vme_constant_setup(ctx, encode_state, encoder_context); + + /*Programing media pipeline*/ + gen8_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context); + + return vaStatus; +} + +static VAStatus +gen8_vme_mpeg2_pipeline(VADriverContextP ctx, + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + gen8_vme_media_init(ctx, encoder_context); + gen8_vme_mpeg2_prepare(ctx, encode_state, encoder_context); + gen8_vme_run(ctx, encode_state, encoder_context); + gen8_vme_stop(ctx, encode_state, encoder_context); + + return VA_STATUS_SUCCESS; +} + +static void +gen8_vme_context_destroy(void *context) +{ + struct gen6_vme_context *vme_context = context; + + i965_gpe_context_destroy(&vme_context->gpe_context); + + dri_bo_unreference(vme_context->vme_output.bo); + vme_context->vme_output.bo = NULL; + + dri_bo_unreference(vme_context->vme_state.bo); + vme_context->vme_state.bo = NULL; + + dri_bo_unreference(vme_context->vme_batchbuffer.bo); + vme_context->vme_batchbuffer.bo = NULL; + + if (vme_context->vme_state_message) { + free(vme_context->vme_state_message); + vme_context->vme_state_message = NULL; + } + + free(vme_context); +} + +Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context)); + struct i965_kernel *vme_kernel_list = NULL; + int i965_kernel_num; + + switch (encoder_context->codec) { + case CODEC_H264: + vme_kernel_list = gen8_vme_kernels; + encoder_context->vme_pipeline = gen8_vme_pipeline; + i965_kernel_num = sizeof(gen8_vme_kernels) / sizeof(struct i965_kernel); + break; + + case CODEC_MPEG2: + vme_kernel_list = gen8_vme_mpeg2_kernels; + encoder_context->vme_pipeline = gen8_vme_mpeg2_pipeline; + i965_kernel_num = sizeof(gen8_vme_mpeg2_kernels) / sizeof(struct i965_kernel); + + break; + + default: + /* never get here */ + assert(0); + + break; + } + vme_context->vme_kernel_sum = i965_kernel_num; + vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + + vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6; + vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); + + vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH; + + vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; + vme_context->gpe_context.vfe_state.num_urb_entries = 16; + vme_context->gpe_context.vfe_state.gpgpu_mode = 0; + vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1; + vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1; + + gen7_vme_scoreboard_init(ctx, vme_context); + + i965_gpe_load_kernels(ctx, + &vme_context->gpe_context, + vme_kernel_list, + i965_kernel_num); + vme_context->vme_surface2_setup = gen7_gpe_surface2_setup; + vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup; + vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup; + vme_context->vme_media_chroma_surface_setup = gen75_gpe_media_chroma_surface_setup; + + encoder_context->vme_context = vme_context; + encoder_context->vme_context_destroy = gen8_vme_context_destroy; + + vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int)); + + return True; +} diff --git a/src/i965_decoder.h b/src/i965_decoder.h index 4f7d2cc9..c7d49d7a 100644 --- a/src/i965_decoder.h +++ b/src/i965_decoder.h @@ -49,4 +49,6 @@ struct gen_buffer { struct hw_context * gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config); +extern struct hw_context * +gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config); #endif /* I965_DECODER_H */ diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index c9ed624a..c6c05914 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -320,8 +320,8 @@ static struct hw_codec_info gen75_hw_codec_info = { /* TODO: Add the separate call back function for Gen8 */ static struct hw_codec_info gen8_hw_codec_info = { - .dec_hw_context_init = gen75_dec_hw_context_init, - .enc_hw_context_init = gen75_enc_hw_context_init, + .dec_hw_context_init = gen8_dec_hw_context_init, + .enc_hw_context_init = gen8_enc_hw_context_init, .proc_hw_context_init = gen75_proc_context_init, .max_width = 4096, .max_height = 4096, diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 1e46a1a4..9bf133f0 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -401,3 +401,10 @@ gen75_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config { return intel_enc_hw_context_init(ctx, obj_config, gen75_vme_context_init, gen75_mfc_context_init); } + +struct hw_context * +gen8_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) +{ + return intel_enc_hw_context_init(ctx, obj_config, gen8_vme_context_init, gen8_mfc_context_init); +} + diff --git a/src/i965_encoder.h b/src/i965_encoder.h index 29bd7028..71396d61 100644 --- a/src/i965_encoder.h +++ b/src/i965_encoder.h @@ -62,6 +62,8 @@ struct intel_encoder_context extern struct hw_context * gen75_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config); +extern struct hw_context * +gen8_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config); #endif /* _I965_ENCODER_H_ */ -- cgit v1.2.1 From bfb5588a4a0c3ff3f380255f14872f4dd985099a Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 7 Jan 2013 13:18:47 +0800 Subject: Add the BDW idrt/surface/sampler state definition Signed-off-by: Zhao Yakui --- src/i965_render.c | 10 ++ src/i965_render.h | 4 + src/i965_structs.h | 316 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 330 insertions(+) diff --git a/src/i965_render.c b/src/i965_render.c index 1161fb61..f5eb4f39 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -762,6 +762,16 @@ gen7_render_set_surface_scs(struct gen7_surface_state *ss) ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; } +/* Set "Shader Channel Select" for GEN8+ */ +void +gen8_render_set_surface_scs(struct gen8_surface_state *ss) +{ + ss->ss7.shader_chanel_select_r = HSW_SCS_RED; + ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN; + ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE; + ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; +} + static void gen7_render_set_surface_state( struct gen7_surface_state *ss, diff --git a/src/i965_render.h b/src/i965_render.h index 1960aced..69046450 100644 --- a/src/i965_render.h +++ b/src/i965_render.h @@ -107,4 +107,8 @@ struct gen7_surface_state; void gen7_render_set_surface_scs(struct gen7_surface_state *ss); +struct gen8_surface_state; +void +gen8_render_set_surface_scs(struct gen8_surface_state *ss); + #endif /* _I965_RENDER_H_ */ diff --git a/src/i965_structs.h b/src/i965_structs.h index c7dd2720..e3f9b3dd 100644 --- a/src/i965_structs.h +++ b/src/i965_structs.h @@ -968,6 +968,322 @@ struct i965_sampler_dndi } dw7; }; +struct gen8_interface_descriptor_data +{ + struct { + unsigned int pad0:6; + unsigned int kernel_start_pointer:26; + } desc0; + + struct { + unsigned int kernel_start_pointer_high:16; + unsigned int pad0:16; + } desc1; + + struct { + unsigned int pad0:7; + unsigned int software_exception_enable:1; + unsigned int pad1:3; + unsigned int maskstack_exception_enable:1; + unsigned int pad2:1; + unsigned int illegal_opcode_exception_enable:1; + unsigned int pad3:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int single_program_flow:1; + unsigned int denorm_mode:1; + unsigned int pad4:12; + } desc2; + + struct { + unsigned int pad0:2; + unsigned int sampler_count:3; + unsigned int sampler_state_pointer:27; + } desc3; + + struct { + unsigned int binding_table_entry_count:5; + unsigned int binding_table_pointer:11; + unsigned int pad0: 16; + } desc4; + + struct { + unsigned int constant_urb_entry_read_offset:16; + unsigned int constant_urb_entry_read_length:16; + } desc5; + + struct { + unsigned int num_threads_in_tg:10; + unsigned int pad0:5; + unsigned int global_barrier_enable:1; + unsigned int shared_local_memory_size:5; + unsigned int barrier_enable:1; + unsigned int rounding_mode:2; + unsigned int pad1:8; + } desc6; + + struct { + unsigned int cross_thread_constant_data_read_length:8; + unsigned int pad0:24; + } desc7; +}; + +struct gen8_surface_state +{ + struct { + unsigned int cube_pos_z:1; + unsigned int cube_neg_z:1; + unsigned int cube_pos_y:1; + unsigned int cube_neg_y:1; + unsigned int cube_pos_x:1; + unsigned int cube_neg_x:1; + unsigned int media_boundary_pixel_mode:2; + unsigned int render_cache_read_write:1; + unsigned int sampler_l2bypass_disable:1; + unsigned int vert_line_stride_ofs:1; + unsigned int vert_line_stride:1; + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int horizontal_alignment:2; + /* Field 16 */ + unsigned int vertical_alignment:2; + unsigned int surface_format:9; /**< BRW_SURFACEFORMAT_x */ + unsigned int pad0:1; + unsigned int is_array:1; + unsigned int surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */ + } ss0; + + struct { + unsigned int surface_qpitch:15; + unsigned int pad0:4; + unsigned int base_mip_level:5; + unsigned int surface_mocs:7; + unsigned int pad1:1; + } ss1; + + struct { + unsigned int width:14; + unsigned int pad0:2; + unsigned int height:14; + unsigned int pad1:2; + } ss2; + + struct { + unsigned int pitch:18; + unsigned int pad:3; + unsigned int depth:11; + } ss3; + + struct { + unsigned int multisample_position_palette_index:3; + unsigned int num_multisamples:3; + unsigned int multisampled_surface_storage_format:1; + unsigned int render_target_view_extent:11; + unsigned int min_array_elt:11; + unsigned int rotation:2; + unsigned int force_ncmp_reduce_type:1; + } ss4; + + struct { + unsigned int mip_count:4; + unsigned int min_lod:4; + unsigned int pad0:4; + unsigned int pad1:2; + unsigned int coherence_type:1; + unsigned int pad2:3; + unsigned int pad3:2; + unsigned int ewa_disable_cube:1; + unsigned int y_offset:3; + unsigned int pad4:1; + unsigned int x_offset:7; + } ss5; + + struct { + unsigned int y_offset_uv_plane:14; + unsigned int pad0:2; + unsigned int x_offset_uv_plane:14; + unsigned int pad1:1; + unsigned int separate_uv_plane:1; + } ss6; + + struct { + unsigned int resource_min_lod:12; + unsigned int pad0:4; + unsigned int shader_chanel_select_a:3; + unsigned int shader_chanel_select_b:3; + unsigned int shader_chanel_select_g:3; + unsigned int shader_chanel_select_r:3; + unsigned int alpha_clear_color:1; + unsigned int blue_clear_color:1; + unsigned int green_clear_color:1; + unsigned int red_clear_color:1; + } ss7; + struct { + unsigned int base_addr; + } ss8; + + struct { + unsigned int base_addr_high:16; + unsigned int pad0:16; + } ss9; + + struct { + unsigned int pad0:12; + unsigned int aux_base_addr:20; + } ss10; + + union { + struct { + unsigned int y_offset_v_plane:14; + unsigned int pad0:2; + unsigned int x_offset_v_plane:14; + unsigned int pad1:2; + } planar; + struct { + unsigned int aux_base_addr_high:16; + unsigned int pad2:16; + } aux_buffer; + } ss11; + + struct { + unsigned int hier_depth_clear; + } ss12; + + struct { + unsigned int pad0; + } ss13; + + struct { + unsigned int pad0; + } ss14; + + struct { + unsigned int pad0; + } ss15; +}; + +struct gen8_surface_state2 +{ + struct { + unsigned int pad0; + } ss0; + + struct { + unsigned int cbcr_pixel_offset_v_direction:2; + unsigned int picture_structure:2; + unsigned int width:14; + unsigned int height:14; + } ss1; + + struct { + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int half_pitch_for_chroma:1; + unsigned int pitch:18; + unsigned int address_ctrl:1; /* clamp or mirror mode */ + unsigned int pad0:4; + unsigned int interleave_chroma:1; + unsigned int surface_format:5; + } ss2; + + struct { + unsigned int y_offset_for_cb:14; + unsigned int pad0:2; + unsigned int x_offset_for_cb:14; + unsigned int pad1:2; + } ss3; + + struct { + unsigned int y_offset_for_cr:15; + unsigned int pad0:1; + unsigned int x_offset_for_cr:14; + unsigned int pad1:2; + } ss4; + + struct { + unsigned int surface_object_mocs:7; + unsigned int pad0:11; + unsigned int pad1:2; + unsigned int pad2:10; + unsigned int vert_line_stride_offset:1; + unsigned int vert_line_stride:1; + } ss5; + + struct { + unsigned int base_addr; + } ss6; + + struct { + unsigned int base_addr_high:16; + unsigned int pad0:16; + } ss7; +}; + +struct gen8_sampler_state +{ + struct + { + unsigned int aniso_algorithm:1; + unsigned int lod_bias:13; + unsigned int min_filter:3; + unsigned int mag_filter:3; + unsigned int mip_filter:2; + unsigned int base_level:5; + unsigned int lod_preclamp:2; + unsigned int default_color_mode:1; + unsigned int pad0:1; + unsigned int disable:1; + } ss0; + + struct + { + unsigned int cube_control_mode:1; + unsigned int shadow_function:3; + unsigned int chroma_key_mode:1; + unsigned int chroma_key_index:2; + unsigned int chroma_key_enable:1; + unsigned int max_lod:12; + unsigned int min_lod:12; + } ss1; + + struct + { + unsigned int lod_clamp_mag_mode:1; /* MIPNONE or MIPFILTER */ + unsigned int flex_filter_vert_align:1; + unsigned int flex_filter_hort_align:1; + unsigned int flex_filter_coff_size:1; /* coff8 or coff 16 */ + unsigned int flex_filter_mode:1; + unsigned int pad0:1; + unsigned int indirect_state_pointer:18; /* point to the SAMPLE_INDIRECT_STATE */ + union { + unsigned char nonsep_filter_footer_highmask; + struct { + unsigned char pad1:2; + unsigned char sep_filter_height:2; + unsigned char sep_filter_width:2; + unsigned char sep_filter_coff_size:2; + } sep_filter; + } ss2_byte3; + } ss2; + + struct + { + unsigned int r_wrap_mode:3; + unsigned int t_wrap_mode:3; + unsigned int s_wrap_mode:3; + unsigned int pad0:1; + unsigned int non_normalized_coord:1; + unsigned int trilinear_quality:2; + unsigned int address_round:6; + unsigned int max_aniso:3; + unsigned int pad1:2; + unsigned int nonsep_filter_foot_lowmask:8; + } ss3; +}; +/* TODO: Add the sampler_8x8 for Gen8+. + * AVS/Convolve is 256DWs. + * MinMaxfilter/Erode/Dilate: 8DWs*/ + struct gen6_blend_state { -- cgit v1.2.1 From 0942d5e3f5370a180640b41411edd575370d8da8 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 22 Jan 2013 10:53:26 +0800 Subject: Use the updated structure/command for VPP on BDW Signed-off-by: Zhao Yakui [Haihao: directly use object instead of id] Signed-off-by: Xiang, Haihao --- src/i965_post_processing.c | 918 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 894 insertions(+), 24 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 03530443..86806fa8 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -53,7 +53,13 @@ #define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) #define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_PADDED_SIZE_0_GEN8 ALIGN(sizeof(struct gen8_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_1_GEN8 ALIGN(sizeof(struct gen8_surface_state2), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN8 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN8, SURFACE_STATE_PADDED_SIZE_1_GEN8) + +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\ + MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)) + #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_PP_SURFACES) @@ -715,6 +721,13 @@ static VAStatus gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_po const VARectangle *dst_rect, void *filter_param); +static VAStatus gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + void *filter_param); + static struct pp_module pp_modules_gen7[] = { { { @@ -1160,6 +1173,201 @@ static struct pp_module pp_modules_gen75[] = { }; +/* TODO: Modify the shader and then compile it again. + * Currently it is derived from Haswell*/ +static const uint32_t pp_null_gen8[][4] = { +}; + +static const uint32_t pp_nv12_load_save_nv12_gen8[][4] = { +#include "shaders/post_processing/gen7/pl2_to_pl2.g75b" +}; + +static const uint32_t pp_nv12_load_save_pl3_gen8[][4] = { +#include "shaders/post_processing/gen7/pl2_to_pl3.g75b" +}; + +static const uint32_t pp_pl3_load_save_nv12_gen8[][4] = { +#include "shaders/post_processing/gen7/pl3_to_pl2.g75b" +}; + +static const uint32_t pp_pl3_load_save_pl3_gen8[][4] = { +#include "shaders/post_processing/gen7/pl3_to_pl3.g75b" +}; + +static const uint32_t pp_nv12_scaling_gen8[][4] = { +#include "shaders/post_processing/gen7/avs.g75b" +}; + +static const uint32_t pp_nv12_avs_gen8[][4] = { +#include "shaders/post_processing/gen7/avs.g75b" +}; + +static const uint32_t pp_nv12_dndi_gen8[][4] = { +// #include "shaders/post_processing/gen7/dndi.g75b" +}; + +static const uint32_t pp_nv12_dn_gen8[][4] = { +// #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b" +}; +static const uint32_t pp_nv12_load_save_pa_gen8[][4] = { +#include "shaders/post_processing/gen7/pl2_to_pa.g75b" +}; + +static const uint32_t pp_pl3_load_save_pa_gen8[][4] = { +#include "shaders/post_processing/gen7/pl3_to_pa.g75b" +}; + +static const uint32_t pp_pa_load_save_nv12_gen8[][4] = { +#include "shaders/post_processing/gen7/pa_to_pl2.g75b" +}; + +static struct pp_module pp_modules_gen8[] = { + { + { + "NULL module (for testing)", + PP_NULL, + pp_null_gen8, + sizeof(pp_null_gen8), + NULL, + }, + + pp_null_initialize, + }, + + { + { + "NV12_NV12", + PP_NV12_LOAD_SAVE_N12, + pp_nv12_load_save_nv12_gen8, + sizeof(pp_nv12_load_save_nv12_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12_PL3", + PP_NV12_LOAD_SAVE_PL3, + pp_nv12_load_save_pl3_gen8, + sizeof(pp_nv12_load_save_pl3_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PL3_NV12", + PP_PL3_LOAD_SAVE_N12, + pp_pl3_load_save_nv12_gen75, + sizeof(pp_pl3_load_save_nv12_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PL3_PL3", + PP_PL3_LOAD_SAVE_N12, + pp_pl3_load_save_pl3_gen75, + sizeof(pp_pl3_load_save_pl3_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12 Scaling module", + PP_NV12_SCALING, + pp_nv12_scaling_gen8, + sizeof(pp_nv12_scaling_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12 AVS module", + PP_NV12_AVS, + pp_nv12_avs_gen8, + sizeof(pp_nv12_avs_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12 DNDI module", + PP_NV12_DNDI, + pp_nv12_dndi_gen8, + sizeof(pp_nv12_dndi_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12 DN module", + PP_NV12_DN, + pp_nv12_dn_gen8, + sizeof(pp_nv12_dn_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + { + { + "NV12_PA module", + PP_NV12_LOAD_SAVE_PA, + pp_nv12_load_save_pa_gen8, + sizeof(pp_nv12_load_save_pa_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PL3_PA module", + PP_PL3_LOAD_SAVE_PA, + pp_pl3_load_save_pa_gen8, + sizeof(pp_pl3_load_save_pa_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PA_NV12 module", + PP_PA_LOAD_SAVE_NV12, + pp_pa_load_save_nv12_gen8, + sizeof(pp_pa_load_save_nv12_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + +}; + + static int pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface) { @@ -1249,6 +1457,25 @@ gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling) } } +static void +gen8_pp_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss0.tiled_surface = 0; + ss->ss0.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + static void gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling) { @@ -1268,6 +1495,25 @@ gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling) } } +static void +gen8_pp_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss2.tiled_surface = 0; + ss->ss2.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss2.tiled_surface = 1; + ss->ss2.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss2.tiled_surface = 1; + ss->ss2.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + static void ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context) { @@ -1707,6 +1953,83 @@ gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_con dri_bo_unmap(ss2_bo); } +static void +gen8_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + dri_bo *surf_bo, unsigned long surf_bo_offset, + int width, int height, int pitch, int format, + int index, int is_target) +{ + struct gen8_surface_state *ss; + dri_bo *ss_bo; + unsigned int tiling; + unsigned int swizzle; + + dri_bo_get_tiling(surf_bo, &tiling, &swizzle); + ss_bo = pp_context->surface_state_binding_table.bo; + assert(ss_bo); + + dri_bo_map(ss_bo, True); + assert(ss_bo->virtual); + ss = (struct gen8_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index)); + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = format; + ss->ss8.base_addr = surf_bo->offset + surf_bo_offset; + ss->ss2.width = width - 1; + ss->ss2.height = height - 1; + ss->ss3.pitch = pitch - 1; + gen8_pp_set_surface_tiling(ss, tiling); + gen8_render_set_surface_scs(ss); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0, + surf_bo_offset, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8), + surf_bo); + ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(ss_bo); +} + + +static void +gen8_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + dri_bo *surf_bo, unsigned long surf_bo_offset, + int width, int height, int wpitch, + int xoffset, int yoffset, + int format, int interleave_chroma, + int index) +{ + struct gen8_surface_state2 *ss2; + dri_bo *ss2_bo; + unsigned int tiling; + unsigned int swizzle; + + dri_bo_get_tiling(surf_bo, &tiling, &swizzle); + ss2_bo = pp_context->surface_state_binding_table.bo; + assert(ss2_bo); + + dri_bo_map(ss2_bo, True); + assert(ss2_bo->virtual); + ss2 = (struct gen8_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index)); + memset(ss2, 0, sizeof(*ss2)); + ss2->ss6.base_addr = surf_bo->offset + surf_bo_offset; + ss2->ss1.cbcr_pixel_offset_v_direction = 0; + ss2->ss1.width = width - 1; + ss2->ss1.height = height - 1; + ss2->ss2.pitch = wpitch - 1; + ss2->ss2.interleave_chroma = interleave_chroma; + ss2->ss2.surface_format = format; + ss2->ss3.x_offset_for_cb = xoffset; + ss2->ss3.y_offset_for_cb = yoffset; + gen8_pp_set_surface2_tiling(ss2, tiling); + dri_bo_emit_reloc(ss2_bo, + I915_GEM_DOMAIN_RENDER, 0, + surf_bo_offset, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state2, ss6), + surf_bo); + ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(ss2_bo); +} + static void pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context, const struct i965_surface *surface, @@ -1993,6 +2316,183 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc } } +static void +gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *surface, + int base_index, int is_target, + int *width, int *height, int *pitch, int *offset) +{ + struct object_surface *obj_surface; + struct object_image *obj_image; + dri_bo *bo; + int fourcc = pp_get_surface_fourcc(ctx, surface); + const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || + fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1; + const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || + fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2; + int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); + int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); + int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || + fourcc == VA_FOURCC('R', 'G', 'B', 'X') || + fourcc == VA_FOURCC('B', 'G', 'R', 'A') || + fourcc == VA_FOURCC('B', 'G', 'R', 'X')); + + if (surface->type == I965_SURFACE_TYPE_SURFACE) { + obj_surface = (struct object_surface *)surface->base; + bo = obj_surface->bo; + width[0] = obj_surface->orig_width; + height[0] = obj_surface->orig_height; + pitch[0] = obj_surface->width; + offset[0] = 0; + + if (packed_yuv) { + if (is_target) + width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */ + else + width[0] = obj_surface->orig_width; /* surface foramt is YCBCR, width is specified in units of pixels */ + + pitch[0] = obj_surface->width * 2; + } else if (rgbx_format) { + if (is_target) + width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */ + } + + width[1] = obj_surface->cb_cr_width; + height[1] = obj_surface->cb_cr_height; + pitch[1] = obj_surface->cb_cr_pitch; + offset[1] = obj_surface->y_cb_offset * obj_surface->width; + + width[2] = obj_surface->cb_cr_width; + height[2] = obj_surface->cb_cr_height; + pitch[2] = obj_surface->cb_cr_pitch; + offset[2] = obj_surface->y_cr_offset * obj_surface->width; + } else { + obj_image = (struct object_image *)surface->base; + bo = obj_image->bo; + width[0] = obj_image->image.width; + height[0] = obj_image->image.height; + pitch[0] = obj_image->image.pitches[0]; + offset[0] = obj_image->image.offsets[0]; + + if (rgbx_format) { + if (is_target) + width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */ + } else if (packed_yuv) { + if (is_target) + width[0] = obj_image->image.width * 2; /* surface format is R8, so double the width */ + else + width[0] = obj_image->image.width; /* surface foramt is YCBCR, width is specified in units of pixels */ + } else if (interleaved_uv) { + width[1] = obj_image->image.width / 2; + height[1] = obj_image->image.height / 2; + pitch[1] = obj_image->image.pitches[1]; + offset[1] = obj_image->image.offsets[1]; + } else { + width[1] = obj_image->image.width / 2; + height[1] = obj_image->image.height / 2; + pitch[1] = obj_image->image.pitches[U]; + offset[1] = obj_image->image.offsets[U]; + width[2] = obj_image->image.width / 2; + height[2] = obj_image->image.height / 2; + pitch[2] = obj_image->image.pitches[V]; + offset[2] = obj_image->image.offsets[V]; + } + } + + if (is_target) { + gen8_pp_set_surface_state(ctx, pp_context, + bo, 0, + width[0] / 4, height[0], pitch[0], + I965_SURFACEFORMAT_R8_UINT, + base_index, 1); + if (rgbx_format) { + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + /* the format is MSB: X-B-G-R */ + pp_static_parameter->grf2.save_avs_rgb_swap = 0; + if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || + (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { + /* It is stored as MSB: X-R-G-B */ + pp_static_parameter->grf2.save_avs_rgb_swap = 1; + } + } + if (!packed_yuv && !rgbx_format) { + if (interleaved_uv) { + gen8_pp_set_surface_state(ctx, pp_context, + bo, offset[1], + width[1] / 2, height[1], pitch[1], + I965_SURFACEFORMAT_R8G8_SINT, + base_index + 1, 1); + } else { + gen8_pp_set_surface_state(ctx, pp_context, + bo, offset[1], + width[1] / 4, height[1], pitch[1], + I965_SURFACEFORMAT_R8_SINT, + base_index + 1, 1); + gen8_pp_set_surface_state(ctx, pp_context, + bo, offset[2], + width[2] / 4, height[2], pitch[2], + I965_SURFACEFORMAT_R8_SINT, + base_index + 2, 1); + } + } + } else { + int format0 = SURFACE_FORMAT_Y8_UNORM; + + switch (fourcc) { + case VA_FOURCC('Y', 'U', 'Y', '2'): + format0 = SURFACE_FORMAT_YCRCB_NORMAL; + break; + + case VA_FOURCC('U', 'Y', 'V', 'Y'): + format0 = SURFACE_FORMAT_YCRCB_SWAPY; + break; + + default: + break; + } + if (rgbx_format) { + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */ + format0 = SURFACE_FORMAT_R8G8B8A8_UNORM; + pp_static_parameter->grf2.src_avs_rgb_swap = 0; + if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || + (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { + pp_static_parameter->grf2.src_avs_rgb_swap = 1; + } + } + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[0], + width[0], height[0], pitch[0], + 0, 0, + format0, 0, + base_index); + + if (!packed_yuv && !rgbx_format) { + if (interleaved_uv) { + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[1], + width[1], height[1], pitch[1], + 0, 0, + SURFACE_FORMAT_R8B8_UNORM, 0, + base_index + 1); + } else { + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[1], + width[1], height[1], pitch[1], + 0, 0, + SURFACE_FORMAT_R8_UNORM, 0, + base_index + 1); + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[2], + width[2], height[2], pitch[2], + 0, 0, + SURFACE_FORMAT_R8_UNORM, 0, + base_index + 2); + } + } + } +} + static int pp_null_x_steps(void *private_context) { @@ -2907,14 +3407,130 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con sampler_8x8[index].dw3.strong_edge_weight = 7; sampler_8x8[index].dw3.ief4_smooth_enable = 0; - dri_bo_emit_reloc(pp_context->sampler_state_table.bo, - I915_GEM_DOMAIN_RENDER, - 0, - 0, - sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1), - pp_context->sampler_state_table.bo_8x8); + dri_bo_emit_reloc(pp_context->sampler_state_table.bo, + I915_GEM_DOMAIN_RENDER, + 0, + 0, + sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1), + pp_context->sampler_state_table.bo_8x8); + + dri_bo_unmap(pp_context->sampler_state_table.bo); + + /* private function & data */ + pp_context->pp_x_steps = gen7_pp_avs_x_steps; + pp_context->pp_y_steps = gen7_pp_avs_y_steps; + pp_context->private_context = &pp_context->pp_avs_context; + pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter; + + pp_avs_context->dest_x = dst_rect->x; + pp_avs_context->dest_y = dst_rect->y; + pp_avs_context->dest_w = ALIGN(dst_rect->width, 16); + pp_avs_context->dest_h = ALIGN(dst_rect->height, 16); + pp_avs_context->src_w = src_rect->width; + pp_avs_context->src_h = src_rect->height; + pp_avs_context->horiz_range = (float)src_rect->width / src_width; + + int dw = (pp_avs_context->src_w - 1) / 16 + 1; + dw = MAX(dw, dst_rect->width); + + pp_static_parameter->grf1.pointer_to_inline_parameter = 7; + pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */ + if (IS_HASWELL(i965->intel.device_id)) + pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */ + + pp_static_parameter->grf2.avs_wa_width = dw; + pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw); + pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw); + + pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; + pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height; + pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height - + (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step; + pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width - + (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw; + + gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface); + + dst_surface->flags = src_surface->flags; + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + void *filter_param) +{ +/* TODO: Add the sampler_8x8 state */ + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + struct i965_sampler_8x8_state *sampler_8x8_state; + int i; + int width[3], height[3], pitch[3], offset[3]; + int src_width, src_height; + + /* source surface */ + gen8_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0, + width, height, pitch, offset); + src_height = height[0]; + + /* destination surface */ + gen8_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1, + width, height, pitch, offset); + + /* sampler 8x8 state */ + dri_bo_map(pp_context->sampler_state_table.bo_8x8, True); + assert(pp_context->sampler_state_table.bo_8x8->virtual); + assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138); + sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual; + memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state)); + + for (i = 0; i < 17; i++) { + /* for Y channel, currently ignore */ + sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0; + sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0; + sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0; + sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0; + sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0; + sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0; + sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0; + sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0; + sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0; + sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0; + sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0; + sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0; + sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0; + sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0; + sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0; + sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0; + /* for U/V channel, 0.25 */ + sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0; + sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0; + sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10; + sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10; + sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10; + sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10; + sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0; + sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0; + sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0; + sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0; + sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10; + sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10; + sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10; + sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10; + sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0; + sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0; + } + + sampler_8x8_state->dw136.default_sharpness_level = 0; + sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1; + sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1; + sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1; + dri_bo_unmap(pp_context->sampler_state_table.bo_8x8); - dri_bo_unmap(pp_context->sampler_state_table.bo); /* private function & data */ pp_context->pp_x_steps = gen7_pp_avs_x_steps; @@ -2928,16 +3544,12 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con pp_avs_context->dest_h = ALIGN(dst_rect->height, 16); pp_avs_context->src_w = src_rect->width; pp_avs_context->src_h = src_rect->height; - pp_avs_context->horiz_range = (float)src_rect->width / src_width; int dw = (pp_avs_context->src_w - 1) / 16 + 1; dw = MAX(dw, dst_rect->width); pp_static_parameter->grf1.pointer_to_inline_parameter = 7; - pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */ - if (IS_HASWELL(i965->intel.device_id)) - pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */ - + pp_static_parameter->grf2.avs_wa_enable = 0; /* It is not required on GEN8+ */ pp_static_parameter->grf2.avs_wa_width = dw; pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw); pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw); @@ -2945,9 +3557,9 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height; pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height - - (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step; + (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step; pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width - - (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw; + (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw; gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface); @@ -2956,7 +3568,6 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con return VA_STATUS_SUCCESS; } - static VAStatus gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, const struct i965_surface *src_surface, @@ -4307,8 +4918,7 @@ gen6_pp_initialize( assert(bo); pp_context->vfe_state.bo = bo; - if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_id)) { static_param_size = sizeof(struct gen7_pp_static_parameter); inline_param_size = sizeof(struct gen7_pp_inline_parameter); } else { @@ -4338,6 +4948,92 @@ gen6_pp_initialize( return va_status; } + +static VAStatus +gen8_pp_initialize( + VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + int pp_index, + void * filter_param +) +{ + VAStatus va_status; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct pp_module *pp_module; + dri_bo *bo; + int static_param_size, inline_param_size; + + dri_bo_unreference(pp_context->surface_state_binding_table.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES, + 4096); + assert(bo); + pp_context->surface_state_binding_table.bo = bo; + + dri_bo_unreference(pp_context->curbe.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "constant buffer", + 4096, + 4096); + assert(bo); + pp_context->curbe.bo = bo; + + dri_bo_unreference(pp_context->idrt.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "interface discriptor", + sizeof(struct gen8_interface_descriptor_data), + 4096); + assert(bo); + pp_context->idrt.bo = bo; + pp_context->idrt.num_interface_descriptors = 0; + + dri_bo_unreference(pp_context->sampler_state_table.bo_8x8); + bo = dri_bo_alloc(i965->intel.bufmgr, + "sampler 8x8 state ", + 4096, + 4096); + assert(bo); + pp_context->sampler_state_table.bo_8x8 = bo; + + + dri_bo_unreference(pp_context->vfe_state.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "vfe state", + sizeof(struct i965_vfe_state), + 4096); + assert(bo); + pp_context->vfe_state.bo = bo; + + static_param_size = sizeof(struct gen7_pp_static_parameter); + inline_param_size = sizeof(struct gen7_pp_inline_parameter); + + memset(pp_context->pp_static_parameter, 0, static_param_size); + memset(pp_context->pp_inline_parameter, 0, inline_param_size); + + assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES); + pp_context->current_pp = pp_index; + pp_module = &pp_context->pp_modules[pp_index]; + + if (pp_module->initialize) + va_status = pp_module->initialize(ctx, pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + filter_param); + else + va_status = VA_STATUS_ERROR_UNIMPLEMENTED; + + calculate_boundary_block_mask(pp_context, dst_rect); + + return va_status; +} + static void gen6_pp_interface_descriptor_table(VADriverContextP ctx, struct i965_post_processing_context *pp_context) @@ -4363,8 +5059,7 @@ gen6_pp_interface_descriptor_table(VADriverContextP ctx, desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5); desc->desc4.constant_urb_entry_read_offset = 0; - if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_id)) desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */ else desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */ @@ -4385,6 +5080,48 @@ gen6_pp_interface_descriptor_table(VADriverContextP ctx, pp_context->idrt.num_interface_descriptors++; } +static void +gen8_pp_interface_descriptor_table(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct gen8_interface_descriptor_data *desc; + dri_bo *bo; + int pp_index = pp_context->current_pp; + + bo = pp_context->idrt.bo; + dri_bo_map(bo, True); + assert(bo->virtual); + desc = bo->virtual; + memset(desc, 0, sizeof(*desc)); + desc->desc0.kernel_start_pointer = + pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */ + desc->desc2.single_program_flow = 1; + desc->desc2.floating_point_mode = FLOATING_POINT_IEEE_754; + desc->desc3.sampler_count = 1; /* 1 - 4 samplers used */ + desc->desc3.sampler_state_pointer = + pp_context->sampler_state_table.bo->offset >> 5; + desc->desc4.binding_table_entry_count = 0; + desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5); + desc->desc5.constant_urb_entry_read_offset = 0; + + desc->desc5.constant_urb_entry_read_length = 6; /* grf 1-6 */ + + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + offsetof(struct gen8_interface_descriptor_data, desc0), + pp_context->pp_modules[pp_index].kernel.bo); + + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + desc->desc3.sampler_count << 2, + offsetof(struct gen8_interface_descriptor_data, desc3), + pp_context->sampler_state_table.bo); + + dri_bo_unmap(bo); + pp_context->idrt.num_interface_descriptors++; +} + static void gen6_pp_upload_constants(VADriverContextP ctx, struct i965_post_processing_context *pp_context) @@ -4417,6 +5154,14 @@ gen6_pp_states_setup(VADriverContextP ctx, gen6_pp_upload_constants(ctx, pp_context); } +static void +gen8_pp_states_setup(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + gen8_pp_interface_descriptor_table(ctx, pp_context); + gen6_pp_upload_constants(ctx, pp_context); +} + static void gen6_pp_pipeline_select(VADriverContextP ctx, struct i965_post_processing_context *pp_context) @@ -4448,6 +5193,40 @@ gen6_pp_state_base_address(VADriverContextP ctx, ADVANCE_BATCH(batch); } +static void +gen8_pp_state_base_address(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + BEGIN_BATCH(batch, 16); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2)); + /* DW1 Generate state address */ + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW4. Surface state address */ + OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(batch, 0); + /* DW6. Dynamic state address */ + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + /* DW8. Indirect object address */ + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + /* DW10. Instruction base address */ + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(batch); +} + static void gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context) @@ -4470,6 +5249,29 @@ gen6_pp_vfe_state(VADriverContextP ctx, ADVANCE_BATCH(batch); } +static void +gen8_pp_vfe_state(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + (pp_context->urb.num_vfe_entries - 1) << 16 | + pp_context->urb.num_vfe_entries << 8); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + (pp_context->urb.size_vfe_entry * 2) << 16 | /* URB Entry Allocation Size, in 256 bits unit */ + (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + static void gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context) @@ -4508,6 +5310,24 @@ gen6_interface_descriptor_load(VADriverContextP ctx, ADVANCE_BATCH(batch); } +static void +gen8_interface_descriptor_load(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + pp_context->idrt.num_interface_descriptors * sizeof(struct gen8_interface_descriptor_data)); + OUT_RELOC(batch, + pp_context->idrt.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + ADVANCE_BATCH(batch); +} + static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) { struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; @@ -4636,6 +5456,24 @@ gen6_pp_pipeline_setup(VADriverContextP ctx, intel_batchbuffer_end_atomic(batch); } +static void +gen8_pp_pipeline_setup(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen6_pp_pipeline_select(ctx, pp_context); + gen8_pp_state_base_address(ctx, pp_context); + gen8_pp_vfe_state(ctx, pp_context); + gen6_pp_curbe_load(ctx, pp_context); + gen8_interface_descriptor_load(ctx, pp_context); + gen8_pp_vfe_state(ctx, pp_context); + gen6_pp_object_walker(ctx, pp_context); + intel_batchbuffer_end_atomic(batch); +} + static VAStatus gen6_post_processing( VADriverContextP ctx, @@ -4669,6 +5507,36 @@ gen6_post_processing( return va_status; } +static VAStatus +gen8_post_processing( + VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + int pp_index, + void * filter_param +) +{ + VAStatus va_status; + + va_status = gen8_pp_initialize(ctx, pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + pp_index, + filter_param); + + if (va_status == VA_STATUS_SUCCESS) { + gen8_pp_states_setup(ctx, pp_context); + gen8_pp_pipeline_setup(ctx, pp_context); + } + + return va_status; +} + static VAStatus i965_post_processing_internal( VADriverContextP ctx, @@ -4684,9 +5552,10 @@ i965_post_processing_internal( VAStatus va_status; struct i965_driver_data *i965 = i965_driver_data(ctx); - if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) + if (IS_GEN8(i965->intel.device_id)) + va_status = gen8_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param); + else if (IS_GEN6(i965->intel.device_id) || + IS_GEN7(i965->intel.device_id)) va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param); else va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param); @@ -5397,9 +6266,10 @@ i965_post_processing_context_init(VADriverContextP ctx, assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75)); + assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); if (IS_GEN8(i965->intel.device_id)) - memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules)); + memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules)); else if (IS_HASWELL(i965->intel.device_id)) memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules)); else if (IS_GEN7(i965->intel.device_id)) -- cgit v1.2.1 From b3a69f6db02e13208a5fa9f794be85c82527aefd Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 22 Jan 2013 10:53:26 +0800 Subject: Avoid the duplicated macro-definition of surface size Signed-off-by: Zhao Yakui --- src/gen6_mfc.c | 4 ++++ src/gen6_mfc.h | 12 ------------ src/gen6_vme.c | 8 -------- src/gen75_mfc.c | 4 ++++ src/gen75_vme.c | 8 -------- src/gen7_mfc.c | 4 ++++ src/gen7_vme.c | 3 --- src/gen8_mfc.c | 4 ++++ src/gen8_vme.c | 10 +++------- src/i965_post_processing.c | 13 +------------ src/i965_render.c | 4 +--- src/i965_structs.h | 12 ++++++++++++ 12 files changed, 33 insertions(+), 53 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index 8ef20463..987fa52f 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -42,6 +42,10 @@ #include "gen6_vme.h" #include "intel_media.h" +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) + static const uint32_t gen6_mfc_batchbuffer_avc_intra[][4] = { #include "shaders/utils/mfc_batchbuffer_avc_intra.g6b" }; diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h index d55cff6b..6df80937 100644 --- a/src/gen6_mfc.h +++ b/src/gen6_mfc.h @@ -51,18 +51,6 @@ struct encode_state; #define __SOFTWARE__ 0 -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) - -#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6) - -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) -#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) -#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) - #define MFC_BATCHBUFFER_AVC_INTRA 0 #define MFC_BATCHBUFFER_AVC_INTER 1 #define NUM_MFC_KERNEL 2 diff --git a/src/gen6_vme.c b/src/gen6_vme.c index dbe099c6..d7d4ba22 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -40,14 +40,6 @@ #include "gen6_vme.h" #include "gen6_mfc.h" -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) - -#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6) - #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 42371616..c6a5ab58 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -51,6 +51,10 @@ #define MFC_SOFTWARE_HASWELL 0 +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) + #define B0_STEP_REV 2 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV) diff --git a/src/gen75_vme.c b/src/gen75_vme.c index 979b109c..ab8bf899 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -38,14 +38,6 @@ #include "gen6_vme.h" #include "gen6_mfc.h" -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) - -#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6) - #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index 375c354f..48f4bf23 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -41,6 +41,10 @@ #include "gen6_mfc.h" #include "gen6_vme.h" +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) + extern void gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context); diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 77eb5b8d..a7081452 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -45,9 +45,6 @@ #endif #define VME_MSG_LENGTH 32 -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN7 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 5f50e0a5..e8f5645c 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -43,6 +43,10 @@ #include "gen6_vme.h" #include "intel_media.h" +#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) + #define MFC_SOFTWARE_HASWELL 1 #define B0_STEP_REV 2 diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 3fe1605c..9d40e9ca 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -41,13 +41,9 @@ #include "gen6_vme.h" #include "gen6_mfc.h" -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) - -#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6) +#ifdef SURFACE_STATE_PADDED_SIZE +#undef SURFACE_STATE_PADDED_SIZE +#endif #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 86806fa8..e5bc8fef 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -45,20 +45,9 @@ IS_GEN7((ctx)->intel.device_id) || \ IS_GEN8((ctx)->intel.device_id)) -#define SURFACE_STATE_PADDED_SIZE_0_I965 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_I965 ALIGN(sizeof(struct i965_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_I965 MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965) - -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) - -#define SURFACE_STATE_PADDED_SIZE_0_GEN8 ALIGN(sizeof(struct gen8_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN8 ALIGN(sizeof(struct gen8_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN8 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN8, SURFACE_STATE_PADDED_SIZE_1_GEN8) #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\ - MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)) + MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)) #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_PP_SURFACES) diff --git a/src/i965_render.c b/src/i965_render.c index f5eb4f39..1a48e602 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -147,9 +147,7 @@ static const uint32_t ps_kernel_static_gen7_haswell[][4] = { #include "shaders/render/exa_wm_write.g7b" }; -#define SURFACE_STATE_PADDED_SIZE_I965 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES) diff --git a/src/i965_structs.h b/src/i965_structs.h index e3f9b3dd..dab4678b 100644 --- a/src/i965_structs.h +++ b/src/i965_structs.h @@ -1757,4 +1757,16 @@ struct gen7_sampler_dndi } dw7; }; +#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) + +#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6) + +#define SURFACE_STATE_PADDED_SIZE_0_GEN8 ALIGN(sizeof(struct gen8_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_1_GEN8 ALIGN(sizeof(struct gen8_surface_state2), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN8 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN8, SURFACE_STATE_PADDED_SIZE_1_GEN8) + #endif /* _I965_STRUCTS_H_ */ -- cgit v1.2.1 From f36d27552d924666cd6decc1a37b07303963e1f2 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 22 Jan 2013 10:53:26 +0800 Subject: Update the BDW surface/sampler state and media command for encoding/decoding Signed-off-by: Zhao Yakui --- src/gen8_vme.c | 33 ++--- src/i965_gpe_utils.c | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/i965_gpe_utils.h | 25 ++++ 3 files changed, 387 insertions(+), 16 deletions(-) diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 9d40e9ca..eb95875f 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -45,7 +45,7 @@ #undef SURFACE_STATE_PADDED_SIZE #endif -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) @@ -297,7 +297,7 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; - struct gen6_interface_descriptor_data *desc; + struct gen8_interface_descriptor_data *desc; int i; dri_bo *bo; @@ -313,18 +313,19 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, /*Setup the descritor table*/ memset(desc, 0, sizeof(*desc)); desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6); - desc->desc2.sampler_count = 0; /* FIXME: */ - desc->desc2.sampler_state_pointer = 0; - desc->desc3.binding_table_entry_count = 1; /* FIXME: */ - desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5); - desc->desc4.constant_urb_entry_read_offset = 0; - desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH; + desc->desc3.sampler_count = 0; /* FIXME: */ + desc->desc3.sampler_state_pointer = 0; + desc->desc4.binding_table_entry_count = 1; /* FIXME: */ + desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5); + desc->desc5.constant_urb_entry_read_offset = 0; + desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH; + /*kernel start*/ dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0, - i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0), + i * sizeof(*desc) + offsetof(struct gen8_interface_descriptor_data, desc0), kernel->bo); desc++; } @@ -620,7 +621,7 @@ static void gen8_vme_pipeline_programing(VADriverContextP ctx, encoder_context); intel_batchbuffer_start_atomic(batch, 0x1000); - gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); + gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); OUT_RELOC(batch, @@ -901,7 +902,7 @@ gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx, encoder_context); intel_batchbuffer_start_atomic(batch, 0x1000); - gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); + gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); OUT_RELOC(batch, @@ -1001,7 +1002,7 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6; - vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); + vme_context->gpe_context.idrt.entry_size = sizeof(struct gen8_interface_descriptor_data); vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH; @@ -1017,10 +1018,10 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e &vme_context->gpe_context, vme_kernel_list, i965_kernel_num); - vme_context->vme_surface2_setup = gen7_gpe_surface2_setup; - vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup; - vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup; - vme_context->vme_media_chroma_surface_setup = gen75_gpe_media_chroma_surface_setup; + vme_context->vme_surface2_setup = gen8_gpe_surface2_setup; + vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup; + vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup; + vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup; encoder_context->vme_context = vme_context; encoder_context->vme_context_destroy = gen8_vme_context_destroy; diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index 4c6469b0..2af323f9 100644 --- a/src/i965_gpe_utils.c +++ b/src/i965_gpe_utils.c @@ -295,6 +295,44 @@ gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling } } +static void +gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss0.tiled_surface = 0; + ss->ss0.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + +static void +gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss2.tiled_surface = 0; + ss->ss2.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss2.tiled_surface = 1; + ss->ss2.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss2.tiled_surface = 1; + ss->ss2.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + static void i965_gpe_set_surface2_state(VADriverContextP ctx, struct object_surface *obj_surface, @@ -677,3 +715,310 @@ gen7_gpe_buffer_suface_setup(VADriverContextP ctx, *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset; dri_bo_unmap(bo); } + +static void +gen8_gpe_set_surface2_state(VADriverContextP ctx, + struct object_surface *obj_surface, + struct gen8_surface_state2 *ss) +{ + int w, h, w_pitch; + unsigned int tiling, swizzle; + + assert(obj_surface->bo); + assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')); + + dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); + w = obj_surface->orig_width; + h = obj_surface->orig_height; + w_pitch = obj_surface->width; + + memset(ss, 0, sizeof(*ss)); + /* ss0 */ + ss->ss6.base_addr = obj_surface->bo->offset; + /* ss1 */ + ss->ss1.cbcr_pixel_offset_v_direction = 2; + ss->ss1.width = w - 1; + ss->ss1.height = h - 1; + /* ss2 */ + ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8; + ss->ss2.interleave_chroma = 1; + ss->ss2.pitch = w_pitch - 1; + ss->ss2.half_pitch_for_chroma = 0; + gen8_gpe_set_surface2_tiling(ss, tiling); + /* ss3: UV offset for interleave mode */ + ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset; + ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset; +} + +void +gen8_gpe_surface2_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset) +{ + struct gen8_surface_state2 *ss; + dri_bo *bo; + + bo = gpe_context->surface_state_binding_table.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + + ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset); + gen8_gpe_set_surface2_state(ctx, obj_surface, ss); + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, 0, + 0, + surface_state_offset + offsetof(struct gen8_surface_state2, ss6), + obj_surface->bo); + + *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset; + dri_bo_unmap(bo); +} + +static void +gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx, + struct object_surface *obj_surface, + struct gen8_surface_state *ss) +{ + int w, h, w_pitch; + unsigned int tiling, swizzle; + + dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); + w = obj_surface->orig_width; + h = obj_surface->orig_height; + w_pitch = obj_surface->width; + + memset(ss, 0, sizeof(*ss)); + /* ss0 */ + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; + /* ss1 */ + ss->ss8.base_addr = obj_surface->bo->offset; + /* ss2 */ + ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */ + ss->ss2.height = h - 1; + /* ss3 */ + ss->ss3.pitch = w_pitch - 1; + gen8_gpe_set_surface_tiling(ss, tiling); +} + +static void +gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx, + struct object_surface *obj_surface, + struct gen8_surface_state *ss) +{ + int w, h, w_pitch; + unsigned int tiling, swizzle; + int cbcr_offset; + + dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); + w = obj_surface->orig_width; + h = obj_surface->orig_height; + w_pitch = obj_surface->width; + + cbcr_offset = obj_surface->height * obj_surface->width; + memset(ss, 0, sizeof(*ss)); + /* ss0 */ + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; + /* ss1 */ + ss->ss8.base_addr = obj_surface->bo->offset + cbcr_offset; + /* ss2 */ + ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */ + ss->ss2.height = (obj_surface->height / 2) -1; + /* ss3 */ + ss->ss3.pitch = w_pitch - 1; + gen8_gpe_set_surface_tiling(ss, tiling); +} + +void +gen8_gpe_media_rw_surface_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset) +{ + struct gen8_surface_state *ss; + dri_bo *bo; + + bo = gpe_context->surface_state_binding_table.bo; + dri_bo_map(bo, True); + assert(bo->virtual); + + ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset); + gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss); + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, 0, + 0, + surface_state_offset + offsetof(struct gen8_surface_state, ss8), + obj_surface->bo); + + *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset; + dri_bo_unmap(bo); +} + +void +gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset) +{ + struct gen8_surface_state *ss; + dri_bo *bo; + int cbcr_offset; + + assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')); + bo = gpe_context->surface_state_binding_table.bo; + dri_bo_map(bo, True); + assert(bo->virtual); + + cbcr_offset = obj_surface->height * obj_surface->width; + ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset); + gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss); + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, 0, + cbcr_offset, + surface_state_offset + offsetof(struct gen8_surface_state, ss8), + obj_surface->bo); + + *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset; + dri_bo_unmap(bo); +} + + +static void +gen8_gpe_set_buffer_surface_state(VADriverContextP ctx, + struct i965_buffer_surface *buffer_surface, + struct gen8_surface_state *ss) +{ + int num_entries; + + assert(buffer_surface->bo); + num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch; + + memset(ss, 0, sizeof(*ss)); + /* ss0 */ + ss->ss0.surface_type = I965_SURFACE_BUFFER; + /* ss1 */ + ss->ss8.base_addr = buffer_surface->bo->offset; + /* ss2 */ + ss->ss2.width = ((num_entries - 1) & 0x7f); + ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff); + /* ss3 */ + ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f); + ss->ss3.pitch = buffer_surface->pitch - 1; +} + +void +gen8_gpe_buffer_suface_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct i965_buffer_surface *buffer_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset) +{ + struct gen8_surface_state *ss; + dri_bo *bo; + + bo = gpe_context->surface_state_binding_table.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + + ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset); + gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss); + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0, + surface_state_offset + offsetof(struct gen8_surface_state, ss8), + buffer_surface->bo); + + *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset; + dri_bo_unmap(bo); +} + +static void +gen8_gpe_state_base_address(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch) +{ + BEGIN_BATCH(batch, 16); + + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14); + + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /*DW4 Surface state base address */ + OUT_RELOC(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(batch, 0); + /*DW6. Dynamic state base address */ + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Dynamic State Base Address + OUT_BATCH(batch, 0); + + /*DW8. Indirect Object base address */ + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Indirect Object Base Address + OUT_BATCH(batch, 0); + /*DW10. Instruct base address */ + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Instruction Base Address + OUT_BATCH(batch, 0); + + /* DW12. Size limitation */ + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound + + /* + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //LLC Coherent Base Address + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY ); //LLC Coherent Upper Bound + */ + + ADVANCE_BATCH(batch); +} + +static void +gen8_gpe_vfe_state(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch) +{ + + BEGIN_BATCH(batch, 9); + + OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2)); + /* Scratch Space Base Pointer and Space */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + + OUT_BATCH(batch, + gpe_context->vfe_state.max_num_threads << 16 | /* Maximum Number of Threads */ + gpe_context->vfe_state.num_urb_entries << 8 | /* Number of URB Entries */ + gpe_context->vfe_state.gpgpu_mode << 2); /* MEDIA Mode */ + OUT_BATCH(batch, 0); /* Debug: Object ID */ + OUT_BATCH(batch, + gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */ + gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */ + + /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */ + OUT_BATCH(batch, gpe_context->vfe_desc5.dword); + OUT_BATCH(batch, gpe_context->vfe_desc6.dword); + OUT_BATCH(batch, gpe_context->vfe_desc7.dword); + + ADVANCE_BATCH(batch); + +} + +void +gen8_gpe_pipeline_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch) +{ + intel_batchbuffer_emit_mi_flush(batch); + + i965_gpe_select(ctx, gpe_context, batch); + gen8_gpe_state_base_address(ctx, gpe_context, batch); + gen8_gpe_vfe_state(ctx, gpe_context, batch); + gen6_gpe_curbe_load(ctx, gpe_context, batch); + gen6_gpe_idrt(ctx, gpe_context, batch); +} + diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h index 72d7de8a..b96916d5 100644 --- a/src/i965_gpe_utils.h +++ b/src/i965_gpe_utils.h @@ -161,4 +161,29 @@ void gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx, struct object_surface *obj_surface, unsigned long binding_table_offset, unsigned long surface_state_offset); + +extern void gen8_gpe_surface2_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); +extern void gen8_gpe_media_rw_surface_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); +extern void gen8_gpe_buffer_suface_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct i965_buffer_surface *buffer_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); +extern void gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); + +void gen8_gpe_pipeline_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch); #endif /* _I965_GPE_UTILS_H_ */ -- cgit v1.2.1 From e1604a6054b16dd283ae8fd56faa154369bcc5e4 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 22 Jan 2013 10:53:26 +0800 Subject: Use the BDW surface/sampler state and memory address allocation for rendering Signed-off-by: Zhao Yakui [Haihao: directly use object intead of id] Signed-off-by: Xiang, Haihao --- src/i965_render.c | 398 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 387 insertions(+), 11 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 1a48e602..74790664 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -147,7 +147,30 @@ static const uint32_t ps_kernel_static_gen7_haswell[][4] = { #include "shaders/render/exa_wm_write.g7b" }; -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) +/*TODO: Modify the shader for GEN8. + * Now it only uses the shader for gen7/haswell + */ +/* Programs for Gen8 */ +static const uint32_t sf_kernel_static_gen8[][4] = +{ +}; +static const uint32_t ps_kernel_static_gen8[][4] = { +#include "shaders/render/exa_wm_src_affine.g7b" +#include "shaders/render/exa_wm_src_sample_planar.g7b" +#include "shaders/render/exa_wm_yuv_rgb.g7b" +#include "shaders/render/exa_wm_write.g7b" +}; + +static const uint32_t ps_subpic_kernel_static_gen8[][4] = { +#include "shaders/render/exa_wm_src_affine.g7b" +#include "shaders/render/exa_wm_src_sample_argb.g7b" +#include "shaders/render/exa_wm_write.g7b" +}; + + +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN8, \ + MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)) + #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES) @@ -294,6 +317,31 @@ static struct i965_kernel render_kernels_gen7_haswell[] = { } }; +static struct i965_kernel render_kernels_gen8[] = { + { + "SF", + SF_KERNEL, + sf_kernel_static_gen8, + sizeof(sf_kernel_static_gen8), + NULL + }, + { + "PS", + PS_KERNEL, + ps_kernel_static_gen8, + sizeof(ps_kernel_static_gen8), + NULL + }, + + { + "PS_SUBPIC", + PS_SUBPIC_KERNEL, + ps_subpic_kernel_static_gen8, + sizeof(ps_subpic_kernel_static_gen8), + NULL + } +}; + #define URB_VS_ENTRIES 8 #define URB_VS_ENTRY_SIZE 1 @@ -750,6 +798,25 @@ gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling) } } +static void +gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss0.tiled_surface = 0; + ss->ss0.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + /* Set "Shader Channel Select" */ void gen7_render_set_surface_scs(struct gen7_surface_state *ss) @@ -811,6 +878,48 @@ gen7_render_set_surface_state( gen7_render_set_surface_tiling(ss, tiling); } + +static void +gen8_render_set_surface_state( + struct gen8_surface_state *ss, + dri_bo *bo, + unsigned long offset, + int width, + int height, + int pitch, + int format, + unsigned int flags +) +{ + unsigned int tiling; + unsigned int swizzle; + + memset(ss, 0, sizeof(*ss)); + + switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) { + case I965_PP_FLAG_BOTTOM_FIELD: + ss->ss0.vert_line_stride_ofs = 1; + /* fall-through */ + case I965_PP_FLAG_TOP_FIELD: + ss->ss0.vert_line_stride = 1; + height /= 2; + break; + } + + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = format; + + ss->ss8.base_addr = bo->offset + offset; + + ss->ss2.width = width - 1; + ss->ss2.height = height - 1; + + ss->ss3.pitch = pitch - 1; + + dri_bo_get_tiling(bo, &tiling, &swizzle); + gen8_render_set_surface_tiling(ss, tiling); +} + static void i965_render_src_surface_state( VADriverContextP ctx, @@ -835,8 +944,18 @@ i965_render_src_surface_state( assert(ss_bo->virtual); ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); - if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) { + if (IS_GEN8(i965->intel.device_id)) { + gen8_render_set_surface_state(ss, + region, offset, + w, h, + pitch, format, flags); + gen8_render_set_surface_scs(ss); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_SAMPLER, 0, + offset, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8), + region); + } else if (IS_GEN7(i965->intel.device_id)) { gen7_render_set_surface_state(ss, region, offset, w, h, @@ -951,8 +1070,18 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index) assert(ss_bo->virtual); ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); - if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) { + if (IS_GEN8(i965->intel.device_id)) { + gen8_render_set_surface_state(ss, + dest_region->bo, 0, + dest_region->width, dest_region->height, + dest_region->pitch, format, 0); + gen8_render_set_surface_scs(ss); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8), + dest_region->bo); + } else if (IS_GEN7(i965->intel.device_id)) { gen7_render_set_surface_state(ss, dest_region->bo, 0, dest_region->width, dest_region->height, @@ -2376,6 +2505,80 @@ gen7_render_initialize(VADriverContextP ctx) render_state->cc.depth_stencil = bo; } +/* + * for GEN8 + */ +static void +gen8_render_initialize(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + dri_bo *bo; + + /* VERTEX BUFFER */ + dri_bo_unreference(render_state->vb.vertex_buffer); + bo = dri_bo_alloc(i965->intel.bufmgr, + "vertex buffer", + 4096, + 4096); + assert(bo); + render_state->vb.vertex_buffer = bo; + + /* WM */ + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES, + 4096); + assert(bo); + render_state->wm.surface_state_binding_table_bo = bo; + + dri_bo_unreference(render_state->wm.sampler); + bo = dri_bo_alloc(i965->intel.bufmgr, + "sampler state", + MAX_SAMPLERS * sizeof(struct gen8_sampler_state), + 4096); + assert(bo); + render_state->wm.sampler = bo; + render_state->wm.sampler_count = 0; + + /* COLOR CALCULATOR */ + dri_bo_unreference(render_state->cc.state); + bo = dri_bo_alloc(i965->intel.bufmgr, + "color calc state", + sizeof(struct gen6_color_calc_state), + 4096); + assert(bo); + render_state->cc.state = bo; + + /* CC VIEWPORT */ + dri_bo_unreference(render_state->cc.viewport); + bo = dri_bo_alloc(i965->intel.bufmgr, + "cc viewport", + sizeof(struct i965_cc_viewport), + 4096); + assert(bo); + render_state->cc.viewport = bo; + + /* BLEND STATE */ + dri_bo_unreference(render_state->cc.blend); + bo = dri_bo_alloc(i965->intel.bufmgr, + "blend state", + sizeof(struct gen6_blend_state), + 4096); + assert(bo); + render_state->cc.blend = bo; + + /* DEPTH & STENCIL STATE */ + dri_bo_unreference(render_state->cc.depth_stencil); + bo = dri_bo_alloc(i965->intel.bufmgr, + "depth & stencil state", + sizeof(struct gen6_depth_stencil_state), + 4096); + assert(bo); + render_state->cc.depth_stencil = bo; +} + static void gen7_render_color_calc_state(VADriverContextP ctx) { @@ -2452,6 +2655,34 @@ gen7_render_sampler(VADriverContextP ctx) dri_bo_unmap(render_state->wm.sampler); } +static void +gen8_render_sampler(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen8_sampler_state *sampler_state; + int i; + + assert(render_state->wm.sampler_count > 0); + assert(render_state->wm.sampler_count <= MAX_SAMPLERS); + + dri_bo_map(render_state->wm.sampler, 1); + assert(render_state->wm.sampler->virtual); + sampler_state = render_state->wm.sampler->virtual; + for (i = 0; i < render_state->wm.sampler_count; i++) { + memset(sampler_state, 0, sizeof(*sampler_state)); + sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR; + sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state++; + } + + dri_bo_unmap(render_state->wm.sampler); +} + + static void gen7_render_setup_states( VADriverContextP ctx, @@ -2472,6 +2703,26 @@ gen7_render_setup_states( i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } +static void +gen8_render_setup_states( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect, + unsigned int flags +) +{ + i965_render_dest_surface_state(ctx, 0); + i965_render_src_surfaces_state(ctx, obj_surface, flags); + gen8_render_sampler(ctx); + i965_render_cc_viewport(ctx); + gen7_render_color_calc_state(ctx); + gen7_render_blend_state(ctx); + gen7_render_depth_stencil_state(ctx); + i965_render_upload_constants(ctx, obj_surface, flags); + i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); +} + static void gen7_emit_invarient_states(VADriverContextP ctx) { @@ -2521,6 +2772,42 @@ gen7_emit_state_base_address(VADriverContextP ctx) OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ } +static void +gen8_emit_state_base_address(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 16); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2)); + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /*DW4 */ + OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(batch, 0); + + /*DW6*/ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */ + OUT_BATCH(batch, 0); + + /*DW8*/ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */ + OUT_BATCH(batch, 0); + + /*DW10 */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */ + OUT_BATCH(batch, 0); + + /*DW12 */ + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */ + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ + ADVANCE_BATCH(batch); +} + static void gen7_emit_viewport_state_pointers(VADriverContextP ctx) { @@ -2999,6 +3286,33 @@ gen7_render_emit_states(VADriverContextP ctx, int kernel) intel_batchbuffer_end_atomic(batch); } +static void +gen8_render_emit_states(VADriverContextP ctx, int kernel) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen7_emit_invarient_states(ctx); + gen8_emit_state_base_address(ctx); + gen7_emit_viewport_state_pointers(ctx); + gen7_emit_urb(ctx); + gen7_emit_cc_state_pointers(ctx); + gen7_emit_sampler_state_pointers(ctx); + gen7_emit_bypass_state(ctx); + gen7_emit_vs_state(ctx); + gen7_emit_clip_state(ctx); + gen7_emit_sf_state(ctx); + gen7_emit_wm_state(ctx, kernel); + gen7_emit_binding_table(ctx); + gen7_emit_depth_buffer_state(ctx); + gen7_emit_drawing_rectangle(ctx); + gen7_emit_vertex_element_state(ctx); + gen7_emit_vertices(ctx); + intel_batchbuffer_end_atomic(batch); +} + static void gen7_render_put_surface( VADriverContextP ctx, @@ -3018,6 +3332,25 @@ gen7_render_put_surface( intel_batchbuffer_flush(batch); } +static void +gen8_render_put_surface( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect, + unsigned int flags +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + gen8_render_initialize(ctx); + gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); + i965_clear_dest_region(ctx); + gen8_render_emit_states(ctx, PS_KERNEL); + intel_batchbuffer_flush(batch); +} + static void gen7_subpicture_render_blend_state(VADriverContextP ctx) { @@ -3059,6 +3392,25 @@ gen7_subpicture_render_setup_states( i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); } +static void +gen8_subpicture_render_setup_states( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect +) +{ + i965_render_dest_surface_state(ctx, 0); + i965_subpic_render_src_surfaces_state(ctx, obj_surface); + gen8_render_sampler(ctx); + i965_render_cc_viewport(ctx); + gen7_render_color_calc_state(ctx); + gen7_subpicture_render_blend_state(ctx); + gen7_render_depth_stencil_state(ctx); + i965_subpic_render_upload_constants(ctx, obj_surface); + i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); +} + static void gen7_render_put_subpicture( VADriverContextP ctx, @@ -3080,6 +3432,26 @@ gen7_render_put_subpicture( intel_batchbuffer_flush(batch); } +static void +gen8_render_put_subpicture( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + + assert(obj_subpic); + gen8_render_initialize(ctx); + gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect); + gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL); + i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff); + intel_batchbuffer_flush(batch); +} /* * global functions @@ -3118,8 +3490,9 @@ intel_render_put_surface( src_rect = dst_rect; } - if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) + if (IS_GEN8(i965->intel.device_id)) + gen8_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); + else if (IS_GEN7(i965->intel.device_id)) gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); else if (IS_GEN6(i965->intel.device_id)) gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); @@ -3140,8 +3513,9 @@ intel_render_put_subpicture( { struct i965_driver_data *i965 = i965_driver_data(ctx); - if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) + if (IS_GEN8(i965->intel.device_id)) + gen8_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); + else if (IS_GEN7(i965->intel.device_id)) gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); else if (IS_GEN6(i965->intel.device_id)) gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); @@ -3162,8 +3536,10 @@ i965_render_init(VADriverContextP ctx) assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / sizeof(render_kernels_gen6[0]))); - if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) + if (IS_GEN8(i965->intel.device_id)) { + memcpy(render_state->render_kernels, render_kernels_gen8, + sizeof(render_state->render_kernels)); + } else if (IS_GEN7(i965->intel.device_id)) memcpy(render_state->render_kernels, (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7), sizeof(render_state->render_kernels)); -- cgit v1.2.1 From 320db0e970911e99ac9070cacd5dffac095fd0d1 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 23 Jan 2013 14:24:41 +0800 Subject: No workaround for JPEG decoding on BDW Signed-off-by: Xiang, Haihao --- src/gen8_mfd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index c351e4b8..bec15119 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2166,6 +2166,7 @@ gen8_mfd_jpeg_bsd_object(VADriverContextP ctx, } /* Workaround for JPEG decoding on Ivybridge */ +#ifdef JPEG_WA VAStatus i965_DestroySurfaces(VADriverContextP ctx, @@ -2615,6 +2616,8 @@ gen8_mfd_jpeg_wa(VADriverContextP ctx, gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context); } +#endif + void gen8_mfd_jpeg_decode_picture(VADriverContextP ctx, struct decode_state *decode_state, @@ -2632,7 +2635,9 @@ gen8_mfd_jpeg_decode_picture(VADriverContextP ctx, /* Currently only support Baseline DCT */ gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context); intel_batchbuffer_start_atomic_bcs(batch, 0x1000); +#ifdef JPEG_WA gen8_mfd_jpeg_wa(ctx, gen7_mfd_context); +#endif intel_batchbuffer_emit_mi_flush(batch); gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context); gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context); -- cgit v1.2.1 From 11026e210526233c8414397fa39adc92ffe997e0 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 23 Jan 2013 14:27:20 +0800 Subject: Enlarge deblocking filter row store on BDW Signed-off-by: Xiang, Haihao --- src/gen8_mfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index bec15119..521ca01b 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -1352,7 +1352,7 @@ gen8_mfd_vc1_decode_init(VADriverContextP ctx, dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo); bo = dri_bo_alloc(i965->intel.bufmgr, "deblocking filter row store", - width_in_mbs * 6 * 64, + width_in_mbs * 7 * 64, 0x1000); assert(bo); gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo; -- cgit v1.2.1 From 82816cef6e63c60f750e7d1452249e31598c0d7a Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 27 Feb 2013 13:25:37 +0800 Subject: Configuration for VP8 decoding/encoding Signed-off-by: Xiang, Haihao --- src/i965_drv_video.c | 32 ++++++++++++++++++++++++++++++++ src/i965_drv_video.h | 4 +++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index c6c05914..50bc0de1 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -78,6 +78,13 @@ #define HAS_TILED_SURFACE(ctx) ((ctx)->codec_info->has_tiled_surface) +#define HAS_VP8_DECODING(ctx) ((ctx)->codec_info->has_vp8_decoding && \ + (ctx)->intel.has_bsd) + +#define HAS_VP8_ENCODING(ctx) ((ctx)->codec_info->has_vp8_encoding && \ + (ctx)->intel.has_bsd) + + static int get_sampling_from_fourcc(unsigned int fourcc); /* Check whether we are rendering to X11 (VA/X11 or VA/GLX API) */ @@ -413,6 +420,11 @@ i965_QueryConfigProfiles(VADriverContextP ctx, profile_list[i++] = VAProfileJPEGBaseline; } + if (HAS_VP8_DECODING(i965) || + HAS_VP8_ENCODING(i965)) { + profile_list[i++] = VAProfileVP8Version0_3; + } + /* If the assert fails then I965_MAX_PROFILES needs to be bigger */ assert(i <= I965_MAX_PROFILES); *num_profiles = i; @@ -468,6 +480,13 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx, entrypoint_list[n++] = VAEntrypointVLD; break; + case VAProfileVP8Version0_3: + if (HAS_VP8_DECODING(i965)) + entrypoint_list[n++] = VAEntrypointVLD; + + if (HAS_VP8_ENCODING(i965)) + entrypoint_list[n++] = VAEntrypointEncSlice; + default: break; } @@ -621,6 +640,15 @@ i965_CreateConfig(VADriverContextP ctx, break; + case VAProfileVP8Version0_3: + if ((HAS_VP8_DECODING(i965) && VAEntrypointVLD == entrypoint) || + (HAS_VP8_ENCODING(i965) && VAEntrypointEncSlice == entrypoint)) + vaStatus = VA_STATUS_SUCCESS; + else + vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + + break; + default: vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; break; @@ -1954,6 +1982,10 @@ i965_BeginPicture(VADriverContextP ctx, vaStatus = VA_STATUS_SUCCESS; break; + case VAProfileVP8Version0_3: + vaStatus = VA_STATUS_SUCCESS; + break; + default: assert(0); vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 773c8ca7..a1dd971c 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -41,7 +41,7 @@ #include "object_heap.h" #include "intel_driver.h" -#define I965_MAX_PROFILES 11 +#define I965_MAX_PROFILES 20 #define I965_MAX_ENTRYPOINTS 5 #define I965_MAX_CONFIG_ATTRIBUTES 10 #define I965_MAX_IMAGE_FORMATS 10 @@ -296,6 +296,8 @@ struct hw_codec_info unsigned int has_tiled_surface:1; unsigned int has_di_motion_adptive:1; unsigned int has_di_motion_compensated:1; + unsigned int has_vp8_decoding:1; + unsigned int has_vp8_encoding:1; unsigned int num_filters; struct i965_filter filters[VAProcFilterCount]; -- cgit v1.2.1 From 993279ab525d52a63b359a14adf28859a91cab42 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 27 Feb 2013 12:53:07 +0800 Subject: Surface fourcc format on Gen8 Signed-off-by: Xiang, Haihao --- src/i965_drv_video.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 50bc0de1..6891b8c6 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -1162,7 +1162,9 @@ i965_guess_surface_format(VADriverContextP ctx, if (!obj_config) return; - if (IS_GEN6(i965->intel.device_id) || IS_GEN7(i965->intel.device_id)) { + if (IS_GEN6(i965->intel.device_id) || + IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) { *fourcc = VA_FOURCC('N', 'V', '1', '2'); *is_tiled = 1; return; @@ -4243,7 +4245,8 @@ i965_GetSurfaceAttributes( } } else if (IS_GEN6(i965->intel.device_id)) { attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); - } else if (IS_GEN7(i965->intel.device_id)) { + } else if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) { if (obj_config->profile == VAProfileJPEGBaseline) attrib_list[i].value.value.i = 0; /* internal format */ else @@ -4319,7 +4322,8 @@ i965_GetSurfaceAttributes( attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } } - } else if (IS_GEN7(i965->intel.device_id)) { + } else if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) { if (obj_config->entrypoint == VAEntrypointEncSlice || obj_config->entrypoint == VAEntrypointVideoProc) { switch (attrib_list[i].value.value.i) { -- cgit v1.2.1 From 617a121244c5b89e55ed36215b78726bf091afa5 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 28 Feb 2013 12:40:52 +0800 Subject: Advertise VP8 decoding on Gen8 The pipeline isn't implemented yet. Signed-off-by: Xiang, Haihao --- src/i965_drv_video.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 6891b8c6..62f07232 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -343,6 +343,7 @@ static struct hw_codec_info gen8_hw_codec_info = { .has_accelerated_getimage = 1, .has_accelerated_putimage = 1, .has_tiled_surface = 1, + .has_vp8_decoding = 1, }; #define I965_PACKED_HEADER_BASE 0 -- cgit v1.2.1 From 8afd428ba440bf79ad88b54fcfee3321e022399a Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 28 Feb 2013 12:43:51 +0800 Subject: Temporarily remove assert() to make vainfo happy Signed-off-by: Xiang, Haihao --- src/i965_post_processing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index e5bc8fef..efab2d8c 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -6255,7 +6255,7 @@ i965_post_processing_context_init(VADriverContextP ctx, assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75)); - assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); + // assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); if (IS_GEN8(i965->intel.device_id)) memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules)); -- cgit v1.2.1 From 95dee08f1afa62823827302baf204ab999b38fbf Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 27 Feb 2013 15:56:24 +0800 Subject: Add support for VAProbabilityBufferType Signed-off-by: Xiang, Haihao --- src/i965_drv_video.c | 9 ++++++++- src/i965_drv_video.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 62f07232..997edfa7 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -1677,6 +1677,7 @@ i965_create_buffer_internal(VADriverContextP ctx, case VAProcPipelineParameterBufferType: case VAProcFilterParameterBufferType: case VAHuffmanTableBufferType: + case VAProbabilityBufferType: /* Ok */ break; @@ -1713,7 +1714,8 @@ i965_create_buffer_internal(VADriverContextP ctx, dri_bo_subdata(buffer_store->bo, 0, size * num_elements, data); } else if (type == VASliceDataBufferType || type == VAImageBufferType || - type == VAEncCodedBufferType) { + type == VAEncCodedBufferType || + type == VAProbabilityBufferType) { buffer_store->bo = dri_bo_alloc(i965->intel.bufmgr, "Buffer", size * num_elements, 64); @@ -2081,6 +2083,7 @@ DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(picture_parameter, pic_param) DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(iq_matrix, iq_matrix) DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(bit_plane, bit_plane) DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(huffman_table, huffman_table) +DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(probability_data, probability_data) #define DEF_RENDER_DECODE_MULTI_BUFFER_FUNC(name, member) DEF_RENDER_MULTI_BUFFER_FUNC(decode, name, member) DEF_RENDER_DECODE_MULTI_BUFFER_FUNC(slice_parameter, slice_params) @@ -2133,6 +2136,10 @@ i965_decoder_render_picture(VADriverContextP ctx, vaStatus = I965_RENDER_DECODE_BUFFER(huffman_table); break; + case VAProbabilityBufferType: + vaStatus = I965_RENDER_DECODE_BUFFER(probability_data); + break; + default: vaStatus = VA_STATUS_ERROR_UNSUPPORTED_BUFFERTYPE; break; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index a1dd971c..1a101f45 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -107,6 +107,7 @@ struct decode_state struct buffer_store *bit_plane; struct buffer_store *huffman_table; struct buffer_store **slice_datas; + struct buffer_store *probability_data; VASurfaceID current_render_target; int max_slice_params; int max_slice_datas; -- cgit v1.2.1 From b739422ecb0a159b2fdf83ce9c0c1d5e88707b87 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 27 Feb 2013 13:36:19 +0800 Subject: New macros for Gen8 Signed-off-by: Xiang, Haihao --- src/i965_defines.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/i965_defines.h b/src/i965_defines.h index 58a73d74..3090ce86 100755 --- a/src/i965_defines.h +++ b/src/i965_defines.h @@ -327,6 +327,11 @@ #define MFD_JPEG_BSD_OBJECT MFX(2, 7, 1, 8) +#define MFX_VP8_PIC_STATE MFX(2, 4, 0, 0) + +#define MFD_VP8_BSD_OBJECT MFX(2, 4, 1, 8) + + #define VEB(pipeline, op, sub_opa, sub_opb) \ (3 << 29 | \ (pipeline) << 27 | \ @@ -704,6 +709,8 @@ #define MFX_FORMAT_VC1 1 #define MFX_FORMAT_AVC 2 #define MFX_FORMAT_JPEG 3 +#define MFX_FORMAT_SVC 4 +#define MFX_FORMAT_VP8 5 #define MFX_SHORT_MODE 0 #define MFX_LONG_MODE 1 -- cgit v1.2.1 From f18395a430662f066e3633d3994a8eeff6820ad6 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 28 Feb 2013 12:49:55 +0800 Subject: Setup VP8 decoding pipeline Update the pipeline state later. Signed-off-by: Xiang, Haihao --- src/gen8_mfd.c | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 219 insertions(+), 1 deletion(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 521ca01b..87bfd7bd 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "intel_batchbuffer.h" #include "intel_driver.h" @@ -108,7 +109,8 @@ gen8_mfd_pipe_mode_select(VADriverContextP ctx, assert(standard_select == MFX_FORMAT_MPEG2 || standard_select == MFX_FORMAT_AVC || standard_select == MFX_FORMAT_VC1 || - standard_select == MFX_FORMAT_JPEG); + standard_select == MFX_FORMAT_JPEG || + standard_select == MFX_FORMAT_VP8); BEGIN_BCS_BATCH(batch, 5); OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2)); @@ -2709,6 +2711,218 @@ gen8_mfd_jpeg_decode_picture(VADriverContextP ctx, intel_batchbuffer_flush(batch); } +static void +gen8_mfd_vp8_decode_init(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct object_surface *obj_surface; + + /* Current decoded picture */ + obj_surface = decode_state->render_object; + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); +} + +static void +gen8_mfd_vp8_pic_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer; + VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer; + VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */ + dri_bo *probs_bo = decode_state->probability_data->bo; + int i, j; + + BEGIN_BCS_BATCH(batch, 38); + OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2)); + OUT_BCS_BATCH(batch, + (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 | + (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0); + OUT_BCS_BATCH(batch, + slice_param->num_of_partitions << 24 | + pic_param->pic_fields.bits.sharpness_level << 16 | + pic_param->pic_fields.bits.sign_bias_alternate << 13 | + pic_param->pic_fields.bits.sign_bias_golden << 12 | + pic_param->pic_fields.bits.loop_filter_adj_enable << 11 | + pic_param->pic_fields.bits.mb_no_coeff_skip << 10 | + pic_param->pic_fields.bits.update_mb_segmentation_map << 9 | + pic_param->pic_fields.bits.segmentation_enabled << 8 | + 0 << 7 | /* segmentation id streamin disabled */ + 0 << 6 | /* segmentation id streamout disabled */ + pic_param->pic_fields.bits.key_frame << 5 | + pic_param->pic_fields.bits.filter_type << 4 | + (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */ + !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */ + + OUT_BCS_BATCH(batch, + pic_param->loop_filter_level[3] << 24 | + pic_param->loop_filter_level[2] << 16 | + pic_param->loop_filter_level[1] << 8 | + pic_param->loop_filter_level[0] << 0); + + /* Quantizer Value for 4 segmetns, DW4-DW15 */ + for (i = 0; i < 4; i++) { + OUT_BCS_BATCH(batch, + iq_matrix->quantization_index[i][0] << 16 | /* Y1AC */ + iq_matrix->quantization_index[i][1] << 0); /* Y1DC */ + OUT_BCS_BATCH(batch, + iq_matrix->quantization_index[i][5] << 16 | /* UVAC */ + iq_matrix->quantization_index[i][4] << 0); /* UVDC */ + OUT_BCS_BATCH(batch, + iq_matrix->quantization_index[i][3] << 16 | /* Y2AC */ + iq_matrix->quantization_index[i][2] << 0); /* Y2DC */ + } + + /* CoeffProbability table for non-key frame, DW16-DW18 */ + if (probs_bo) { + OUT_BCS_RELOC(batch, probs_bo, + 0, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + + OUT_BCS_BATCH(batch, + pic_param->mb_segment_tree_probs[2] << 16 | + pic_param->mb_segment_tree_probs[1] << 8 | + pic_param->mb_segment_tree_probs[0] << 0); + + OUT_BCS_BATCH(batch, + pic_param->prob_skip_false << 24 | + pic_param->prob_intra << 16 | + pic_param->prob_last << 8 | + pic_param->prob_gf << 0); + + OUT_BCS_BATCH(batch, + pic_param->y_mode_probs[3] << 24 | + pic_param->y_mode_probs[2] << 16 | + pic_param->y_mode_probs[1] << 8 | + pic_param->y_mode_probs[0] << 0); + + OUT_BCS_BATCH(batch, + pic_param->uv_mode_probs[2] << 16 | + pic_param->uv_mode_probs[1] << 8 | + pic_param->uv_mode_probs[0] << 0); + + /* MV update value, DW23-DW32 */ + for (i = 0; i < 2; i++) { + for (j = 0; j < 20; j += 4) { + OUT_BCS_BATCH(batch, + (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 | + pic_param->mv_probs[i][j + 2] << 16 | + pic_param->mv_probs[i][j + 1] << 8 | + pic_param->mv_probs[i][j + 0] << 0); + } + } + + OUT_BCS_BATCH(batch, + pic_param->loop_filter_deltas_ref_frame[3] << 24 | + pic_param->loop_filter_deltas_ref_frame[2] << 16 | + pic_param->loop_filter_deltas_ref_frame[1] << 8 | + pic_param->loop_filter_deltas_ref_frame[0] << 0); + + OUT_BCS_BATCH(batch, + pic_param->loop_filter_deltas_mode[3] << 24 | + pic_param->loop_filter_deltas_mode[2] << 16 | + pic_param->loop_filter_deltas_mode[1] << 8 | + pic_param->loop_filter_deltas_mode[0] << 0); + + /* segmentation id stream base address, DW35-DW37 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_vp8_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferVP8 *pic_param, + VASliceParameterBufferVP8 *slice_param, + dri_bo *slice_data_bo, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 22); + OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2)); + OUT_BCS_BATCH(batch, + 0 << 16 | /* Partition 0 CPBAC Entropy Count */ + 0 << 8 | /* Partition 0 Count Entropy Range */ + slice_param->num_of_partitions << 4 | + (slice_param->macroblock_offset & 0x7)); + OUT_BCS_BATCH(batch, + 0 << 24 | /* Partition 0 Count Entropy Value */ + 0); + OUT_BCS_BATCH(batch, + 0); /* Partition 0 Data length, DW3 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 0 Data offset, DW4 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 1 Data length, DW5 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 1 Data offset, DW6 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 2 Data length, DW7 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 2 Data offset, DW8 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 3 Data length, DW9 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 3 Data offset, DW10 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 4 Data length, DW11 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 4 Data offset, DW12 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 5 Data length, DW13 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 5 Data offset, DW14 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 6 Data length, DW15 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 6 Data offset, DW16 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 7 Data length, DW17 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 7 Data offset, DW18 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 8 Data length, DW19 */ + OUT_BCS_BATCH(batch, + 0); /* Partition 8 Data offset, DW20 */ + OUT_BCS_BATCH(batch, + 1 << 31 | /* concealment method */ + 0); + ADVANCE_BCS_BATCH(batch); +} + +void +gen8_mfd_vp8_decode_picture(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferVP8 *pic_param; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer; + + gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); + gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); + gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); + gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context); + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); +} + static VAStatus gen8_mfd_decode_picture(VADriverContextP ctx, VAProfile profile, @@ -2751,6 +2965,10 @@ gen8_mfd_decode_picture(VADriverContextP ctx, gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context); break; + case VAProfileVP8Version0_3: + gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context); + break; + default: assert(0); break; -- cgit v1.2.1 From 1cfca12b9e53fc28afe751987700524704927f88 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 1 Mar 2013 10:38:13 +0800 Subject: Fix the VPP error during porting patch from master to staging Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 60 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index efab2d8c..1e7e9c22 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1201,14 +1201,22 @@ static const uint32_t pp_nv12_dn_gen8[][4] = { static const uint32_t pp_nv12_load_save_pa_gen8[][4] = { #include "shaders/post_processing/gen7/pl2_to_pa.g75b" }; - static const uint32_t pp_pl3_load_save_pa_gen8[][4] = { #include "shaders/post_processing/gen7/pl3_to_pa.g75b" }; - static const uint32_t pp_pa_load_save_nv12_gen8[][4] = { #include "shaders/post_processing/gen7/pa_to_pl2.g75b" }; +static const uint32_t pp_pa_load_save_pl3_gen8[][4] = { +#include "shaders/post_processing/gen7/pa_to_pl3.g75b" +}; +static const uint32_t pp_rgbx_load_save_nv12_gen8[][4] = { +#include "shaders/post_processing/gen7/rgbx_to_nv12.g75b" +}; +static const uint32_t pp_nv12_load_save_rgbx_gen8[][4] = { +#include "shaders/post_processing/gen7/pl2_to_rgbx.g75b" +}; + static struct pp_module pp_modules_gen8[] = { { @@ -1251,7 +1259,7 @@ static struct pp_module pp_modules_gen8[] = { { "PL3_NV12", PP_PL3_LOAD_SAVE_N12, - pp_pl3_load_save_nv12_gen75, + pp_pl3_load_save_nv12_gen8, sizeof(pp_pl3_load_save_nv12_gen8), NULL, }, @@ -1263,7 +1271,7 @@ static struct pp_module pp_modules_gen8[] = { { "PL3_PL3", PP_PL3_LOAD_SAVE_N12, - pp_pl3_load_save_pl3_gen75, + pp_pl3_load_save_pl3_gen8, sizeof(pp_pl3_load_save_pl3_gen8), NULL, }, @@ -1304,7 +1312,7 @@ static struct pp_module pp_modules_gen8[] = { NULL, }, - gen8_pp_plx_avs_initialize, + gen7_pp_nv12_dndi_initialize, }, { @@ -1316,7 +1324,7 @@ static struct pp_module pp_modules_gen8[] = { NULL, }, - gen8_pp_plx_avs_initialize, + gen7_pp_nv12_dn_initialize, }, { { @@ -1353,7 +1361,43 @@ static struct pp_module pp_modules_gen8[] = { gen8_pp_plx_avs_initialize, }, - + + { + { + "PA_PL3 module", + PP_PA_LOAD_SAVE_PL3, + pp_pa_load_save_pl3_gen8, + sizeof(pp_pa_load_save_pl3_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "RGBX_NV12 module", + PP_RGBX_LOAD_SAVE_NV12, + pp_rgbx_load_save_nv12_gen8, + sizeof(pp_rgbx_load_save_nv12_gen8), + NULL, + }, + + gen7_pp_rgbx_avs_initialize, + }, + + { + { + "NV12_RGBX module", + PP_NV12_LOAD_SAVE_RGBX, + pp_nv12_load_save_rgbx_gen8, + sizeof(pp_nv12_load_save_rgbx_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + }; @@ -6255,7 +6299,7 @@ i965_post_processing_context_init(VADriverContextP ctx, assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75)); - // assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); + assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); if (IS_GEN8(i965->intel.device_id)) memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules)); -- cgit v1.2.1 From 3d94d4aeab8f5be6ee5822d96b70848d5efedf2f Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 1 Mar 2013 10:38:13 +0800 Subject: Update the MI_BATCH_BUFFER_START for BDW Signed-off-by: Zhao Yakui --- src/gen8_mfc.c | 5 +++-- src/gen8_vme.c | 5 +++-- src/i965_post_processing.c | 18 ++++++++++++++---- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index e8f5645c..fb8b0d7b 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -1583,12 +1583,13 @@ gen8_mfc_avc_pipeline_programing(VADriverContextP ctx, // picture level programing gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context); - BEGIN_BCS_BATCH(batch, 2); - OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); + BEGIN_BCS_BATCH(batch, 3); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); OUT_BCS_RELOC(batch, slice_batch_bo, I915_GEM_DOMAIN_COMMAND, 0, 0); + OUT_BCS_BATCH(batch, 0); ADVANCE_BCS_BATCH(batch); // end programing diff --git a/src/gen8_vme.c b/src/gen8_vme.c index eb95875f..389ca8e8 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -622,12 +622,13 @@ static void gen8_vme_pipeline_programing(VADriverContextP ctx, intel_batchbuffer_start_atomic(batch, 0x1000); gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); OUT_RELOC(batch, vme_context->vme_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, 0); + OUT_BATCH(batch, 0); ADVANCE_BATCH(batch); intel_batchbuffer_end_atomic(batch); diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 1e7e9c22..51bacc2c 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -5455,12 +5455,22 @@ gen6_pp_object_walker(VADriverContextP ctx, dri_bo_unmap(command_buffer); - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); - OUT_RELOC(batch, command_buffer, + if (IS_GEN8(i965->intel.device_id)) { + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_RELOC(batch, command_buffer, I915_GEM_DOMAIN_COMMAND, 0, 0); - ADVANCE_BATCH(batch); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + } else { + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_RELOC(batch, command_buffer, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + ADVANCE_BATCH(batch); + } dri_bo_unreference(command_buffer); -- cgit v1.2.1 From f56d30f3a05d7dcfb80add9997969e89d1fd4db0 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 1 Mar 2013 10:38:13 +0800 Subject: Set the max thread num for PS thread on BDW Signed-off-by: Zhao Yakui --- src/i965_render.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/i965_render.c b/src/i965_render.c index 74790664..194c7453 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -3569,7 +3569,15 @@ i965_render_init(VADriverContextP ctx) 4096, 64); assert(render_state->curbe.bo); - if (IS_HSW_GT1(i965->intel.device_id)) { + if (IS_GEN8(i965->intel.device_id)) { + render_state->max_wm_threads = 48; + if (IS_BDW_GT1(i965->intel.device_id)) + render_state->max_wm_threads = 120; + else if (IS_BDW_GT2(i965->intel.device_id)) + render_state->max_wm_threads = 180; + else if (IS_BDW_GT2PLUS(i965->intel.device_id)) + render_state->max_wm_threads = 360; + } else if (IS_HSW_GT1(i965->intel.device_id)) { render_state->max_wm_threads = 102; } else if (IS_HSW_GT2(i965->intel.device_id)) { render_state->max_wm_threads = 204; -- cgit v1.2.1 From a1cdd8ce1f3d3073b145ab8886ac523c7dac2cce Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 1 Mar 2013 10:38:13 +0800 Subject: Implement the rendering CSC conversion for BDW This is implemented based on 3D engine, which is similar to that on Ivy. But it also needs to handle a lot of changes about 3D commands between BDW and Ivy. Signed-off-by: Zhao Yakui [Haihao: directly use object instead of id] Signed-off-by: Xiang, Haihao --- src/i965_render.c | 719 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 674 insertions(+), 45 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 194c7453..806763e8 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -1697,6 +1697,52 @@ i965_clear_dest_region(VADriverContextP ctx) intel_batchbuffer_end_atomic(batch); } +static void +gen8_clear_dest_region(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + struct intel_region *dest_region = render_state->draw_region; + unsigned int blt_cmd, br13; + int pitch; + + blt_cmd = GEN8_XY_COLOR_BLT_CMD; + br13 = 0xf0 << 16; + pitch = dest_region->pitch; + + if (dest_region->cpp == 4) { + br13 |= BR13_8888; + blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA); + } else { + assert(dest_region->cpp == 2); + br13 |= BR13_565; + } + + if (dest_region->tiling != I915_TILING_NONE) { + blt_cmd |= XY_COLOR_BLT_DST_TILED; + pitch /= 4; + } + + br13 |= pitch; + + intel_batchbuffer_start_atomic_blt(batch, 24); + BEGIN_BLT_BATCH(batch, 7); + + OUT_BATCH(batch, blt_cmd); + OUT_BATCH(batch, br13); + OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x)); + OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) | + (dest_region->x + dest_region->width)); + OUT_RELOC(batch, dest_region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH(batch, 0x0); + OUT_BATCH(batch, 0x0); + ADVANCE_BATCH(batch); + intel_batchbuffer_end_atomic(batch); +} + static void i965_surface_render_pipeline_setup(VADriverContextP ctx) { @@ -2440,6 +2486,7 @@ gen7_render_initialize(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; dri_bo *bo; + int size; /* VERTEX BUFFER */ dri_bo_unreference(render_state->vb.vertex_buffer); @@ -2488,9 +2535,10 @@ gen7_render_initialize(VADriverContextP ctx) /* BLEND STATE */ dri_bo_unreference(render_state->cc.blend); + size = sizeof(struct gen8_global_blend_state) + 2 * sizeof(struct gen8_blend_state_rt); bo = dri_bo_alloc(i965->intel.bufmgr, "blend state", - sizeof(struct gen6_blend_state), + size, 4096); assert(bo); render_state->cc.blend = bo; @@ -2703,6 +2751,26 @@ gen7_render_setup_states( i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } +static void +gen8_render_blend_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen8_global_blend_state *global_blend_state; + struct gen8_blend_state_rt *blend_state; + + dri_bo_map(render_state->cc.blend, 1); + assert(render_state->cc.blend->virtual); + global_blend_state = render_state->cc.blend->virtual; + memset(global_blend_state, 0, sizeof(*global_blend_state)); + /* Global blend state + blend_state for Render Target */ + blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); + blend_state->blend1.logic_op_enable = 1; + blend_state->blend1.logic_op_func = 0xc; + blend_state->blend1.pre_blend_clamp_enable = 1; + dri_bo_unmap(render_state->cc.blend); +} + static void gen8_render_setup_states( VADriverContextP ctx, @@ -2717,8 +2785,7 @@ gen8_render_setup_states( gen8_render_sampler(ctx); i965_render_cc_viewport(ctx); gen7_render_color_calc_state(ctx); - gen7_render_blend_state(ctx); - gen7_render_depth_stencil_state(ctx); + gen8_render_blend_state(ctx); i965_render_upload_constants(ctx, obj_surface, flags); i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } @@ -2912,6 +2979,31 @@ gen7_emit_cc_state_pointers(VADriverContextP ctx) ADVANCE_BATCH(batch); } +static void +gen8_emit_cc_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2)); + OUT_RELOC(batch, + render_state->cc.state, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); + OUT_RELOC(batch, + render_state->cc.blend, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); + ADVANCE_BATCH(batch); + +} + static void gen7_emit_sampler_state_pointers(VADriverContextP ctx) { @@ -3287,66 +3379,603 @@ gen7_render_emit_states(VADriverContextP ctx, int kernel) } static void -gen8_render_emit_states(VADriverContextP ctx, int kernel) +gen8_emit_vertices(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; - intel_batchbuffer_start_atomic(batch, 0x1000); - intel_batchbuffer_emit_mi_flush(batch); - gen7_emit_invarient_states(ctx); - gen8_emit_state_base_address(ctx); - gen7_emit_viewport_state_pointers(ctx); - gen7_emit_urb(ctx); - gen7_emit_cc_state_pointers(ctx); - gen7_emit_sampler_state_pointers(ctx); - gen7_emit_bypass_state(ctx); - gen7_emit_vs_state(ctx); - gen7_emit_clip_state(ctx); - gen7_emit_sf_state(ctx); - gen7_emit_wm_state(ctx, kernel); - gen7_emit_binding_table(ctx); - gen7_emit_depth_buffer_state(ctx); - gen7_emit_drawing_rectangle(ctx); - gen7_emit_vertex_element_state(ctx); - gen7_emit_vertices(ctx); - intel_batchbuffer_end_atomic(batch); + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2)); + OUT_BATCH(batch, + (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) | + (0 << GEN8_VB0_MOCS_SHIFT) | + GEN7_VB0_ADDRESS_MODIFYENABLE | + ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); + OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 12 * 4); + ADVANCE_BATCH(batch); + + /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */ + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2)); + OUT_BATCH(batch, + _3DPRIM_RECTLIST); + ADVANCE_BATCH(batch); + + + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2)); + OUT_BATCH(batch, + GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); + OUT_BATCH(batch, 3); /* vertex count per instance */ + OUT_BATCH(batch, 0); /* start vertex offset */ + OUT_BATCH(batch, 1); /* single instance */ + OUT_BATCH(batch, 0); /* start instance location */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); } static void -gen7_render_put_surface( - VADriverContextP ctx, - struct object_surface *obj_surface, - const VARectangle *src_rect, - const VARectangle *dst_rect, - unsigned int flags -) +gen8_emit_vertex_element_state(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; - gen7_render_initialize(ctx); - gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); - i965_clear_dest_region(ctx); - gen7_render_emit_states(ctx, PS_KERNEL); - intel_batchbuffer_flush(batch); + /* Set up our vertex elements, sourced from the single vertex buffer. */ + OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2)); + /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ + OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN8_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ + OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN8_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (8 << VE0_OFFSET_SHIFT)); + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); +} + +static void +gen8_emit_vs_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + /* disable VS constant buffer */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* CS Buffer 0 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* CS Buffer 1 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* CS Buffer 2 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* CS Buffer 3 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2)); + OUT_BATCH(batch, 0); /* without VS kernel */ + OUT_BATCH(batch, 0); + /* VS shader dispatch flag */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW6. VS shader GRF and URB buffer definition */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); } +/* + * URB layout on GEN8 + * ---------------------------------------- + * | PS Push Constants (8KB) | VS entries | + * ---------------------------------------- + */ static void -gen8_render_put_surface( - VADriverContextP ctx, - struct object_surface *obj_surface, - const VARectangle *src_rect, - const VARectangle *dst_rect, - unsigned int flags -) +gen8_emit_urb(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; + unsigned int num_urb_entries = 64; - gen8_render_initialize(ctx); - gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); - i965_clear_dest_region(ctx); + /* The minimum urb entries is 64 */ + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); + /* Size is 8Kbs and base address is 0Kb */ + OUT_BATCH(batch, + (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) | + (4 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2)); + OUT_BATCH(batch, + (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) | + (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | + (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2)); + OUT_BATCH(batch, + (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (5 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2)); + OUT_BATCH(batch, + (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (6 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2)); + OUT_BATCH(batch, + (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (7 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_bypass_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + /* bypass GS */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 10); + OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2)); + /* GS shader address */ + OUT_BATCH(batch, 0); /* without GS kernel */ + OUT_BATCH(batch, 0); + /* DW3. GS shader dispatch flag */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW6. GS shader GRF and URB offset/length */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* disable HS */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2)); + OUT_BATCH(batch, 0); + /*DW2. HS pass-through */ + OUT_BATCH(batch, 0); + /*DW3. HS shader address */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /*DW5. HS shader flag. URB offset/length and so on */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Disable TE */ + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Disable DS */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2)); + /* DW1. DS shader pointer */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW3-5. DS shader dispatch flag.*/ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW8. DS shader output URB */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Disable STREAMOUT */ + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_invarient_states(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2)); + OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | + GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + ADVANCE_BATCH(batch); + + /* Update 3D Multisample pattern */ + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(batch, 1); + ADVANCE_BATCH(batch); + + /* Set system instruction pointer */ + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, CMD_STATE_SIP | 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_clip_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ + OUT_BATCH(batch, 0); +} + +static void +gen8_emit_sf_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2)); + OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2)); + OUT_BATCH(batch, + (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) | + (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) | + (0 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* SBE for backend setup */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_wm_state(VADriverContextP ctx, int kernel) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + unsigned int num_samples = 0; + unsigned int max_threads; + + max_threads = render_state->max_wm_threads - 2; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2)); + OUT_BATCH(batch, + (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2)); + OUT_BATCH(batch, + GEN8_PS_BLEND_HAS_WRITEABLE_RT); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2)); + OUT_BATCH(batch, + GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2)); + OUT_BATCH(batch, 1); + OUT_BATCH(batch, 0); + /*DW3-4. Constant buffer 0 */ + OUT_RELOC(batch, + render_state->curbe.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BATCH(batch, 0); + + /*DW5-10. Constant buffer 1-3 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 12); + OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2)); + /* PS shader address */ + OUT_RELOC(batch, + render_state->render_kernels[kernel].bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BATCH(batch, 0); + /* DW3. PS shader flag .Binding table cnt/sample cnt */ + OUT_BATCH(batch, + (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | + (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + /* DW4-5. Scatch space */ + OUT_BATCH(batch, 0); /* scratch space base offset */ + OUT_BATCH(batch, 0); + /* DW6. PS shader threads. */ + OUT_BATCH(batch, + ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples | + GEN7_PS_PUSH_CONSTANT_ENABLE | + GEN7_PS_16_DISPATCH_ENABLE); + /* DW7. PS shader GRF */ + OUT_BATCH(batch, + (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); + OUT_BATCH(batch, 0); /* kernel 1 pointer */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* kernel 2 pointer */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); + OUT_BATCH(batch, BINDING_TABLE_OFFSET); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_depth_buffer_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 8); + OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2)); + OUT_BATCH(batch, + (I965_DEPTHFORMAT_D32_FLOAT << 18) | + (I965_SURFACE_NULL << 29)); + /* DW2-3. Depth Buffer Address */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW4-7. Surface structure */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Update the Hier Depth buffer */ + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Update the stencil buffer */ + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_depth_stencil_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_render_emit_states(VADriverContextP ctx, int kernel) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen8_emit_invarient_states(ctx); + gen8_emit_state_base_address(ctx); + gen7_emit_viewport_state_pointers(ctx); + gen8_emit_urb(ctx); + gen8_emit_cc_state_pointers(ctx); + gen7_emit_sampler_state_pointers(ctx); + gen8_emit_bypass_state(ctx); + gen8_emit_vs_state(ctx); + gen8_emit_clip_state(ctx); + gen8_emit_sf_state(ctx); + gen8_emit_depth_stencil_state(ctx); + gen8_emit_wm_state(ctx, kernel); + gen8_emit_depth_buffer_state(ctx); + gen7_emit_drawing_rectangle(ctx); + gen8_emit_vertex_element_state(ctx); + gen8_emit_vertices(ctx); + intel_batchbuffer_end_atomic(batch); +} + +static void +gen7_render_put_surface( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect, + unsigned int flags +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + gen7_render_initialize(ctx); + gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); + i965_clear_dest_region(ctx); + gen7_render_emit_states(ctx, PS_KERNEL); + intel_batchbuffer_flush(batch); +} + +static void +gen8_render_put_surface( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect, + unsigned int flags +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + gen8_render_initialize(ctx); + gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); + gen8_clear_dest_region(ctx); gen8_render_emit_states(ctx, PS_KERNEL); intel_batchbuffer_flush(batch); } -- cgit v1.2.1 From 02c4494bbbbc9bc9b4af3de2756c7794666ac162 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 1 Mar 2013 10:38:13 +0800 Subject: Add the support of subpic for BDW Signed-off-by: Zhao Yakui [Haihao: directly use object instead of id] Signed-off-by: Xiang, Haihao --- src/i965_render.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 806763e8..2e706976 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -3789,11 +3789,25 @@ gen8_emit_wm_state(VADriverContextP ctx, int kernel) (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE)); ADVANCE_BATCH(batch); - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2)); - OUT_BATCH(batch, - GEN8_PS_BLEND_HAS_WRITEABLE_RT); - ADVANCE_BATCH(batch); + + if (kernel == PS_KERNEL) { + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2)); + OUT_BATCH(batch, + GEN8_PS_BLEND_HAS_WRITEABLE_RT); + ADVANCE_BATCH(batch); + } else if (kernel == PS_SUBPIC_KERNEL) { + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2)); + OUT_BATCH(batch, + (GEN8_PS_BLEND_HAS_WRITEABLE_RT | + GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE | + (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) | + (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) | + (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) | + (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT))); + ADVANCE_BATCH(batch); + } BEGIN_BATCH(batch, 2); OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2)); @@ -4002,6 +4016,32 @@ gen7_subpicture_render_blend_state(VADriverContextP ctx) dri_bo_unmap(render_state->cc.blend); } +static void +gen8_subpicture_render_blend_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen8_global_blend_state *global_blend_state; + struct gen8_blend_state_rt *blend_state; + + dri_bo_map(render_state->cc.blend, 1); + assert(render_state->cc.blend->virtual); + global_blend_state = render_state->cc.blend->virtual; + memset(global_blend_state, 0, sizeof(*global_blend_state)); + /* Global blend state + blend_state for Render Target */ + blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); + blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; + blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; + blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD; + blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; + blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; + blend_state->blend0.colorbuf_blend = 1; + blend_state->blend1.post_blend_clamp_enable = 1; + blend_state->blend1.pre_blend_clamp_enable = 1; + blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */ + dri_bo_unmap(render_state->cc.blend); +} + static void gen7_subpicture_render_setup_states( VADriverContextP ctx, @@ -4034,8 +4074,7 @@ gen8_subpicture_render_setup_states( gen8_render_sampler(ctx); i965_render_cc_viewport(ctx); gen7_render_color_calc_state(ctx); - gen7_subpicture_render_blend_state(ctx); - gen7_render_depth_stencil_state(ctx); + gen8_subpicture_render_blend_state(ctx); i965_subpic_render_upload_constants(ctx, obj_surface); i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); } -- cgit v1.2.1 From 4125f73a287d97c7a774ad8af9b55cfea731e4e3 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 1 Mar 2013 10:38:13 +0800 Subject: Update the pixel shader for BDW rendering function Signed-off-by: Zhao Yakui --- src/i965_render.c | 8 +- src/shaders/render/Makefile.am | 28 ++++++- src/shaders/render/exa_wm_src_affine.g8a | 47 +++++++++++ src/shaders/render/exa_wm_src_affine.g8b | 4 + src/shaders/render/exa_wm_src_sample_planar.g8a | 106 ++++++++++++++++++++++++ src/shaders/render/exa_wm_src_sample_planar.g8b | 20 +++++ src/shaders/render/exa_wm_write.g8a | 83 +++++++++++++++++++ src/shaders/render/exa_wm_write.g8b | 19 +++++ src/shaders/render/exa_wm_yuv_rgb.g8a | 106 ++++++++++++++++++++++++ src/shaders/render/exa_wm_yuv_rgb.g8b | 19 +++++ 10 files changed, 435 insertions(+), 5 deletions(-) create mode 100644 src/shaders/render/exa_wm_src_affine.g8a create mode 100644 src/shaders/render/exa_wm_src_affine.g8b create mode 100644 src/shaders/render/exa_wm_src_sample_planar.g8a create mode 100644 src/shaders/render/exa_wm_src_sample_planar.g8b create mode 100644 src/shaders/render/exa_wm_write.g8a create mode 100644 src/shaders/render/exa_wm_write.g8b create mode 100644 src/shaders/render/exa_wm_yuv_rgb.g8a create mode 100644 src/shaders/render/exa_wm_yuv_rgb.g8b diff --git a/src/i965_render.c b/src/i965_render.c index 2e706976..36eca9dc 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -155,10 +155,10 @@ static const uint32_t sf_kernel_static_gen8[][4] = { }; static const uint32_t ps_kernel_static_gen8[][4] = { -#include "shaders/render/exa_wm_src_affine.g7b" -#include "shaders/render/exa_wm_src_sample_planar.g7b" -#include "shaders/render/exa_wm_yuv_rgb.g7b" -#include "shaders/render/exa_wm_write.g7b" +#include "shaders/render/exa_wm_src_affine.g8b" +#include "shaders/render/exa_wm_src_sample_planar.g8b" +#include "shaders/render/exa_wm_yuv_rgb.g8b" +#include "shaders/render/exa_wm_write.g8b" }; static const uint32_t ps_subpic_kernel_static_gen8[][4] = { diff --git a/src/shaders/render/Makefile.am b/src/shaders/render/Makefile.am index bed683b0..47d5a6b1 100644 --- a/src/shaders/render/Makefile.am +++ b/src/shaders/render/Makefile.am @@ -85,6 +85,20 @@ INTEL_G7B_HASWELL = \ exa_wm_yuv_color_balance.g7b.haswell \ $(NULL) +INTEL_G8A = \ + exa_wm_src_affine.g8a \ + exa_wm_src_sample_planar.g8a \ + exa_wm_write.g8a \ + exa_wm_yuv_rgb.g8a + +INTEL_G8S = $(INTEL_G8A:%.g8a=%.g8s) + +INTEL_G8B = \ + exa_wm_src_affine.g8b \ + exa_wm_src_sample_planar.g8b \ + exa_wm_yuv_rgb.g8b \ + exa_wm_write.g8b + TARGETS = if HAVE_GEN4ASM TARGETS += $(INTEL_G4B) @@ -92,11 +106,12 @@ TARGETS += $(INTEL_G4B_GEN5) TARGETS += $(INTEL_G6B) TARGETS += $(INTEL_G7B) TARGETS += $(INTEL_G7B_HASWELL) +TARGETS += $(INTEL_G8B) endif all-local: $(TARGETS) -SUFFIXES = .g4a .g4s .g4b .g4b.gen5 .g6a .g6s .g6b .g7a .g7s .g7b .g7b.haswell +SUFFIXES = .g4a .g4s .g4b .g4b.gen5 .g6a .g6s .g6b .g7a .g7s .g7b .g7b.haswell .g8a .g8b .g8s if HAVE_GEN4ASM $(INTEL_G4S): $(INTEL_G4A) $(INTEL_G4I) @@ -120,12 +135,21 @@ $(INTEL_G7S): $(INTEL_G7A) $(INTEL_G7I) $(AM_V_GEN)$(GEN4ASM) -g 7 -o $@ $< .g7s.g7b.haswell: $(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $< + + +$(INTEL_G8S): $(INTEL_G8A) $(INTEL_G8I) +.g8a.g8s: + $(AM_V_GEN)m4 $< > $@ +.g8s.g8b: + $(AM_V_GEN)$(GEN4ASM) -g 8 -o $@ $< + endif CLEANFILES = \ $(INTEL_G4S) \ $(INTEL_G6S) \ $(INTEL_G7S) \ + $(INTEL_G8S) \ $(NULL) EXTRA_DIST = \ @@ -138,6 +162,8 @@ EXTRA_DIST = \ $(INTEL_G7A) \ $(INTEL_G7B) \ $(INTEL_G7B_HASWELL) \ + $(INTEL_G8A)) \ + $(INTEL_G8B) \ $(NULL) # Extra clean files so that maintainer-clean removes *everything* diff --git a/src/shaders/render/exa_wm_src_affine.g8a b/src/shaders/render/exa_wm_src_affine.g8a new file mode 100644 index 00000000..1d4efcc4 --- /dev/null +++ b/src/shaders/render/exa_wm_src_affine.g8a @@ -0,0 +1,47 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +/* + * Fragment to compute src u/v values + */ +include(`exa_wm.g4i') + +define(`ul', `g66') +define(`uh', `g67') +define(`vl', `g68') +define(`vh', `g69') + +define(`bl', `g2.0<8,8,1>F') +define(`bh', `g4.0<8,8,1>F') + +define(`a0_a_x',`g7.0<0,1,0>F') +define(`a0_a_y',`g7.16<0,1,0>F') + +/* U */ +pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */ +pln (8) uh<1>F a0_a_x bh { align1 }; /* pixel 8-15 */ + +/* V */ +pln (8) vl<1>F a0_a_y bl { align1 }; /* pixel 0-7 */ +pln (8) vh<1>F a0_a_y bh { align1 }; /* pixel 8-15 */ diff --git a/src/shaders/render/exa_wm_src_affine.g8b b/src/shaders/render/exa_wm_src_affine.g8b new file mode 100644 index 00000000..02732579 --- /dev/null +++ b/src/shaders/render/exa_wm_src_affine.g8b @@ -0,0 +1,4 @@ + { 0x0060005a, 0x28403ae8, 0x3a0000e0, 0x008d0040 }, + { 0x0060005a, 0x28603ae8, 0x3a0000e0, 0x008d0080 }, + { 0x0060005a, 0x28803ae8, 0x3a0000f0, 0x008d0040 }, + { 0x0060005a, 0x28a03ae8, 0x3a0000f0, 0x008d0080 }, diff --git a/src/shaders/render/exa_wm_src_sample_planar.g8a b/src/shaders/render/exa_wm_src_sample_planar.g8a new file mode 100644 index 00000000..76844913 --- /dev/null +++ b/src/shaders/render/exa_wm_src_sample_planar.g8a @@ -0,0 +1,106 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu + * Keith Packard + * Zhao Yakui + */ + +/* Sample the src surface in planar format */ + +include(`exa_wm.g4i') + +/* Ivybridge uses GRFs in SEND instruction */ +define(`src_msg_gen8', `g65') +define(`src_msg_ind_gen8',`65') +/* UV flag */ +define(`uv_flag', `g6.0<0,1,0>UW') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ +cmp.e.f0.0 (1) null uv_flag 0x1UW {align1}; +(f0.0) jmpi INTERLEAVED_UV; + +cmp.e.f0.0 (1) null uv_flag 0x2UW {align1}; +(f0.0) jmpi CONSTANT_UV; + +/* load r */ +mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable }; +mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable }; + +/* emit sampler 'send' cmd */ + +/* sample U (Cr) */ +send (16) src_msg_ind_gen8 /* msg reg index */ + src_sample_g<1>UW /* readback */ + null + sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + +/* sample V (Cb) */ +mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable }; +mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable }; + +send (16) src_msg_ind_gen8 /* msg reg index */ + src_sample_b<1>UW /* readback */ + null + sampler (5,4,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + +jmpi SAMPLE_Y; + +CONSTANT_UV: +mov (16) src_sample_g<1>f 0.5f { compr align1 mask_disable }; +mov (16) src_sample_b<1>f 0.5f { compr align1 mask_disable }; + +jmpi SAMPLE_Y; + +INTERLEAVED_UV: +mov (1) g0.8<1>UD 0x0000c000UD { align1 mask_disable }; +mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable }; + +/* sample UV (CrCb) */ +send (16) src_msg_ind_gen8 /* msg reg index */ + src_sample_g<1>UW /* readback */ + null + sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 4 { align1 }; /* required message len 5, readback len 8 */ + + +SAMPLE_Y: +mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable }; +mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable }; + +/* sample Y */ +send (16) src_msg_ind_gen8 /* msg reg index */ + src_sample_r<1>UW /* readback */ + null + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + diff --git a/src/shaders/render/exa_wm_src_sample_planar.g8b b/src/shaders/render/exa_wm_src_sample_planar.g8b new file mode 100644 index 00000000..f29cfe4b --- /dev/null +++ b/src/shaders/render/exa_wm_src_sample_planar.g8b @@ -0,0 +1,20 @@ + { 0x01000010, 0x200012e0, 0x160000c0, 0x00010001 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x01000010, 0x200012e0, 0x160000c0, 0x00020002 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 }, + { 0x00000001, 0x2008060c, 0x00000000, 0x0000e000 }, + { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22000a48, 0x0e000820, 0x0a2c0203 }, + { 0x00000001, 0x2008060c, 0x00000000, 0x0000e000 }, + { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22400a48, 0x0e000820, 0x0a2c0405 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 }, + { 0x00800001, 0x22003eec, 0x38000000, 0x3f000000 }, + { 0x00800001, 0x22403eec, 0x38000000, 0x3f000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00000001, 0x2008060c, 0x00000000, 0x0000c000 }, + { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22000a48, 0x0e000820, 0x0a4c0203 }, + { 0x00000001, 0x2008060c, 0x00000000, 0x0000e000 }, + { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x21c00a48, 0x0e000820, 0x0a2c0001 }, diff --git a/src/shaders/render/exa_wm_write.g8a b/src/shaders/render/exa_wm_write.g8a new file mode 100644 index 00000000..58347b3f --- /dev/null +++ b/src/shaders/render/exa_wm_write.g8a @@ -0,0 +1,83 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +include(`exa_wm.g4i') + +/* header */ +define(`data_port_msg_2_0', `g64') +define(`data_port_msg_2_1', `g65') +define(`data_port_msg_2_ind', `64') + +mov (8) data_port_msg_2_0<1>UD g0<8,8,1>UD {align1 mask_disable}; +mov (8) data_port_msg_2_1<1>UD g1<8,8,1>UD {align1 mask_disable}; + +/* + * Prepare data in g66-g67 for Red channel, g68-g69 for Green channel, + * g70-g71 for Blue and g72-g73 for Alpha channel + */ +define(`slot_r_00', `g66') +define(`slot_r_01', `g67') +define(`slot_g_00', `g68') +define(`slot_g_01', `g69') +define(`slot_b_00', `g70') +define(`slot_b_01', `g71') +define(`slot_a_00', `g72') +define(`slot_a_01', `g73') + +mov (8) slot_r_00<1>F src_sample_r_01<1>F { align1 mask_disable }; +mov (8) slot_r_01<1>F src_sample_r_23<1>F { align1 mask_disable }; + +mov (8) slot_g_00<1>F src_sample_g_01<1>F { align1 mask_disable }; +mov (8) slot_g_01<1>F src_sample_g_23<1>F { align1 mask_disable }; + +mov (8) slot_b_00<1>F src_sample_b_01<1>F { align1 mask_disable }; +mov (8) slot_b_01<1>F src_sample_b_23<1>F { align1 mask_disable }; + +mov (8) slot_a_00<1>F src_sample_a_01<1>F { align1 mask_disable }; +mov (8) slot_a_01<1>F src_sample_a_23<1>F { align1 mask_disable }; + +send (16) + data_port_msg_2_ind + null<1>UW + null + write ( + 0, /* binding table index */ + 16, /* last render target(1) + slots 15:0(0) + msg type simd16 single source(000) */ + 12, /* render target write */ + 0, /* ignore for Ivybridge */ + 1 /* header present */ + ) + mlen 10 + rlen 0 + { align1 EOT }; + +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; + diff --git a/src/shaders/render/exa_wm_write.g8b b/src/shaders/render/exa_wm_write.g8b new file mode 100644 index 00000000..2f237de1 --- /dev/null +++ b/src/shaders/render/exa_wm_write.g8b @@ -0,0 +1,19 @@ + { 0x00600001, 0x2800020c, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x2820020c, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28403aec, 0x002001c0, 0x00000000 }, + { 0x00600001, 0x28603aec, 0x002001e0, 0x00000000 }, + { 0x00600001, 0x28803aec, 0x00200200, 0x00000000 }, + { 0x00600001, 0x28a03aec, 0x00200220, 0x00000000 }, + { 0x00600001, 0x28c03aec, 0x00200240, 0x00000000 }, + { 0x00600001, 0x28e03aec, 0x00200260, 0x00000000 }, + { 0x00600001, 0x29003aec, 0x00200280, 0x00000000 }, + { 0x00600001, 0x29203aec, 0x002002a0, 0x00000000 }, + { 0x05800031, 0x20000a40, 0x0e000800, 0x940b1000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/render/exa_wm_yuv_rgb.g8a b/src/shaders/render/exa_wm_yuv_rgb.g8a new file mode 100644 index 00000000..62669c80 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_rgb.g8a @@ -0,0 +1,106 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Keith Packard + * Eric Anholt + * Zhao Yakui + * + */ + +include(`exa_wm.g4i') + +define(`YCbCr_base', `src_sample_base') + +define(`Cr', `src_sample_b') +define(`Cr_01', `src_sample_b_01') +define(`Cr_23', `src_sample_b_23') + +define(`Y', `src_sample_r') +define(`Y_01', `src_sample_r_01') +define(`Y_23', `src_sample_r_23') + +define(`Cb', `src_sample_g') +define(`Cb_01', `src_sample_g_01') +define(`Cb_23', `src_sample_g_23') + +define(`Crn', `mask_sample_g') +define(`Crn_01', `mask_sample_g_01') +define(`Crn_23', `mask_sample_g_23') + +define(`Yn', `mask_sample_r') +define(`Yn_01', `mask_sample_r_01') +define(`Yn_23', `mask_sample_r_23') + +define(`Cbn', `mask_sample_b') +define(`Cbn_01', `mask_sample_b_01') +define(`Cbn_23', `mask_sample_b_23') + + /* color space conversion function: + * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) + * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) + * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) + */ + + /* Normalize Y, Cb and Cr: + * + * Yn = (Y - 16/255) * 1.164 + * Crn = Cr - 128 / 255 + * Cbn = Cb - 128 / 255 + */ +add (16) Yn<1>F Y<8;8,1>F -0.0627451F { compr align1 }; +mul (16) Yn<1>F Yn<8;8,1>F 1.164F { compr align1 }; + +add (16) Crn<1>F Cr<8;8,1>F -0.501961F { compr align1 }; + +add (16) Cbn<1>F Cb<8;8,1>F -0.501961F { compr align1 }; + + /* + * R = Y + Cr * 1.596 + */ +mov (8) acc0<1>F Yn_01.0<8;8,1>F { align1 }; +mac.sat(8) src_sample_r_01<1>F Crn_01<8;8,1>F 1.596F { align1 }; +mov (8) acc0<1>F Yn_23.0<8;8,1>F { align1 }; +mac.sat(8) src_sample_r_23<1>F Crn_23<8;8,1>F 1.596F { align1 }; + + /* + * G = Crn * -0.813 + Cbn * -0.392 + Y + */ +mov (8) acc0<1>F Yn_01.0<8;8,1>F { align1 }; +mac (8) acc0<1>F Crn_01.0<8;8,1>F -0.813F { align1 }; +mac.sat(8) src_sample_g_01<1>F Cbn_01.0<8;8,1>F -0.392F { align1 }; +mov (8) acc0<1>F Yn_23.0<8;8,1>F { align1 }; +mac (8) acc0<1>F Crn_23.0<8;8,1>F -0.813F { align1 }; +mac.sat(8) src_sample_g_23<1>F Cbn_23.0<8;8,1>F -0.392F { align1 }; + + /* + * B = Cbn * 2.017 + Y + */ +mov (8) acc0<1>F Yn_01.0<8;8,1>F { align1 }; +mac.sat(8) src_sample_b_01<1>F Cbn_01.0<8;8,1>F 2.017F { align1 }; + +mov (8) acc0<1>F Yn_23.0<8;8,1>F { align1 }; +mac.sat(8) src_sample_b_23<1>F Cbn_23.0<8;8,1>F 2.017F { align1 }; + /* + * A = 1.0 + */ +mov (16) src_sample_a<1>F 1.0F { compr align1 }; diff --git a/src/shaders/render/exa_wm_yuv_rgb.g8b b/src/shaders/render/exa_wm_yuv_rgb.g8b new file mode 100644 index 00000000..8898a395 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_rgb.g8b @@ -0,0 +1,19 @@ + { 0x00800040, 0x22c03ae8, 0x3e8d01c0, 0xbd808081 }, + { 0x00800041, 0x22c03ae8, 0x3e8d02c0, 0x3f94fdf4 }, + { 0x00800040, 0x23003ae8, 0x3e8d0240, 0xbf008084 }, + { 0x00800040, 0x23403ae8, 0x3e8d0200, 0xbf008084 }, + { 0x00600001, 0x24003ae0, 0x008d02c0, 0x00000000 }, + { 0x80600048, 0x21c03ae8, 0x3e8d0300, 0x3fcc49ba }, + { 0x00600001, 0x24003ae0, 0x008d02e0, 0x00000000 }, + { 0x80600048, 0x21e03ae8, 0x3e8d0320, 0x3fcc49ba }, + { 0x00600001, 0x24003ae0, 0x008d02c0, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d0300, 0xbf5020c5 }, + { 0x80600048, 0x22003ae8, 0x3e8d0340, 0xbec8b439 }, + { 0x00600001, 0x24003ae0, 0x008d02e0, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d0320, 0xbf5020c5 }, + { 0x80600048, 0x22203ae8, 0x3e8d0360, 0xbec8b439 }, + { 0x00600001, 0x24003ae0, 0x008d02c0, 0x00000000 }, + { 0x80600048, 0x22403ae8, 0x3e8d0340, 0x40011687 }, + { 0x00600001, 0x24003ae0, 0x008d02e0, 0x00000000 }, + { 0x80600048, 0x22603ae8, 0x3e8d0360, 0x40011687 }, + { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 }, -- cgit v1.2.1 From 8ed48ca2b0d8dc68a7c503b4b8c17f8ea1d16c20 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 1 Mar 2013 10:38:13 +0800 Subject: Update the pixel shader of subpic function for BDW Signed-off-by: Zhao Yakui --- src/i965_render.c | 6 +-- src/shaders/render/Makefile.am | 2 + src/shaders/render/exa_wm_src_sample_argb.g8a | 59 +++++++++++++++++++++++++++ src/shaders/render/exa_wm_src_sample_argb.g8b | 5 +++ 4 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 src/shaders/render/exa_wm_src_sample_argb.g8a create mode 100644 src/shaders/render/exa_wm_src_sample_argb.g8b diff --git a/src/i965_render.c b/src/i965_render.c index 36eca9dc..c3867215 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -162,9 +162,9 @@ static const uint32_t ps_kernel_static_gen8[][4] = { }; static const uint32_t ps_subpic_kernel_static_gen8[][4] = { -#include "shaders/render/exa_wm_src_affine.g7b" -#include "shaders/render/exa_wm_src_sample_argb.g7b" -#include "shaders/render/exa_wm_write.g7b" +#include "shaders/render/exa_wm_src_affine.g8b" +#include "shaders/render/exa_wm_src_sample_argb.g8b" +#include "shaders/render/exa_wm_write.g8b" }; diff --git a/src/shaders/render/Makefile.am b/src/shaders/render/Makefile.am index 47d5a6b1..33aa367c 100644 --- a/src/shaders/render/Makefile.am +++ b/src/shaders/render/Makefile.am @@ -88,6 +88,7 @@ INTEL_G7B_HASWELL = \ INTEL_G8A = \ exa_wm_src_affine.g8a \ exa_wm_src_sample_planar.g8a \ + exa_wm_src_sample_argb.g8a \ exa_wm_write.g8a \ exa_wm_yuv_rgb.g8a @@ -96,6 +97,7 @@ INTEL_G8S = $(INTEL_G8A:%.g8a=%.g8s) INTEL_G8B = \ exa_wm_src_affine.g8b \ exa_wm_src_sample_planar.g8b \ + exa_wm_src_sample_argb.g8b \ exa_wm_yuv_rgb.g8b \ exa_wm_write.g8b diff --git a/src/shaders/render/exa_wm_src_sample_argb.g8a b/src/shaders/render/exa_wm_src_sample_argb.g8a new file mode 100644 index 00000000..662ef22f --- /dev/null +++ b/src/shaders/render/exa_wm_src_sample_argb.g8a @@ -0,0 +1,59 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu + * Keith Packard + */ + +/* Sample the src surface */ + +include(`exa_wm.g4i') + +/* Ivybridge uses GRFs in SEND instruction */ +define(`src_msg_gen8', `g65') +define(`src_msg_ind_gen8',`65') + +/* subpicture global alpha */ +define(`global_alpha', `r6.0<0,1,0>f') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +/* load argb */ +mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable }; +mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable }; + +/* src_msg will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ +send (16) src_msg_ind_gen8 /* msg reg index */ + src_sample_base<1>UW /* readback */ + null + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ + +mul (8) src_sample_a_01<1>f src_sample_a_01<1>f global_alpha { align1 mask_disable }; +mul (8) src_sample_a_23<1>f src_sample_a_23<1>f global_alpha { align1 mask_disable }; + diff --git a/src/shaders/render/exa_wm_src_sample_argb.g8b b/src/shaders/render/exa_wm_src_sample_argb.g8b new file mode 100644 index 00000000..3c86fb8b --- /dev/null +++ b/src/shaders/render/exa_wm_src_sample_argb.g8b @@ -0,0 +1,5 @@ + { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x21c00a48, 0x0e000820, 0x0a8c0001 }, + { 0x00600041, 0x22803aec, 0x3a200280, 0x000000c0 }, + { 0x00600041, 0x22a03aec, 0x3a2002a0, 0x000000c0 }, -- cgit v1.2.1 From b3cb310bb47dbd30c3f07ff573c9c6afa4d8abeb Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 1 Mar 2013 10:38:13 +0800 Subject: Set the force bits to read URB offset/length for SF stage on BDW Otherwise it can't fill the thread payload correctly for pixel shader. Signed-off-by: Zhao Yakui --- src/i965_defines.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/i965_render.c | 2 ++ 2 files changed, 55 insertions(+) diff --git a/src/i965_defines.h b/src/i965_defines.h index 3090ce86..52ae85f1 100755 --- a/src/i965_defines.h +++ b/src/i965_defines.h @@ -106,6 +106,11 @@ # define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27 # define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25 +#define GEN8_3DSTATE_RASTER CMD(3, 0, 0x50) +# define GEN8_3DSTATE_RASTER_CULL_BOTH (0 << 16) +# define GEN8_3DSTATE_RASTER_CULL_NONE (1 << 16) +# define GEN8_3DSTATE_RASTER_CULL_FRONT (2 << 16) +# define GEN8_3DSTATE_RASTER_CULL_BACK (3 << 16) #define GEN6_3DSTATE_WM CMD(3, 0, 0x14) /* DW2 */ @@ -188,9 +193,13 @@ # define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1) # define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1) +#define GEN8_3DSTATE_MULTISAMPLE CMD(3, 0, 0x0d) +#define GEN8_3DSTATE_SAMPLE_PATTERN CMD(3, 1, 0x1C) + /* GEN7 */ #define GEN7_3DSTATE_CLEAR_PARAMS CMD(3, 0, 0x04) #define GEN7_3DSTATE_DEPTH_BUFFER CMD(3, 0, 0x05) +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER CMD(3, 0, 0x07) #define GEN7_3DSTATE_URB_VS CMD(3, 0, 0x30) #define GEN7_3DSTATE_URB_HS CMD(3, 0, 0x31) @@ -205,6 +214,8 @@ #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS CMD(3, 1, 0x16) /* DW1 */ # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 +# define GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 +# define GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT 0 #define GEN7_3DSTATE_CONSTANT_HS CMD(3, 0, 0x19) #define GEN7_3DSTATE_CONSTANT_DS CMD(3, 0, 0x1a) @@ -222,6 +233,11 @@ # define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20) # define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11 # define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4 +# define GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH (1 << 29) +# define GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET (1 << 28) + +# define GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT 5 +#define GEN8_3DSTATE_SBE_SWIZ CMD(3, 0, 0x51) #define GEN7_3DSTATE_PS CMD(3, 0, 0x20) /* DW1: kernel pointer */ @@ -254,6 +270,37 @@ /* DW6: kernel 1 pointer */ /* DW7: kernel 2 pointer */ +# define GEN8_PS_MAX_THREADS_SHIFT 23 + +#define GEN8_3DSTATE_PSEXTRA CMD(3, 0, 0x4f) +/* DW1 */ +# define GEN8_PSX_PIXEL_SHADER_VALID (1 << 31) +# define GEN8_PSX_PSCDEPTH_OFF (0 << 26) +# define GEN8_PSX_PSCDEPTH_ON (1 << 26) +# define GEN8_PSX_PSCDEPTH_ON_GE (2 << 26) +# define GEN8_PSX_PSCDEPTH_ON_LE (3 << 26) +# define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8) + +#define GEN8_3DSTATE_PSBLEND CMD(3, 0, 0x4d) +/* DW1 */ +# define GEN8_PS_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31) +# define GEN8_PS_BLEND_HAS_WRITEABLE_RT (1 << 30) +# define GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE (1 << 29) +# define GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(28, 24) +# define GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT 24 +# define GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(23, 19) +# define GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT 19 +# define GEN8_PS_BLEND_SRC_BLEND_FACTOR_MASK INTEL_MASK(18, 14) +# define GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT 14 +# define GEN8_PS_BLEND_DST_BLEND_FACTOR_MASK INTEL_MASK(13, 9) +# define GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT 9 +# define GEN8_PS_BLEND_ALPHA_TEST_ENABLE (1 << 8) +# define GEN8_PS_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 7) + + +#define GEN7_3DSTATE_STENCIL_BUFFER CMD(3, 0, 0x06) +#define GEN8_3DSTATE_WM_DEPTH_STENCIL CMD(3, 0, 0x4e) + #define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL CMD(3, 0, 0x21) #define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC CMD(3, 0, 0x23) @@ -614,6 +661,8 @@ #define VE1_VFCOMPONENT_2_SHIFT 20 #define VE1_VFCOMPONENT_3_SHIFT 16 #define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0 +#define GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN8 */ +#define GEN8_VE0_VALID (1 << 25) /* for GEN8 */ #define VB0_BUFFER_INDEX_SHIFT 27 #define GEN6_VB0_BUFFER_INDEX_SHIFT 26 @@ -623,6 +672,8 @@ #define GEN6_VB0_INSTANCEDATA (1 << 20) #define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14) #define VB0_BUFFER_PITCH_SHIFT 0 +#define GEN8_VB0_BUFFER_INDEX_SHIFT 26 +#define GEN8_VB0_MOCS_SHIFT 16 #define _3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) #define _3DPRIMITIVE_VERTEX_RANDOM (1 << 15) @@ -653,6 +704,8 @@ #define _3DPRIM_LINESTRIP_CONT_BF 0x14 #define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 +#define GEN8_3DSTATE_VF_TOPOLOGY CMD(3, 0, 0x4b) + #define I965_TILEWALK_XMAJOR 0 #define I965_TILEWALK_YMAJOR 1 diff --git a/src/i965_render.c b/src/i965_render.c index c3867215..02356591 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -3742,6 +3742,8 @@ gen8_emit_sf_state(VADriverContextP ctx) BEGIN_BATCH(batch, 4); OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2)); OUT_BATCH(batch, + (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) | + (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) | (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) | (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) | (0 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT)); -- cgit v1.2.1 From 088e84c25c74b587ace06bf67cd8f8ac84f433c0 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 1 Mar 2013 10:38:14 +0800 Subject: Set render surface alignment on BDW This is the requirement per B-spec. Signed-off-by: Zhao Yakui --- src/i965_render.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/i965_render.c b/src/i965_render.c index 02356591..b1714a69 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -916,6 +916,10 @@ gen8_render_set_surface_state( ss->ss3.pitch = pitch - 1; + /* Always set 1(align 4 mode) per B-spec */ + ss->ss0.vertical_alignment = 1; + ss->ss0.horizontal_alignment = 1; + dri_bo_get_tiling(bo, &tiling, &swizzle); gen8_render_set_surface_tiling(ss, tiling); } -- cgit v1.2.1 From d1ce64926f3c0fedcfe4bab86e51ed406b5aa96f Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 27 Mar 2013 09:24:15 +0800 Subject: Rewrite the VME shader for encoding on BDW Signed-off-by: Zhao Yakui --- src/gen8_mfc.c | 2 +- src/gen8_vme.c | 35 +- src/shaders/vme/Makefile.am | 26 +- src/shaders/vme/inter_bframe_gen8.asm | 875 +++++++++++++++++++++++++++++ src/shaders/vme/inter_bframe_gen8.g8a | 2 + src/shaders/vme/inter_bframe_gen8.g8b | 423 ++++++++++++++ src/shaders/vme/inter_frame_gen8.asm | 727 ++++++++++++++++++++++++ src/shaders/vme/inter_frame_gen8.g8a | 2 + src/shaders/vme/inter_frame_gen8.g8b | 300 ++++++++++ src/shaders/vme/intra_frame_gen8.asm | 185 ++++++ src/shaders/vme/intra_frame_gen8.g8a | 2 + src/shaders/vme/intra_frame_gen8.g8b | 72 +++ src/shaders/vme/mpeg2_inter_frame_gen8.g8a | 3 + src/shaders/vme/mpeg2_inter_frame_gen8.g8b | 300 ++++++++++ src/shaders/vme/vme8.inc | 341 +++++++++++ src/shaders/vme/vme8_mpeg2.inc | 18 + 16 files changed, 3281 insertions(+), 32 deletions(-) create mode 100644 src/shaders/vme/inter_bframe_gen8.asm create mode 100644 src/shaders/vme/inter_bframe_gen8.g8a create mode 100644 src/shaders/vme/inter_bframe_gen8.g8b create mode 100644 src/shaders/vme/inter_frame_gen8.asm create mode 100644 src/shaders/vme/inter_frame_gen8.g8a create mode 100644 src/shaders/vme/inter_frame_gen8.g8b create mode 100644 src/shaders/vme/intra_frame_gen8.asm create mode 100644 src/shaders/vme/intra_frame_gen8.g8a create mode 100644 src/shaders/vme/intra_frame_gen8.g8b create mode 100644 src/shaders/vme/mpeg2_inter_frame_gen8.g8a create mode 100644 src/shaders/vme/mpeg2_inter_frame_gen8.g8b create mode 100644 src/shaders/vme/vme8.inc create mode 100644 src/shaders/vme/vme8_mpeg2.inc diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index fb8b0d7b..1deaae38 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -2463,7 +2463,7 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e mfc_context->avc_qm_state = gen8_mfc_avc_qm_state; mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state; mfc_context->insert_object = gen8_mfc_avc_insert_object; - mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup; + mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup; encoder_context->mfc_context = mfc_context; encoder_context->mfc_context_destroy = gen8_mfc_context_destroy; diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 389ca8e8..65d27c1a 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -51,8 +51,7 @@ #define VME_INTRA_SHADER 0 #define VME_INTER_SHADER 1 -#define VME_BINTER_SHADER 3 -#define VME_BATCHBUFFER 2 +#define VME_BINTER_SHADER 2 #define CURBE_ALLOCATION_SIZE 37 /* in 256-bit */ #define CURBE_TOTAL_DATA_LENGTH (4 * 32) /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */ @@ -61,19 +60,15 @@ #define VME_MSG_LENGTH 32 static const uint32_t gen8_vme_intra_frame[][4] = { -#include "shaders/vme/intra_frame_haswell.g75b" +#include "shaders/vme/intra_frame_gen8.g8b" }; static const uint32_t gen8_vme_inter_frame[][4] = { -#include "shaders/vme/inter_frame_haswell.g75b" +#include "shaders/vme/inter_frame_gen8.g8b" }; static const uint32_t gen8_vme_inter_bframe[][4] = { -#include "shaders/vme/inter_bframe_haswell.g75b" -}; - -static const uint32_t gen8_vme_batchbuffer[][4] = { -#include "shaders/vme/batchbuffer.g75b" +#include "shaders/vme/inter_bframe_gen8.g8b" }; static struct i965_kernel gen8_vme_kernels[] = { @@ -91,13 +86,6 @@ static struct i965_kernel gen8_vme_kernels[] = { sizeof(gen8_vme_inter_frame), NULL }, - { - "VME BATCHBUFFER", - VME_BATCHBUFFER, - gen8_vme_batchbuffer, - sizeof(gen8_vme_batchbuffer), - NULL - }, { "VME inter BFrame", VME_BINTER_SHADER, @@ -108,15 +96,11 @@ static struct i965_kernel gen8_vme_kernels[] = { }; static const uint32_t gen8_vme_mpeg2_intra_frame[][4] = { -#include "shaders/vme/intra_frame_haswell.g75b" +#include "shaders/vme/intra_frame_gen8.g8b" }; static const uint32_t gen8_vme_mpeg2_inter_frame[][4] = { -#include "shaders/vme/mpeg2_inter_haswell.g75b" -}; - -static const uint32_t gen8_vme_mpeg2_batchbuffer[][4] = { -#include "shaders/vme/batchbuffer.g75b" +#include "shaders/vme/mpeg2_inter_frame_gen8.g8b" }; static struct i965_kernel gen8_vme_mpeg2_kernels[] = { @@ -134,13 +118,6 @@ static struct i965_kernel gen8_vme_mpeg2_kernels[] = { sizeof(gen8_vme_mpeg2_inter_frame), NULL }, - { - "VME BATCHBUFFER", - VME_BATCHBUFFER, - gen8_vme_mpeg2_batchbuffer, - sizeof(gen8_vme_mpeg2_batchbuffer), - NULL - }, }; /* only used for VME source surface state */ diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index 634e6d4a..d84f7952 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -1,6 +1,7 @@ VME_CORE = batchbuffer.asm intra_frame.asm inter_frame.asm VME7_CORE = batchbuffer.asm intra_frame_ivb.asm inter_frame_ivb.asm inter_bframe_ivb.asm mpeg2_inter_ivb.asm VME75_CORE = batchbuffer.asm intra_frame_haswell.asm inter_frame_haswell.asm inter_bframe_haswell.asm mpeg2_inter_haswell.asm +VME8_CORE = intra_frame_gen8.asm inter_frame_gen8.asm inter_bframe_gen8.asm INTEL_G6B = batchbuffer.g6b intra_frame.g6b inter_frame.g6b INTEL_G6A = batchbuffer.g6a intra_frame.g6a inter_frame.g6a @@ -17,16 +18,24 @@ INTEL_G75A = batchbuffer.g75a intra_frame_haswell.g75a inter_frame_haswell.g75a INTEL_GEN75_INC = batchbuffer.inc vme75.inc vme75_mpeg2.inc INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm) + +INTEL_G8B = intra_frame_gen8.g8b inter_frame_gen8.g8b mpeg2_inter_frame_gen8.g8b inter_bframe_gen8.g8b +INTEL_G8A = intra_frame_gen8.g8a inter_frame_gen8.g8a mpeg2_inter_frame_gen8.g8a inter_bframe_gen8.g8a +INTEL_GEN8_INC = vme8.inc vme8_mpeg2.inc +INTEL_GEN8_ASM = $(INTEL_G8A:%.g8a=%.gen8.asm) + + TARGETS = if HAVE_GEN4ASM TARGETS += $(INTEL_G6B) TARGETS += $(INTEL_G7B) TARGETS += $(INTEL_G75B) +TARGETS += $(INTEL_G8B) endif all-local: $(TARGETS) -SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm +SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm .g8a .g8b .gen8.asm if HAVE_GEN4ASM $(INTEL_GEN6_ASM): $(VME_CORE) $(INTEL_GEN6_INC) @@ -53,9 +62,18 @@ $(INTEL_GEN75_ASM): $(VME75_CORE) $(INTEL_GEN75_INC) rm _vme0.$@ .gen75.asm.g75b: $(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $< + +$(INTEL_GEN8_ASM): $(VME8_CORE) $(INTEL_GEN8_INC) +.g8a.gen8.asm: + $(AM_V_GEN)cpp -P $< > _vme0.$@ && \ + m4 _vme0.$@ > $@ && \ + rm _vme0.$@ +.gen8.asm.g8b: + $(AM_V_GEN)$(GEN4ASM) -g 8 -o $@ $< + endif -CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM) +CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM) $(INTEL_GEN8_ASM) EXTRA_DIST = \ $(INTEL_G6A) \ @@ -64,11 +82,15 @@ EXTRA_DIST = \ $(INTEL_G75B) \ $(INTEL_G7A) \ $(INTEL_G7B) \ + $(INTEL_G8A) \ + $(INTEL_G8B) \ $(INTEL_GEN6_INC) \ $(INTEL_GEN75_INC) \ $(INTEL_GEN7_INC) \ + $(INTEL_GEN8_INC) \ $(VME75_CORE) \ $(VME7_CORE) \ + $(VME8_CORE) \ $(VME_CORE) \ $(NULL) diff --git a/src/shaders/vme/inter_bframe_gen8.asm b/src/shaders/vme/inter_bframe_gen8.asm new file mode 100644 index 00000000..d8de5882 --- /dev/null +++ b/src/shaders/vme/inter_bframe_gen8.asm @@ -0,0 +1,875 @@ +/* + * Copyright © <2010>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * Authors: Zhao Yakui + */ +// Modual name: Inter_bframe_haswell.asm +// +// Make inter predition estimation for Inter frame for B-frame +// + +// +// Now, begin source code.... +// + +#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud +#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud + +/* + * __START + */ +__INTER_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1}; +mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1}; +mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* + * Media Read Message -- fetch Luma neighbor edge pixels + */ +/* ROW */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1}; + +/* + * Media Read Message -- fetch Chroma neighbor edge pixels + */ +/* ROW */ +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16 , y * 8 */ +mul (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 2:W {align1}; +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16, y * 8 */ +mul (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 2:W {align1}; +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1}; +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +mov (8) vme_m1.0<1>:ud 0:ud {align1}; +mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; +mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; + +/* read back the data for MB A */ +/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag), +* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID) +*/ +mov (8) mba_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; +mba_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */ +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; +mov (1) mba_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB A */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* MV */ +mov (2) mba_result.20<1>:w -1:w {align1}; +mov (1) INPUT_ARG0.0<1>:ud mb_inter_wb.4<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.4<1>:ud mb_inter_wb.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud INTER_BLOCK1:ud {align1}; +SAVE_RET {align1}; +jmpi (1) mb_pred_func; +mov (1) mb_pred_mode.0<1>:uw RET_ARG<0,1,0>:uw {align1}; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L0 {align1}; +(f0.0) mov (1) mba_result.16<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mba_result.20<1>:w 0:w {align1}; +(f0.0) mov (1) mba_result.4<1>:ud mb_mv1.8<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mbb_start; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L1 {align1}; +(f0.0) mov (1) mba_result.18<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mba_result.22<1>:w 0:w {align1}; +(f0.0) mov (1) mba_result.8<1>:ud mb_mv1.12<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mbb_start; +mov (2) mba_result.4<1>:ud mb_mv1.8<2,2,1>:ud {align1}; +mov (2) mba_result.16<1>:uw MB_PRED_FLAG {align1}; +mov (2) mba_result.20<1>:w 0:w {align1}; + +mbb_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +/* MB B doesn't exist. Zero MV. mba_flag is zero */ +/* If MB B doesn't exist, neither MB C nor D exists */ +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbb_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbc_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB B */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ +mov (2) mbb_result.20<1>:w -1:w {align1}; +mov (1) INPUT_ARG0.0<1>:ud mb_inter_wb.4<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.4<1>:ud mb_inter_wb.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud INTER_BLOCK2:ud {align1}; +SAVE_RET {align1}; +jmpi (1) mb_pred_func; +mov (1) mb_pred_mode.0<1>:uw RET_ARG<0,1,0>:uw {align1}; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L0 {align1}; +(f0.0) mov (1) mbb_result.16<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mbb_result.20<1>:w 0:w {align1}; +(f0.0) mov (1) mbb_result.4<1>:ud mb_mv2.16<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mbc_start; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L1 {align1}; +(f0.0) mov (1) mbb_result.18<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mbb_result.22<1>:w 0:w {align1}; +(f0.0) mov (1) mbb_result.8<1>:ud mb_mv2.20<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mbc_start; +mov (2) mbb_result.16<1>:uw MB_PRED_FLAG {align1}; +mov (2) mbb_result.20<1>:w 0:w {align1}; +mov (2) mbb_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1}; + +mbc_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1}; +/* MB C doesn't exist. Zero MV. mba_flag is zero */ +/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */ +(f0.0) jmpi (1) mbd_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB C */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* Forward MV */ +mov (2) mbc_result.20<1>:w -1:w {align1}; +mov (1) INPUT_ARG0.0<1>:ud mb_inter_wb.4<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.4<1>:ud mb_inter_wb.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud INTER_BLOCK2:ud {align1}; +SAVE_RET {align1}; +jmpi (1) mb_pred_func; +mov (1) mb_pred_mode.0<1>:uw RET_ARG<0,1,0>:uw {align1}; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L0 {align1}; +(f0.0) mov (1) mbc_result.16<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mbc_result.20<1>:w 0:w {align1}; +(f0.0) mov (1) mbc_result.4<1>:ud mb_mv2.16<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mb_mvp_start; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L1 {align1}; +(f0.0) mov (1) mbc_result.18<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mbc_result.22<1>:w 0:w {align1}; +(f0.0) mov (1) mbc_result.8<1>:ud mb_mv2.20<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (2) mbc_result.16<1>:uw MB_PRED_FLAG {align1}; +mov (2) mbc_result.20<1>:w 0:w {align1}; +mov (2) mbc_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1}; + +jmpi (1) mb_mvp_start; +mbd_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB D */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ub + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; + +/* TODO: RefID is required after multi-references are added */ + +/* Forward MV */ +mov (2) mbc_result.20<1>:w -1:w {align1}; +mov (1) INPUT_ARG0.0<1>:ud mb_inter_wb.4<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.4<1>:ud mb_inter_wb.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud INTER_BLOCK3:ud {align1}; +SAVE_RET {align1}; +jmpi (1) mb_pred_func; +mov (1) mb_pred_mode.0<1>:uw RET_ARG<0,1,0>:uw {align1}; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L0 {align1}; +(f0.0) mov (1) mbc_result.16<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mbc_result.20<1>:w 0:w {align1}; +(f0.0) mov (1) mbc_result.4<1>:ud mb_mv3.24<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mb_mvp_start; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L1 {align1}; +(f0.0) mov (1) mbc_result.18<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mbc_result.22<1>:w 0:w {align1}; +(f0.0) mov (1) mbc_result.8<1>:ud mb_mv3.28<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (2) mbc_result.16<1>:uw MB_PRED_FLAG {align1}; +mov (2) mbc_result.20<1>:w 0:w {align1}; +mov (2) mbc_result.4<1>:ud mb_mv3.24<2,2,1>:ud {align1}; + +mb_mvp_start: +/*TODO: Add the skip prediction */ +/* Check whether both MB B and C are inavailable */ +add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; +cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; +(-f0.0) jmpi (1) mb_median_start; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +(f0.0) mov (2) mbb_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; +(f0.0) mov (2) mbc_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; +(f0.0) mov (2) mbb_result.20<1>:uw mba_result.20<2,2,1>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:uw mba_result.20<2,2,1>:uw {align1}; +(f0.0) mov (2) mb_mvp_ref.0<1>:ud mba_result.4<2,2,1>:ud {align1}; +(-f0.0) mov (2) mb_mvp_ref.0<1>:ud 0:ud {align1}; +jmpi (1) __mb_hwdep_end; + +mb_median_start: +/* forward_MVP */ +/* check whether only one neighbour MB has the same ref ID with the current MB */ +mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; +cmp.z.f0.0 (1) null:d mba_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbb_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbc_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1}; +cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mvp_backward; + +mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1}; +SAVE_RET {align1}; + jmpi (1) word_imedian; +mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1}; +SAVE_RET {align1}; +jmpi (1) word_imedian; +mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; + + +mvp_backward: +/* check whether only one neighbour MB has the same ref ID with the current MB */ +mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; +cmp.z.f0.0 (1) null:d mba_result.22<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.8<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbb_result.22<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.8<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbc_result.22<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.8<0,1,0>:ud {align1}; +cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) mb_mvp_ref.4<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; + +mov (1) INPUT_ARG0.0<1>:w mba_result.8<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.8<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.8<0,1,0>:w {align1}; +SAVE_RET {align1}; + jmpi (1) word_imedian; +mov (1) mb_mvp_ref.4<1>:w RET_ARG<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.0<1>:w mba_result.10<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.10<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.10<0,1,0>:w {align1}; +SAVE_RET {align1}; +jmpi (1) word_imedian; +mov (1) mb_mvp_ref.6<1>:w RET_ARG<0,1,0>:w {align1}; + +__mb_hwdep_end: +asr (4) mb_ref_win.0<1>:w mb_mvp_ref.0<4,4,1>:w 2:w {align1}; +add (4) mb_ref_win.8<1>:w mb_ref_win.0<4,4,1>:w 3:w {align1}; +and (4) mb_ref_win.16<1>:uw mb_ref_win.8<4,4,1>:uw 0xFFFC:uw {align1}; +/* m2, get the MV/Mb cost passed from constant buffer when +spawning thread by MEDIA_OBJECT */ +mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; +/* m3 cost center */ +mov (8) vme_m3.0<1>:ud 0x0:ud {align1}; +mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1}; + +/* m4. skip center */ +mov (8) vme_msg_4<1>:ud 0x0:ud {align1}; + +/* m5 */ +mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1}; +and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1}; +mov (8) vme_msg_5<1>:UD INEP_ROW.0<8,8,1>:UD {align1}; +/* Use the Luma mode */ +mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1}; +mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* m6 */ +mov (8) vme_msg_6<1>:UD 0x0:UD {align1}; +mov (16) vme_msg_6.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1}; +mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + +/* the penalty for Intra mode */ +mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1}; +mov (1) vme_msg_6.20<1>:UW CHROMA_ROW.6<0,1,0>:UW {align1}; + + +/* m7 */ + +mov (4) vme_msg_7.16<1>:UD CHROMA_ROW.8<4,4,1>:UD {align1}; +mov (8) vme_msg_7.0<1>:UW CHROMA_COL.2<16,8,2>:UW {align1}; + +/* + * SIC VME message + */ +/* m1 */ +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1}; +(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1}; + +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + +/* Disable DC HAAR component when calculating HARR SATD block */ +mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1}; +mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* m0 */ +mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */ +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_SIC_MESSAGE_TYPE + ) + mlen sic_vme_msg_length + rlen vme_wb_length + {align1}; +/* + * Oword Block Write message + */ +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; + +/* Distortion, Intra (17-16), */ +mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1}; + +mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1}; +/* VME clock counts */ +mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1}; + +mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_DUAL_REFERENCE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW DREF_REGION_SIZE {align1}; +/* Dual Reference Width&Height,32x32 */ + +mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1}; + +/* Reference = (x-8,y-8)-(x+8,y+8) */ +add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -8:W {align1}; +add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -8:W {align1}; + +mov (1) vme_m0.0<1>:W -8:W {align1}; +mov (1) vme_m0.2<1>:W -8:W {align1}; + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1}; +(f0.0) add (1) vme_m0.4<1>:w vme_m0.4<0,1,0>:w 4:w {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1}; +(f0.0) add (1) vme_m0.6<1>:w vme_m0.6<0,1,0>:w 4:w {align1}; + +add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.20<2,2,1>:w {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* the Max MV number is passed by constant buffer */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +mov (1) vme_m1.8<1>:UD DSTART_CENTER + DSEARCH_PATH_LEN:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + + +/* Setup the Cost center */ +/* currently four 8x8 share the same cost center */ +mov (4) vme_m3.0<2>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; +mov (4) vme_m3.4<2>:ud mb_mvp_ref.4<0,1,0>:ud {align1}; + +/* M4/M5 search path */ + +mov (1) vme_msg_4.0<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; + +mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; +mov (8) vme_msg_5.16<1>:UD 0x0:UD {align1}; + +send (8) + vme_msg_ind + vme_wb<1>:UD + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_IME_MESSAGE_TYPE + ) + mlen ime_vme_msg_length + rlen vme_wb_length {align1}; + +/* Set Macroblock-shape/mode for FBR */ + +mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; +mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; +mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; + +and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; +mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* Send FBR message into CRE */ + +mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; + + /* 16x16 Source, 1/4 pixel, harr, BME ENABLE */ +mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER + FBR_BME_ENABLE:UD {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (1) tmp_reg0.0<1>:uw BI_WEIGHT {align1}; +mov (1) vme_m1.6<1>:UB tmp_reg0.0<0,1,0>:ub {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; +mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_FBR_MESSAGE_TYPE + ) + mlen fbr_vme_msg_length + rlen vme_wb_length + {align1}; + +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; +/* write FME info */ +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; + +mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1}; +/* Inter distortion of FME */ +mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1}; + +mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1}; + +/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_0, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Write FME/BME MV */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; + + +mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; +/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_8, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 5 + rlen obw_wb_length + {align1}; + +/* Write FME/BME RefID */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + + +/* Issue message fence so that the previous write message is committed */ +send (16) + mb_ind + obw_wb + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_FENCE, + OBR_MF_COMMIT, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; + + + nop ; + nop ; +/* Compare three word data to get the min value */ +word_imin: + cmp.le.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + cmp.le.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + RETURN {align1}; + +/* Compare three word data to get the max value */ +word_imax: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + cmp.ge.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + RETURN {align1}; + +word_imedian: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_a_ge_b; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + jmpi (1) cmp_end; +cmp_a_ge_b: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; +cmp_end: + RETURN {align1}; + +mb_pred_func: + mov (8) TEMP_VAR0.0<1>:ud 0:ud {align1}; + mov (1) TEMP_VAR0.0<1>:ub INPUT_ARG0.2<0,1,0>:ub {align1}; + and (1) TEMP_VAR0.4<1>:uw INPUT_ARG0.4<0,1,0>:uw INTER_MASK:uw {align1}; + /* INTER16x16 mode. The bit1-0 is the prediction mode */ + cmp.e.f0.0 (1) null:uw TEMP_VAR0.4<0,1,0>:uw INTER_16X16MODE:uw {align1}; + (f0.0) and (1) RET_ARG<1>:uw TEMP_VAR0.0<0,1,0>:uw PRED_MASK {align1}; + (f0.0) jmpi (1) end_mb_pred; + /* Check whether it is INTER8x8 mode. */ + cmp.e.f0.0 (1) null:uw TEMP_VAR0.4<0,1,0>:uw INTER_8X8MODE:uw {align1}; + (f0.0) jmpi (1) mb_pred_func_8; + + /* Check whether it is INTER16x8 mode. */ + cmp.e.f0.0 (1) null:uw TEMP_VAR0.4<0,1,0>:uw INTER_16X8MODE:uw {align1}; + (f0.0) jmpi (1) mb_pred_func_168; +mb_pred_func_816: + /* Block 0/2 uses the bit1-0. Block 1/3 uses the bit3-2 */ + mov (1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw {align1}; + and.z.f0.0 (1) null:uw TEMP_VAR0.8<0,1,0>:uw INTER_BLOCK1:uw {align1}; + (f0.0) and (1) RET_ARG<1>:uw TEMP_VAR0.0<0,1,0>:uw PRED_MASK {align1}; + (f0.0) jmpi (1) end_mb_pred; + shr (1) TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw 2:uw {align1}; + and (1) RET_ARG<1>:uw TEMP_VAR0.16<0,1,0>:uw PRED_MASK {align1}; + jmpi (1) end_mb_pred; + +mb_pred_func_168: + /* Block 0/1 uses the bit1-0. Block 2/3 uses the bit3-2 */ + mov (1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw {align1}; + cmp.l.f0.0 (1) null:uw TEMP_VAR0.8<0,1,0>:uw INTER_BLOCK2:uw {align1}; + (f0.0) and (1) RET_ARG<1>:uw TEMP_VAR0.0<0,1,0>:uw PRED_MASK {align1}; + (f0.0) jmpi (1) end_mb_pred; + shr (1) TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw 2:uw {align1}; + and (1) RET_ARG<1>:uw TEMP_VAR0.16<0,1,0>:uw PRED_MASK {align1}; + jmpi (1) end_mb_pred; + +mb_pred_func_8: + /* 8X8 mode. Every block uses two bits as the prediction mode. */ + mul (1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw 2:uw {align1}; + shr (1) TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw TEMP_VAR0.8<0,1,0>:uw {align1}; + and (1) RET_ARG<1>:uw TEMP_VAR0.16<0,1,0>:uw PRED_MASK {align1}; +end_mb_pred: + RETURN {align1}; + diff --git a/src/shaders/vme/inter_bframe_gen8.g8a b/src/shaders/vme/inter_bframe_gen8.g8a new file mode 100644 index 00000000..8aff32ed --- /dev/null +++ b/src/shaders/vme/inter_bframe_gen8.g8a @@ -0,0 +1,2 @@ +#include "vme8.inc" +#include "inter_bframe_gen8.asm" diff --git a/src/shaders/vme/inter_bframe_gen8.g8b b/src/shaders/vme/inter_bframe_gen8.g8b new file mode 100644 index 00000000..b3d74cce --- /dev/null +++ b/src/shaders/vme/inter_bframe_gen8.g8b @@ -0,0 +1,423 @@ + { 0x00800001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f }, + { 0x00000001, 0x24142288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 }, + { 0x00000001, 0x24342288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 }, + { 0x00000001, 0x24542288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 }, + { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 }, + { 0x00000041, 0x24880208, 0x06000488, 0x00000018 }, + { 0x00000001, 0x24942288, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 }, + { 0x00600001, 0x24600608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2a800608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000d60 }, + { 0x00600001, 0x2ae00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b200608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000210 }, + { 0x00000001, 0x2ae00e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24001a68, 0x1e000400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000160 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00000001, 0x2fa00208, 0x00000b84, 0x00000000 }, + { 0x00000001, 0x2fa40208, 0x00000b80, 0x00000000 }, + { 0x00000001, 0x2fa80608, 0x00000000, 0x00000001 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00001490 }, + { 0x00000001, 0x2aa01248, 0x00000fe4, 0x00000000 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00000000 }, + { 0x00010001, 0x2af01e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2af41e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2ae40208, 0x00000bc8, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00010001 }, + { 0x00010001, 0x2af21e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2af61e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2ae80208, 0x00000bcc, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00200001, 0x2ae40208, 0x00450bc8, 0x00000000 }, + { 0x00200001, 0x2af01e48, 0x18000000, 0x00010001 }, + { 0x00200001, 0x2af41e68, 0x18000000, 0x00000000 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000006c0 }, + { 0x00000001, 0x2b000e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000160 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00000001, 0x2fa00208, 0x00000b84, 0x00000000 }, + { 0x00000001, 0x2fa40208, 0x00000b80, 0x00000000 }, + { 0x00000001, 0x2fa80608, 0x00000000, 0x00000002 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00001230 }, + { 0x00000001, 0x2aa01248, 0x00000fe4, 0x00000000 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00000000 }, + { 0x00010001, 0x2b101e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2b141e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2b040208, 0x00000bf0, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00010001 }, + { 0x00010001, 0x2b121e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2b161e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2b080208, 0x00000bf4, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00200001, 0x2b101e48, 0x18000000, 0x00010001 }, + { 0x00200001, 0x2b141e68, 0x18000000, 0x00000000 }, + { 0x00200001, 0x2b040208, 0x00450bf0, 0x00000000 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00080008 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000230 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000003c0 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00000001, 0x2fa00208, 0x00000b84, 0x00000000 }, + { 0x00000001, 0x2fa40208, 0x00000b80, 0x00000000 }, + { 0x00000001, 0x2fa80608, 0x00000000, 0x00000002 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000fe0 }, + { 0x00000001, 0x2aa01248, 0x00000fe4, 0x00000000 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00000000 }, + { 0x00010001, 0x2b301e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2b341e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2b240208, 0x00000bf0, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000002e0 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00010001 }, + { 0x00010001, 0x2b321e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2b361e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2b280208, 0x00000bf4, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000290 }, + { 0x00200001, 0x2b301e48, 0x18000000, 0x00010001 }, + { 0x00200001, 0x2b341e68, 0x18000000, 0x00000000 }, + { 0x00200001, 0x2b240208, 0x00450bf0, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000250 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00040004 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000210 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00200040, 0x24001a68, 0x1e450400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000160 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a88, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00000001, 0x2fa00208, 0x00000b84, 0x00000000 }, + { 0x00000001, 0x2fa40208, 0x00000b80, 0x00000000 }, + { 0x00000001, 0x2fa80608, 0x00000000, 0x00000003 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000d80 }, + { 0x00000001, 0x2aa01248, 0x00000fe4, 0x00000000 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00000000 }, + { 0x00010001, 0x2b301e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2b341e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2b240208, 0x00000c18, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00010001 }, + { 0x00010001, 0x2b321e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2b361e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2b280208, 0x00000c1c, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00200001, 0x2b301e48, 0x18000000, 0x00010001 }, + { 0x00200001, 0x2b341e68, 0x18000000, 0x00000000 }, + { 0x00200001, 0x2b240208, 0x00450c18, 0x00000000 }, + { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 }, + { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000001 }, + { 0x00210001, 0x2b040208, 0x00450ae4, 0x00000000 }, + { 0x00210001, 0x2b240208, 0x00450ae4, 0x00000000 }, + { 0x00210001, 0x2b141248, 0x00450af4, 0x00000000 }, + { 0x00210001, 0x2b341248, 0x00450af4, 0x00000000 }, + { 0x00210001, 0x2ac00208, 0x00450ae4, 0x00000000 }, + { 0x00310001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000320 }, + { 0x00600001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000af4, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000ae4, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b14, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b04, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b34, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b24, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x2ac00208, 0x00000404, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x00000001, 0x2fa01a68, 0x00000ae4, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000009d0 }, + { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000970 }, + { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, + { 0x00600001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000af6, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000ae8, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b16, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b08, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b36, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b28, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x2ac40208, 0x00000404, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x00000001, 0x2fa01a68, 0x00000ae8, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b08, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b28, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000840 }, + { 0x00000001, 0x2ac41a68, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x2fa01a68, 0x00000aea, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b0a, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b2a, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000007e0 }, + { 0x00000001, 0x2ac61a68, 0x00000fe4, 0x00000000 }, + { 0x0040000c, 0x2a801a68, 0x1e690ac0, 0x00020002 }, + { 0x00400040, 0x2a881a68, 0x1e690a80, 0x00030003 }, + { 0x00400005, 0x2a901248, 0x16690a88, 0xfffcfffc }, + { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x25800608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23800608, 0x00000000, 0x00000000 }, + { 0x00000005, 0x23840208, 0x06000384, 0xff000000 }, + { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00010001 }, + { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 }, + { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 }, + { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 }, + { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 }, + { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 }, + { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 }, + { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 }, + { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00200020 }, + { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 }, + { 0x00000001, 0x28340208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00200700 }, + { 0x00000001, 0x24561648, 0x10000000, 0x20202020 }, + { 0x00000001, 0x24400208, 0x00000448, 0x00000000 }, + { 0x00000040, 0x24401a68, 0x1e000440, 0xfff8fff8 }, + { 0x00000040, 0x24421a68, 0x1e000442, 0xfff8fff8 }, + { 0x00000001, 0x24401e68, 0x18000000, 0xfff8fff8 }, + { 0x00000001, 0x24421e68, 0x18000000, 0xfff8fff8 }, + { 0x00000001, 0x24440208, 0x00000440, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00010040, 0x24401a68, 0x1e000440, 0x00040004 }, + { 0x00010040, 0x24441a68, 0x1e000444, 0x00040004 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00010040, 0x24421a68, 0x1e000442, 0x00040004 }, + { 0x00010040, 0x24461a68, 0x1e000446, 0x00040004 }, + { 0x00200040, 0x24401a68, 0x1a450440, 0x00450a90 }, + { 0x00200040, 0x24441a68, 0x1a450444, 0x00450a94 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24600608, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 }, + { 0x00000001, 0x24680608, 0x00000000, 0x00001212 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00400001, 0x45800208, 0x00000ac0, 0x00000000 }, + { 0x00400001, 0x45840208, 0x00000ac4, 0x00000000 }, + { 0x00000001, 0x28800608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28840608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28880608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x288c0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28900608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28b00608, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 }, + { 0x00000001, 0x25740608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x25752288, 0x00000199, 0x00000000 }, + { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 }, + { 0x00000005, 0x24001248, 0x16000180, 0x00030003 }, + { 0x00000001, 0x25742288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00203000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00200020 }, + { 0x00000001, 0x24662288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000002 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000574, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0003 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000001 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d0200, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x0a0a0403 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000008 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x0a800031, 0x20000a60, 0x0e000b40, 0x0219e003 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x06000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 }, + { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 }, + { 0x06000010, 0x20001a60, 0x1a000f60, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 }, + { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000f60, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000a0 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x00600001, 0x2f600608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x2f602288, 0x00000fa2, 0x00000000 }, + { 0x00000005, 0x2f641248, 0x16000fa4, 0x00030003 }, + { 0x01000010, 0x20001240, 0x16000f64, 0x00000000 }, + { 0x00010005, 0x2fe41248, 0x16000f60, 0x00030003 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000150 }, + { 0x01000010, 0x20001240, 0x16000f64, 0x00030003 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000100 }, + { 0x01000010, 0x20001240, 0x16000f64, 0x00010001 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 }, + { 0x00000001, 0x2f681248, 0x00000fa8, 0x00000000 }, + { 0x01000005, 0x20001240, 0x16000f68, 0x00010001 }, + { 0x00010005, 0x2fe41248, 0x16000f60, 0x00030003 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000d0 }, + { 0x00000008, 0x2f701248, 0x16000f60, 0x00020002 }, + { 0x00000005, 0x2fe41248, 0x16000f70, 0x00030003 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000000a0 }, + { 0x00000001, 0x2f681248, 0x00000fa8, 0x00000000 }, + { 0x05000010, 0x20001240, 0x16000f68, 0x00020002 }, + { 0x00010005, 0x2fe41248, 0x16000f60, 0x00030003 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000060 }, + { 0x00000008, 0x2f701248, 0x16000f60, 0x00020002 }, + { 0x00000005, 0x2fe41248, 0x16000f70, 0x00030003 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00000041, 0x2f681248, 0x16000fa8, 0x00020002 }, + { 0x00000008, 0x2f701248, 0x12000f60, 0x00000f68 }, + { 0x00000005, 0x2fe41248, 0x16000f70, 0x00030003 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame_gen8.asm b/src/shaders/vme/inter_frame_gen8.asm new file mode 100644 index 00000000..aa9fb80c --- /dev/null +++ b/src/shaders/vme/inter_frame_gen8.asm @@ -0,0 +1,727 @@ +/* + * Copyright © <2013>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * + */ +// Modual name: Inter_frame_gen8.asm +// +// Make inter predition estimation for Inter-frame on gen8 +// + +// +// Now, begin source code.... +// + +#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud +#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud + +/* + * __START + */ +__INTER_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1}; +mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1}; +mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* + * Media Read Message -- fetch Luma neighbor edge pixels + */ +/* ROW */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1}; + +/* + * Media Read Message -- fetch Chroma neighbor edge pixels + */ +/* ROW */ +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16 , y * 8 */ +mul (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 2:W {align1}; +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16, y * 8 */ +mul (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 2:W {align1}; +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1}; +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; +mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; +/* read back the data for MB A */ +/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag), +* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID) +*/ +mov (8) mba_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; +mba_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */ +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; +mov (1) mba_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB A */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* MV */ +mov (2) mba_result.4<1>:ud mb_mv1.8<2,2,1>:ud {align1}; +mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1}; + +mbb_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +/* MB B doesn't exist. Zero MV. mba_flag is zero */ +/* If MB B doesn't exist, neither MB C nor D exists */ +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbb_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbc_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB B */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ +mov (2) mbb_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1}; +mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1}; + +mbc_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1}; +/* MB C doesn't exist. Zero MV. mba_flag is zero */ +/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */ +(f0.0) jmpi (1) mbd_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB C */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +jmpi (1) mb_mvp_start; +mbd_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB D */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ub + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; + +/* TODO: RefID is required after multi-references are added */ + +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv3.24<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +mb_mvp_start: +/*TODO: Add the skip prediction */ +/* Check whether both MB B and C are inavailable */ +add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; +cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; +(-f0.0) jmpi (1) mb_median_start; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1}; +(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1}; +jmpi (1) __mb_hwdep_end; + +mb_median_start: +/* check whether only one neighbour MB has the same ref ID with the current MB */ +mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; +cmp.z.f0.0 (1) null:d mba_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbb_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbc_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1}; +cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; + +mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1}; +SAVE_RET {align1}; + jmpi (1) word_imedian; +mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1}; +SAVE_RET {align1}; +jmpi (1) word_imedian; +mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; + +__mb_hwdep_end: +asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1}; +add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1}; +and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1}; +/* m2, get the MV/Mb cost passed from constant buffer when +spawning thread by MEDIA_OBJECT */ +mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* m3 FWD/BWD cost center*/ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; + +/* m4 skip center*/ +mov (8) vme_msg_4<1>:UD 0x0:UD {align1}; + +/* m5 */ +mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1}; +and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1}; +mov (8) vme_msg_5<1>:UD INEP_ROW.0<8,8,1>:UD {align1}; + + +/* Use the Luma mode */ +mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1}; +mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* m6 */ +mov (8) vme_msg_6<1>:UD 0x0:UD {align1}; +mov (16) vme_msg_6.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1}; +mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + +/* the penalty for Intra mode */ +mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1}; +mov (1) vme_msg_6.20<1>:UW CHROMA_ROW.6<0,1,0>:UW {align1}; + + +/* m7 */ + +mov (4) vme_msg_7.16<1>:UD CHROMA_ROW.8<4,4,1>:UD {align1}; +mov (8) vme_msg_7.0<1>:UW CHROMA_COL.2<16,8,2>:UW {align1}; + +/* + * SIC VME message + */ + +/* m1 */ +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1}; +(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1}; + +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + +/* Disable DC HAAR component when calculating HARR SATD block */ +mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1}; +mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* m0 */ +mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */ +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_SIC_MESSAGE_TYPE + ) + mlen sic_vme_msg_length + rlen vme_wb_length + {align1}; +/* + * Oword Block Write message + */ +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; + +/* Distortion, Intra (17-16), */ +mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1}; + +mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1}; +/* VME clock counts */ +mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1}; + +mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1}; + +add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+28) */ +add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -12:W {align1}; + +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; + +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 12:w {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1}; + +add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* the Max MV number is passed by constant buffer */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* Setup the Cost center */ +/* currently four 8x8 share the same cost center */ +mov (4) vme_m3.0<2>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; +mov (4) vme_m3.4<2>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; + +mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1}; +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* M4/M5 search path */ +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_4.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.28<1>:UD 0x100F0F0F:UD {align1}; + +mov (1) vme_msg_5.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_5.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_5.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_5.12<1>:UD 0x000F0F0F:UD {align1}; + +mov (4) vme_msg_5.16<1>:UD 0x0:UD {align1}; + +send (8) + vme_msg_ind + vme_wb<1>:UD + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_IME_MESSAGE_TYPE + ) + mlen ime_vme_msg_length + rlen vme_wb_length {align1}; + +/* Set Macroblock-shape/mode for FBR */ + +mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; +mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; +mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; + +and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; +mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* Send FBR message into CRE */ + +mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; + +mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/4 pixel, harr, BME disable */ +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; +mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_FBR_MESSAGE_TYPE + ) + mlen fbr_vme_msg_length + rlen vme_wb_length + {align1}; + +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; +/* write FME info */ +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; + +mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1}; +/* Inter distortion of FME */ +mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1}; + +mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1}; + +/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_0, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Write FME/BME MV */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; + + +mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; +/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_8, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 5 + rlen obw_wb_length + {align1}; + +/* Write FME/BME RefID */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Issue message fence so that the previous write message is committed */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_FENCE, + OBR_MF_COMMIT, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; + + + nop ; + nop ; +/* Compare three word data to get the min value */ +word_imin: + cmp.le.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + cmp.le.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + RETURN {align1}; + +/* Compare three word data to get the max value */ +word_imax: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + cmp.ge.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + RETURN {align1}; + +word_imedian: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_a_ge_b; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + jmpi (1) cmp_end; +cmp_a_ge_b: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; +cmp_end: + RETURN {align1}; + diff --git a/src/shaders/vme/inter_frame_gen8.g8a b/src/shaders/vme/inter_frame_gen8.g8a new file mode 100644 index 00000000..f514dd37 --- /dev/null +++ b/src/shaders/vme/inter_frame_gen8.g8a @@ -0,0 +1,2 @@ +#include "vme8.inc" +#include "inter_frame_gen8.asm" diff --git a/src/shaders/vme/inter_frame_gen8.g8b b/src/shaders/vme/inter_frame_gen8.g8b new file mode 100644 index 00000000..c4e2c972 --- /dev/null +++ b/src/shaders/vme/inter_frame_gen8.g8b @@ -0,0 +1,300 @@ + { 0x00800001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f }, + { 0x00000001, 0x24142288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 }, + { 0x00000001, 0x24342288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 }, + { 0x00000001, 0x24542288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 }, + { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 }, + { 0x00000041, 0x24880208, 0x06000488, 0x00000018 }, + { 0x00000001, 0x24942288, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 }, + { 0x00600001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2a800608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000750 }, + { 0x00600001, 0x2ae00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b200608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 }, + { 0x00000001, 0x2ae00e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24001a68, 0x1e000400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2ae40208, 0x00450bc8, 0x00000000 }, + { 0x00000001, 0x2af01e68, 0x18000000, 0x00010001 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000360 }, + { 0x00000001, 0x2b000e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b040208, 0x00450bf0, 0x00000000 }, + { 0x00000001, 0x2b101e68, 0x18000000, 0x00010001 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00080008 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000110 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000180 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b240208, 0x00450bf0, 0x00000000 }, + { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000130 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00040004 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00200040, 0x24001a68, 0x1e450400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a88, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b240208, 0x00450c18, 0x00000000 }, + { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 }, + { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 }, + { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000001 }, + { 0x00010001, 0x2b040208, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b240208, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b141248, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2b341248, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2ac00208, 0x00000ae4, 0x00000000 }, + { 0x00110001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000190 }, + { 0x00600001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000af4, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000ae4, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b14, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b04, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b34, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b24, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x2ac00208, 0x00000404, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x00000001, 0x2fa01a68, 0x00000ae4, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000870 }, + { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000810 }, + { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, + { 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 }, + { 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 }, + { 0x00200005, 0x2a901248, 0x16450a88, 0xfffcfffc }, + { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23800608, 0x00000000, 0x00000000 }, + { 0x00000005, 0x23840208, 0x06000384, 0xff000000 }, + { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00010001 }, + { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 }, + { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 }, + { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 }, + { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 }, + { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 }, + { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 }, + { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 }, + { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00200020 }, + { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 }, + { 0x00000001, 0x28340208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00200000 }, + { 0x00000001, 0x24561648, 0x10000000, 0x28302830 }, + { 0x00000001, 0x24400208, 0x00000448, 0x00000000 }, + { 0x00000040, 0x24401a68, 0x1e000440, 0xfff0fff0 }, + { 0x00000040, 0x24421a68, 0x1e000442, 0xfff4fff4 }, + { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 }, + { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 }, + { 0x00000001, 0x24440208, 0x00000440, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00010040, 0x24401a68, 0x1e000440, 0x000c000c }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00010040, 0x24421a68, 0x1e000442, 0x00080008 }, + { 0x00200040, 0x24401a68, 0x1a450440, 0x00450a90 }, + { 0x00200040, 0x24441a68, 0x1a450444, 0x00450a90 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24600608, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 }, + { 0x00000001, 0x24680608, 0x00000000, 0x30003030 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00400001, 0x45800208, 0x00000ac0, 0x00000000 }, + { 0x00400001, 0x45840208, 0x00000ac0, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00000001, 0x28800608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28900608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28940608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28980608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x289c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28a00608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28a40608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28a80608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x28ac0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28b00608, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 }, + { 0x00000001, 0x25740608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x25752288, 0x00000199, 0x00000000 }, + { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 }, + { 0x00000005, 0x24001248, 0x16000180, 0x00030003 }, + { 0x00000001, 0x25742288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00243000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000002 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000574, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0003 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000001 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d0200, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x0a0a0403 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000008 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x0219e003 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x06000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 }, + { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 }, + { 0x06000010, 0x20001a60, 0x1a000f60, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 }, + { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000f60, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000a0 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, diff --git a/src/shaders/vme/intra_frame_gen8.asm b/src/shaders/vme/intra_frame_gen8.asm new file mode 100644 index 00000000..41cdb3a5 --- /dev/null +++ b/src/shaders/vme/intra_frame_gen8.asm @@ -0,0 +1,185 @@ +/* + * Copyright © <2010>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * + */ +// Modual name: IntraFrame_gen8.asm +// +// Make intra predition estimation for Intra frame on Gen8 +// + +// +// Now, begin source code.... +// + +/* + * __START + */ +__INTRA_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1}; +mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1}; +mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* + * Media Read Message -- fetch Luma neighbor edge pixels + */ +/* ROW */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1}; + +/* + * Media Read Message -- fetch Chroma neighbor edge pixels + */ +/* ROW */ +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16 , y * 8 */ +mul (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 2:W {align1}; +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16, y * 8 */ +mul (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 2:W {align1}; +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1}; +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* m2, get the MV/Mb cost passed by constant buffer +when creating EU thread by MEDIA_OBJECT */ +mov (8) vme_msg_2<1>:UD r1.0<8,8,1>:UD {align1}; + +/* m3. This is changed for FWD/BWD cost center */ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; + +/* m4.*/ +mov (8) vme_msg_4<1>:ud 0x0:ud {align1}; + +/* m5 */ +mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1}; +and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1}; +mov (8) vme_msg_5<1>:UD INEP_ROW.0<8,8,1>:UD {align1}; + +mov (1) tmp_reg0.0<1>:UW LUMA_CHROMA_MODE:UW {align1}; +/* Use the Luma mode */ +mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* m6 */ +mov (8) vme_msg_6<1>:UD 0x0:UD {align1}; +mov (16) vme_msg_6.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1}; +mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + +/* the penalty for Intra mode */ +mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1}; +mov (1) vme_msg_6.20<1>:UW CHROMA_ROW.6<0,1,0>:UW {align1}; + + +/* m7 */ + +mov (4) vme_msg_7.16<1>:UD CHROMA_ROW.8<4,4,1>:UD {align1}; +mov (8) vme_msg_7.0<1>:UW CHROMA_COL.2<16,8,2>:UW {align1}; + +/* + * VME message + */ + +/* m1 */ +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1}; +(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1}; + +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + +/* Disable DC HAAR component when calculating HARR SATD block */ +mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1}; +mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; +/* m0 */ +/* 16x16 Source, Intra_harr */ +add (1) vme_m0.12<1>:UD vme_m0.12<0,1,0>:ud INTRA_SAD_HAAR:UD {align1}; +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_SIC_MESSAGE_TYPE + ) + mlen sic_vme_msg_length + rlen vme_wb_length + {align1}; +/* + * Oword Block Write message + */ +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; + +/* Distortion, Intra (17-16), */ +mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1}; + +mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1}; +/* VME clock counts */ +mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1}; + +mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; diff --git a/src/shaders/vme/intra_frame_gen8.g8a b/src/shaders/vme/intra_frame_gen8.g8a new file mode 100644 index 00000000..859c72cf --- /dev/null +++ b/src/shaders/vme/intra_frame_gen8.g8a @@ -0,0 +1,2 @@ +#include "vme8.inc" +#include "intra_frame_gen8.asm" diff --git a/src/shaders/vme/intra_frame_gen8.g8b b/src/shaders/vme/intra_frame_gen8.g8b new file mode 100644 index 00000000..43c904a5 --- /dev/null +++ b/src/shaders/vme/intra_frame_gen8.g8b @@ -0,0 +1,72 @@ + { 0x00800001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f }, + { 0x00000001, 0x24142288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 }, + { 0x00000001, 0x24342288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 }, + { 0x00000001, 0x24542288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 }, + { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 }, + { 0x00000041, 0x24880208, 0x06000488, 0x00000002 }, + { 0x00000001, 0x24942288, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 }, + { 0x00600001, 0x28400208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28600608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23800608, 0x00000000, 0x00000000 }, + { 0x00000005, 0x23840208, 0x06000384, 0xff000000 }, + { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00000000 }, + { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 }, + { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 }, + { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 }, + { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 }, + { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 }, + { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 }, + { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 }, + { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00200020 }, + { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00000040, 0x244c0208, 0x0600044c, 0x00800000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 }, + { 0x00000001, 0x28340208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 }, diff --git a/src/shaders/vme/mpeg2_inter_frame_gen8.g8a b/src/shaders/vme/mpeg2_inter_frame_gen8.g8a new file mode 100644 index 00000000..9925c25b --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_frame_gen8.g8a @@ -0,0 +1,3 @@ +#include "vme8.inc" +#include "vme75_mpeg2.inc" +#include "inter_frame_gen8.asm" diff --git a/src/shaders/vme/mpeg2_inter_frame_gen8.g8b b/src/shaders/vme/mpeg2_inter_frame_gen8.g8b new file mode 100644 index 00000000..7e52539e --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_frame_gen8.g8b @@ -0,0 +1,300 @@ + { 0x00800001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f }, + { 0x00000001, 0x24142288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 }, + { 0x00000001, 0x24342288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 }, + { 0x00000001, 0x24542288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 }, + { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 }, + { 0x00000041, 0x24880208, 0x06000488, 0x00000018 }, + { 0x00000001, 0x24942288, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 }, + { 0x00600001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2a800608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000750 }, + { 0x00600001, 0x2ae00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b200608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 }, + { 0x00000001, 0x2ae00e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24001a68, 0x1e000400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2ae40208, 0x00450bc8, 0x00000000 }, + { 0x00000001, 0x2af01e68, 0x18000000, 0x00010001 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000360 }, + { 0x00000001, 0x2b000e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b040208, 0x00450bf0, 0x00000000 }, + { 0x00000001, 0x2b101e68, 0x18000000, 0x00010001 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00080008 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000110 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000180 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b240208, 0x00450bf0, 0x00000000 }, + { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000130 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00040004 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00200040, 0x24001a68, 0x1e450400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a88, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b240208, 0x00450c18, 0x00000000 }, + { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 }, + { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 }, + { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000001 }, + { 0x00010001, 0x2b040208, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b240208, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b141248, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2b341248, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2ac00208, 0x00000ae4, 0x00000000 }, + { 0x00110001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000190 }, + { 0x00600001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000af4, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000ae4, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b14, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b04, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b34, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b24, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x2ac00208, 0x00000404, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x00000001, 0x2fa01a68, 0x00000ae4, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000870 }, + { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000810 }, + { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, + { 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 }, + { 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 }, + { 0x00200005, 0x2a901248, 0x16450a88, 0xfffcfffc }, + { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23800608, 0x00000000, 0x00000000 }, + { 0x00000005, 0x23840208, 0x06000384, 0xff000000 }, + { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00010001 }, + { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 }, + { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 }, + { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 }, + { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 }, + { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 }, + { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 }, + { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 }, + { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00200020 }, + { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 }, + { 0x00000001, 0x28340208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x7e200000 }, + { 0x00000001, 0x24561648, 0x10000000, 0x28302830 }, + { 0x00000001, 0x24400208, 0x00000448, 0x00000000 }, + { 0x00000040, 0x24401a68, 0x1e000440, 0xfff0fff0 }, + { 0x00000040, 0x24421a68, 0x1e000442, 0xfff4fff4 }, + { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 }, + { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 }, + { 0x00000001, 0x24440208, 0x00000440, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00010040, 0x24401a68, 0x1e000440, 0x000c000c }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00010040, 0x24421a68, 0x1e000442, 0x00080008 }, + { 0x00200040, 0x24401a68, 0x1a450440, 0x00450a90 }, + { 0x00200040, 0x24441a68, 0x1a450444, 0x00450a90 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24600608, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 }, + { 0x00000001, 0x24680608, 0x00000000, 0x30003030 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00400001, 0x45800208, 0x00000ac0, 0x00000000 }, + { 0x00400001, 0x45840208, 0x00000ac0, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00000001, 0x28800608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28900608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28940608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28980608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x289c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28a00608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28a40608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28a80608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x28ac0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28b00608, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 }, + { 0x00000001, 0x25740608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x25752288, 0x00000199, 0x00000000 }, + { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 }, + { 0x00000005, 0x24001248, 0x16000180, 0x00030003 }, + { 0x00000001, 0x25742288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00243000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000002 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000574, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0003 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000001 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d0200, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x0a0a0403 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000008 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x0219e003 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x06000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 }, + { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 }, + { 0x06000010, 0x20001a60, 0x1a000f60, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 }, + { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000f60, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000a0 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, diff --git a/src/shaders/vme/vme8.inc b/src/shaders/vme/vme8.inc new file mode 100644 index 00000000..bf60ad51 --- /dev/null +++ b/src/shaders/vme/vme8.inc @@ -0,0 +1,341 @@ +/* + * Copyright © <2010>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * + */ +// Modual name: ME_header.inc for Gen8 +// +// Global symbols define +// + +/* + * Constant + */ +define(`VME_MESSAGE_TYPE_INTER', `1') +define(`VME_MESSAGE_TYPE_INTRA', `2') +define(`VME_MESSAGE_TYPE_MIXED', `3') + +define(`VME_SIC_MESSAGE_TYPE', `1') +define(`VME_IME_MESSAGE_TYPE', `2') +define(`VME_FBR_MESSAGE_TYPE', `3') + +define(`BLOCK_32X1', `0x0000001F') +define(`BLOCK_4X16', `0x000F0003') +define(`BLOCK_8X4', `0x00070003') + +define(`LUMA_INTRA_16x16_DISABLE', `0x1') +define(`LUMA_INTRA_8x8_DISABLE', `0x2') +define(`LUMA_INTRA_4x4_DISABLE', `0x4') + +define(`INTRA_PRED_AVAIL_FLAG_AE', `0x60') +define(`INTRA_PRED_AVAIL_FLAG_B', `0x10') +define(`INTRA_PRED_AVAIL_FLAG_C', `0x8') +define(`INTRA_PRED_AVAIL_FLAG_D', `0x4') + +define(`BIND_IDX_VME', `0') +define(`BIND_IDX_VME_REF0', `1') +define(`BIND_IDX_VME_REF1', `2') +define(`BIND_IDX_OUTPUT', `3') +define(`BIND_IDX_INEP', `4') + +define(`SUB_PEL_MODE_INTEGER', `0x00000000') +define(`SUB_PEL_MODE_HALF', `0x00001000') +define(`SUB_PEL_MODE_QUARTER', `0x00003000') + +define(`INTER_SAD_NONE', `0x00000000') +define(`INTER_SAD_HAAR', `0x00200000') + +define(`INTRA_SAD_NONE', `0x00000000') +define(`INTRA_SAD_HAAR', `0x00800000') + +define(`INTER_PART_MASK', `0x00000000') + +define(`SEARCH_CTRL_SINGLE', `0x00000000') +define(`SEARCH_CTRL_DUAL_START', `0x00000100') +define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300') +define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700') + +define(`REF_REGION_SIZE', `0x2830:UW') + +define(`BI_SUB_MB_PART_MASK', `0x0c000000') +define(`MAX_NUM_MV', `0x00000020') +define(`FB_PRUNING_ENABLE', `0x40000000') + +define(`SEARCH_PATH_LEN', `0x00003030') +define(`START_CENTER', `0x30000000') + +define(`ADAPTIVE_SEARCH_ENABLE', `0x00000002') +define(`INTRA_PREDICTORE_MODE', `0x11111111:UD') + +define(`INTER_VME_OUTPUT_IN_OWS', `10') +define(`INTER_VME_OUTPUT_MV_IN_OWS', `8') + +define(`INTRAMBFLAG_MASK', `0x00002000') +define(`MVSIZE_UW_BASE', `0x0040') +define(`MFC_MV32_BIT_SHIFT', `5') +define(`CBP_DC_YUV_UW', `0x000E') + +define(`DC_HARR_ENABLE', `0x0000') +define(`DC_HARR_DISABLE', `0x0020') + +define(`MV32_BIT_MASK', `0x0020') +define(`MV32_BIT_SHIFT', `5') + +define(`OBW_CACHE_TYPE', `10') + + +define(`OBW_MESSAGE_TYPE', `8') + +define(`OBW_BIND_IDX', `BIND_IDX_OUTPUT') + +define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */ +define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */ +define(`OBW_CONTROL_2', `2') /* 2 OWords */ +define(`OBW_CONTROL_3', `3') /* 4 OWords */ +define(`OBW_CONTROL_8', `4') /* 8 OWords */ + +define(`FBR_BME_ENABLE', `0x00000000') +define(`FBR_BME_DISABLE', `0x00040000') + +define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ + + +define(`OBW_HEADER_PRESENT', `1') + +/* GRF registers + * r0 header + * r1~r4 constant buffer (reserved) + * r5 inline data + * r6~r11 reserved + * r12 write back of VME message + * r13 write back of Oword Block Write + */ +/* + * GRF 0 -- header + */ +define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */ + +/* + * GRF 1~4 -- Constant Buffer (reserved) + */ + +/* + * GRF 5 -- inline data + */ +define(`inline_reg0', `r5') +define(`w_in_mb_uw', `inline_reg0.2') +define(`orig_xy_ub', `inline_reg0.0') +define(`orig_x_ub', `inline_reg0.0') /* in macroblock */ +define(`orig_y_ub', `inline_reg0.1') +define(`transform_8x8_ub', `inline_reg0.4') +define(`input_mb_intra_ub', `inline_reg0.5') +define(`num_macroblocks', `inline_reg0.6') + +/* + * GRF 6~11 -- reserved + */ + +/* + * GRF 12~15 -- write back for VME message + */ +define(`vme_wb', `r12') +define(`vme_wb0', `r12') +define(`vme_wb1', `r13') +define(`vme_wb2', `r14') +define(`vme_wb3', `r15') +define(`vme_wb4', `r16') +define(`vme_wb5', `r17') +define(`vme_wb6', `r18') +define(`vme_ime_wb7', `r19') +define(`vme_ime_wb8', `r20') +define(`vme_ime_wb9', `r21') +define(`vme_ime_wb10', `r22') + + +/* + * GRF 24 -- write for VME output message + */ +define(`obw_wb', `null<1>:W') +define(`obw_wb_length', `0') + + +/* + * GRF 28~30 -- Intra Neighbor Edge Pixels + */ +define(`INEP_ROW', `r28') +define(`INEP_COL0', `r29') +define(`INEP_COL1', `r30') + +/* + * GRF 48~50 -- Chroma Neighbor Edge Pixels + */ +define(`CHROMA_ROW', `r48') +define(`CHROMA_COL', `r49') + +/* + * temporary registers + */ +define(`tmp_reg0', `r32') +define(`read0_header', `tmp_reg0') +define(`tmp_reg1', `r33') +define(`read1_header', `tmp_reg1') +define(`tmp_reg2', `r34') +define(`vme_m0', `tmp_reg2') +define(`tmp_reg3', `r35') +define(`vme_m1', `tmp_reg3') +define(`intra_flag', `vme_m1.28') +define(`intra_part_mask_ub', `vme_m1.28') +define(`mb_intra_struct_ub', `vme_m1.29') +define(`tmp_reg4', `r36') +define(`obw_m0', `tmp_reg4') +define(`tmp_reg5', `r37') +define(`obw_m1', `tmp_reg5') +define(`tmp_reg6', `r38') +define(`obw_m2', `tmp_reg6') +define(`tmp_reg7', `r39') +define(`obw_m3', `tmp_reg7') +define(`tmp_reg8', `r40') +define(`obw_m4', `tmp_reg8') +define(`tmp_reg9', `r41') +define(`tmp_x_w', `tmp_reg9.0') +define(`tmp_rega', `r42') +define(`tmp_ud0', `tmp_rega.0') +define(`tmp_ud1', `tmp_rega.4') +define(`tmp_ud2', `tmp_rega.8') +define(`tmp_ud3', `tmp_rega.12') +define(`tmp_uw0', `tmp_rega.0') +define(`tmp_uw1', `tmp_rega.2') +define(`tmp_uw2', `tmp_rega.4') +define(`tmp_uw3', `tmp_rega.6') +define(`tmp_uw4', `tmp_rega.8') +define(`tmp_uw5', `tmp_rega.10') +define(`tmp_uw6', `tmp_rega.12') +define(`tmp_uw7', `tmp_rega.14') + +define(`vme_m2', `r43') +define(`vme_m3', `r44') +/* + * MRF registers + */ + +define(`msg_ind', `64') +define(`msg_reg0', `r64') +define(`msg_reg1', `r65') +define(`msg_reg2', `r66') +define(`msg_reg3', `r67') +define(`msg_reg4', `r68') +define(`msg_reg5', `r69') +define(`msg_reg6', `r70') +define(`msg_reg7', `r71') +define(`msg_reg8', `r72') +define(`msg_reg9', `r73') + +define(`ts_msg_ind', `112') +define(`ts_msg_reg0', `r112') +/* + * VME message payload + */ + +define(`vme_intra_wb_length', `1') +define(`vme_wb_length', `7') +define(`sic_vme_msg_length', `8') +define(`fbr_vme_msg_length', `8') +define(`ime_vme_msg_length', `6') + +define(`vme_msg_ind', `msg_ind') +define(`vme_msg_0', `msg_reg0') +define(`vme_msg_1', `msg_reg1') +define(`vme_msg_2', `msg_reg2') + +define(`vme_msg_3', `msg_reg3') +define(`vme_msg_4', `msg_reg4') + + +define(`vme_msg_5', `msg_reg5') +define(`vme_msg_6', `msg_reg6') +define(`vme_msg_7', `msg_reg7') +define(`vme_msg_8', `msg_reg8') +define(`vme_msg_9', `msg_reg9') + +define(`BIND_IDX_CBCR', `6') + + +define(`LUMA_CHROMA_MODE', `0x0') +define(`LUMA_INTRA_MODE', `0x1') +define(`LUMA_INTRA_DISABLE', `0x2') + +define(`RETURN_REG', `r127.0') +define(`RET_ARG', `r127.4') + +/* Now at most two registers are used for input parameter */ +define(`INPUT_ARG0', `r125') +define(`INPUT_ARG1', `r126') + +/* Two temporal registers are used in the function */ +define(`TEMP_VAR0', `r123') +define(`TEMP_VAR1', `r124') + + +define(`OBR_MESSAGE_TYPE', `0') +define(`OBR_CACHE_TYPE', `10') +define(`OBR_BIND_IDX', `BIND_IDX_OUTPUT') + +define(`OBR_CONTROL_0', `0') /* 1 OWord, low 128 bits */ +define(`OBR_CONTROL_1', `1') /* 1 OWord, high 128 bits */ +define(`OBR_CONTROL_2', `2') /* 2 OWords */ +define(`OBR_CONTROL_4', `3') /* 4 OWords */ +define(`OBR_CONTROL_8', `4') /* 8 OWords */ +define(`OBR_WRITE_COMMIT_CATEGORY', `0') /* category on SNB+ for Data port */ +define(`OBR_HEADER_PRESENT', `1') + +define(`mb_hwdep', `r5.6') +define(`MB_AVAIL', `1:d') +define(`MB_PRED_FLAG', `1:w') + +define(`mb_pred_mode', `r85') +define(`mb_mvp_ref', `r86') +define(`mba_result', `r87') +define(`mbb_result', `r88') +define(`mbc_result', `r89') +define(`mb_ind', `90') +define(`mb_msg0', `r90') +define(`mb_wb', `r91') +define(`mb_intra_wb', `r91') +define(`mb_inter_wb', `r92') +define(`mb_mv0', `r93') +define(`mb_mv1', `r94') +define(`mb_mv2', `r95') +define(`mb_mv3', `r96') +define(`mb_ref', `r97') +define(`mb_ref_win', `r84') + +define(`DREF_REGION_SIZE', `0x2020:UW') +define(`PRED_L0', `0x0':uw) +define(`PRED_L1', `0x1':uw) +define(`PRED_BI', `0x2':uw) +define(`PRED_DIRECT', `0x3':uw) +define(`PRED_MASK', `0x3':uw) + +/* The MAX search len per reference is 16 */ +define(`DSEARCH_PATH_LEN', `0x00001212') +define(`BI_WEIGHT', `0x20':uw) +define(`DSTART_CENTER', `0x00000000') +define(`INTER_MASK', `0x03') +define(`INTER_16X16MODE', `0x0') +define(`INTER_16X8MODE', `0x01') +define(`INTER_8X16MODE', `0x02') +define(`INTER_8X8MODE', `0x03') +define(`INTER_BLOCK0', `0x0') +define(`INTER_BLOCK1', `0x1') +define(`INTER_BLOCK2', `0x2') +define(`INTER_BLOCK3', `0x3') +define(`INTER_16X8MODE', `0x01') +define(`INTER_8X16MODE', `0x02') + + +define(`OBR_MESSAGE_FENCE', `7') +define(`OBR_MF_NOCOMMIT', `0') +define(`OBR_MF_COMMIT', `0x20') diff --git a/src/shaders/vme/vme8_mpeg2.inc b/src/shaders/vme/vme8_mpeg2.inc new file mode 100644 index 00000000..9b877acf --- /dev/null +++ b/src/shaders/vme/vme8_mpeg2.inc @@ -0,0 +1,18 @@ +/* + * Copyright © <2010>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * + */ +// Modual name: ME_header.inc +// +// Global symbols define +// + +/* + * Constant + */ + +define(`INTER_PART_MASK', `0x7e000000') -- cgit v1.2.1 From 39a9b6bc31b0e5280b94a1f822f46736a7cbe571 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 27 Mar 2013 09:26:32 +0800 Subject: Handle the bit length of last dword for INSERT_OBJECT on BDW Otherwise it can't insert the content of INSERT_OBJECT command during encoding, which causes that the encoded clip can't be parsed by player. Signed-off-by: Zhao Yakui --- src/gen8_mfc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 1deaae38..b3c1b021 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -367,6 +367,9 @@ gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *e if (batch == NULL) batch = encoder_context->base.batch; + if (data_bits_in_last_dw == 0) + data_bits_in_last_dw = 32; + BEGIN_BCS_BATCH(batch, lenght_in_dws + 2); OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2)); -- cgit v1.2.1 From e933dd81f282c81cf9b670e9198e2e56b7f5f066 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 27 Mar 2013 12:42:04 +0800 Subject: Fix the MV offset for MPEG2 on BDW Signed-off-by: Zhao Yakui --- src/gen8_mfc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index b3c1b021..0bf452b4 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -1868,7 +1868,8 @@ gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx, return len_in_dwords; } -#define MPEG2_INTER_MV_OFFSET 12 +/* Byte offset */ +#define MPEG2_INTER_MV_OFFSET 48 static struct _mv_ranges { @@ -1928,11 +1929,11 @@ gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx, if (batch == NULL) batch = encoder_context->base.batch; - mvptr = (short *)msg; - mvx0 = mpeg2_motion_vector(mvptr[MPEG2_INTER_MV_OFFSET + 0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]); - mvy0 = mpeg2_motion_vector(mvptr[MPEG2_INTER_MV_OFFSET + 1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]); - mvx1 = mpeg2_motion_vector(mvptr[MPEG2_INTER_MV_OFFSET + 2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]); - mvy1 = mpeg2_motion_vector(mvptr[MPEG2_INTER_MV_OFFSET + 3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]); + mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);; + mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]); + mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]); + mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]); + mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]); BEGIN_BCS_BATCH(batch, len_in_dwords); -- cgit v1.2.1 From 3a2e4b45c0937561b1e142c9eb0e7a0dfd61d487 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 11 Apr 2013 13:09:21 +0800 Subject: Redefine the VPP vfe_state on Gen6+ Otherwise the VFE_STATE programmed on Gen6+ is not reasonable and difficult to understand. Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 54 +++++++++++++++++++++++++++------------------- src/i965_post_processing.h | 9 ++++++++ 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 51bacc2c..a95d4a44 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -5270,12 +5270,14 @@ gen6_pp_vfe_state(VADriverContextP ctx, OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2)); OUT_BATCH(batch, 0); OUT_BATCH(batch, - (pp_context->urb.num_vfe_entries - 1) << 16 | - pp_context->urb.num_vfe_entries << 8); + (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 | + pp_context->vfe_gpu_state.num_urb_entries << 8); OUT_BATCH(batch, 0); OUT_BATCH(batch, - (pp_context->urb.size_vfe_entry * 2) << 16 | /* URB Entry Allocation Size, in 256 bits unit */ - (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */ + (pp_context->vfe_gpu_state.urb_entry_size) << 16 | + /* URB Entry Allocation Size, in 256 bits unit */ + (pp_context->vfe_gpu_state.curbe_allocation_size)); + /* CURBE Allocation Size, in 256 bits unit */ OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); @@ -5293,12 +5295,14 @@ gen8_pp_vfe_state(VADriverContextP ctx, OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, - (pp_context->urb.num_vfe_entries - 1) << 16 | - pp_context->urb.num_vfe_entries << 8); + (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 | + pp_context->vfe_gpu_state.num_urb_entries << 8); OUT_BATCH(batch, 0); OUT_BATCH(batch, - (pp_context->urb.size_vfe_entry * 2) << 16 | /* URB Entry Allocation Size, in 256 bits unit */ - (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */ + (pp_context->vfe_gpu_state.urb_entry_size) << 16 | + /* URB Entry Allocation Size, in 256 bits unit */ + (pp_context->vfe_gpu_state.curbe_allocation_size)); + /* CURBE Allocation Size, in 256 bits unit */ OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); @@ -6281,6 +6285,8 @@ i965_post_processing_terminate(VADriverContextP ctx) i965->pp_context = NULL; } +#define VPP_CURBE_ALLOCATION_SIZE 32 + static void i965_post_processing_context_init(VADriverContextP ctx, struct i965_post_processing_context *pp_context, @@ -6289,21 +6295,25 @@ i965_post_processing_context_init(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); int i; - pp_context->urb.size = URB_SIZE((&i965->intel)); - pp_context->urb.num_vfe_entries = 32; - pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */ - pp_context->urb.num_cs_entries = 1; - - if (IS_GEN7(i965->intel.device_id)) - pp_context->urb.size_cs_entry = 4; /* in 512 bits unit */ - else + if (IS_IRONLAKE(i965->intel.device_id)) { + pp_context->urb.size = URB_SIZE((&i965->intel)); + pp_context->urb.num_vfe_entries = 32; + pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */ + pp_context->urb.num_cs_entries = 1; pp_context->urb.size_cs_entry = 2; - - pp_context->urb.vfe_start = 0; - pp_context->urb.cs_start = pp_context->urb.vfe_start + - pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry; - assert(pp_context->urb.cs_start + - pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + pp_context->urb.vfe_start = 0; + pp_context->urb.cs_start = pp_context->urb.vfe_start + + pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry; + assert(pp_context->urb.cs_start + + pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + } else { + pp_context->vfe_gpu_state.max_num_threads = 60; + pp_context->vfe_gpu_state.num_urb_entries = 59; + pp_context->vfe_gpu_state.gpgpu_mode = 0; + pp_context->vfe_gpu_state.urb_entry_size = 16 - 1; + pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE; + } + assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6)); diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index 9b2d7c63..f399cbb0 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -477,6 +477,15 @@ struct i965_post_processing_context unsigned int size_cs_entry; } urb; + struct { + unsigned int gpgpu_mode : 1; + unsigned int pad0 : 7; + unsigned int max_num_threads : 16; + unsigned int num_urb_entries : 8; + unsigned int urb_entry_size : 16; + unsigned int curbe_allocation_size : 16; + } vfe_gpu_state; + struct pp_load_save_context pp_load_save_context; struct pp_scaling_context pp_scaling_context; struct pp_avs_context pp_avs_context; -- cgit v1.2.1 From 016a43518435fbefdc34187c915318cb82766aa6 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 12 Apr 2013 14:49:10 +0800 Subject: Update states for VP8 decoding on BDW Signed-off-by: Xiang, Haihao --- src/gen8_mfd.c | 135 +++++++++++++++++++++++++++++++++-------------- src/i965_decoder.h | 1 + src/i965_decoder_utils.c | 46 ++++++++++++++++ 3 files changed, 142 insertions(+), 40 deletions(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 87bfd7bd..4df155e5 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -2717,10 +2718,66 @@ gen8_mfd_vp8_decode_init(VADriverContextP ctx, struct gen7_mfd_context *gen7_mfd_context) { struct object_surface *obj_surface; + struct i965_driver_data *i965 = i965_driver_data(ctx); + dri_bo *bo; + VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer; + int width_in_mbs = (pic_param->frame_width + 15) / 16; + int height_in_mbs = (pic_param->frame_height + 15) / 16; + + assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */ + assert(height_in_mbs > 0 && height_in_mbs <= 256); /* Current decoded picture */ obj_surface = decode_state->render_object; i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + + dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.bo = NULL; + gen7_mfd_context->post_deblocking_output.valid = 0; + + dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.valid = 1; + + /* The same as AVC */ + dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "intra row store", + width_in_mbs * 64, + 0x1000); + assert(bo); + gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "deblocking filter row store", + width_in_mbs * 64 * 4, + 0x1000); + assert(bo); + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + width_in_mbs * 64 * 2, + 0x1000); + assert(bo); + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "mpr row store", + width_in_mbs * 64 * 2, + 0x1000); + assert(bo); + gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1; + + gen7_mfd_context->bitplane_read_buffer.valid = 0; } static void @@ -2848,56 +2905,41 @@ gen8_mfd_vp8_bsd_object(VADriverContextP ctx, struct gen7_mfd_context *gen7_mfd_context) { struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int i, log2num; + unsigned int offset = slice_param->slice_data_offset; + + assert(slice_param->num_of_partitions >= 2); + assert(slice_param->num_of_partitions <= 9); + + log2num = (int)log2(slice_param->num_of_partitions - 1); BEGIN_BCS_BATCH(batch, 22); OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2)); OUT_BCS_BATCH(batch, - 0 << 16 | /* Partition 0 CPBAC Entropy Count */ - 0 << 8 | /* Partition 0 Count Entropy Range */ - slice_param->num_of_partitions << 4 | + pic_param->bool_coder_ctx.count << 16 | /* Partition 0 CPBAC Entropy Count */ + pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */ + log2num << 4 | (slice_param->macroblock_offset & 0x7)); OUT_BCS_BATCH(batch, - 0 << 24 | /* Partition 0 Count Entropy Value */ + pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */ 0); - OUT_BCS_BATCH(batch, - 0); /* Partition 0 Data length, DW3 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 0 Data offset, DW4 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 1 Data length, DW5 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 1 Data offset, DW6 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 2 Data length, DW7 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 2 Data offset, DW8 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 3 Data length, DW9 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 3 Data offset, DW10 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 4 Data length, DW11 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 4 Data offset, DW12 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 5 Data length, DW13 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 5 Data offset, DW14 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 6 Data length, DW15 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 6 Data offset, DW16 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 7 Data length, DW17 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 7 Data offset, DW18 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 8 Data length, DW19 */ - OUT_BCS_BATCH(batch, - 0); /* Partition 8 Data offset, DW20 */ + + for (i = 0; i < 9; i++) { + if (i < slice_param->num_of_partitions) { + OUT_BCS_BATCH(batch, slice_param->partition_size[i]); + OUT_BCS_BATCH(batch, offset); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + + offset += slice_param->partition_size[i]; + } + OUT_BCS_BATCH(batch, 1 << 31 | /* concealment method */ 0); + ADVANCE_BCS_BATCH(batch); } @@ -2908,17 +2950,30 @@ gen8_mfd_vp8_decode_picture(VADriverContextP ctx, { struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; VAPictureParameterBufferVP8 *pic_param; + VASliceParameterBufferVP8 *slice_param; + dri_bo *slice_data_bo; assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer; + /* one slice per frame */ + assert(decode_state->num_slice_params == 1); + assert(decode_state->slice_params[0]->num_elements == 1); + assert(decode_state->slice_params && decode_state->slice_params[0]->buffer); + assert(decode_state->slice_datas[0]->bo); + + slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; + slice_data_bo = decode_state->slice_datas[0]->bo; + gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context); intel_batchbuffer_start_atomic_bcs(batch, 0x1000); intel_batchbuffer_emit_mi_flush(batch); gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); + gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context); gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context); intel_batchbuffer_end_atomic(batch); intel_batchbuffer_flush(batch); } diff --git a/src/i965_decoder.h b/src/i965_decoder.h index c7d49d7a..01c093fc 100644 --- a/src/i965_decoder.h +++ b/src/i965_decoder.h @@ -29,6 +29,7 @@ #include #include +#include #include #define MAX_GEN_REFERENCE_FRAMES 16 diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index b3aba3d1..51d38a6b 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -654,6 +654,48 @@ error: return VA_STATUS_ERROR_INVALID_PARAMETER; } +static VAStatus +intel_decoder_check_vp8_parameter(VADriverContextP ctx, + struct decode_state *decode_state) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer; + struct object_surface *obj_surface; + int i = 0; + + if (pic_param->last_ref_frame != VA_INVALID_SURFACE) { + obj_surface = SURFACE(pic_param->last_ref_frame); + + if (obj_surface && obj_surface->bo) + decode_state->reference_objects[i++] = obj_surface; + else + decode_state->reference_objects[i++] = NULL; + } + + if (pic_param->golden_ref_frame != VA_INVALID_SURFACE) { + obj_surface = SURFACE(pic_param->golden_ref_frame); + + if (obj_surface && obj_surface->bo) + decode_state->reference_objects[i++] = obj_surface; + else + decode_state->reference_objects[i++] = NULL; + } + + if (pic_param->alt_ref_frame != VA_INVALID_SURFACE) { + obj_surface = SURFACE(pic_param->alt_ref_frame); + + if (obj_surface && obj_surface->bo) + decode_state->reference_objects[i++] = obj_surface; + else + decode_state->reference_objects[i++] = NULL; + } + + for ( ; i < 16; i++) + decode_state->reference_objects[i] = NULL; + + return VA_STATUS_SUCCESS; +} + VAStatus intel_decoder_sanity_check_input(VADriverContextP ctx, VAProfile profile, @@ -695,6 +737,10 @@ intel_decoder_sanity_check_input(VADriverContextP ctx, vaStatus = VA_STATUS_SUCCESS; break; + case VAProfileVP8Version0_3: + vaStatus = intel_decoder_check_vp8_parameter(ctx, decode_state); + break; + default: vaStatus = VA_STATUS_ERROR_INVALID_PARAMETER; break; -- cgit v1.2.1 From ab920e6c54f990b08e48b9c94272ec591704d692 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 16 Apr 2013 13:57:14 +0800 Subject: Upload the constant buffer on Gen6+ Signe-off-by: Zhao Yakui --- src/i965_post_processing.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index a95d4a44..317221cd 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -5314,14 +5314,20 @@ gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { struct intel_batchbuffer *batch = pp_context->batch; + struct i965_driver_data *i965 = i965_driver_data(ctx); + int param_size; - assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size); + if (IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) + param_size = sizeof(struct gen7_pp_static_parameter); + else + param_size = sizeof(struct pp_static_parameter); BEGIN_BATCH(batch, 4); OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2)); OUT_BATCH(batch, 0); OUT_BATCH(batch, - pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32); + param_size); OUT_RELOC(batch, pp_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, -- cgit v1.2.1 From 9e87fa80e0e501dfcde6e701640810b0e937de20 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 16 Apr 2013 13:57:19 +0800 Subject: Add the 8x8 sampler for BDW Signed-off-by: Zhao Yakui --- src/i965_structs.h | 188 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) diff --git a/src/i965_structs.h b/src/i965_structs.h index dab4678b..682dc2d5 100644 --- a/src/i965_structs.h +++ b/src/i965_structs.h @@ -1280,6 +1280,48 @@ struct gen8_sampler_state unsigned int nonsep_filter_foot_lowmask:8; } ss3; }; + +struct gen8_global_blend_state +{ + unsigned int pad0:19; + unsigned int ydither_offset:2; + unsigned int xdither_offset:2; + unsigned int color_dither_enable:1; + unsigned int alpha_test_func:3; + unsigned int alpha_test_enable:1; + unsigned int alpha_to_coverage_dither:1; + unsigned int alpha_to_one:1; + unsigned int ia_blend_enable:1; + unsigned int alpha_to_coverage:1; +}; + +struct gen8_blend_state_rt { + struct { + unsigned int blue_write_dis:1; + unsigned int green_write_dis:1; + unsigned int red_write_dis:1; + unsigned int alpha_write_dis:1; + unsigned int pad0:1; + unsigned int alpha_blend_func:3; + unsigned int ia_dest_blend_factor:5; + unsigned int ia_src_blend_factor:5; + unsigned int color_blend_func:3; + unsigned int dest_blend_factor:5; + unsigned int src_blend_factor:5; + unsigned int colorbuf_blend:1; + } blend0; + + struct { + unsigned int post_blend_clamp_enable:1; + unsigned int pre_blend_clamp_enable:1; + unsigned int clamp_range:2; + unsigned int pre_blend_src_clamp:1; + unsigned int pad0:22; + unsigned int logic_op_func:4; + unsigned int logic_op_enable:1; + } blend1; +}; + /* TODO: Add the sampler_8x8 for Gen8+. * AVS/Convolve is 256DWs. * MinMaxfilter/Erode/Dilate: 8DWs*/ @@ -1672,6 +1714,7 @@ struct gen7_sampler_8x8 } dw3; }; +/* This can also be used for BDW+ */ struct gen7_sampler_dndi { struct { @@ -1757,6 +1800,151 @@ struct gen7_sampler_dndi } dw7; }; +struct gen8_sampler_8x8_avs { + struct { + unsigned int gain_factor:6; + unsigned int weak_edge_threshold:6; + unsigned int strong_edge_threshold:6; + unsigned int r3x_coefficient:5; + unsigned int r3c_coefficient:5; + unsigned int chroma_key_index:2; + unsigned int chroma_key_enable:1; + unsigned int pad1:1; + } dw0; + + struct { + unsigned int pad0; + } dw1; + + struct { + unsigned int global_noise_estimation:8; + unsigned int non_edge_weight:3; + unsigned int regular_weight:3; + unsigned int strong_edge_weight:3; + unsigned int r5x_coefficient:5; + unsigned int r5cx_coefficient:5; + unsigned int r5c_coefficient:5; + } dw2; + + struct { + unsigned int sin_alpha:8; /* S0.7 */ + unsigned int cos_alpha:8; /* S0.7 */ + unsigned int sat_max:6; + unsigned int hue_max:6; + unsigned int enable_8tap_filter:2; + unsigned int ief4_smooth_enable:1; + unsigned int skin_ief_enable:1; + } dw3; + + struct { + unsigned int s3u:11; /* S2.8 */ + unsigned int pad0:1; + unsigned int diamond_margin:3; + unsigned int vy_std_enable:1; + unsigned int umid:8; + unsigned int vmid:8; + } dw4; + + struct { + unsigned int diamond_dv:7; + unsigned int diamond_th:6; + unsigned int diamond_alpha:8; + unsigned int hs_margin:3; + unsigned int diamond_du:7; + unsigned int skin_detailfilter:1; + } dw5; + + struct { + unsigned int y_point1:8; + unsigned int y_point2:8; + unsigned int y_point3:8; + unsigned int y_point4:8; + } dw6; + + struct { + unsigned int inv_margin_vyl:16; + unsigned int pad0:16; + } dw7; + + struct { + unsigned int inv_margin_vyu:16; + unsigned int p0l:8; + unsigned int p1l:8; + } dw8; + + struct { + unsigned int p2l:8; + unsigned int p3l:8; + unsigned int b0l:8; + unsigned int b1l:8; + } dw9; + + struct { + unsigned int b2l:8; + unsigned int b3l:8; + unsigned int s0l:11; + unsigned int y_slope2:5; + } dw10; + + struct { + unsigned int s1l:11; + unsigned int s2l:11; + unsigned int pad0:10; + } dw11; + + struct { + unsigned int s3l:11; + unsigned int p0u:8; + unsigned int p1u:8; + unsigned int y_slope1:5; + } dw12; + + struct { + unsigned int p2u:8; + unsigned int p3u:8; + unsigned int b0u:8; + unsigned int b1u:8; + } dw13; + + struct { + unsigned int b2u:8; + unsigned int b3u:8; + unsigned int s0u:11; + unsigned int pad0:5; + } dw14; + + struct { + unsigned int s1u:11; + unsigned int s2u:11; + unsigned int pad0:10; + } dw15; + + /* DW16-DW151 */ + struct i965_sampler_8x8_coefficient coefficients[17]; + + struct { + unsigned int transition_area_with_8_pixels:3; + unsigned int pad0:1; + unsigned int transition_area_with_4_pixels:3; + unsigned int pad1:1; + unsigned int max_derivative_8_pixels:8; + unsigned int max_derivative_4_pixels:8; + unsigned int default_sharpness_level:8; + } dw152; + + struct { + unsigned int rgb_adaptive:1; + unsigned int adaptive_filter_for_all_channel:1; + unsigned int pad0:19; + unsigned int bypass_y_adaptive_filtering:1; + unsigned int bypass_x_adaptive_filtering:1; + unsigned int pad1:9; + } dw153; + + /* Reserved to 256DW */ + unsigned int reserved[102]; +}; + #define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) #define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) #define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) -- cgit v1.2.1 From 7d20fd94f82777b5de152bb1fd56c314180cf88d Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 16 Apr 2013 13:57:23 +0800 Subject: Initialize the 8x8 sampler for AVS on BDW Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 212 ++++++++++++++++++++++++++++++++++----------- src/intel_driver.h | 2 + 2 files changed, 164 insertions(+), 50 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 317221cd..6db68d82 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -3500,69 +3500,180 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con /* TODO: Add the sampler_8x8 state */ struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - struct i965_sampler_8x8_state *sampler_8x8_state; + struct gen8_sampler_8x8_avs *sampler_8x8; + struct i965_sampler_8x8_coefficient *sampler_8x8_state; int i; int width[3], height[3], pitch[3], offset[3]; int src_width, src_height; + memset(pp_static_parameter, 0, sizeof(struct gen7_pp_static_parameter)); + /* source surface */ gen8_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0, width, height, pitch, offset); src_height = height[0]; + src_width = width[0]; /* destination surface */ gen8_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1, width, height, pitch, offset); /* sampler 8x8 state */ - dri_bo_map(pp_context->sampler_state_table.bo_8x8, True); - assert(pp_context->sampler_state_table.bo_8x8->virtual); - assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138); - sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual; - memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state)); + dri_bo_map(pp_context->sampler_state_table.bo, True); + assert(pp_context->sampler_state_table.bo->virtual); - for (i = 0; i < 17; i++) { + /* Currently only one gen8 sampler_8x8 is initialized */ + sampler_8x8 = (struct gen8_sampler_8x8_avs *) + pp_context->sampler_state_table.bo->virtual; + memset(sampler_8x8, 0, sizeof(*sampler_8x8)); + + sampler_8x8->dw0.gain_factor = 44; + sampler_8x8->dw0.weak_edge_threshold = 1; + sampler_8x8->dw0.strong_edge_threshold = 8; + /* Use the value like that on Ivy instead of default + * sampler_8x8->dw0.r3x_coefficient = 5; + */ + sampler_8x8->dw0.r3x_coefficient = 27; + sampler_8x8->dw0.r3c_coefficient = 5; + + sampler_8x8->dw2.global_noise_estimation = 255; + sampler_8x8->dw2.non_edge_weight = 1; + sampler_8x8->dw2.regular_weight = 2; + sampler_8x8->dw2.strong_edge_weight = 7; + /* Use the value like that on Ivy instead of default + * sampler_8x8->dw2.r5x_coefficient = 7; + * sampler_8x8->dw2.r5cx_coefficient = 7; + * sampler_8x8->dw2.r5c_coefficient = 7; + */ + sampler_8x8->dw2.r5x_coefficient = 9; + sampler_8x8->dw2.r5cx_coefficient = 8; + sampler_8x8->dw2.r5c_coefficient = 3; + + sampler_8x8->dw3.sin_alpha = 101; /* sin_alpha = 0 */ + sampler_8x8->dw3.cos_alpha = 79; /* cos_alpha = 0 */ + sampler_8x8->dw3.sat_max = 0x1f; + sampler_8x8->dw3.hue_max = 14; + /* The 8tap filter will determine whether the adaptive Filter is + * applied for all channels(dw153). + * If the 8tap filter is disabled, the adaptive filter should be disabled. + * Only when 8tap filter is enabled, it can be enabled or not + */ + sampler_8x8->dw3.enable_8tap_filter = 3; + sampler_8x8->dw3.ief4_smooth_enable = 0; + + sampler_8x8->dw4.s3u = 0; + sampler_8x8->dw4.diamond_margin = 4; + sampler_8x8->dw4.vy_std_enable = 0; + sampler_8x8->dw4.umid = 110; + sampler_8x8->dw4.vmid = 154; + + sampler_8x8->dw5.diamond_dv = 0; + sampler_8x8->dw5.diamond_th = 35; + sampler_8x8->dw5.diamond_alpha = 100; /* diamond_alpha = 0 */ + sampler_8x8->dw5.hs_margin = 3; + sampler_8x8->dw5.diamond_du = 2; + + sampler_8x8->dw6.y_point1 = 46; + sampler_8x8->dw6.y_point2 = 47; + sampler_8x8->dw6.y_point3 = 254; + sampler_8x8->dw6.y_point4 = 255; + + sampler_8x8->dw7.inv_margin_vyl = 3300; /* inv_margin_vyl = 0 */ + + sampler_8x8->dw8.inv_margin_vyu = 1600; /* inv_margin_vyu = 0 */ + sampler_8x8->dw8.p0l = 46; + sampler_8x8->dw8.p1l = 216; + + sampler_8x8->dw9.p2l = 236; + sampler_8x8->dw9.p3l = 236; + sampler_8x8->dw9.b0l = 133; + sampler_8x8->dw9.b1l = 130; + + sampler_8x8->dw10.b2l = 130; + sampler_8x8->dw10.b3l = 130; + /* s0l = -5 / 256. s2.8 */ + sampler_8x8->dw10.s0l = 1029; /* s0l = 0 */ + sampler_8x8->dw10.y_slope2 = 31; /* y_slop2 = 0 */ + + sampler_8x8->dw11.s1l = 0; + sampler_8x8->dw11.s2l = 0; + + sampler_8x8->dw12.s3l = 0; + sampler_8x8->dw12.p0u = 46; + sampler_8x8->dw12.p1u = 66; + sampler_8x8->dw12.y_slope1 = 31; /* y_slope1 = 0 */ + + sampler_8x8->dw13.p2u = 130; + sampler_8x8->dw13.p3u = 236; + sampler_8x8->dw13.b0u = 143; + sampler_8x8->dw13.b1u = 163; + + sampler_8x8->dw14.b2u = 200; + sampler_8x8->dw14.b3u = 140; + sampler_8x8->dw14.s0u = 256; /* s0u = 0 */ + + sampler_8x8->dw15.s1u = 113; /* s1u = 0 */ + sampler_8x8->dw15.s2u = 1203; /* s2u = 0 */ + + sampler_8x8_state = sampler_8x8->coefficients; + + for (i = 0; i < 17; i++) { + float coff; + coff = i; + coff = coff / 16; + + memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state)); /* for Y channel, currently ignore */ - sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0; - sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0; - sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0; - sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0; - sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0; - sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0; - sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0; - sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0; - sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0; - sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0; - sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0; - sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0; - sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0; - sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0; - sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0; - sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0; + sampler_8x8_state->dw0.table_0x_filter_c0 = 0x0; + sampler_8x8_state->dw0.table_0x_filter_c1 = 0x0; + sampler_8x8_state->dw0.table_0x_filter_c2 = 0x0; + sampler_8x8_state->dw0.table_0x_filter_c3 = + intel_format_convert(1 - coff, 1, 6, 0); + sampler_8x8_state->dw1.table_0x_filter_c4 = + intel_format_convert(coff, 1, 6, 0); + sampler_8x8_state->dw1.table_0x_filter_c5 = 0x0; + sampler_8x8_state->dw1.table_0x_filter_c6 = 0x0; + sampler_8x8_state->dw1.table_0x_filter_c7 = 0x0; + sampler_8x8_state->dw2.table_0y_filter_c0 = 0x0; + sampler_8x8_state->dw2.table_0y_filter_c1 = 0x0; + sampler_8x8_state->dw2.table_0y_filter_c2 = 0x0; + sampler_8x8_state->dw2.table_0y_filter_c3 = + intel_format_convert(1 - coff, 1, 6, 0); + sampler_8x8_state->dw3.table_0y_filter_c4 = + intel_format_convert(coff, 1, 6, 0); + sampler_8x8_state->dw3.table_0y_filter_c5 = 0x0; + sampler_8x8_state->dw3.table_0y_filter_c6 = 0x0; + sampler_8x8_state->dw3.table_0y_filter_c7 = 0x0; /* for U/V channel, 0.25 */ - sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0; - sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0; - sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10; - sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10; - sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10; - sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10; - sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0; - sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0; - sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0; - sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0; - sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10; - sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10; - sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10; - sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10; - sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0; - sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0; + sampler_8x8_state->dw4.table_1x_filter_c0 = 0x0; + sampler_8x8_state->dw4.table_1x_filter_c1 = 0x0; + sampler_8x8_state->dw4.table_1x_filter_c2 = 0x0; + sampler_8x8_state->dw4.table_1x_filter_c3 = + intel_format_convert(1 - coff, 1, 6, 0); + sampler_8x8_state->dw5.table_1x_filter_c4 = + intel_format_convert(coff, 1, 6, 0); + sampler_8x8_state->dw5.table_1x_filter_c5 = 0x00; + sampler_8x8_state->dw5.table_1x_filter_c6 = 0x0; + sampler_8x8_state->dw5.table_1x_filter_c7 = 0x0; + sampler_8x8_state->dw6.table_1y_filter_c0 = 0x0; + sampler_8x8_state->dw6.table_1y_filter_c1 = 0x0; + sampler_8x8_state->dw6.table_1y_filter_c2 = 0x0; + sampler_8x8_state->dw6.table_1y_filter_c3 = + intel_format_convert(1 - coff, 1, 6, 0); + sampler_8x8_state->dw7.table_1y_filter_c4 = + intel_format_convert(coff, 1, 6,0); + sampler_8x8_state->dw7.table_1y_filter_c5 = 0x0; + sampler_8x8_state->dw7.table_1y_filter_c6 = 0x0; + sampler_8x8_state->dw7.table_1y_filter_c7 = 0x0; + sampler_8x8_state++; } - sampler_8x8_state->dw136.default_sharpness_level = 0; - sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1; - sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1; - sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1; - dri_bo_unmap(pp_context->sampler_state_table.bo_8x8); + sampler_8x8->dw152.default_sharpness_level = 0; + sampler_8x8->dw153.adaptive_filter_for_all_channel = 1; + sampler_8x8->dw153.bypass_y_adaptive_filtering = 1; + sampler_8x8->dw153.bypass_x_adaptive_filtering = 1; + + dri_bo_unmap(pp_context->sampler_state_table.bo); /* private function & data */ @@ -3577,15 +3688,16 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con pp_avs_context->dest_h = ALIGN(dst_rect->height, 16); pp_avs_context->src_w = src_rect->width; pp_avs_context->src_h = src_rect->height; + pp_avs_context->horiz_range = (float)src_rect->width / src_width; int dw = (pp_avs_context->src_w - 1) / 16 + 1; dw = MAX(dw, dst_rect->width); pp_static_parameter->grf1.pointer_to_inline_parameter = 7; pp_static_parameter->grf2.avs_wa_enable = 0; /* It is not required on GEN8+ */ - pp_static_parameter->grf2.avs_wa_width = dw; - pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw); - pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw); + pp_static_parameter->grf2.avs_wa_width = src_width; + pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width); + pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width); pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height; @@ -5025,13 +5137,13 @@ gen8_pp_initialize( pp_context->idrt.bo = bo; pp_context->idrt.num_interface_descriptors = 0; - dri_bo_unreference(pp_context->sampler_state_table.bo_8x8); + dri_bo_unreference(pp_context->sampler_state_table.bo); bo = dri_bo_alloc(i965->intel.bufmgr, "sampler 8x8 state ", - 4096, + 4096 * 2, 4096); assert(bo); - pp_context->sampler_state_table.bo_8x8 = bo; + pp_context->sampler_state_table.bo = bo; dri_bo_unreference(pp_context->vfe_state.bo); diff --git a/src/intel_driver.h b/src/intel_driver.h index 77ac815b..7580006a 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -38,6 +38,8 @@ #define XY_COLOR_BLT_WRITE_RGB (1 << 20) #define XY_COLOR_BLT_DST_TILED (1 << 11) +#define GEN8_XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 0x05) + /* BR13 */ #define BR13_8 (0x0 << 24) #define BR13_565 (0x1 << 24) -- cgit v1.2.1 From a580edc0c53c01326e4a1337a5ae4fe43e0ee6de Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 16 Apr 2013 13:57:38 +0800 Subject: Add the NV12 scaling shader for BDW This is the first VPP shader for BDW,which is used to do the NV12 scaling conversion. Signed-off-by: Zhao Yakui --- configure.ac | 1 + src/i965_post_processing.c | 2 +- src/shaders/post_processing/Makefile.am | 2 +- src/shaders/post_processing/gen8/EOT.g8a | 166 ++++++ src/shaders/post_processing/gen8/Makefile.am | 43 ++ src/shaders/post_processing/gen8/PL2_AVS_Buf_0.g8a | 462 +++++++++++++++ src/shaders/post_processing/gen8/PL2_AVS_Buf_1.g8a | 458 +++++++++++++++ src/shaders/post_processing/gen8/PL2_AVS_Buf_2.g8a | 458 +++++++++++++++ src/shaders/post_processing/gen8/PL2_AVS_Buf_3.g8a | 460 +++++++++++++++ src/shaders/post_processing/gen8/Save_AVS_NV12.g8a | 621 +++++++++++++++++++++ .../post_processing/gen8/Set_AVS_Buf_0123_PL2.g8a | 361 ++++++++++++ src/shaders/post_processing/gen8/Set_Layer_0.g8a | 483 ++++++++++++++++ src/shaders/post_processing/gen8/VP_Setup.g8a | 440 +++++++++++++++ src/shaders/post_processing/gen8/pl2_to_pl2.asm | 17 + src/shaders/post_processing/gen8/pl2_to_pl2.g8b | 244 ++++++++ 15 files changed, 4216 insertions(+), 2 deletions(-) create mode 100644 src/shaders/post_processing/gen8/EOT.g8a create mode 100644 src/shaders/post_processing/gen8/Makefile.am create mode 100644 src/shaders/post_processing/gen8/PL2_AVS_Buf_0.g8a create mode 100644 src/shaders/post_processing/gen8/PL2_AVS_Buf_1.g8a create mode 100644 src/shaders/post_processing/gen8/PL2_AVS_Buf_2.g8a create mode 100644 src/shaders/post_processing/gen8/PL2_AVS_Buf_3.g8a create mode 100644 src/shaders/post_processing/gen8/Save_AVS_NV12.g8a create mode 100644 src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL2.g8a create mode 100644 src/shaders/post_processing/gen8/Set_Layer_0.g8a create mode 100644 src/shaders/post_processing/gen8/VP_Setup.g8a create mode 100644 src/shaders/post_processing/gen8/pl2_to_pl2.asm create mode 100644 src/shaders/post_processing/gen8/pl2_to_pl2.g8b diff --git a/configure.ac b/configure.ac index 7bd39edb..5f1f64a2 100644 --- a/configure.ac +++ b/configure.ac @@ -179,6 +179,7 @@ AC_OUTPUT([ src/shaders/post_processing/gen5_6/Makefile src/shaders/post_processing/gen7/Makefile src/shaders/post_processing/gen75/Makefile + src/shaders/post_processing/gen8/Makefile src/shaders/render/Makefile src/shaders/utils/Makefile src/shaders/vme/Makefile diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 6db68d82..d0e4789a 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1168,7 +1168,7 @@ static const uint32_t pp_null_gen8[][4] = { }; static const uint32_t pp_nv12_load_save_nv12_gen8[][4] = { -#include "shaders/post_processing/gen7/pl2_to_pl2.g75b" +#include "shaders/post_processing/gen8/pl2_to_pl2.g8b" }; static const uint32_t pp_nv12_load_save_pl3_gen8[][4] = { diff --git a/src/shaders/post_processing/Makefile.am b/src/shaders/post_processing/Makefile.am index a0022ca9..0f5c2bfe 100644 --- a/src/shaders/post_processing/Makefile.am +++ b/src/shaders/post_processing/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = gen5_6 gen7 gen75 +SUBDIRS = gen5_6 gen7 gen75 gen8 # Extra clean files so that maintainer-clean removes *everything* MAINTAINERCLEANFILES = Makefile.in diff --git a/src/shaders/post_processing/gen8/EOT.g8a b/src/shaders/post_processing/gen8/EOT.g8a new file mode 100644 index 00000000..72c3da32 --- /dev/null +++ b/src/shaders/post_processing/gen8/EOT.g8a @@ -0,0 +1,166 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 2 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +//End of Thread message + +mov (8) r127<1>:ud r0.0<8;8,1>:ud + send (1) null<1>:d r127 0x27 0x02000010 diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am new file mode 100644 index 00000000..512d1cfd --- /dev/null +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -0,0 +1,43 @@ +INTEL_PP_G8B = \ + pl2_to_pl2.g8b \ + $(NULL) + +INTEL_PP_G8A = \ + EOT.g8a \ + PL2_AVS_Buf_0.g8a \ + PL2_AVS_Buf_1.g8a \ + PL2_AVS_Buf_2.g8a \ + PL2_AVS_Buf_3.g8a \ + Save_AVS_NV12.g8a \ + Set_AVS_Buf_0123_PL2.g8a \ + Set_Layer_0.g8a \ + VP_Setup.g8a \ + $(NULL) + +INTEL_PP_ASM = $(INTEL_PP_G8B:%.g8b=%.asm) +INTEL_PP_GEN8_ASM = $(INTEL_PP_G8B:%.g8b=%.g8s) + +TARGETS = +if HAVE_GEN4ASM +TARGETS += $(INTEL_PP_G8B) +endif + +all-local: $(TARGETS) + +SUFFIXES = .g8b .g8s .asm + +$(INTEL_PP_GEN8_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G8A) +.asm.g8s: + $(AM_V_GEN)cpp $< > _pp0.$@; \ + ../../gpp.py _pp0.$@ $@; \ + rm _pp0.$@ +.g8s.g8b: + $(AM_V_GEN)intel-gen4asm -a -o $@ -g 8 $< + +CLEANFILES = $(INTEL_PP_GEN7_ASM) + +EXTRA_DIST = \ + $(INTEL_PP_G8B) + +# Extra clean files so that maintainer-clean removes *everything* +MAINTAINERCLEANFILES = Makefile.in diff --git a/src/shaders/post_processing/gen8/PL2_AVS_Buf_0.g8a b/src/shaders/post_processing/gen8/PL2_AVS_Buf_0.g8a new file mode 100644 index 00000000..bbff22c5 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL2_AVS_Buf_0.g8a @@ -0,0 +1,462 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x48EB001:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000A000:ud // Enable Red+Blue channel + + send (1) uwBUFFER_0(4)<1> r16 0x2 a0.0:ud + // Returns UV data in 8 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL2_AVS_Buf_1.g8a b/src/shaders/post_processing/gen8/PL2_AVS_Buf_1.g8a new file mode 100644 index 00000000..e9165760 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL2_AVS_Buf_1.g8a @@ -0,0 +1,458 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 42 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_1.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 1 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + // set the vertical block number + + mov (1) r25.1<1>:ud 1:ud + + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x48EB001:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000A000:ud // Enable Red+Blue channel + + send (1) uwBUFFER_1(4)<1> r16 0x2 a0.0:ud + // Returns UV data in 8 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_1_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL2_AVS_Buf_2.g8a b/src/shaders/post_processing/gen8/PL2_AVS_Buf_2.g8a new file mode 100644 index 00000000..ed51a197 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL2_AVS_Buf_2.g8a @@ -0,0 +1,458 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 42 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_2.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 2 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + // set the vertical block number + + + mov (1) r25.1<1>:ud 2:ud + + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x48EB001:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000A000:ud // Enable Red+Blue channel + + send (1) uwBUFFER_2(4)<1> r16 0x2 a0.0:ud + // Returns UV data in 8 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_2_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL2_AVS_Buf_3.g8a b/src/shaders/post_processing/gen8/PL2_AVS_Buf_3.g8a new file mode 100644 index 00000000..5b46bf77 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL2_AVS_Buf_3.g8a @@ -0,0 +1,460 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 42 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_3.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + // set the vertical block number + + + mov (1) r25.1<1>:ud 3:ud + + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x48EB001:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000A000:ud // Enable Red+Blue channel + + send (1) uwBUFFER_3(4)<1> r16 0x2 a0.0:ud + // Returns UV data in 8 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_3_: + nop + + diff --git a/src/shaders/post_processing/gen8/Save_AVS_NV12.g8a b/src/shaders/post_processing/gen8/Save_AVS_NV12.g8a new file mode 100644 index 00000000..dcb7ce0c --- /dev/null +++ b/src/shaders/post_processing/gen8/Save_AVS_NV12.g8a @@ -0,0 +1,621 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 131 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// Module name: Save_AVS_NV12.asm +// +// Save NV12 420 frame data block of size 16x16 +// +// To save 16x16 block (16x16 bytes of Y and 16x8 bytes of interleaved UV), we need 2 send instructions with of size 16x16 and 16x8 each. +// --------------- +// | 16x16 | +// | YUYV | +// --------------- +// | 16x8 UV | +// --------------- + +//----------------------------------------------------------------- +//The layout of data is as follows: +//mMSGHDR0 : Y data header (16x16) +//mubMSGPAYLOAD0 : Y data payload (8 GRFs) +//mMSGHDR1 : U data header (16x8) +//mubMSGPAYLOAD1 : U data payload (4 GRFs) +//------------------------------------------------------------------ + + + +// Module name: Save.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Msg payload buffers; upto 4 full-size messages can be written + + +.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud + +.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw + +.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub + + + // the r17 register (nTEMP0) is originally defined from "Common.inc" + // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming + + .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw + + +//_SAVE_INC_ + + + // At the save module we have all 8 address sub-registers available. + // So we will use PING-PONG type of scheme to save the data using + // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help + // reduce dependency. - rT + + //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4. + //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5). + //Offsets are zero for buffer 0 and buffer 4. + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + add (4) a0.4<1>:uw r22.0<4;4,1>:w 512:uw + + //Set up header for Y,U and V data + mov (8) r28<1>:ud r27<8;8,1>:ud + mov (8) r37<1>:ud r27<8;8,1>:ud + + mov (2) r28.0<1>:d r7.0<2;2,1>:w { NoDDClr } //ORI Y (LUMA) = ORI + mov (1) r37.0<1>:d r7.0<0;1,0>:w { NoDDClr } //H ORI (CHROMA) = H ORI + shr (1) r37.1<1>:d r7.1<0;1,0>:w 1:w { NoDDClr, NoDDChk } //V ORI (CHROMA) = V ORI/2 + + mov (1) r28.2<1>:ud 0xF000F:ud { NoDDChk } // Y Block width and height (16x16) + mov (1) r37.2<1>:ud 0x7000F:ud { NoDDChk } // UV Block width and height(16x8) + +// Unscramble, and pack data directly to MRFs + +// Data 16x16 block is divided as - +// --------- +// | 0 | +// --------- +// | 1 | +// --------- +// | 2 | +// --------- +// | 3 | +// --------- +// All sub-blocks are of size 16x4 +// 0: ubBUFFER_0 +// 1: ubBUFFER_1, ubBUFFER_0+16 +// 2: ubBUFFER_2 +// 3: ubBUFFER_3, ubBUFFER_2+16 + + //Y Rounding 16x4 top part + add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw + + // U Averaging and Rounding, 8x2 top part + shr (8) uwBUFFER_5(0,0)<2> r[a0.2,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(1,0)<2> r[a0.2,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(2,0)<2> r[a0.2,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(3,0)<2> r[a0.2,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2> + add.sat (8) r[a0.2,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2> + add.sat (8) r[a0.2,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw + + // V Averaging and Rounding, 8x2 top part + shr (8) uwBUFFER_5(4,0)<2> r[a0.0,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(5,0)<2> r[a0.0,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(6,0)<2> r[a0.0,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(7,0)<2> r[a0.0,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2> + add.sat (8) r[a0.0,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2> + add.sat (8) r[a0.0,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers + + //Y Rounding, 16x4 bottom part + add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw + + // U Averaging and Rounding, 8x2 bottom part + shr (8) uwBUFFER_5(0,0)<2> r[a0.6,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(1,0)<2> r[a0.6,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(2,0)<2> r[a0.6,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(3,0)<2> r[a0.6,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2> + add.sat (8) r[a0.6,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2> + add.sat (8) r[a0.6,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw + + // V Averaging and Rounding, 8x2 bottom part + shr (8) uwBUFFER_5(4,0)<2> r[a0.4,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(5,0)<2> r[a0.4,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(6,0)<2> r[a0.4,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(7,0)<2> r[a0.4,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2> + add.sat (8) r[a0.4,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2> + add.sat (8) r[a0.4,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw + + add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers + //Y Rounding 16x4 top part + add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw + + // U Averaging and Rounding, 8x2 top part + shr (8) uwBUFFER_5(0,0)<2> r[a0.2,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(1,0)<2> r[a0.2,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(2,0)<2> r[a0.2,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(3,0)<2> r[a0.2,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2> + add.sat (8) r[a0.2,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2> + add.sat (8) r[a0.2,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw + + // V Averaging and Rounding, 8x2 top part + shr (8) uwBUFFER_5(4,0)<2> r[a0.0,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(5,0)<2> r[a0.0,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(6,0)<2> r[a0.0,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(7,0)<2> r[a0.0,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2> + add.sat (8) r[a0.0,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2> + add.sat (8) r[a0.0,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers + + //Y Rounding, 16x4 bottom part + add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw + + // U Averaging and Rounding, 8x2 bottom part + shr (8) uwBUFFER_5(0,0)<2> r[a0.6,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(1,0)<2> r[a0.6,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(2,0)<2> r[a0.6,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(3,0)<2> r[a0.6,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2> + add.sat (8) r[a0.6,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2> + add.sat (8) r[a0.6,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw + + // V Averaging and Rounding, 8x2 bottom part + shr (8) uwBUFFER_5(4,0)<2> r[a0.4,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(5,0)<2> r[a0.4,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(6,0)<2> r[a0.4,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(7,0)<2> r[a0.4,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2> + add.sat (8) r[a0.4,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2> + add.sat (8) r[a0.4,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw + + add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers + // restore pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4 registers + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + add (4) a0.4<1>:uw r22.0<4;4,1>:w 512:uw + +//Buffer 0 +//Move Y to msg payload + mov (16) mubMSGPAYLOAD0(0,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(0,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(1,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(1,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk } + +//Move U to msg payload + mov (8) mubMSGPAYLOAD1(0,0)<2> r[a0.2, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD1(0,16)<2> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + +//Move V to msg payload + mov (8) mubMSGPAYLOAD1(0,1)<2> r[a0.0, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD1(0,17)<2> r[a0.0, 65]<32;8,4>:ub { NoDDChk } + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers + +//Buffer 1 + mov (16) mubMSGPAYLOAD0(2,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(2,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(3,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(3,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD1(1,0)<2> r[a0.6, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD1(1,16)<2> r[a0.6, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + + mov (8) mubMSGPAYLOAD1(1,1)<2> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD1(1,17)<2> r[a0.4, 65]<32;8,4>:ub { NoDDChk } + + add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers + +//Buffer 2 + mov (16) mubMSGPAYLOAD0(4,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(4,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(5,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(5,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD1(2,0)<2> r[a0.2, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD1(2,16)<2> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + + mov (8) mubMSGPAYLOAD1(2,1)<2> r[a0.0, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD1(2,17)<2> r[a0.0, 65]<32;8,4>:ub { NoDDChk } + +//Buffer 3 + mov (16) mubMSGPAYLOAD0(6,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(6,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(7,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(7,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD1(3,0)<2> r[a0.6, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD1(3,16)<2> r[a0.6, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + + mov (8) mubMSGPAYLOAD1(3,1)<2> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD1(3,17)<2> r[a0.4, 65]<32;8,4>:ub { NoDDChk } +//=========================================================================== + +send (1) null<1>:d r28 0xc 0x120A8018:ud +send (1) null<1>:d r37 0xc 0xA0A8019:ud diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL2.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL2.g8a new file mode 100644 index 00000000..1d38ae2a --- /dev/null +++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL2.g8a @@ -0,0 +1,361 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 7 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +//Module Name: Set_AVS_Buf_0123_PL2.asm + + + +//Module Name: Set_Buf_0123_PL2 + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + //AVS LAYOUT: (YYUUVVAA) + //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V + //For PL2-AVS: V = 8, Y= 0, U = 4, A = 12. + mov (4) acc0.0<1>:w 0x6EA2:v //Subtract 6 from 0,4,8,12 + add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back + shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address. + + //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT + //SU LAYOUT:(YUVAYUVA) + //V = 4, Y = 0, U = 2, A = 6 + mov (4) acc0.0<1>:w 0x6204:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw + shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address. + + //OFFSET: + mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk } + + diff --git a/src/shaders/post_processing/gen8/Set_Layer_0.g8a b/src/shaders/post_processing/gen8/Set_Layer_0.g8a new file mode 100644 index 00000000..b1b574ec --- /dev/null +++ b/src/shaders/post_processing/gen8/Set_Layer_0.g8a @@ -0,0 +1,483 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 18 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + +#define MSG_AVS_SAMPLE 0x00000000 +#define MSG_CONVOLE_SAMPLE 0x10000000 +#define MSG_MINMAX_SAMPLE 0x20000000 +#define MSG_MINMAXF_SAMPLE 0x30000000 +#define MSG_ERODE_SAMPLE 0x40000000 +#define MSG_DILATE_SAMPLE 0x50000000 +#define MSG_BOOLCENT_SAMPLE 0x60000000 +#define MSG_CENTROID_SAMPLE 0x70000000 + +#define MSG_IEF_BYPASS 0x08000000 +#define MSG_IEF_ENABLE 0x00000000 + +//16x4 or 8x4 or 16x8 or 4x4 +#define MSG_AVS_164 0x00000000 +#define MSG_AVS_84 0x02000000 +#define MSG_AVS_168 0x04000000 +#define MSG_AVS_44 0x06000000 + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + + + + + +//Module name: Set_Layer_N.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Used to generate LABELS at compile time. + + +//definitions for Expand Mask +.declare uwMask_Temp1 Base=r17.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF +.declare ubMask_Temp1 Base=r17.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub // 1 GRF +.declare udMask_Temp1 Base=r17.0 ElementSize=4 Type=ud // 1 GRF +.declare uwMask_Temp2 Base=r16.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF +.declare ubMask_Temp2 Base=r16.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub // 1 GRF +.declare udMask_Temp2 Base=r16.0 ElementSize=4 Type=ud // 1 GRF + +.declare uwMask_Temp3 Base=r15.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF +.declare ubMask_Temp3 Base=r15.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub // 1 GRF + +.declare udALPHA_MASK_REG Base=r21.0 ElementSize=4 Type=ud // 1 GRF +.declare udALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=4 Type=ud // 1 GRF + + +//Initialize mask reg to FFFF + + mov (16) uwALPHA_MASK_REG(0)<1> 0xFFFF:uw + + +//Fast jump for - +//LAYER0: we determine whether layer 0 is to be loaded and processed or not based +// on block mask in module "Set_Layer_0" and store result in f0.1. +// This flag is then directly used to while loading buf0-3 and colorfill. +// (So flag f0.1 should not be changed from Set_Layer_0 till Colorfill) +// +//LAYER1-7: For all other layers, we compute whether layer is to be loaded and processed +// based on block mask in module "Set_Layer_1-7" and store result in SKIP_LAYER +// variable. +// While Loading buf 4 and 5, we move SKIP_LAYER to f0.0 every time and use it +// for Loading. +// For processing though, we move SKIP_LAYER only once to f0.1 in module +// "Set_Buf0_Buf4" and use f0.1 for deciding whether layer 1-7 (all 4 sub blocks) +// is to be processed or not. +// (So flag f0.1) should not be modififed from module "Set_Buf0_Buf4" till module +// that processess sub-block 3). +// +//None of the above fast jumps, apply to CSC modules. We always perform CSC irrespective of mask. +// +//Example: (Without going into finer details) +// Typical Combined kernel: +// +// (let var = decision whether to load/process that layer) +// +// Set_Layer_0 //f0.1 <- var +// .. +// Set_Layer_1 //f0.1 <- var, SKIP_LAYER <- var +// .. +// Load buf 0 //use f0.1 +// Load buf 4 //f0.0 <- SKIP_LAYER +// Load buf 1 //use f0.1 +// Load buf 5 //f0.0 <- SKIP_LAYER +// Load buf 2 //use f0.1 +// Load buf 3 //use f0.1 +// .. +// .. +// Colorfill +// .. +// Set_Buf0_Buf4 //f0.1 <- SKIP_LAYER +// process0-4 //Use f0.1 +// Load buf 4 +// Set_Buf1_Buf5 +// process1-5 +// Load buf 5 +// .. +// Set_Layer_2 //f0.1 <-var, SKIP_LAYER <- var +// .. +// Set_Buf2_Buf4 +// process2-4 +// Load buf 4 +// Set_Buf3_Buf5 +// process3-5 +// Load buf 5 +// .. + + + and (1) r24.2<1>:ub r2.2<0;1,0>:uw 3:uw + + + //Copy all AVS Payload data + // Setup Message Payload Header for 1st block of Media Sampler 8x8 (16x4 for IVB+) + //currently the dx & dy is passed by Constant buffer (zero) + mov (1) r25.0<1>:f r7.6<0;1,0>:f //NLAS dy + mov (1) r25.6<1>:f r7.5<0;1,0>:f //NLAS dx + mov (1) r25.4<1>:f r3.0<0;1,0>:f //Step X + mov (1) r25.5<1>:f r4.0<0;1,0>:f //Step Y + + + mov (1) r25.2<1>:f r6.0<0;1,0>:f //Orig X + mov (1) r25.3<1>:f r5.0<0;1,0>:f //Orig Y + + mov (1) r25.7<1>:ud 0:ud + add (1) r25.7<1>:ud r25.7<0;1,0>:ud MSG_AVS_SAMPLE + MSG_AVS_164 + MSG_IEF_BYPASS:ud + + //NLAS calculations for 2nd half of blocks of Media Sampler 8x8: + // X(i) = X0 + dx*i + ddx*i*(i-1)/2 ==> X(8) = X0 + dx*8 +ddx*28 + // dx(i)= dx(0) + ddx*i ==> dx(8)= dx + ddx*8 + + //OPTIMIZATION: fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY - are sub registers of same GRF. Use NODDCLR NODDCHK. -rT + + // Calculating X(8) + mov (1) acc0.2<1>:f r6.0<0;1,0>:f + mac (1) acc0.2<1>:f r3.0<0;1,0>:f 8.0:f + mac (1) r23.2<1>:f r7.5<0;1,0>:f 28.0:f { NoDDClr } + + // Calculating Y(4) + mul (1) r23.1<1>:f r4.0<0;1,0>:f 4.0:f { NoDDClr, NoDDChk } //dY*4 + + // Calculating dx(8) + mov (1) acc0.4<1>:f r3.0<0;1,0>:f + mac (1) r23.4<1>:f r7.5<0;1,0>:f 8.0:f { NoDDClr, NoDDChk } + + // Binding Index + mov (1) r23.5<1>:ud 0:ud { NoDDChk } + + +SKIP_LAYER_L0: + nop + + diff --git a/src/shaders/post_processing/gen8/VP_Setup.g8a b/src/shaders/post_processing/gen8/VP_Setup.g8a new file mode 100644 index 00000000..95f5fe2a --- /dev/null +++ b/src/shaders/post_processing/gen8/VP_Setup.g8a @@ -0,0 +1,440 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * Authors: Zhao Yakui + */ + +// 326 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: VP_Setup.asm +// Author: Vivek Kumar +// Description: Sets up all parameters for the Video Processing Kernel + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Setup pointer to the inline parameter + +// Copy MSG HDR + mov (8) r27.0<1>:ud r0.0<8;8,1>:ud // Initialize message payload header with R0 + +// Only one layer is enough + +//temp; remove it once unread msg warnings are resolved -vK +mov (8) r25<1>:ud r0.0<8;8,1>:ud +mov (8) r26<1>:ud r0.0<8;8,1>:ud + +// Calculate StepX for all layers and overwrite it on the ratio + mul (8) r3.0<1>:f r3.0<8;8,1>:f r7.4<0;1,0>:f //StepX_ratio = StepX / VideoStepX + + //Normalised Ratio of Horizontal step size with main video for all layers now becomes + //Normalised Horizontal step size for all layers + +// Calculate block origin for all layers and overwrite it on the frame origin + mov (2) r8.5<1>:f r7.0<2;2,1>:w //Convert origin from word to float + + cmp.e.f0.0 (1) null<1>:d r2.26<0;1,0>:ub 1:uw + + + shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 0:uw + and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw + cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw + (f0.1) jmpi (1) ROTATE_90_L0 + cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw + (f0.1) jmpi (1) ROTATE_180_L0 + cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw + (f0.1) jmpi (1) ROTATE_270_L0 + + // rotate 0 degree +ROTATE_0_L0: + (-f0.0)mov (1) acc0.0<1>:f r6.0<0;1,0>:f + (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r8.5<0;1,0>:f + + mov (1) acc0.0<1>:f r5.0<0;1,0>:f + mac (1) r5.0<1>:f r4.0<0;1,0>:f r8.6<0;1,0>:f + jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0 + + // rotate 90 degree +ROTATE_90_L0: + (-f0.0)mov (1) acc0.0<1>:f r6.0<0;1,0>:f + (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r8.6<0;1,0>:f + + mov (1) r16.0<1>:f r2.0<0;1,0>:uw + add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f + add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f + + mov (1) acc0.0<1>:f r5.0<0;1,0>:f + mac (1) r5.0<1>:f r4.0<0;1,0>:f r17.0<0;1,0>:f + jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0 + + // rotate 180 degree +ROTATE_180_L0: + (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw + (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f + (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f + (-f0.0)mov (1) acc0.0<1>:f r6.0<0;1,0>:f + (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r17.0<0;1,0>:f + + mov (1) r16.0<1>:f r2.1<0;1,0>:uw + add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f + add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f + mov (1) acc0.0<1>:f r5.0<0;1,0>:f + mac (1) r5.0<1>:f r4.0<0;1,0>:f r17.0<0;1,0>:f + jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0 + + // rotate 270 degree +ROTATE_270_L0: + (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw + (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f + (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f + (-f0.0)mov (1) acc0.0<1>:f r6.0<0;1,0>:f + (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r17.0<0;1,0>:f + + mov (1) acc0.0<1>:f r5.0<0;1,0>:f + mac (1) r5.0<1>:f r4.0<0;1,0>:f r8.5<0;1,0>:f + +END_SRC_BLOCK_ORIG_COMP_L0: + nop diff --git a/src/shaders/post_processing/gen8/pl2_to_pl2.asm b/src/shaders/post_processing/gen8/pl2_to_pl2.asm new file mode 100644 index 00000000..0281854d --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_pl2.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL2_TO_PL2 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL2.g8a" +#include "PL2_AVS_Buf_0.g8a" +#include "PL2_AVS_Buf_1.g8a" +#include "PL2_AVS_Buf_2.g8a" +#include "PL2_AVS_Buf_3.g8a" +#include "Save_AVS_NV12.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl2_to_pl2.g8b b/src/shaders/post_processing/gen8/pl2_to_pl2.g8b new file mode 100644 index 00000000..fa728828 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_pl2.g8b @@ -0,0 +1,244 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 }, + { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, -- cgit v1.2.1 From f26114aef700d28730f6586a71d9f2526f5083f5 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 16 Apr 2013 13:57:42 +0800 Subject: Create the image with aligned width/height on BDW The hardware requires that the surface pitch should be 64 alignment. Otherwise the data port can't be accessed correctly. Signed-off-by: Zhao Yakui --- src/i965_drv_video.c | 52 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 997edfa7..58b67bbf 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -2671,7 +2671,7 @@ i965_CreateImage(VADriverContextP ctx, struct object_image *obj_image; VAStatus va_status = VA_STATUS_ERROR_OPERATION_FAILED; VAImageID image_id; - unsigned int width2, height2, size2, size; + unsigned int size2, size, awidth, aheight; out_image->image_id = VA_INVALID_ID; out_image->buf = VA_INVALID_ID; @@ -2691,10 +2691,18 @@ i965_CreateImage(VADriverContextP ctx, image->image_id = image_id; image->buf = VA_INVALID_ID; - size = width * height; - width2 = (width + 1) / 2; - height2 = (height + 1) / 2; - size2 = width2 * height2; + awidth = ALIGN(width, 64); + + if ((format->fourcc == VA_FOURCC('Y','V','1','2')) || + (format->fourcc == VA_FOURCC('I','4','2','0'))) { + if (awidth % 128 != 0) { + awidth = ALIGN(width, 128); + } + } + + aheight = ALIGN(height, 16); + size = awidth * aheight; + size2 = (awidth / 2) * (aheight / 2); image->num_palette_entries = 0; image->entry_bytes = 0; @@ -2704,9 +2712,9 @@ i965_CreateImage(VADriverContextP ctx, case VA_FOURCC('I','A','4','4'): case VA_FOURCC('A','I','4','4'): image->num_planes = 1; - image->pitches[0] = width; + image->pitches[0] = awidth; image->offsets[0] = 0; - image->data_size = image->offsets[0] + image->pitches[0] * height; + image->data_size = image->offsets[0] + image->pitches[0] * aheight; image->num_palette_entries = 16; image->entry_bytes = 3; image->component_order[0] = 'R'; @@ -2716,9 +2724,9 @@ i965_CreateImage(VADriverContextP ctx, case VA_FOURCC('I','A','8','8'): case VA_FOURCC('A','I','8','8'): image->num_planes = 1; - image->pitches[0] = width * 2; + image->pitches[0] = awidth * 2; image->offsets[0] = 0; - image->data_size = image->offsets[0] + image->pitches[0] * height; + image->data_size = image->offsets[0] + image->pitches[0] * aheight; image->num_palette_entries = 256; image->entry_bytes = 3; image->component_order[0] = 'R'; @@ -2732,42 +2740,42 @@ i965_CreateImage(VADriverContextP ctx, case VA_FOURCC('B','G','R','X'): case VA_FOURCC('R','G','B','X'): image->num_planes = 1; - image->pitches[0] = width * 4; + image->pitches[0] = awidth * 4; image->offsets[0] = 0; - image->data_size = image->offsets[0] + image->pitches[0] * height; + image->data_size = image->offsets[0] + image->pitches[0] * aheight; break; case VA_FOURCC('Y','V','1','2'): image->num_planes = 3; - image->pitches[0] = width; + image->pitches[0] = awidth; image->offsets[0] = 0; - image->pitches[1] = width2; - image->offsets[1] = size + size2; - image->pitches[2] = width2; - image->offsets[2] = size; + image->pitches[1] = awidth / 2; + image->offsets[1] = size; + image->pitches[2] = awidth / 2; + image->offsets[2] = size + size2; image->data_size = size + 2 * size2; break; case VA_FOURCC('I','4','2','0'): image->num_planes = 3; - image->pitches[0] = width; + image->pitches[0] = awidth; image->offsets[0] = 0; - image->pitches[1] = width2; + image->pitches[1] = awidth / 2; image->offsets[1] = size; - image->pitches[2] = width2; + image->pitches[2] = awidth / 2; image->offsets[2] = size + size2; image->data_size = size + 2 * size2; break; case VA_FOURCC('N','V','1','2'): image->num_planes = 2; - image->pitches[0] = width; + image->pitches[0] = awidth; image->offsets[0] = 0; - image->pitches[1] = width; + image->pitches[1] = awidth; image->offsets[1] = size; image->data_size = size + 2 * size2; break; case VA_FOURCC('Y','U','Y','2'): case VA_FOURCC('U','Y','V','Y'): image->num_planes = 1; - image->pitches[0] = width * 2; + image->pitches[0] = awidth * 2; image->offsets[0] = 0; image->data_size = size * 2; break; -- cgit v1.2.1 From 37f9b9adf845d50e2f1c272b6c8eb44040432e65 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 16 Apr 2013 13:57:51 +0800 Subject: Use the horizontal/vertical alignment for VPP surface on BDW This is hardware requirement. Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index d0e4789a..c329600d 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -2011,6 +2011,11 @@ gen8_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_cont ss->ss2.width = width - 1; ss->ss2.height = height - 1; ss->ss3.pitch = pitch - 1; + + /* Always set 1(align 4 mode) per B-spec */ + ss->ss0.vertical_alignment = 1; + ss->ss0.horizontal_alignment = 1; + gen8_pp_set_surface_tiling(ss, tiling); gen8_render_set_surface_scs(ss); dri_bo_emit_reloc(ss_bo, -- cgit v1.2.1 From 821b3128a1670be45a7c62f91f20a6cdf45a9795 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 21 May 2013 15:12:25 +0800 Subject: Set BSP buffer for VP8 decoding Signed-off-by: Xiang, Haihao --- src/gen8_mfd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 4df155e5..8d60be48 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2971,6 +2971,7 @@ gen8_mfd_vp8_decode_picture(VADriverContextP ctx, gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); + gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context); gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context); gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context); -- cgit v1.2.1 From 7dfc815c89ad595e8985e671e5bbb602fe74a2b9 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 23 May 2013 10:22:09 +0800 Subject: Handle the pitch when using RGBX surface in VPP for BDW Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index c329600d..7704f2d9 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -2393,6 +2393,7 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc } else if (rgbx_format) { if (is_target) width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */ + pitch[0] = obj_surface->width * 4; } width[1] = obj_surface->cb_cr_width; -- cgit v1.2.1 From fcbc0b80b7895cde336d70009a3ed55293295950 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 23 May 2013 10:22:09 +0800 Subject: Add the VPP shader of NV12->YV12/I420 conversion Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 2 +- src/shaders/post_processing/gen8/Makefile.am | 2 + src/shaders/post_processing/gen8/Save_AVS_PL3.g8a | 565 ++++++++++++++++++++++ src/shaders/post_processing/gen8/pl2_to_pl3.asm | 17 + src/shaders/post_processing/gen8/pl2_to_pl3.g8b | 197 ++++++++ 5 files changed, 782 insertions(+), 1 deletion(-) create mode 100644 src/shaders/post_processing/gen8/Save_AVS_PL3.g8a create mode 100644 src/shaders/post_processing/gen8/pl2_to_pl3.asm create mode 100644 src/shaders/post_processing/gen8/pl2_to_pl3.g8b diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 7704f2d9..c60c705b 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1172,7 +1172,7 @@ static const uint32_t pp_nv12_load_save_nv12_gen8[][4] = { }; static const uint32_t pp_nv12_load_save_pl3_gen8[][4] = { -#include "shaders/post_processing/gen7/pl2_to_pl3.g75b" +#include "shaders/post_processing/gen8/pl2_to_pl3.g8b" }; static const uint32_t pp_pl3_load_save_nv12_gen8[][4] = { diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index 512d1cfd..7a4860b9 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -1,5 +1,6 @@ INTEL_PP_G8B = \ pl2_to_pl2.g8b \ + pl2_to_pl3.g8b \ $(NULL) INTEL_PP_G8A = \ @@ -9,6 +10,7 @@ INTEL_PP_G8A = \ PL2_AVS_Buf_2.g8a \ PL2_AVS_Buf_3.g8a \ Save_AVS_NV12.g8a \ + Save_AVS_PL3.g8a \ Set_AVS_Buf_0123_PL2.g8a \ Set_Layer_0.g8a \ VP_Setup.g8a \ diff --git a/src/shaders/post_processing/gen8/Save_AVS_PL3.g8a b/src/shaders/post_processing/gen8/Save_AVS_PL3.g8a new file mode 100644 index 00000000..417fd4f1 --- /dev/null +++ b/src/shaders/post_processing/gen8/Save_AVS_PL3.g8a @@ -0,0 +1,565 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * Author: Zhao Yakui + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 84 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// Module name: Save_AVS_PL3.asm +// +// Save PL3 420 frame data block of size 16x16 +// +// To save 16x16 block (16x16 byte of Y and 8x8 byte of U and V each) we need 3 send instructions with one of size 16x16 and two of size 8x8. +// ----------------- +// | 16x16 Y | +// | | +// ----------------- +// | 8x8 U | +// --------- +// | 8x8 V | +// --------- + +//----------------------------------------------------------------- +//The layout of data is as follows: +//mMSGHDR0 : Y data header (16x16) +//mubMSGPAYLOAD0 : Y data payload (8 GRFs) +//mMSGHDR1 : U data header (8x8) +//mubMSGPAYLOAD1 : U data payload (2 GRFs) +//mMSGHDR2 : V data header (8x8) +//mubMSGPAYLOAD2 : V data payload (2 GRFs) +//------------------------------------------------------------------ + + + +// Module name: Save.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Msg payload buffers; upto 4 full-size messages can be written + + +.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud + +.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw + +.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub + + + // the r17 register (nTEMP0) is originally defined from "Common.inc" + // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming + + .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw + + +//_SAVE_INC_ + + + // At the save module we have all 8 address sub-registers available. + // So we will use PING-PONG type of scheme to save the data using + // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help + // reduce dependency. - rT + + //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4. + //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5). + //Offsets are zero for buffer 0 and buffer 4. + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + add (4) a0.4<1>:uw r22.0<4;4,1>:w 512:uw + + //Set up header for Y,U and V data + mov (8) r28<1>:ud r27<8;8,1>:ud + mov (8) r37<1>:ud r27<8;8,1>:ud + mov (8) r46<1>:ud r27<8;8,1>:ud + + mov (2) r28.0<1>:d r7.0<2;2,1>:w { NoDDClr } //ORI Y (LUMA) = ORI + shr (2) r37.0<1>:d r7.0<2;2,1>:w 1:w { NoDDClr } //H/V ORI U = H/V ORI/2 + shr (2) r46.0<1>:d r7.0<2;2,1>:w 1:w { NoDDClr } //H/V ORI V = H/V ORI/2 + + mov (1) r28.2<1>:ud 0xF000F:ud { NoDDChk } // Y Block width and height (16x16) + mov (1) r37.2<1>:ud 0x70007:ud { NoDDChk } // U Block width and height (8x8) + mov (1) r46.2<1>:ud 0x70007:ud { NoDDChk } // V Block width and height (8x8) + +// Unscramble, and pack data directly to MRFs + +// Data 16x16 block is divided as - +// --------- +// | 0 | +// --------- +// | 1 | +// --------- +// | 2 | +// --------- +// | 3 | +// --------- +// All sub-blocks are of size 16x4 +// 0: ubBUFFER_0 +// 1: ubBUFFER_1, ubBUFFER_0+16 +// 2: ubBUFFER_2 +// 3: ubBUFFER_3, ubBUFFER_2+16 + + //Y Rounding, first + add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw + + // U rounding + add.sat (8) r[a0.2,0]<2>:uw r[a0.2,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.2,64]<2>:uw r[a0.2,64]<16;8,2>:uw 0x0080:uw + + // V rounding + add.sat (8) r[a0.0,0]<2>:uw r[a0.0,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.0,64]<2>:uw r[a0.0,64]<16;8,2>:uw 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers + + //Y Rounding, second + add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw + + // U rounding + add.sat (8) r[a0.6,0]<2>:uw r[a0.6,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.6,64]<2>:uw r[a0.6,64]<16;8,2>:uw 0x0080:uw + + // V rounding + add.sat (8) r[a0.4,0]<2>:uw r[a0.4,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.4,64]<2>:uw r[a0.4,64]<16;8,2>:uw 0x0080:uw + + add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers + + //Y Rounding, third + add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw + + // U rounding + add.sat (8) r[a0.2,0]<2>:uw r[a0.2,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.2,64]<2>:uw r[a0.2,64]<16;8,2>:uw 0x0080:uw + + // V rounding + add.sat (8) r[a0.0,0]<2>:uw r[a0.0,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.0,64]<2>:uw r[a0.0,64]<16;8,2>:uw 0x0080:uw + + + //Y Rounding, fourth + add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw + + // U rounding + add.sat (8) r[a0.6,0]<2>:uw r[a0.6,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.6,64]<2>:uw r[a0.6,64]<16;8,2>:uw 0x0080:uw + + // V rounding + add.sat (8) r[a0.4,0]<2>:uw r[a0.4,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.4,64]<2>:uw r[a0.4,64]<16;8,2>:uw 0x0080:uw + + // restore the TOP and BOT pointers + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + add (4) a0.4<1>:uw r22.0<4;4,1>:w 512:uw + +//Buffer 0 +//Move Y to msg payload + mov (16) mubMSGPAYLOAD0(0,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(0,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(1,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(1,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk } + +//Move U to msg payload + mov (8) mubMSGPAYLOAD1(0,0)<1> r[a0.2, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD1(0,8)<1> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + +//Move V to msg payload + mov (8) mubMSGPAYLOAD2(0,0)<1> r[a0.0, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD2(0,8)<1> r[a0.0, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers + +//Buffer 1 + mov (16) mubMSGPAYLOAD0(2,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(2,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(3,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(3,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD1(0,16)<1> r[a0.6, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD1(0,24)<1> r[a0.6, 65]<32;8,4>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD2(0,16)<1> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD2(0,24)<1> r[a0.4, 65]<32;8,4>:ub { NoDDChk } + + add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers + +//Buffer 2 + mov (16) mubMSGPAYLOAD0(4,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(4,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(5,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(5,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD1(1,0)<1> r[a0.2, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD1(1,8)<1> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + + mov (8) mubMSGPAYLOAD2(1,0)<1> r[a0.0, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD2(1,8)<1> r[a0.0, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + +//Buffer 3 + mov (16) mubMSGPAYLOAD0(6,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(6,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(7,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(7,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD1(1,16)<1> r[a0.6, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD1(1,24)<1> r[a0.6, 65]<32;8,4>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD2(1,16)<1> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD2(1,24)<1> r[a0.4, 65]<32;8,4>:ub { NoDDChk } + +//=========================================================================== + +send (1) null<1>:d r28 0xc 0x120A8018:ud +send (1) null<1>:d r37 0xc 0x60A8019:ud +send (1) null<1>:d r46 0xc 0x60A801A:ud diff --git a/src/shaders/post_processing/gen8/pl2_to_pl3.asm b/src/shaders/post_processing/gen8/pl2_to_pl3.asm new file mode 100644 index 00000000..042a8347 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_pl3.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL2_TO_PL3 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL2.g8a" +#include "PL2_AVS_Buf_0.g8a" +#include "PL2_AVS_Buf_1.g8a" +#include "PL2_AVS_Buf_2.g8a" +#include "PL2_AVS_Buf_3.g8a" +#include "Save_AVS_PL3.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl2_to_pl3.g8b b/src/shaders/post_processing/gen8/pl2_to_pl3.g8b new file mode 100644 index 00000000..28a951c7 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_pl3.g8b @@ -0,0 +1,197 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00200208, 0x24a01a28, 0x1e4500e0, 0x00010001 }, + { 0x00200208, 0x25c01a28, 0x1e4500e0, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x00070007 }, + { 0x00000401, 0x25c80608, 0x00000000, 0x00070007 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 }, + { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 }, + { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 }, + { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 }, + { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 }, + { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 }, + { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 }, + { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 }, + { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 }, + { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 }, + { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 }, + { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 }, + { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x24c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x24c82288, 0x00cf8441, 0x00000000 }, + { 0x00600201, 0x25e02288, 0x00cf8001, 0x00000000 }, + { 0x00600601, 0x25e82288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600601, 0x24d02288, 0x00cf8c01, 0x00000000 }, + { 0x00600401, 0x24d82288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x25f02288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x25f82288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x24e02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x24e82288, 0x00cf8441, 0x00000000 }, + { 0x00600201, 0x26002288, 0x00cf8001, 0x00000000 }, + { 0x00600601, 0x26082288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600601, 0x24f02288, 0x00cf8c01, 0x00000000 }, + { 0x00600401, 0x24f82288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x26102288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x26182288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8019 }, + { 0x0c000031, 0x20002220, 0x060005c0, 0x060a801a }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, -- cgit v1.2.1 From 75f5fccd9c57e70955162e1c91fd649e4b93ea38 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 23 May 2013 10:22:09 +0800 Subject: Add the VPP shader of YV12/I420->NV12 conversion Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 2 +- src/shaders/post_processing/gen8/Makefile.am | 6 + src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a | 470 +++++++++++++++++++++ src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a | 470 +++++++++++++++++++++ src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a | 470 +++++++++++++++++++++ src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a | 470 +++++++++++++++++++++ .../post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a | 362 ++++++++++++++++ src/shaders/post_processing/gen8/pl3_to_pl2.asm | 17 + src/shaders/post_processing/gen8/pl3_to_pl2.g8b | 260 ++++++++++++ 9 files changed, 2526 insertions(+), 1 deletion(-) create mode 100644 src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a create mode 100644 src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a create mode 100644 src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a create mode 100644 src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a create mode 100644 src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a create mode 100644 src/shaders/post_processing/gen8/pl3_to_pl2.asm create mode 100644 src/shaders/post_processing/gen8/pl3_to_pl2.g8b diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index c60c705b..3d3de871 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1176,7 +1176,7 @@ static const uint32_t pp_nv12_load_save_pl3_gen8[][4] = { }; static const uint32_t pp_pl3_load_save_nv12_gen8[][4] = { -#include "shaders/post_processing/gen7/pl3_to_pl2.g75b" +#include "shaders/post_processing/gen8/pl3_to_pl2.g8b" }; static const uint32_t pp_pl3_load_save_pl3_gen8[][4] = { diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index 7a4860b9..4f28e7fd 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -1,6 +1,7 @@ INTEL_PP_G8B = \ pl2_to_pl2.g8b \ pl2_to_pl3.g8b \ + pl3_to_pl2.g8b \ $(NULL) INTEL_PP_G8A = \ @@ -9,9 +10,14 @@ INTEL_PP_G8A = \ PL2_AVS_Buf_1.g8a \ PL2_AVS_Buf_2.g8a \ PL2_AVS_Buf_3.g8a \ + PL3_AVS_Buf_0.g8a \ + PL3_AVS_Buf_1.g8a \ + PL3_AVS_Buf_2.g8a \ + PL3_AVS_Buf_3.g8a \ Save_AVS_NV12.g8a \ Save_AVS_PL3.g8a \ Set_AVS_Buf_0123_PL2.g8a \ + Set_AVS_Buf_0123_PL3.g8a \ Set_Layer_0.g8a \ VP_Setup.g8a \ $(NULL) diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a new file mode 100644 index 00000000..b5b85d56 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a @@ -0,0 +1,470 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 0:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV + + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_0(4)<1> r16 0x2 a0.0:ud + // Returns U data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_0(8)<1> r16 0x2 a0.0:ud + // Returns V data in 4 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a new file mode 100644 index 00000000..8457ae17 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a @@ -0,0 +1,470 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 1:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV + + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_1(4)<1> r16 0x2 a0.0:ud + // Returns U data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_1(8)<1> r16 0x2 a0.0:ud + // Returns V data in 4 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a new file mode 100644 index 00000000..99b40fe9 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a @@ -0,0 +1,470 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 2:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV + + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_2(4)<1> r16 0x2 a0.0:ud + // Returns U data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_2(8)<1> r16 0x2 a0.0:ud + // Returns V data in 4 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a new file mode 100644 index 00000000..8659876b --- /dev/null +++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a @@ -0,0 +1,470 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 3:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV + + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_3(4)<1> r16 0x2 a0.0:ud + // Returns U data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_3(8)<1> r16 0x2 a0.0:ud + // Returns V data in 4 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a new file mode 100644 index 00000000..05336661 --- /dev/null +++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a @@ -0,0 +1,362 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 7 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +//Module Name: Set_AVS_Buf_0123_PL3.asm + + + +//Module Name: Set_Buf_0123_PL3 + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + //AVS LAYOUT: (YYUUVVAA) + //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V + //For PL3-AVS: V = 8, Y= 0, U = 4, A = 12. + mov (4) acc0.0<1>:w 0x6EA2:v + //Subtract 6 from 0,4,8,12 + add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back + shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address. + + //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT + //SU LAYOUT:(YUVAYUVA) + //V = 4, Y = 0, U = 2, A = 6 + mov (4) acc0.0<1>:w 0x6204:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw + shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address. + + //OFFSET: + mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk } + + diff --git a/src/shaders/post_processing/gen8/pl3_to_pl2.asm b/src/shaders/post_processing/gen8/pl3_to_pl2.asm new file mode 100644 index 00000000..713cb979 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pl2.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL3_TO_PL2 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL3.g8a" +#include "PL3_AVS_Buf_0.g8a" +#include "PL3_AVS_Buf_1.g8a" +#include "PL3_AVS_Buf_2.g8a" +#include "PL3_AVS_Buf_3.g8a" +#include "Save_AVS_NV12.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl3_to_pl2.g8b b/src/shaders/post_processing/gen8/pl3_to_pl2.g8b new file mode 100644 index 00000000..9a141e72 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pl2.g8b @@ -0,0 +1,260 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x29002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2b002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2d002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2f002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 }, + { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, -- cgit v1.2.1 From 5a779ff22fe428c80187a2fa68398a536c0cbd9d Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 23 May 2013 10:22:09 +0800 Subject: Add the VPP shader of PL3 AVS conversion between YV12 and I420 Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 6 +- src/shaders/post_processing/gen8/Makefile.am | 1 + src/shaders/post_processing/gen8/pl3_to_pl3.asm | 17 ++ src/shaders/post_processing/gen8/pl3_to_pl3.g8b | 213 ++++++++++++++++++++++++ 4 files changed, 234 insertions(+), 3 deletions(-) create mode 100644 src/shaders/post_processing/gen8/pl3_to_pl3.asm create mode 100644 src/shaders/post_processing/gen8/pl3_to_pl3.g8b diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 3d3de871..59ba8ea8 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1180,15 +1180,15 @@ static const uint32_t pp_pl3_load_save_nv12_gen8[][4] = { }; static const uint32_t pp_pl3_load_save_pl3_gen8[][4] = { -#include "shaders/post_processing/gen7/pl3_to_pl3.g75b" +#include "shaders/post_processing/gen8/pl3_to_pl3.g8b" }; static const uint32_t pp_nv12_scaling_gen8[][4] = { -#include "shaders/post_processing/gen7/avs.g75b" +#include "shaders/post_processing/gen8/pl2_to_pl2.g8b" }; static const uint32_t pp_nv12_avs_gen8[][4] = { -#include "shaders/post_processing/gen7/avs.g75b" +#include "shaders/post_processing/gen8/pl2_to_pl2.g8b" }; static const uint32_t pp_nv12_dndi_gen8[][4] = { diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index 4f28e7fd..461cb56c 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -2,6 +2,7 @@ INTEL_PP_G8B = \ pl2_to_pl2.g8b \ pl2_to_pl3.g8b \ pl3_to_pl2.g8b \ + pl3_to_pl3.g8b \ $(NULL) INTEL_PP_G8A = \ diff --git a/src/shaders/post_processing/gen8/pl3_to_pl3.asm b/src/shaders/post_processing/gen8/pl3_to_pl3.asm new file mode 100644 index 00000000..f6a2a760 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pl3.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL3_TO_PL3 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL3.g8a" +#include "PL3_AVS_Buf_0.g8a" +#include "PL3_AVS_Buf_1.g8a" +#include "PL3_AVS_Buf_2.g8a" +#include "PL3_AVS_Buf_3.g8a" +#include "Save_AVS_PL3.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl3_to_pl3.g8b b/src/shaders/post_processing/gen8/pl3_to_pl3.g8b new file mode 100644 index 00000000..67ac99c7 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pl3.g8b @@ -0,0 +1,213 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x29002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2b002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2d002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2f002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00200208, 0x24a01a28, 0x1e4500e0, 0x00010001 }, + { 0x00200208, 0x25c01a28, 0x1e4500e0, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x00070007 }, + { 0x00000401, 0x25c80608, 0x00000000, 0x00070007 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 }, + { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 }, + { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 }, + { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 }, + { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 }, + { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 }, + { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 }, + { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 }, + { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 }, + { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 }, + { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 }, + { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 }, + { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x24c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x24c82288, 0x00cf8441, 0x00000000 }, + { 0x00600201, 0x25e02288, 0x00cf8001, 0x00000000 }, + { 0x00600601, 0x25e82288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600601, 0x24d02288, 0x00cf8c01, 0x00000000 }, + { 0x00600401, 0x24d82288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x25f02288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x25f82288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x24e02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x24e82288, 0x00cf8441, 0x00000000 }, + { 0x00600201, 0x26002288, 0x00cf8001, 0x00000000 }, + { 0x00600601, 0x26082288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600601, 0x24f02288, 0x00cf8c01, 0x00000000 }, + { 0x00600401, 0x24f82288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x26102288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x26182288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8019 }, + { 0x0c000031, 0x20002220, 0x060005c0, 0x060a801a }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, -- cgit v1.2.1 From 69a1c54dc8156946b9eec835164e1501d1676d42 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 23 May 2013 10:22:09 +0800 Subject: Add the VPP shader of NV12->RGBX conversion Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 2 +- src/shaders/post_processing/gen8/Makefile.am | 3 + src/shaders/post_processing/gen8/Save_AVS_RGBX.g8a | 641 ++++++++++++++ src/shaders/post_processing/gen8/YUV_to_RGB.g8a | 955 +++++++++++++++++++++ src/shaders/post_processing/gen8/pl2_to_rgbx.asm | 18 + src/shaders/post_processing/gen8/pl2_to_rgbx.g8b | 722 ++++++++++++++++ 6 files changed, 2340 insertions(+), 1 deletion(-) create mode 100644 src/shaders/post_processing/gen8/Save_AVS_RGBX.g8a create mode 100644 src/shaders/post_processing/gen8/YUV_to_RGB.g8a create mode 100644 src/shaders/post_processing/gen8/pl2_to_rgbx.asm create mode 100644 src/shaders/post_processing/gen8/pl2_to_rgbx.g8b diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 59ba8ea8..e7ad0d09 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1214,7 +1214,7 @@ static const uint32_t pp_rgbx_load_save_nv12_gen8[][4] = { #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b" }; static const uint32_t pp_nv12_load_save_rgbx_gen8[][4] = { -#include "shaders/post_processing/gen7/pl2_to_rgbx.g75b" +#include "shaders/post_processing/gen8/pl2_to_rgbx.g8b" }; diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index 461cb56c..7f5b7eec 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -3,6 +3,7 @@ INTEL_PP_G8B = \ pl2_to_pl3.g8b \ pl3_to_pl2.g8b \ pl3_to_pl3.g8b \ + pl2_to_rgbx.g8b \ $(NULL) INTEL_PP_G8A = \ @@ -17,8 +18,10 @@ INTEL_PP_G8A = \ PL3_AVS_Buf_3.g8a \ Save_AVS_NV12.g8a \ Save_AVS_PL3.g8a \ + Save_AVS_RGBX.g8a \ Set_AVS_Buf_0123_PL2.g8a \ Set_AVS_Buf_0123_PL3.g8a \ + YUV_to_RGB.g8a \ Set_Layer_0.g8a \ VP_Setup.g8a \ $(NULL) diff --git a/src/shaders/post_processing/gen8/Save_AVS_RGBX.g8a b/src/shaders/post_processing/gen8/Save_AVS_RGBX.g8a new file mode 100644 index 00000000..d2df8e4e --- /dev/null +++ b/src/shaders/post_processing/gen8/Save_AVS_RGBX.g8a @@ -0,0 +1,641 @@ +/* + * Copyright 2000-2013 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * Authors: + * Zhao Yakui + */ + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// Module name: Save_AVS_RGBX.asm +// +// Save packed ARGB 444 frame data block of size 16x16 +// +// To save 16x16 block (64x16 byte layout for ARGB8888) we need 8 send instructions with 32x4 in each +// -------- +// | 0 | 1 | +// | 2 | 3 | +// | 4 | 5 | +// | 6 | 7 | +// --------- +// the 8 32x4 block send is used + + + +// Module name: Save.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Msg payload buffers; upto 4 full-size messages can be written + + +.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud + +.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw + +.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub + + + // the r17 register (nTEMP0) is originally defined from "Common.inc" + // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming + + .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw + + +//_SAVE_INC_ + + +// At the save module we have all 8 address sub-registers available. +// So we will use PING-PONG type of scheme to save the data using +// pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help +// reduce dependency. - rT + + //Internal LAYOUT:(RRGGBBAA) + //Assign buffer channel order for Buffer 0123 in the order RGBA a0.3>A, a0.2>B, a0.1>G, a0.0>R + // R = 0, G= 4, B = 8, A = 12. + mov (4) acc0.0<1>:w 0x62EA:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw + shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw + + // if channel swap? + // This means that it should be BGRA(B is the LSB) or RGBA + // the internal format is always RGBA(MSB-A-B-G-R). + and.nz.f0.0 null<1>:w r2.3<0;1,0>:uw 0x01:w + +//wBUFF_CHNL_PTR points to either buffer 0 or buffer 4. +//Add appropriate offsets to get pointers for all buffers (1,2,3 or 5). +//Offsets are zero for buffer 0 and buffer 4. + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + + // pointer swap + (f0.0) mov (1) uwTemp0<1> a0.0<0;1,0>:uw + (f0.0) mov (1) a0.0<1>:uw a0.2<0;1,0>:uw + (f0.0) mov (1) a0.2<1>:uw uwTemp0<0;1,0> + + shl (1) r27.0<1>:d r7.0<0;1,0>:w 2:w { NoDDClr } // H. block origin need to be quadrupled + mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin (1st quadrant) + mov (1) r27.2<1>:ud 0x3001F:ud { NoDDChk } // Block width and height (32x4) + + mov (4) a0.4<1>:uw a0.0<4;4,1>:uw + + mov (8) r28<1>:ud r27<8;8,1>:ud + mov (8) r37<1>:ud r27<8;8,1>:ud + mov (8) r46<1>:ud r27<8;8,1>:ud + mov (8) r55<1>:ud r27<8;8,1>:ud + + mov (8) r31<1>:ud r27<8;8,1>:ud + mov (8) r40<1>:ud r27<8;8,1>:ud + mov (8) r49<1>:ud r27<8;8,1>:ud + mov (8) r58<1>:ud r27<8;8,1>:ud + +//Buffer 0/1 are written by using 4 32x4. + + add (1) r37.0<1>:d r27.0<0;1,0>:d 32:d + + add (1) r46.1<1>:d r27.1<0;1,0>:d 4:d + + add (1) r55.1<1>:d r27.1<0;1,0>:d 4:d + add (1) r55.0<1>:d r27.0<0;1,0>:d 32:d + + // write Buf_0 to 1st quarter of four horizontal output blocks + +// Please note the scattered order of NODDCLR, NODDCHK flags. Since the sub-registers +// of destination reg are not updated at one place and hence even flags are scattered. -rT + +/* for block 0 the left part of buffer 0 and 1 */ + mov (8) mubMSGPAYLOAD0(0, 0)<4> r[a0.0, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(0, 1)<4> r[a0.1, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(0, 2)<4> r[a0.2, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD0(1, 0)<4> r[a0.0, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(1, 1)<4> r[a0.1, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(1, 2)<4> r[a0.2, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(0, 0)<4> r[a0.0, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(0, 1)<4> r[a0.1, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(0, 2)<4> r[a0.2, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(1, 0)<4> r[a0.0, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(1, 1)<4> r[a0.1, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(1, 2)<4> r[a0.2, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD0(2, 0)<4> r[a0.0, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(2, 1)<4> r[a0.1, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(2, 2)<4> r[a0.2, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD0(3, 0)<4> r[a0.0, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(3, 1)<4> r[a0.1, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(3, 2)<4> r[a0.2, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(3, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(2, 0)<4> r[a0.0, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(2, 1)<4> r[a0.1, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(2, 2)<4> r[a0.2, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(3, 0)<4> r[a0.0, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(3, 1)<4> r[a0.1, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(3, 2)<4> r[a0.2, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(3, 3)<4> r2.31:ub + +/* For Buffer 0 */ + send (16) null<1>:d r28 0xc 0x0A0A8018:ud + send (16) null<1>:d r37 0xc 0x0A0A8018:ud + + add (4) a0.0<1>:uw a0.4<4;4,1>:uw 512:uw + mov (8) mubMSGPAYLOAD2(0, 0)<4> r[a0.0, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(0, 1)<4> r[a0.1, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(0, 2)<4> r[a0.2, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD2(1, 0)<4> r[a0.0, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(1, 1)<4> r[a0.1, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(1, 2)<4> r[a0.2, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(0, 0)<4> r[a0.0, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(0, 1)<4> r[a0.1, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(0, 2)<4> r[a0.2, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(1, 0)<4> r[a0.0, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(1, 1)<4> r[a0.1, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(1, 2)<4> r[a0.2, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD2(2, 0)<4> r[a0.0, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(2, 1)<4> r[a0.1, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(2, 2)<4> r[a0.2, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD2(3, 0)<4> r[a0.0, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(3, 1)<4> r[a0.1, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(3, 2)<4> r[a0.2, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(3, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(2, 0)<4> r[a0.0, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(2, 1)<4> r[a0.1, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(2, 2)<4> r[a0.2, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(3, 0)<4> r[a0.0, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(3, 1)<4> r[a0.1, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(3, 2)<4> r[a0.2, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(3, 3)<4> r2.31:ub + // send Buffer 1 + send (16) null<1>:d r46 0xc 0x0A0A8018:ud + send (16) null<1>:d r55 0xc 0x0A0A8018:ud + + +/* for Buffer 2/3 */ + mov (8) r28<1>:ud r27<8;8,1>:ud + mov (8) r37<1>:ud r27<8;8,1>:ud + mov (8) r46<1>:ud r27<8;8,1>:ud + mov (8) r55<1>:ud r27<8;8,1>:ud + + add (1) r28.1<1>:d r27.1<0;1,0>:d 8:d + + add (1) r37.0<1>:d r27.0<0;1,0>:d 32:d + add (1) r37.1<1>:d r27.1<0;1,0>:d 8:d + + add (1) r46.1<1>:d r27.1<0;1,0>:d 12:d + + add (1) r55.1<1>:d r27.1<0;1,0>:d 12:d + add (1) r55.0<1>:d r27.0<0;1,0>:d 32:d + + add (4) a0.0<1>:uw a0.4<4;4,1>:uw 1024:uw + + mov (8) mubMSGPAYLOAD0(0, 0)<4> r[a0.0, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(0, 1)<4> r[a0.1, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(0, 2)<4> r[a0.2, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD0(1, 0)<4> r[a0.0, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(1, 1)<4> r[a0.1, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(1, 2)<4> r[a0.2, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(0, 0)<4> r[a0.0, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(0, 1)<4> r[a0.1, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(0, 2)<4> r[a0.2, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(1, 0)<4> r[a0.0, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(1, 1)<4> r[a0.1, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(1, 2)<4> r[a0.2, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD0(2, 0)<4> r[a0.0, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(2, 1)<4> r[a0.1, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(2, 2)<4> r[a0.2, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD0(3, 0)<4> r[a0.0, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(3, 1)<4> r[a0.1, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(3, 2)<4> r[a0.2, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(3, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(2, 0)<4> r[a0.0, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(2, 1)<4> r[a0.1, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(2, 2)<4> r[a0.2, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(3, 0)<4> r[a0.0, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(3, 1)<4> r[a0.1, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(3, 2)<4> r[a0.2, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(3, 3)<4> r2.31:ub + +// Send Buffer 2 + send (16) null<1>:d r28 0xc 0x0A0A8018:ud + send (16) null<1>:d r37 0xc 0x0A0A8018:ud + + add (4) a0.0<1>:uw a0.4<4;4,1>:uw 1536:uw + mov (8) mubMSGPAYLOAD2(0, 0)<4> r[a0.0, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(0, 1)<4> r[a0.1, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(0, 2)<4> r[a0.2, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD2(1, 0)<4> r[a0.0, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(1, 1)<4> r[a0.1, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(1, 2)<4> r[a0.2, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(0, 0)<4> r[a0.0, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(0, 1)<4> r[a0.1, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(0, 2)<4> r[a0.2, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(1, 0)<4> r[a0.0, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(1, 1)<4> r[a0.1, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(1, 2)<4> r[a0.2, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD2(2, 0)<4> r[a0.0, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(2, 1)<4> r[a0.1, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(2, 2)<4> r[a0.2, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD2(3, 0)<4> r[a0.0, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(3, 1)<4> r[a0.1, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(3, 2)<4> r[a0.2, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(3, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(2, 0)<4> r[a0.0, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(2, 1)<4> r[a0.1, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(2, 2)<4> r[a0.2, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(3, 0)<4> r[a0.0, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(3, 1)<4> r[a0.1, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(3, 2)<4> r[a0.2, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(3, 3)<4> r2.31:ub + // send buffer 3 + send (16) null<1>:d r46 0xc 0x0A0A8018:ud + send (16) null<1>:d r55 0xc 0x0A0A8018:ud + + + diff --git a/src/shaders/post_processing/gen8/YUV_to_RGB.g8a b/src/shaders/post_processing/gen8/YUV_to_RGB.g8a new file mode 100644 index 00000000..9f3fcba8 --- /dev/null +++ b/src/shaders/post_processing/gen8/YUV_to_RGB.g8a @@ -0,0 +1,955 @@ +/* + * Copyright 2000-2013 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * Authors: + * Zhao Yakui + */ + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// Module name: YUV_to_RGB.asm +// +// Convert YUV to RGB, handle it by 16x4 block +// + + +// Description: Includes all definitions explicit to Fast Composite. + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare bBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare bBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare bBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare bBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Msg payload buffers; upto 4 full-size messages can be written + +//Unnecessary to use the MSGPayLoad, So it is temporiarily used for conversion of YUV->RGB + +.declare fBUFFER_R Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_G Base=r30.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_B Base=r32.0 ElementSize=4 SrcRegion=<8;8,1> Type=f + +.declare fBUFFER_Y Base=r36.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_U Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_V Base=r40.0 ElementSize=4 SrcRegion=<8;8,1> Type=f + + +.declare wTempR Base=r42.0 ElementSize=2 Type=w +.declare wTempG Base=r44.0 ElementSize=2 Type=w +.declare wTempB Base=r46.0 ElementSize=2 Type=w + +.declare ubTempR Base=r42.0 ElementSize=1 Type=ub +.declare ubTempG Base=r44.0 ElementSize=1 Type=ub +.declare ubTempB Base=r46.0 ElementSize=1 Type=ub + + // the r17 register (nTEMP0) is originally defined from "Common.inc" + // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming + + .declare wTemp0 Base=r17.0 ElementSize=2 Type=uw + + +//_SAVE_INC_ + // NTSC standard + // R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255)) + // G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255)) + // B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255)) + // ITU-R conversion, Now we are using ITU-R conversion + // R = clip( Y + 1.402*(Cr-128)) // ITU-R + // G = clip( Y - 0.344*(Cb-128) - 0.714*(Cr-128)) + // B = clip( Y + 1.772*(Cb-128)) + + // At the save module we have all 8 address sub-registers available. + // So we will use PING-PONG type of scheme to save the data using + // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help + // reduce dependency. - rT + + //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4. + //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5). + //Offsets are zero for buffer 0 and buffer 4. + //Y/U/V is also stored as R/G/B for the internal purpose +//for BUFFER_0 + mov (4) a0.0<1>:uw r22.0<4;4,1>:uw +//the first line in the block 0 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + + mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4> + +//the second line in the block 0 + + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4> + +//the third line in the block 0 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4> + +//the fourth line in the block 0 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4> + + +//for BUFFER_1 + add (4) a0.0<1>:uw r22.0<4;4,1>:uw 512:uw +//the first line in the block 1 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + + mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4> + +//the second line in the block 1 + + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4> + +//the third line in the block 1 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4> + +//the fourth line in the block 1 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4> + + +//for BUFFER_2 + add (4) a0.0<1>:uw r22.0<4;4,1>:uw 1024:uw +//the first line in the block 2 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + + mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4> + +//the second line in the block 2 + + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4> + +//the third line in the block 2 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4> + +//the fourth line in the block 2 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4> + + +//for BUFFER_3 + add (4) a0.0<1>:uw r22.0<4;4,1>:uw 1536:uw +//the first line in the block 3 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + + mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4> + +//the second line in the block 3 + + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4> + +//the third line in the block 3 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4> + +//the fourth line in the block 3 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4> + diff --git a/src/shaders/post_processing/gen8/pl2_to_rgbx.asm b/src/shaders/post_processing/gen8/pl2_to_rgbx.asm new file mode 100644 index 00000000..58a5204d --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_rgbx.asm @@ -0,0 +1,18 @@ +// Module name: AVS +.kernel PL2_TO_PL2 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL2.g8a" +#include "PL2_AVS_Buf_0.g8a" +#include "PL2_AVS_Buf_1.g8a" +#include "PL2_AVS_Buf_2.g8a" +#include "PL2_AVS_Buf_3.g8a" +#include "YUV_to_RGB.g8a" +#include "Save_AVS_RGBX.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b b/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b new file mode 100644 index 00000000..fe1c49c1 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b @@ -0,0 +1,722 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x22001240, 0x006902c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8011, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8031, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8051, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8071, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0612288, 0x00cf05c0, 0x00000000 }, + { 0x00400040, 0x22001240, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x248022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8011, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8031, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8051, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8071, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0612288, 0x00cf05c0, 0x00000000 }, + { 0x00400040, 0x22001240, 0x166902c0, 0x04000400 }, + { 0x00600001, 0x248022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8011, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8031, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8051, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8071, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0612288, 0x00cf05c0, 0x00000000 }, + { 0x00400040, 0x22001240, 0x166902c0, 0x06000600 }, + { 0x00600001, 0x248022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8011, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8031, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8051, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8071, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0612288, 0x00cf05c0, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x000062ea }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x02800005, 0x20001260, 0x1e000046, 0x00010001 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00010001, 0x22201048, 0x00000200, 0x00000000 }, + { 0x00010001, 0x22001040, 0x00000204, 0x00000000 }, + { 0x00010001, 0x22041240, 0x00000220, 0x00000000 }, + { 0x00000209, 0x23601a28, 0x1e0000e0, 0x00020002 }, + { 0x00000601, 0x23641a28, 0x000000e2, 0x00000000 }, + { 0x00000401, 0x23680608, 0x00000000, 0x0003001f }, + { 0x00400001, 0x22081040, 0x00690200, 0x00000000 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x26e00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x23e00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x25000208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x26200208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x27400208, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x24a00a28, 0x0e000360, 0x00000020 }, + { 0x00000040, 0x25c40a28, 0x0e000364, 0x00000004 }, + { 0x00000040, 0x26e40a28, 0x0e000364, 0x00000004 }, + { 0x00000040, 0x26e00a28, 0x0e000360, 0x00000020 }, + { 0x00600001, 0x63a02288, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x63a12288, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x63a22288, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x63a32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x63c02288, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x63c12288, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x63c22288, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x63c32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x64c02288, 0x00ae8011, 0x00000000 }, + { 0x00600001, 0x64c12288, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x64c22288, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x64c32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x64e02288, 0x00ae8031, 0x00000000 }, + { 0x00600001, 0x64e12288, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x64e22288, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x64e32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x63e02288, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x63e12288, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x63e22288, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x63e32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x64002288, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x64012288, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x64022288, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x64032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x65002288, 0x00ae8051, 0x00000000 }, + { 0x00600001, 0x65012288, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x65022288, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x65032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x65202288, 0x00ae8071, 0x00000000 }, + { 0x00600001, 0x65212288, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x65222288, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x65232288, 0x0000005f, 0x00000000 }, + { 0x0c800031, 0x20002220, 0x06000380, 0x0a0a8018 }, + { 0x0c800031, 0x20002220, 0x060004a0, 0x0a0a8018 }, + { 0x00400040, 0x22001040, 0x16690208, 0x02000200 }, + { 0x00600001, 0x65e02288, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x65e12288, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x65e22288, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x65e32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x66002288, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x66012288, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x66022288, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x66032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67002288, 0x00ae8011, 0x00000000 }, + { 0x00600001, 0x67012288, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x67022288, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x67032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67202288, 0x00ae8031, 0x00000000 }, + { 0x00600001, 0x67212288, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x67222288, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x67232288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x66202288, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x66212288, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x66222288, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x66232288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x66402288, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x66412288, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x66422288, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x66432288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67402288, 0x00ae8051, 0x00000000 }, + { 0x00600001, 0x67412288, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x67422288, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x67432288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67602288, 0x00ae8071, 0x00000000 }, + { 0x00600001, 0x67612288, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x67622288, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x67632288, 0x0000005f, 0x00000000 }, + { 0x0c800031, 0x20002220, 0x060005c0, 0x0a0a8018 }, + { 0x0c800031, 0x20002220, 0x060006e0, 0x0a0a8018 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x26e00208, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000008 }, + { 0x00000040, 0x24a00a28, 0x0e000360, 0x00000020 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000008 }, + { 0x00000040, 0x25c40a28, 0x0e000364, 0x0000000c }, + { 0x00000040, 0x26e40a28, 0x0e000364, 0x0000000c }, + { 0x00000040, 0x26e00a28, 0x0e000360, 0x00000020 }, + { 0x00400040, 0x22001040, 0x16690208, 0x04000400 }, + { 0x00600001, 0x63a02288, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x63a12288, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x63a22288, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x63a32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x63c02288, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x63c12288, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x63c22288, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x63c32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x64c02288, 0x00ae8011, 0x00000000 }, + { 0x00600001, 0x64c12288, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x64c22288, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x64c32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x64e02288, 0x00ae8031, 0x00000000 }, + { 0x00600001, 0x64e12288, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x64e22288, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x64e32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x63e02288, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x63e12288, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x63e22288, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x63e32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x64002288, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x64012288, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x64022288, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x64032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x65002288, 0x00ae8051, 0x00000000 }, + { 0x00600001, 0x65012288, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x65022288, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x65032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x65202288, 0x00ae8071, 0x00000000 }, + { 0x00600001, 0x65212288, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x65222288, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x65232288, 0x0000005f, 0x00000000 }, + { 0x0c800031, 0x20002220, 0x06000380, 0x0a0a8018 }, + { 0x0c800031, 0x20002220, 0x060004a0, 0x0a0a8018 }, + { 0x00400040, 0x22001040, 0x16690208, 0x06000600 }, + { 0x00600001, 0x65e02288, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x65e12288, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x65e22288, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x65e32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x66002288, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x66012288, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x66022288, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x66032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67002288, 0x00ae8011, 0x00000000 }, + { 0x00600001, 0x67012288, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x67022288, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x67032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67202288, 0x00ae8031, 0x00000000 }, + { 0x00600001, 0x67212288, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x67222288, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x67232288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x66202288, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x66212288, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x66222288, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x66232288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x66402288, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x66412288, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x66422288, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x66432288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67402288, 0x00ae8051, 0x00000000 }, + { 0x00600001, 0x67412288, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x67422288, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x67432288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67602288, 0x00ae8071, 0x00000000 }, + { 0x00600001, 0x67612288, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x67622288, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x67632288, 0x0000005f, 0x00000000 }, + { 0x0c800031, 0x20002220, 0x060005c0, 0x0a0a8018 }, + { 0x0c800031, 0x20002220, 0x060006e0, 0x0a0a8018 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, -- cgit v1.2.1 From 0598c2f00a591b9a11abd590c353a91d1aaebbab Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 23 May 2013 10:22:09 +0800 Subject: Follow the bspec to workaround the NV12->RGBX conversion issue on BDW Signed-off-by: Zhao Yakui --- src/shaders/post_processing/gen8/YUV_to_RGB.g8a | 48 ++++++++++++++++-------- src/shaders/post_processing/gen8/pl2_to_rgbx.g8b | 48 ++++++++++++++++-------- 2 files changed, 64 insertions(+), 32 deletions(-) diff --git a/src/shaders/post_processing/gen8/YUV_to_RGB.g8a b/src/shaders/post_processing/gen8/YUV_to_RGB.g8a index 9f3fcba8..2b968d88 100644 --- a/src/shaders/post_processing/gen8/YUV_to_RGB.g8a +++ b/src/shaders/post_processing/gen8/YUV_to_RGB.g8a @@ -371,7 +371,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -409,7 +410,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -446,7 +448,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -482,7 +485,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -521,7 +525,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -559,7 +564,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -596,7 +602,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -632,7 +639,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -671,7 +679,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -709,7 +718,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -746,7 +756,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -782,7 +793,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -821,7 +833,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -859,7 +872,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -896,7 +910,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> @@ -932,7 +947,8 @@ mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> - mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> diff --git a/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b b/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b index fe1c49c1..9ee29c23 100644 --- a/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b +++ b/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b @@ -121,7 +121,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -148,7 +149,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -175,7 +177,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -202,7 +205,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -230,7 +234,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -257,7 +262,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -284,7 +290,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -311,7 +318,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -339,7 +347,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -366,7 +375,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -393,7 +403,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -420,7 +431,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -448,7 +460,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -475,7 +488,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -502,7 +516,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, @@ -529,7 +544,8 @@ { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, - { 0x00800048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, -- cgit v1.2.1 From fbb04ae7cadf008236b3ca1c42db4d4ae75b94d2 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 23 May 2013 10:22:09 +0800 Subject: Add the VPP shader of RGBX->NV12 conversion Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 4 +- src/shaders/post_processing/gen8/Makefile.am | 7 + src/shaders/post_processing/gen8/PA_AVS_Buf_0.g8a | 457 +++++++++++ src/shaders/post_processing/gen8/PA_AVS_Buf_1.g8a | 457 +++++++++++ src/shaders/post_processing/gen8/PA_AVS_Buf_2.g8a | 457 +++++++++++ src/shaders/post_processing/gen8/PA_AVS_Buf_3.g8a | 457 +++++++++++ src/shaders/post_processing/gen8/RGB_to_YUV.g8a | 910 +++++++++++++++++++++ .../post_processing/gen8/Set_AVS_Buf_0123_BGRA.g8a | 368 +++++++++ src/shaders/post_processing/gen8/rgbx_to_nv12.asm | 18 + src/shaders/post_processing/gen8/rgbx_to_nv12.g8b | 661 +++++++++++++++ 10 files changed, 3794 insertions(+), 2 deletions(-) create mode 100644 src/shaders/post_processing/gen8/PA_AVS_Buf_0.g8a create mode 100644 src/shaders/post_processing/gen8/PA_AVS_Buf_1.g8a create mode 100644 src/shaders/post_processing/gen8/PA_AVS_Buf_2.g8a create mode 100644 src/shaders/post_processing/gen8/PA_AVS_Buf_3.g8a create mode 100644 src/shaders/post_processing/gen8/RGB_to_YUV.g8a create mode 100644 src/shaders/post_processing/gen8/Set_AVS_Buf_0123_BGRA.g8a create mode 100644 src/shaders/post_processing/gen8/rgbx_to_nv12.asm create mode 100644 src/shaders/post_processing/gen8/rgbx_to_nv12.g8b diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index e7ad0d09..2ae2e0bf 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1211,7 +1211,7 @@ static const uint32_t pp_pa_load_save_pl3_gen8[][4] = { #include "shaders/post_processing/gen7/pa_to_pl3.g75b" }; static const uint32_t pp_rgbx_load_save_nv12_gen8[][4] = { -#include "shaders/post_processing/gen7/rgbx_to_nv12.g75b" +#include "shaders/post_processing/gen8/rgbx_to_nv12.g8b" }; static const uint32_t pp_nv12_load_save_rgbx_gen8[][4] = { #include "shaders/post_processing/gen8/pl2_to_rgbx.g8b" @@ -1383,7 +1383,7 @@ static struct pp_module pp_modules_gen8[] = { NULL, }, - gen7_pp_rgbx_avs_initialize, + gen8_pp_plx_avs_initialize, }, { diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index 7f5b7eec..e2f586b6 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -4,6 +4,7 @@ INTEL_PP_G8B = \ pl3_to_pl2.g8b \ pl3_to_pl3.g8b \ pl2_to_rgbx.g8b \ + rgbx_to_nv12.g8b \ $(NULL) INTEL_PP_G8A = \ @@ -16,12 +17,18 @@ INTEL_PP_G8A = \ PL3_AVS_Buf_1.g8a \ PL3_AVS_Buf_2.g8a \ PL3_AVS_Buf_3.g8a \ + PA_AVS_Buf_0.g8a \ + PA_AVS_Buf_1.g8a \ + PA_AVS_Buf_2.g8a \ + PA_AVS_Buf_3.g8a \ Save_AVS_NV12.g8a \ Save_AVS_PL3.g8a \ Save_AVS_RGBX.g8a \ Set_AVS_Buf_0123_PL2.g8a \ Set_AVS_Buf_0123_PL3.g8a \ + Set_AVS_Buf_0123_BGRA.g8a \ YUV_to_RGB.g8a \ + RGB_to_YUV.g8a \ Set_Layer_0.g8a \ VP_Setup.g8a \ $(NULL) diff --git a/src/shaders/post_processing/gen8/PA_AVS_Buf_0.g8a b/src/shaders/post_processing/gen8/PA_AVS_Buf_0.g8a new file mode 100644 index 00000000..228b2564 --- /dev/null +++ b/src/shaders/post_processing/gen8/PA_AVS_Buf_0.g8a @@ -0,0 +1,457 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x50EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x00000000:ud // Enable ARGB channels + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 0:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud + // Returns RGBA data in 16 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PA_AVS_Buf_1.g8a b/src/shaders/post_processing/gen8/PA_AVS_Buf_1.g8a new file mode 100644 index 00000000..c93806dd --- /dev/null +++ b/src/shaders/post_processing/gen8/PA_AVS_Buf_1.g8a @@ -0,0 +1,457 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x50EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x00000000:ud // Enable ARGB channels + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 1:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud + // Returns RGBA data in 16 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PA_AVS_Buf_2.g8a b/src/shaders/post_processing/gen8/PA_AVS_Buf_2.g8a new file mode 100644 index 00000000..2cfc90c0 --- /dev/null +++ b/src/shaders/post_processing/gen8/PA_AVS_Buf_2.g8a @@ -0,0 +1,457 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x50EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x00000000:ud // Enable ARGB channels + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 2:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud + // Returns RGBA data in 16 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PA_AVS_Buf_3.g8a b/src/shaders/post_processing/gen8/PA_AVS_Buf_3.g8a new file mode 100644 index 00000000..0cbc4ba0 --- /dev/null +++ b/src/shaders/post_processing/gen8/PA_AVS_Buf_3.g8a @@ -0,0 +1,457 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x50EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x00000000:ud // Enable ARGB channels + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 3:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud + // Returns RGBA data in 16 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/RGB_to_YUV.g8a b/src/shaders/post_processing/gen8/RGB_to_YUV.g8a new file mode 100644 index 00000000..2cda31eb --- /dev/null +++ b/src/shaders/post_processing/gen8/RGB_to_YUV.g8a @@ -0,0 +1,910 @@ +/* + * Copyright 2000-2013 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * Authors: + * Zhao Yakui + */ + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// Module name: YUV_to_RGB.asm +// +// Convert YUV to RGB, handle it by 16x4 block +// + + +// Description: Includes all definitions explicit to Fast Composite. + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare bBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare bBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare bBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare bBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +//Pointer to mask reg + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Msg payload buffers; upto 4 full-size messages can be written + +//Unnecessary to use the MSGPayLoad, So it is temporiarily used for conversion of YUV->RGB + +.declare fBUFFER_R Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_G Base=r30.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_B Base=r32.0 ElementSize=4 SrcRegion=<8;8,1> Type=f + +.declare fBUFFER_Y Base=r36.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_U Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_V Base=r40.0 ElementSize=4 SrcRegion=<8;8,1> Type=f + +.declare wTempY Base=r42.0 ElementSize=2 Type=w +.declare wTempU Base=r44.0 ElementSize=2 Type=w +.declare wTempV Base=r46.0 ElementSize=2 Type=w + +.declare ubTempY Base=r42.0 ElementSize=1 Type=ub +.declare ubTempU Base=r44.0 ElementSize=1 Type=ub +.declare ubTempV Base=r46.0 ElementSize=1 Type=ub + + // the r17 register (nTEMP0) is originally defined from "Common.inc" + // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming + + .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw + + +//_SAVE_INC_ + // ITU-R conversion, Now we are using ITU-R conversion + // Y = 0.299R + 0.587G + 0.114B + // U = -0.169R - 0.331G + 0.499B + 128 + // V = 0.499R - 0.418G - 0.0813B+ 128 + + // At the save module we have all 8 address sub-registers available. + // So we will use PING-PONG type of scheme to save the data using + // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help + // reduce dependency. - rT + + //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4. + //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5). + //Offsets are zero for buffer 0 and buffer 4. + //It always uses the YUVA layout. +//for BUFFER_0 + mov (4) a0.0<1>:uw r22.0<4;4,1>:uw + mov (4) a0.4<1>:uw r22.0<4;4,1>:uw + // YUV uses the a0.5,a0.6 and a0.4 as the indirect-register + // Y = a0.5, U=a0.6, V=a0.4 + // if channel swap? + // This means that it should be BGRX(B is the LSB) or RGBX + // 1 means that it is BGRX. + and.nz.f0.0 null<1>:w r2.0<0;1,0>:uw 0x01:w + // pointer swap + (f0.0) mov (1) uwTemp0<1> a0.0:uw + (f0.0) mov (1) a0.0:uw a0.1:uw + (f0.0) mov (1) a0.1:uw uwTemp0<0;1,0> + +//the first line in the block 0 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 0]<1>:uw 0:uw + mov (16) r[a0.6, 0]<1>:uw 0:uw + mov (16) r[a0.4, 0]<1>:uw 0:uw + mov (16) r[a0.5,1]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,1]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,1]<2>:ub ubTempV(0, 0)<32;8,4> + + +//the second line in the block 0 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 32]<1>:uw 0:uw + mov (16) r[a0.6, 32]<1>:uw 0:uw + mov (16) r[a0.4, 32]<1>:uw 0:uw + mov (16) r[a0.5,33]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,33]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,33]<2>:ub ubTempV(0, 0)<32;8,4> + +//the third line in the block 0 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 64]<1>:uw 0:uw + mov (16) r[a0.6, 64]<1>:uw 0:uw + mov (16) r[a0.4, 64]<1>:uw 0:uw + mov (16) r[a0.5,65]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,65]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,65]<2>:ub ubTempV(0, 0)<32;8,4> + +//the fourth line in the block 0 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 96]<1>:uw 0:uw + mov (16) r[a0.6, 96]<1>:uw 0:uw + mov (16) r[a0.4, 96]<1>:uw 0:uw + mov (16) r[a0.5,97]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,97]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,97]<2>:ub ubTempV(0, 0)<32;8,4> + +//for Buffer_1 + + add (8) a0.0<1>:uw a0.0<8;8,1>:uw 512:uw +//the first line in the block 1 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 0]<1>:uw 0:uw + mov (16) r[a0.6, 0]<1>:uw 0:uw + mov (16) r[a0.4, 0]<1>:uw 0:uw + mov (16) r[a0.5,1]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,1]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,1]<2>:ub ubTempV(0, 0)<32;8,4> + + +//the second line in the block 1 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 32]<1>:uw 0:uw + mov (16) r[a0.6, 32]<1>:uw 0:uw + mov (16) r[a0.4, 32]<1>:uw 0:uw + mov (16) r[a0.5,33]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,33]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,33]<2>:ub ubTempV(0, 0)<32;8,4> + +//the third line in the block 1 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 64]<1>:uw 0:uw + mov (16) r[a0.6, 64]<1>:uw 0:uw + mov (16) r[a0.4, 64]<1>:uw 0:uw + mov (16) r[a0.5,65]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,65]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,65]<2>:ub ubTempV(0, 0)<32;8,4> + +//the fourth line in the block 1 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 96]<1>:uw 0:uw + mov (16) r[a0.6, 96]<1>:uw 0:uw + mov (16) r[a0.4, 96]<1>:uw 0:uw + mov (16) r[a0.5,97]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,97]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,97]<2>:ub ubTempV(0, 0)<32;8,4> + +//for Buffer_2 + add (8) a0.0<1>:uw a0.0<8;8,1>:uw 512:uw +//the first line in the block 2 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 0]<1>:uw 0:uw + mov (16) r[a0.6, 0]<1>:uw 0:uw + mov (16) r[a0.4, 0]<1>:uw 0:uw + mov (16) r[a0.5,1]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,1]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,1]<2>:ub ubTempV(0, 0)<32;8,4> + +//the second line in the block 2 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 32]<1>:uw 0:uw + mov (16) r[a0.6, 32]<1>:uw 0:uw + mov (16) r[a0.4, 32]<1>:uw 0:uw + mov (16) r[a0.5,33]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,33]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,33]<2>:ub ubTempV(0, 0)<32;8,4> + +//the third line in the block 2 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 64]<1>:uw 0:uw + mov (16) r[a0.6, 64]<1>:uw 0:uw + mov (16) r[a0.4, 64]<1>:uw 0:uw + mov (16) r[a0.5,65]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,65]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,65]<2>:ub ubTempV(0, 0)<32;8,4> + +//the fourth line in the block 2 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 96]<1>:uw 0:uw + mov (16) r[a0.6, 96]<1>:uw 0:uw + mov (16) r[a0.4, 96]<1>:uw 0:uw + mov (16) r[a0.5,97]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,97]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,97]<2>:ub ubTempV(0, 0)<32;8,4> + +//for Buffer_3 + add (8) a0.0<1>:uw a0.0<8;8,1>:uw 512:uw +//the first line in the block 3 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 0]<1>:uw 0:uw + mov (16) r[a0.6, 0]<1>:uw 0:uw + mov (16) r[a0.4, 0]<1>:uw 0:uw + mov (16) r[a0.5,1]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,1]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,1]<2>:ub ubTempV(0, 0)<32;8,4> + + +//the second line in the block 3 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 32]<1>:uw 0:uw + mov (16) r[a0.6, 32]<1>:uw 0:uw + mov (16) r[a0.4, 32]<1>:uw 0:uw + mov (16) r[a0.5,33]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,33]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,33]<2>:ub ubTempV(0, 0)<32;8,4> + +//the third line in the block 3 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 64]<1>:uw 0:uw + mov (16) r[a0.6, 64]<1>:uw 0:uw + mov (16) r[a0.4, 64]<1>:uw 0:uw + mov (16) r[a0.5,65]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,65]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,65]<2>:ub ubTempV(0, 0)<32;8,4> + +//the fourth line in the block 3 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 96]<1>:uw 0:uw + mov (16) r[a0.6, 96]<1>:uw 0:uw + mov (16) r[a0.4, 96]<1>:uw 0:uw + mov (16) r[a0.5,97]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,97]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,97]<2>:ub ubTempV(0, 0)<32;8,4> + diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_BGRA.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_BGRA.g8a new file mode 100644 index 00000000..798564f8 --- /dev/null +++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_BGRA.g8a @@ -0,0 +1,368 @@ +/* + * Copyright 2000-2013 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Authors: Zhao Yakui + */ +// 7 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +//Module Name: Set_AVS_Buf_0123_BGRA.asm + + + +//Module Name: Set_Buf_0123_BGRA + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + //AVS LAYOUT:(UUYYVVAA) + //AVS RGBX LAYOUT (RRGGBBAA) + //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V + // V = 8, Y= 0, U = 4, A = 12. + // And a0.x is used as indirect-register for RGBX. R=a0.1, G=a0.2, B=a0.0 + // B = 8, R= 0, G = 4, A = 12 + mov (4) acc0.0<1>:w 0x6EA2:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw + shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw + + //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT + + //SU LAYOUT:(VYUAVYUA) + //V = 4, Y = 2, U = 0, A = 6 + //B = 4, G = 2, R = 0, A = 6 + mov (4) acc0.0<1>:w 0x6204:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw + shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address. + + //OFFSET: + mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk } + + diff --git a/src/shaders/post_processing/gen8/rgbx_to_nv12.asm b/src/shaders/post_processing/gen8/rgbx_to_nv12.asm new file mode 100644 index 00000000..14baafeb --- /dev/null +++ b/src/shaders/post_processing/gen8/rgbx_to_nv12.asm @@ -0,0 +1,18 @@ +// Module name: AVS +.kernel RGBX_TO_NV12 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_BGRA.g8a" +#include "PA_AVS_Buf_0.g8a" +#include "PA_AVS_Buf_1.g8a" +#include "PA_AVS_Buf_2.g8a" +#include "PA_AVS_Buf_3.g8a" +#include "RGB_to_YUV.g8a" +#include "Save_AVS_NV12.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/rgbx_to_nv12.g8b b/src/shaders/post_processing/gen8/rgbx_to_nv12.g8b new file mode 100644 index 00000000..4cc113b5 --- /dev/null +++ b/src/shaders/post_processing/gen8/rgbx_to_nv12.g8b @@ -0,0 +1,661 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x22001240, 0x006902c0, 0x00000000 }, + { 0x00400001, 0x22081240, 0x006902c0, 0x00000000 }, + { 0x02800005, 0x20001260, 0x1e000040, 0x00010001 }, + { 0x00010001, 0x22201048, 0x00000200, 0x00000000 }, + { 0x00010001, 0x22001040, 0x00000202, 0x00000000 }, + { 0x00010001, 0x22021240, 0x00000220, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8011, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8031, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8051, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8071, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8612288, 0x00cf05c0, 0x00000000 }, + { 0x00600040, 0x22001040, 0x168d0200, 0x02000200 }, + { 0x00600001, 0x238022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8011, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8031, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8051, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8071, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8612288, 0x00cf05c0, 0x00000000 }, + { 0x00600040, 0x22001040, 0x168d0200, 0x02000200 }, + { 0x00600001, 0x238022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8011, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8031, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8051, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8071, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8612288, 0x00cf05c0, 0x00000000 }, + { 0x00600040, 0x22001040, 0x168d0200, 0x02000200 }, + { 0x00600001, 0x238022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8011, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8031, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8051, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8071, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8612288, 0x00cf05c0, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 }, + { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, -- cgit v1.2.1 From 49f3654d0e0ece530a7d2a4dd413195e48ce85ed Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Sat, 8 Jun 2013 09:49:18 +0800 Subject: Fix a vp8 decoder picture parameter error (1) log2(num_of_partition - 1) should be set to picture state as BSpec (2) add an assert about probability buffer Signed-off-by: Zhong Li --- src/gen8_mfd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 8d60be48..4997c20f 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2790,7 +2790,9 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx, VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer; VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */ dri_bo *probs_bo = decode_state->probability_data->bo; - int i, j; + int i, j,log2num; + + log2num = (int)log2(slice_param->num_of_partitions - 1); BEGIN_BCS_BATCH(batch, 38); OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2)); @@ -2798,7 +2800,7 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx, (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 | (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0); OUT_BCS_BATCH(batch, - slice_param->num_of_partitions << 24 | + log2num << 24 | pic_param->pic_fields.bits.sharpness_level << 16 | pic_param->pic_fields.bits.sign_bias_alternate << 13 | pic_param->pic_fields.bits.sign_bias_golden << 12 | @@ -2962,6 +2964,8 @@ gen8_mfd_vp8_decode_picture(VADriverContextP ctx, assert(decode_state->slice_params && decode_state->slice_params[0]->buffer); assert(decode_state->slice_datas[0]->bo); + assert(decode_state->probability_data); + slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; slice_data_bo = decode_state->slice_datas[0]->bo; -- cgit v1.2.1 From c889b94cdefe3e68f536405fdac6e2b77c1d2f89 Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Sat, 8 Jun 2013 09:49:19 +0800 Subject: Remove unnecessary asserts I think these two asserts are not necessary, and they will cause assert failure when probability buffer created. Signed-off-by: Zhong Li --- src/i965_drv_video.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 58b67bbf..86476b6e 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -2051,8 +2051,6 @@ i965_BeginPicture(VADriverContextP ctx, struct object_buffer *obj_buffer) \ { \ struct category##_state *category = &obj_context->codec_state.category; \ - assert(obj_buffer->buffer_store->bo == NULL); \ - assert(obj_buffer->buffer_store->buffer); \ i965_release_buffer_store(&category->member); \ i965_reference_buffer_store(&category->member, obj_buffer->buffer_store); \ return VA_STATUS_SUCCESS; \ -- cgit v1.2.1 From ae9fbed402dabfc7d7201217f704dfb03f1e09a9 Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Sat, 8 Jun 2013 14:37:24 +0800 Subject: Enable loop-deblock of bdw vp8 decoder When deblock is enable, post-deblocking bo should be used as output buffer. Signed-off-by: Zhong Li --- src/gen8_mfd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 4997c20f..c60e333f 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2732,13 +2732,14 @@ gen8_mfd_vp8_decode_init(VADriverContextP ctx, i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); - gen7_mfd_context->post_deblocking_output.bo = NULL; - gen7_mfd_context->post_deblocking_output.valid = 0; + gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable; dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo); - gen7_mfd_context->pre_deblocking_output.valid = 1; + gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable; /* The same as AVC */ dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo); -- cgit v1.2.1 From d306107f3ee94b2e3eede9074826fd8a79546f21 Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Sun, 9 Jun 2013 18:13:38 +0800 Subject: Vp8 quant index converted to quant value on BDW Signed-off-by: Zhong Li --- src/gen8_mfd.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index c60e333f..3066eb3f 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2712,6 +2712,38 @@ gen8_mfd_jpeg_decode_picture(VADriverContextP ctx, intel_batchbuffer_flush(batch); } +static const int vp8_dc_qlookup[128] = +{ + 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17, + 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, + 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157, +}; + +static const int vp8_ac_qlookup[128] = +{ + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, + 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, + 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152, + 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209, + 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284, +}; + +static inline unsigned int vp8_clip_quantization_index(unsigned int index) +{ + if(index > 127) + return 127; + else if(index <0) + return 0; +} + static void gen8_mfd_vp8_decode_init(VADriverContextP ctx, struct decode_state *decode_state, @@ -2792,6 +2824,7 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx, VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */ dri_bo *probs_bo = decode_state->probability_data->bo; int i, j,log2num; + unsigned int quantization_value[4][6]; log2num = (int)log2(slice_param->num_of_partitions - 1); @@ -2824,15 +2857,26 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx, /* Quantizer Value for 4 segmetns, DW4-DW15 */ for (i = 0; i < 4; i++) { + quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/ + quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/ + quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/ + /* 101581>>16 is equivalent to 155/100 */ + quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/ + quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/ + quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/ + + quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8); + quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132); + + OUT_BCS_BATCH(batch, + quantization_value[i][0] << 16 | /* Y1AC */ + quantization_value[i][1] << 0); /* Y1DC */ OUT_BCS_BATCH(batch, - iq_matrix->quantization_index[i][0] << 16 | /* Y1AC */ - iq_matrix->quantization_index[i][1] << 0); /* Y1DC */ - OUT_BCS_BATCH(batch, - iq_matrix->quantization_index[i][5] << 16 | /* UVAC */ - iq_matrix->quantization_index[i][4] << 0); /* UVDC */ + quantization_value[i][5] << 16 | /* UVAC */ + quantization_value[i][4] << 0); /* UVDC */ OUT_BCS_BATCH(batch, - iq_matrix->quantization_index[i][3] << 16 | /* Y2AC */ - iq_matrix->quantization_index[i][2] << 0); /* Y2DC */ + quantization_value[i][3] << 16 | /* Y2AC */ + quantization_value[i][2] << 0); /* Y2DC */ } /* CoeffProbability table for non-key frame, DW16-DW18 */ -- cgit v1.2.1 From 20d5d6e1a9acbaa0c8d537a690e141dd81c17d7f Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 18 Jun 2013 14:16:42 +0800 Subject: New PCI IDs for BDW Signed-off-by: Xiang, Haihao --- src/intel_driver.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/intel_driver.h b/src/intel_driver.h index 7580006a..eae4d12f 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -248,6 +248,10 @@ struct intel_region #define PCI_CHIP_BAYTRAIL_M_4 0x0157 #define PCI_CHIP_BAYTRAIL_D 0x0155 +#define PCI_CHIP_BROADWELL_MS_GT1 0x1602 +#define PCI_CHIP_BROADWELL_MS_GT2 0x1612 +#define PCI_CHIP_BROADWELL_MS_GT2PLUS 0x1622 + #define PCI_CHIP_BROADWELL_M_GT1_1 0x1606 #define PCI_CHIP_BROADWELL_M_GT2_1 0x1616 #define PCI_CHIP_BROADWELL_M_GT2PLUS_1 0x1626 @@ -392,19 +396,22 @@ struct intel_region devid == PCI_CHIP_BROADWELL_M_GT1_2 || \ devid == PCI_CHIP_BROADWELL_M_GT1_3 || \ devid == PCI_CHIP_BROADWELL_D_GT1_1 || \ - devid == PCI_CHIP_BROADWELL_D_GT1_2) + devid == PCI_CHIP_BROADWELL_D_GT1_2 || \ + devid == PCI_CHIP_BROADWELL_MS_GT1) #define IS_BDW_GT2(devid) (devid == PCI_CHIP_BROADWELL_M_GT2_1 || \ devid == PCI_CHIP_BROADWELL_M_GT2_2 || \ devid == PCI_CHIP_BROADWELL_M_GT2_3 || \ devid == PCI_CHIP_BROADWELL_D_GT2_1 || \ - devid == PCI_CHIP_BROADWELL_D_GT2_2) + devid == PCI_CHIP_BROADWELL_D_GT2_2 || \ + devid == PCI_CHIP_BROADWELL_MS_GT2) #define IS_BDW_GT2PLUS(devid) (devid == PCI_CHIP_BROADWELL_M_GT2PLUS_1 || \ devid == PCI_CHIP_BROADWELL_M_GT2PLUS_2 || \ devid == PCI_CHIP_BROADWELL_M_GT2PLUS_3 || \ devid == PCI_CHIP_BROADWELL_D_GT2PLUS_1 || \ - devid == PCI_CHIP_BROADWELL_D_GT2PLUS_2) + devid == PCI_CHIP_BROADWELL_D_GT2PLUS_2 || \ + devid == PCI_CHIP_BROADWELL_MS_GT2PLUS) #define IS_GEN8(devid) (IS_BDW_GT1(devid) || \ IS_BDW_GT2(devid) || \ -- cgit v1.2.1 From e074814e01a2a138535094985721fba0da2b443a Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 21 Jun 2013 10:16:57 +0800 Subject: Add the CSC conversion from NV12/I420 to YUYV Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 4 +- src/shaders/post_processing/gen8/Makefile.am | 5 +- src/shaders/post_processing/gen8/Save_AVS_PA.g8a | 629 +++++++++++++++++++++++ src/shaders/post_processing/gen8/pl2_to_pa.asm | 17 + src/shaders/post_processing/gen8/pl2_to_pa.g8b | 287 +++++++++++ src/shaders/post_processing/gen8/pl3_to_pa.asm | 17 + src/shaders/post_processing/gen8/pl3_to_pa.g8b | 303 +++++++++++ 7 files changed, 1259 insertions(+), 3 deletions(-) create mode 100644 src/shaders/post_processing/gen8/Save_AVS_PA.g8a create mode 100644 src/shaders/post_processing/gen8/pl2_to_pa.asm create mode 100644 src/shaders/post_processing/gen8/pl2_to_pa.g8b create mode 100644 src/shaders/post_processing/gen8/pl3_to_pa.asm create mode 100644 src/shaders/post_processing/gen8/pl3_to_pa.g8b diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 2ae2e0bf..78405b5d 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1199,10 +1199,10 @@ static const uint32_t pp_nv12_dn_gen8[][4] = { // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b" }; static const uint32_t pp_nv12_load_save_pa_gen8[][4] = { -#include "shaders/post_processing/gen7/pl2_to_pa.g75b" +#include "shaders/post_processing/gen8/pl2_to_pa.g8b" }; static const uint32_t pp_pl3_load_save_pa_gen8[][4] = { -#include "shaders/post_processing/gen7/pl3_to_pa.g75b" +#include "shaders/post_processing/gen8/pl3_to_pa.g8b" }; static const uint32_t pp_pa_load_save_nv12_gen8[][4] = { #include "shaders/post_processing/gen7/pa_to_pl2.g75b" diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index e2f586b6..776803e1 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -4,7 +4,9 @@ INTEL_PP_G8B = \ pl3_to_pl2.g8b \ pl3_to_pl3.g8b \ pl2_to_rgbx.g8b \ - rgbx_to_nv12.g8b \ + rgbx_to_nv12.g8b \ + pl2_to_pa.g8b \ + pl3_to_pa.g8b \ $(NULL) INTEL_PP_G8A = \ @@ -24,6 +26,7 @@ INTEL_PP_G8A = \ Save_AVS_NV12.g8a \ Save_AVS_PL3.g8a \ Save_AVS_RGBX.g8a \ + Save_AVS_PA.g8a \ Set_AVS_Buf_0123_PL2.g8a \ Set_AVS_Buf_0123_PL3.g8a \ Set_AVS_Buf_0123_BGRA.g8a \ diff --git a/src/shaders/post_processing/gen8/Save_AVS_PA.g8a b/src/shaders/post_processing/gen8/Save_AVS_PA.g8a new file mode 100644 index 00000000..1cedac7e --- /dev/null +++ b/src/shaders/post_processing/gen8/Save_AVS_PA.g8a @@ -0,0 +1,629 @@ +/* + * Copyright 2000-2013 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * Authors: Zhao Yakui + */ +// 174 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// Module name: Save_AVS_PA.asm +// +// Save PA 422 frame data block of size 16x16 +// +// To save 16x16 block (32x16 bytes of YUYV) we need 2 send instructions with of size 16x16 each. +// ------------------------------- +// | 16x16 | 16x16 | +// | YUYV | YUYV | +// ------------------------------- +// these 2 sends are replaced by 8 32x2 sends to improve performance + + + +// Module name: Save.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Msg payload buffers; upto 4 full-size messages can be written + + +.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud + +.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw + +.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub + + + // the r17 register (nTEMP0) is originally defined from "Common.inc" + // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming + + .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw + + +//_SAVE_INC_ + + + //wBUFF_CHNL_PTR points to buffer 0. + //Add appropriate offsets to get pointers for all buffers (1,2,3). + //Offset is zero for buffer 0. + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + + //Set DEST pointers according to output packing i.e. YUYV, YVYU, UYVY, VYUY + add (4) a0.4<1>:w r2.28<4;4,1>:ub 928:uw + + /* X block origin. YUY2 or UYUV */ + shl (1) r27.0<1>:d r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be 2 times + mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin (1st quadrant) + mov (1) r27.2<1>:ud 0x1001F:ud { NoDDChk } // Block width and height (32x2) + +// Rounding + // left + add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw + + // right + add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 512:uw + // left + add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw + + // right + add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw + // left + add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw + + // right + add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1536:uw + // left + add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw + + // right + add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 2048:uw + // restore pointer + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + + mov (8) r28<1>:ud r27<8;8,1>:ud + mov (8) r37<1>:ud r27<8;8,1>:ud + add (1) r37.1<1>:d r27.1<0;1,0>:d 2:d // Point to 2nd part + + /* a0.2 U, a0.1 Y, a0.0 V */ + + mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk } + + /* a0.4 + 288 = r38 */ + mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk } + + send (1) null<1>:d r28 0xc 0x60A8018:ud + send (1) null<1>:d r37 0xc 0x60A8018:ud + + // restore pointer + add (4) a0.0<1>:uw r22.0<4;4,1>:w 512:uw + + add (1) r28.1<1>:d r27.1<0;1,0>:d 4:d // Point to 2nd part + add (1) r37.1<1>:d r27.1<0;1,0>:d 6:d // Point to 2nd part + + + mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk } + + mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk } + + send (1) null<1>:d r28 0xc 0x60A8018:ud + send (1) null<1>:d r37 0xc 0x60A8018:ud + + // restore pointer + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw + + add (1) r28.1<1>:d r27.1<0;1,0>:d 8:d // Point to 2nd part + add (1) r37.1<1>:d r27.1<0;1,0>:d 10:d // Point to 2nd part + + + mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk } + + mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk } + + send (1) null<1>:d r28 0xc 0x60A8018:ud + send (1) null<1>:d r37 0xc 0x60A8018:ud + + // restore pointer + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1536:uw + + add (1) r28.1<1>:d r27.1<0;1,0>:d 12:d // Point to 2nd part + add (1) r37.1<1>:d r27.1<0;1,0>:d 14:d // Point to 2nd part + + mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk } + + mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk } + + send (1) null<1>:d r28 0xc 0x60A8018:ud + send (1) null<1>:d r37 0xc 0x60A8018:ud + diff --git a/src/shaders/post_processing/gen8/pl2_to_pa.asm b/src/shaders/post_processing/gen8/pl2_to_pa.asm new file mode 100644 index 00000000..55d9cedf --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_pa.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL2_TO_PA +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL2.g8a" +#include "PL2_AVS_Buf_0.g8a" +#include "PL2_AVS_Buf_1.g8a" +#include "PL2_AVS_Buf_2.g8a" +#include "PL2_AVS_Buf_3.g8a" +#include "Save_AVS_PA.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl2_to_pa.g8b b/src/shaders/post_processing/gen8/pl2_to_pa.g8b new file mode 100644 index 00000000..0c0cda16 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_pa.g8b @@ -0,0 +1,287 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22082260, 0x1669005c, 0x03a003a0 }, + { 0x00000209, 0x23601a28, 0x1e0000e0, 0x00010001 }, + { 0x00000601, 0x23641a28, 0x000000e2, 0x00000000 }, + { 0x00000401, 0x23680608, 0x00000000, 0x0001001f }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x08000800 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000002 }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000004 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000006 }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000008 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000a }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x0000000c }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000e }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/pl3_to_pa.asm b/src/shaders/post_processing/gen8/pl3_to_pa.asm new file mode 100644 index 00000000..acb7670c --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pa.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL3_TO_PL3 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL3.g8a" +#include "PL3_AVS_Buf_0.g8a" +#include "PL3_AVS_Buf_1.g8a" +#include "PL3_AVS_Buf_2.g8a" +#include "PL3_AVS_Buf_3.g8a" +#include "Save_AVS_PA.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl3_to_pa.g8b b/src/shaders/post_processing/gen8/pl3_to_pa.g8b new file mode 100644 index 00000000..d6798c27 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pa.g8b @@ -0,0 +1,303 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x29002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2b002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2d002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2f002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22082260, 0x1669005c, 0x03a003a0 }, + { 0x00000209, 0x23601a28, 0x1e0000e0, 0x00010001 }, + { 0x00000601, 0x23641a28, 0x000000e2, 0x00000000 }, + { 0x00000401, 0x23680608, 0x00000000, 0x0001001f }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x08000800 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000002 }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000004 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000006 }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000008 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000a }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x0000000c }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000e }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, -- cgit v1.2.1 From 1543f2ffa17d6c648181562247439a38f0a399c4 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 21 Jun 2013 10:17:02 +0800 Subject: Add the conversion from YUYV to NV12/I420 Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 4 +- src/shaders/post_processing/gen8/Makefile.am | 3 + .../post_processing/gen8/Set_AVS_Buf_0123_VYUA.g8a | 366 +++++++++++++++++++++ src/shaders/post_processing/gen8/pa_to_pl2.asm | 17 + src/shaders/post_processing/gen8/pa_to_pl2.g8b | 236 +++++++++++++ src/shaders/post_processing/gen8/pa_to_pl3.asm | 17 + src/shaders/post_processing/gen8/pa_to_pl3.g8b | 189 +++++++++++ 7 files changed, 830 insertions(+), 2 deletions(-) create mode 100644 src/shaders/post_processing/gen8/Set_AVS_Buf_0123_VYUA.g8a create mode 100644 src/shaders/post_processing/gen8/pa_to_pl2.asm create mode 100644 src/shaders/post_processing/gen8/pa_to_pl2.g8b create mode 100644 src/shaders/post_processing/gen8/pa_to_pl3.asm create mode 100644 src/shaders/post_processing/gen8/pa_to_pl3.g8b diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 78405b5d..87c9000d 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1205,10 +1205,10 @@ static const uint32_t pp_pl3_load_save_pa_gen8[][4] = { #include "shaders/post_processing/gen8/pl3_to_pa.g8b" }; static const uint32_t pp_pa_load_save_nv12_gen8[][4] = { -#include "shaders/post_processing/gen7/pa_to_pl2.g75b" +#include "shaders/post_processing/gen8/pa_to_pl2.g8b" }; static const uint32_t pp_pa_load_save_pl3_gen8[][4] = { -#include "shaders/post_processing/gen7/pa_to_pl3.g75b" +#include "shaders/post_processing/gen8/pa_to_pl3.g8b" }; static const uint32_t pp_rgbx_load_save_nv12_gen8[][4] = { #include "shaders/post_processing/gen8/rgbx_to_nv12.g8b" diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index 776803e1..ddb53cd3 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -7,6 +7,8 @@ INTEL_PP_G8B = \ rgbx_to_nv12.g8b \ pl2_to_pa.g8b \ pl3_to_pa.g8b \ + pa_to_pl2.g8b \ + pa_to_pl3.g8b \ $(NULL) INTEL_PP_G8A = \ @@ -30,6 +32,7 @@ INTEL_PP_G8A = \ Set_AVS_Buf_0123_PL2.g8a \ Set_AVS_Buf_0123_PL3.g8a \ Set_AVS_Buf_0123_BGRA.g8a \ + Set_AVS_Buf_0123_VYUA.g8a \ YUV_to_RGB.g8a \ RGB_to_YUV.g8a \ Set_Layer_0.g8a \ diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_VYUA.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_VYUA.g8a new file mode 100644 index 00000000..3573e2bf --- /dev/null +++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_VYUA.g8a @@ -0,0 +1,366 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 7 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +//Module Name: Set_AVS_Buf_0123_VYUA.asm + + + +//Module Name: Set_Buf_0123_VYUA + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + //AVS LAYOUT:(VVYYUUAA) + //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V + // V = 0, Y= 4, U = 8, A = 12. + //YCrCb or YCrCb_Swap returns the following data: + //Cr is returned on R-channel. 0 + //Y is returned on G channel. 4 + //Cb is returned on B channel. 8 + mov (4) acc0.0<1>:w 0x62EA:v //Subtract 6 from 0,4,8,12 + add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back + shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address. + + //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT + + //SU LAYOUT:(VYUAVYUA) + //V = 0, Y = 2, U = 4, A = 6 + mov (4) acc0.0<1>:w 0x6420:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw + shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address. + + //OFFSET: + mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk } + + diff --git a/src/shaders/post_processing/gen8/pa_to_pl2.asm b/src/shaders/post_processing/gen8/pa_to_pl2.asm new file mode 100644 index 00000000..adc81fdb --- /dev/null +++ b/src/shaders/post_processing/gen8/pa_to_pl2.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel YUY2_TO_NV12 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_VYUA.g8a" +#include "PA_AVS_Buf_0.g8a" +#include "PA_AVS_Buf_1.g8a" +#include "PA_AVS_Buf_2.g8a" +#include "PA_AVS_Buf_3.g8a" +#include "Save_AVS_NV12.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pa_to_pl2.g8b b/src/shaders/post_processing/gen8/pa_to_pl2.g8b new file mode 100644 index 00000000..3282c51f --- /dev/null +++ b/src/shaders/post_processing/gen8/pa_to_pl2.g8b @@ -0,0 +1,236 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x000062ea }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006420 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 }, + { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/pa_to_pl3.asm b/src/shaders/post_processing/gen8/pa_to_pl3.asm new file mode 100644 index 00000000..44c7f9e2 --- /dev/null +++ b/src/shaders/post_processing/gen8/pa_to_pl3.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel YUY2_TO_NV12 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_VYUA.g8a" +#include "PA_AVS_Buf_0.g8a" +#include "PA_AVS_Buf_1.g8a" +#include "PA_AVS_Buf_2.g8a" +#include "PA_AVS_Buf_3.g8a" +#include "Save_AVS_PL3.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pa_to_pl3.g8b b/src/shaders/post_processing/gen8/pa_to_pl3.g8b new file mode 100644 index 00000000..3d1d0878 --- /dev/null +++ b/src/shaders/post_processing/gen8/pa_to_pl3.g8b @@ -0,0 +1,189 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x000062ea }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006420 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00200208, 0x24a01a28, 0x1e4500e0, 0x00010001 }, + { 0x00200208, 0x25c01a28, 0x1e4500e0, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x00070007 }, + { 0x00000401, 0x25c80608, 0x00000000, 0x00070007 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 }, + { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 }, + { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 }, + { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 }, + { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 }, + { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 }, + { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 }, + { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 }, + { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 }, + { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 }, + { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 }, + { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 }, + { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x24c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x24c82288, 0x00cf8441, 0x00000000 }, + { 0x00600201, 0x25e02288, 0x00cf8001, 0x00000000 }, + { 0x00600601, 0x25e82288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600601, 0x24d02288, 0x00cf8c01, 0x00000000 }, + { 0x00600401, 0x24d82288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x25f02288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x25f82288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x24e02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x24e82288, 0x00cf8441, 0x00000000 }, + { 0x00600201, 0x26002288, 0x00cf8001, 0x00000000 }, + { 0x00600601, 0x26082288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600601, 0x24f02288, 0x00cf8c01, 0x00000000 }, + { 0x00600401, 0x24f82288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x26102288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x26182288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8019 }, + { 0x0c000031, 0x20002220, 0x060005c0, 0x060a801a }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, -- cgit v1.2.1 From f7cf2fc0a882ee7082530cff0b40adda8efc23b6 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 21 Jun 2013 10:26:13 +0800 Subject: VPP: add VPP Filters for BDW Needs to rebuild the shader for VAProcFilterSharpening on BDW Signed-off-by: Xiang, Haihao --- src/i965_drv_video.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 86476b6e..bfb1092f 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -343,7 +343,16 @@ static struct hw_codec_info gen8_hw_codec_info = { .has_accelerated_getimage = 1, .has_accelerated_putimage = 1, .has_tiled_surface = 1, + .has_di_motion_adptive = 1, .has_vp8_decoding = 1, + + .num_filters = 4, + .filters = { + VAProcFilterNoiseReduction, + VAProcFilterDeinterlacing, + VAProcFilterSharpening, /* need to rebuild the shader for BDW */ + VAProcFilterColorBalance, + }, }; #define I965_PACKED_HEADER_BASE 0 -- cgit v1.2.1 From 5e88fa8fb1b5009fb69501adce8395d6ea05dba7 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 21 Jun 2013 13:29:15 +0800 Subject: VME uses reference frame parsed from slice_param instead of hacked DPB for Gen8 This is backported from Ivy/Haswell/Sandybridge. Signed-off-by: Zhao Yakui --- src/gen8_vme.c | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 65d27c1a..4a631ca7 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -240,6 +240,7 @@ gen8_vme_surface_setup(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct object_surface *obj_surface; + struct i965_driver_data *i965 = i965_driver_data(ctx); /*Setup surfaces state*/ /* current picture for encoding */ @@ -249,17 +250,45 @@ gen8_vme_surface_setup(VADriverContextP ctx, gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context); if (!is_intra) { - /* reference 0 */ - obj_surface = encode_state->reference_objects[0]; - - if (obj_surface && obj_surface->bo) - gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); - - /* reference 1 */ - obj_surface = encode_state->reference_objects[1]; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int slice_type; + struct object_surface *slice_obj_surface; + int ref_surface_id; + + slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { + slice_obj_surface = NULL; + ref_surface_id = slice_param->RefPicList0[0].picture_id; + if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + slice_obj_surface = SURFACE(ref_surface_id); + } + if (slice_obj_surface && slice_obj_surface->bo) { + obj_surface = slice_obj_surface; + } else { + obj_surface = encode_state->reference_objects[0]; + } + /* reference 0 */ + if (obj_surface && obj_surface->bo) + gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); + } + if (slice_type == SLICE_TYPE_B) { + /* reference 1 */ + slice_obj_surface = NULL; + ref_surface_id = slice_param->RefPicList1[0].picture_id; + if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + slice_obj_surface = SURFACE(ref_surface_id); + } + if (slice_obj_surface && slice_obj_surface->bo) { + obj_surface = slice_obj_surface; + } else { + obj_surface = encode_state->reference_objects[0]; + } - if (obj_surface && obj_surface->bo) - gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); + obj_surface = encode_state->reference_objects[1]; + if (obj_surface && obj_surface->bo) + gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); + } } /* VME output */ -- cgit v1.2.1 From 7e8ce8ce06e6e7efcab69b506758e6628882dd83 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 21 Jun 2013 13:29:24 +0800 Subject: PAK encoding uses the reference list parsed from slice_param instead of hacked DPB This is backported from Sandybridge/Ivybridge/Haswell. Signed-off-by: Zhao Yakui --- src/gen8_mfc.c | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 0bf452b4..258f94f8 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -636,31 +636,6 @@ gen8_mfc_avc_directmode_state(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); } -static void -gen8_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - int i; - - BEGIN_BCS_BATCH(batch, 10); - OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); - OUT_BCS_BATCH(batch, 0); //Select L0 - OUT_BCS_BATCH(batch, 0x80808020); //Only 1 reference - for(i = 0; i < 7; i++) { - OUT_BCS_BATCH(batch, 0x80808080); - } - ADVANCE_BCS_BATCH(batch); - - BEGIN_BCS_BATCH(batch, 10); - OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); - OUT_BCS_BATCH(batch, 1); //Select L1 - OUT_BCS_BATCH(batch, 0x80808022); //Only 1 reference - for(i = 0; i < 7; i++) { - OUT_BCS_BATCH(batch, 0x80808080); - } - ADVANCE_BCS_BATCH(batch); -} - static void gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, @@ -707,7 +682,7 @@ static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, mfc_context->avc_qm_state(ctx, encoder_context); mfc_context->avc_fqm_state(ctx, encoder_context); gen8_mfc_avc_directmode_state(ctx, encoder_context); - gen8_mfc_avc_ref_idx_state(ctx, encoder_context); + intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context); } -- cgit v1.2.1 From 46755cb5f1f2cb8e24a740db0e7bfec4f2d0ec10 Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Wed, 26 Jun 2013 14:02:53 +0800 Subject: Bulid BDW vebox pipeline Build gen8 vebox pipeline, and ProcAMP has been enabled and verified on simulator. However, DN/DI need further effort. Signed-off-by: Zhong Li --- src/gen75_picture_process.c | 6 +- src/gen75_vpp_vebox.c | 209 ++++++++++++++++++++++++++++++++++++++++++++ src/gen75_vpp_vebox.h | 3 + 3 files changed, 217 insertions(+), 1 deletion(-) diff --git a/src/gen75_picture_process.c b/src/gen75_picture_process.c index fee378f6..9dd7c192 100644 --- a/src/gen75_picture_process.c +++ b/src/gen75_picture_process.c @@ -75,6 +75,7 @@ gen75_vpp_vebox(VADriverContextP ctx, { VAStatus va_status = VA_STATUS_SUCCESS; VAProcPipelineParameterBuffer* pipeline_param = proc_ctx->pipeline_param; + struct i965_driver_data *i965 = i965_driver_data(ctx); /* vpp features based on VEBox fixed function */ if(proc_ctx->vpp_vebox_ctx == NULL) { @@ -85,7 +86,10 @@ gen75_vpp_vebox(VADriverContextP ctx, proc_ctx->vpp_vebox_ctx->surface_input_object = proc_ctx->surface_pipeline_input_object; proc_ctx->vpp_vebox_ctx->surface_output_object = proc_ctx->surface_render_output_object; - va_status = gen75_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx); + if (IS_HASWELL(i965->intel.device_id)) + va_status = gen75_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx); + else if (IS_GEN8(i965->intel.device_id)) + va_status = gen8_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx); return va_status; } diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index b5a88b63..ab9a0146 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -23,6 +23,7 @@ * * Authors: * Li Xiaowei + * Li Zhong */ #include @@ -1310,3 +1311,211 @@ struct intel_vebox_context * gen75_vebox_context_init(VADriverContextP ctx) return proc_context; } +void bdw_veb_state_command(VADriverContextP ctx, struct intel_vebox_context *proc_ctx) +{ + struct intel_batchbuffer *batch = proc_ctx->batch; + unsigned int is_dn_enabled = (proc_ctx->filters_mask & 0x01)? 1: 0; + unsigned int is_di_enabled = (proc_ctx->filters_mask & 0x02)? 1: 0; + unsigned int is_iecp_enabled = (proc_ctx->filters_mask & 0xff00)?1:0; + unsigned int is_first_frame = !!((proc_ctx->frame_order == -1) && + (is_di_enabled || + is_dn_enabled)); + unsigned int di_output_frames_flag = 2; /* Output Current Frame Only */ + + if(proc_ctx->fourcc_input != proc_ctx->fourcc_output || + (is_dn_enabled == 0 && is_di_enabled == 0)){ + is_iecp_enabled = 1; + } + + if (is_di_enabled) { + VAProcFilterParameterBufferDeinterlacing *di_param = + (VAProcFilterParameterBufferDeinterlacing *)proc_ctx->filter_di; + + assert(di_param); + + if (di_param->algorithm == VAProcDeinterlacingBob) + is_first_frame = 1; + + if (di_param->algorithm == VAProcDeinterlacingMotionAdaptive && + proc_ctx->frame_order != -1) + di_output_frames_flag = 0; /* Output both Current Frame and Previous Frame */ + } + + BEGIN_VEB_BATCH(batch, 0xc); + OUT_VEB_BATCH(batch, VEB_STATE | (0xc - 2)); + OUT_VEB_BATCH(batch, + 0 << 25 | // state surface control bits + 0 << 23 | // reserved. + 0 << 22 | // gamut expansion position + 0 << 15 | // reserved. + 0 << 14 | // single slice vebox enable + 0 << 13 | // hot pixel filter enable + 0 << 12 | // alpha plane enable + 0 << 11 | // vignette enable + 0 << 10 | // demosaic enable + di_output_frames_flag << 8 | // DI output frame + 0 << 7 | // 444->422 downsample method + 0 << 6 | // 422->420 downsample method + is_first_frame << 5 | // DN/DI first frame + is_di_enabled << 4 | // DI enable + is_dn_enabled << 3 | // DN enable + is_iecp_enabled << 2 | // global IECP enabled + 0 << 1 | // ColorGamutCompressionEnable + 0 ) ; // ColorGamutExpansionEnable. + + OUT_RELOC(batch, + proc_ctx->dndi_state_table.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + + OUT_VEB_BATCH(batch, 0); + + OUT_RELOC(batch, + proc_ctx->iecp_state_table.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + + OUT_VEB_BATCH(batch, 0); + + OUT_RELOC(batch, + proc_ctx->gamut_state_table.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + + OUT_VEB_BATCH(batch, 0); + + OUT_RELOC(batch, + proc_ctx->vertex_state_table.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + + OUT_VEB_BATCH(batch, 0); + + OUT_VEB_BATCH(batch, 0);/*caputre pipe state pointer*/ + OUT_VEB_BATCH(batch, 0); + + ADVANCE_VEB_BATCH(batch); +} + +void bdw_veb_dndi_iecp_command(VADriverContextP ctx, struct intel_vebox_context *proc_ctx) +{ + struct intel_batchbuffer *batch = proc_ctx->batch; + unsigned char frame_ctrl_bits = 0; + unsigned int startingX = 0; + unsigned int endingX = (proc_ctx->width_input + 63 ) / 64 * 64; + + BEGIN_VEB_BATCH(batch, 0x14); + OUT_VEB_BATCH(batch, VEB_DNDI_IECP_STATE | (0x14 - 2));//DWord 0 + OUT_VEB_BATCH(batch, + startingX << 16 | + endingX -1);//DWord 1 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_IN_CURRENT].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);//DWord 2 + OUT_VEB_BATCH(batch,0);//DWord 3 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_IN_PREVIOUS].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);//DWord 4 + OUT_VEB_BATCH(batch,0);//DWord 5 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_IN_STMM].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);//DWord 6 + OUT_VEB_BATCH(batch,0);//DWord 7 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_OUT_STMM].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 8 + OUT_VEB_BATCH(batch,0);//DWord 9 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 10 + OUT_VEB_BATCH(batch,0);//DWord 11 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_OUT_CURRENT].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 12 + OUT_VEB_BATCH(batch,0);//DWord 13 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_OUT_PREVIOUS].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 14 + OUT_VEB_BATCH(batch,0);//DWord 15 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_OUT_STATISTIC].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 16 + OUT_VEB_BATCH(batch,0);//DWord 17 + + OUT_VEB_BATCH(batch,0);//DWord 18 + OUT_VEB_BATCH(batch,0);//DWord 19 + + ADVANCE_VEB_BATCH(batch); +} + +VAStatus gen8_vebox_process_picture(VADriverContextP ctx, + struct intel_vebox_context *proc_ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + + VAProcPipelineParameterBuffer *pipe = proc_ctx->pipeline_param; + VAProcFilterParameterBuffer* filter = NULL; + struct object_buffer *obj_buf = NULL; + unsigned int i; + + for (i = 0; i < pipe->num_filters; i ++) { + obj_buf = BUFFER(pipe->filters[i]); + + assert(obj_buf && obj_buf->buffer_store); + + if (!obj_buf || !obj_buf->buffer_store) + goto error; + + filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer; + + if (filter->type == VAProcFilterNoiseReduction) { + proc_ctx->filters_mask |= VPP_DNDI_DN; + proc_ctx->filter_dn = filter; + } else if (filter->type == VAProcFilterDeinterlacing) { + proc_ctx->filters_mask |= VPP_DNDI_DI; + proc_ctx->filter_di = filter; + } else if (filter->type == VAProcFilterColorBalance) { + proc_ctx->filters_mask |= VPP_IECP_PRO_AMP; + proc_ctx->filter_iecp_amp = filter; + proc_ctx->filter_iecp_amp_num_elements = obj_buf->num_elements; + } + } + + hsw_veb_pre_format_convert(ctx, proc_ctx); + hsw_veb_surface_reference(ctx, proc_ctx); + + if (proc_ctx->frame_order == -1) { + hsw_veb_resource_prepare(ctx, proc_ctx); + } + + if (proc_ctx->format_convert_flags & POST_COPY_CONVERT) { + assert(proc_ctx->frame_order == 1); + /* directly copy the saved frame in the second call */ + } else { + intel_batchbuffer_start_atomic_veb(proc_ctx->batch, 0x1000); + intel_batchbuffer_emit_mi_flush(proc_ctx->batch); + hsw_veb_surface_state(ctx, proc_ctx, INPUT_SURFACE); + hsw_veb_surface_state(ctx, proc_ctx, OUTPUT_SURFACE); + hsw_veb_state_table_setup(ctx, proc_ctx); + + bdw_veb_state_command(ctx, proc_ctx); + bdw_veb_dndi_iecp_command(ctx, proc_ctx); + intel_batchbuffer_end_atomic(proc_ctx->batch); + intel_batchbuffer_flush(proc_ctx->batch); + } + + hsw_veb_post_format_convert(ctx, proc_ctx); + // hsw_veb_surface_unreference(ctx, proc_ctx); + + proc_ctx->frame_order = (proc_ctx->frame_order + 1) % 2; + + return VA_STATUS_SUCCESS; + +error: + return VA_STATUS_ERROR_INVALID_PARAMETER; +} + diff --git a/src/gen75_vpp_vebox.h b/src/gen75_vpp_vebox.h index f1061c75..a78a1651 100644 --- a/src/gen75_vpp_vebox.h +++ b/src/gen75_vpp_vebox.h @@ -150,4 +150,7 @@ void gen75_vebox_context_destroy(VADriverContextP ctx, struct intel_vebox_context * gen75_vebox_context_init(VADriverContextP ctx); +VAStatus gen8_vebox_process_picture(VADriverContextP ctx, + struct intel_vebox_context *proc_ctx); + #endif -- cgit v1.2.1 From 4cbcb1621b4c4d320b3072c6b7989584ab9a3e28 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 15 Aug 2013 15:18:39 +0800 Subject: Rewrite the GPU VME shader for MPEG2 encoding on Gen8 This is from that on Haswell/Ivybridge. Now the MPEG2/H264 uses the same mode/motion vector prediction shader. But the MV search region of mpeg2 is different with that on H264, which causes that the wrong mode/motion vector prediction is used for MPEG2. Signed-off-by: Zhao Yakui --- src/gen8_vme.c | 29 +- src/shaders/vme/Makefile.am | 4 +- src/shaders/vme/mpeg2_inter_gen8.asm | 722 +++++++++++++++++++++++++++++++++++ src/shaders/vme/mpeg2_inter_gen8.g8a | 3 + src/shaders/vme/mpeg2_inter_gen8.g8b | 292 ++++++++++++++ 5 files changed, 1030 insertions(+), 20 deletions(-) create mode 100644 src/shaders/vme/mpeg2_inter_gen8.asm create mode 100644 src/shaders/vme/mpeg2_inter_gen8.g8a create mode 100644 src/shaders/vme/mpeg2_inter_gen8.g8b diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 4a631ca7..dc325df2 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -100,7 +100,7 @@ static const uint32_t gen8_vme_mpeg2_intra_frame[][4] = { }; static const uint32_t gen8_vme_mpeg2_inter_frame[][4] = { -#include "shaders/vme/mpeg2_inter_frame_gen8.g8b" +#include "shaders/vme/mpeg2_inter_gen8.g8b" }; static struct i965_kernel gen8_vme_mpeg2_kernels[] = { @@ -827,7 +827,6 @@ gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, int slice_mb_begin = slice_param->macroblock_address; int slice_mb_number = slice_param->num_macroblocks; unsigned int mb_intra_ub; - int slice_mb_x = slice_param->macroblock_address % mb_width; for (i = 0; i < slice_mb_number;) { int mb_count = i + slice_mb_begin; @@ -850,21 +849,6 @@ gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; } - if (i < mb_width) { - if (i == 0) - mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE); - - mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK); - - if ((i == (mb_width - 1)) && slice_mb_x) { - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; - } - } - - if ((i == mb_width) && slice_mb_x) { - mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D); - } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); *command_ptr++ = kernel; *command_ptr++ = 0; @@ -928,11 +912,20 @@ gen8_vme_mpeg2_prepare(VADriverContextP ctx, { VAStatus vaStatus = VA_STATUS_SUCCESS; VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + if ((!vme_context->mpeg2_level) || + (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) { + vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK; + } + /*Setup all the memory object*/ gen8_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); gen8_vme_interface_setup(ctx, encode_state, encoder_context); - gen8_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); + //gen8_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); + intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context); gen8_vme_constant_setup(ctx, encode_state, encoder_context); /*Programing media pipeline*/ diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index d84f7952..29e70b8d 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -19,8 +19,8 @@ INTEL_GEN75_INC = batchbuffer.inc vme75.inc vme75_mpeg2.inc INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm) -INTEL_G8B = intra_frame_gen8.g8b inter_frame_gen8.g8b mpeg2_inter_frame_gen8.g8b inter_bframe_gen8.g8b -INTEL_G8A = intra_frame_gen8.g8a inter_frame_gen8.g8a mpeg2_inter_frame_gen8.g8a inter_bframe_gen8.g8a +INTEL_G8B = intra_frame_gen8.g8b inter_frame_gen8.g8b mpeg2_inter_frame_gen8.g8b inter_bframe_gen8.g8b mpeg2_inter_gen8.g8b +INTEL_G8A = intra_frame_gen8.g8a inter_frame_gen8.g8a mpeg2_inter_frame_gen8.g8a inter_bframe_gen8.g8a mpeg2_inter_gen8.g8a INTEL_GEN8_INC = vme8.inc vme8_mpeg2.inc INTEL_GEN8_ASM = $(INTEL_G8A:%.g8a=%.gen8.asm) diff --git a/src/shaders/vme/mpeg2_inter_gen8.asm b/src/shaders/vme/mpeg2_inter_gen8.asm new file mode 100644 index 00000000..aea2cc66 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_gen8.asm @@ -0,0 +1,722 @@ +/* + * Copyright © <2013>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * + */ +// Modual name: mpeg2_inter_gen8.asm +// +// Make inter predition estimation for MPEG2 Inter-frame on gen8 +// + +// +// Now, begin source code.... +// + +#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud +#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud + +/* + * __START + */ +__INTER_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + + +shl (2) pic_ref.0<1>:uw r4.24<2,2,1>:uw 4:uw {align1}; +mov (2) pic_ref.16<1>:uw r4.20<2,2,1>:uw {align1}; +mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; +mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; +mov (8) mba_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; + +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; +/* read back the data for MB A */ +/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag), +* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID) +*/ +mba_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */ +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; +mov (1) mba_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB A */ +/* bind index 3, read 2 oword (32 bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* MV */ +mov (2) mba_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1}; + +mbb_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +/* MB B doesn't exist. Zero MV. mba_flag is zero */ +/* If MB B doesn't exist, neither MB C nor D exists */ +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbb_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbc_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB B */ +/* bind index 3, read 2 oword (32bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +mov (2) mbb_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1}; + +mbc_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1}; +/* MB C doesn't exist. Zero MV. mba_flag is zero */ +/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */ +(f0.0) jmpi (1) mbd_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB C */ +/* bind index 3, read 2 oword (32bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +jmpi (1) mb_mvp_start; +mbd_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB D */ +/* bind index 3, read 2 oword (32bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ub + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +/* TODO: RefID is required after multi-references are added */ + +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +mb_mvp_start: +/*TODO: Add the skip prediction */ +/* Check whether both MB B and C are inavailable */ +add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; +cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; +(-f0.0) jmpi (1) mb_median_start; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1}; +(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1}; +jmpi (1) __mb_hwdep_end; + +mb_median_start: +/* check whether only one neighbour MB has the same ref ID with the current MB */ +mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; +cmp.z.f0.0 (1) null:d mba_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbb_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbc_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1}; +cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; + +mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1}; +SAVE_RET {align1}; + jmpi (1) word_imedian; +mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1}; +SAVE_RET {align1}; +jmpi (1) word_imedian; +mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; + +__mb_hwdep_end: + +/* Calibrate the ref window for MPEG2 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; + +mov (1) INPUT_ARG0.0<1>:ud vme_m0.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1}; +mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; + +SAVE_RET {align1}; +jmpi (1) ref_boundary_check; +mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; + +/* m2, get the MV/Mb cost passed from constant buffer when +spawning thread by MEDIA_OBJECT */ +mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* m3 FWD/BWD cost center*/ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; + +/* m4 skip center*/ +mov (8) vme_msg_4<1>:UD 0x0:UD {align1}; + +/* m5 */ +mov (8) vme_msg_5<1>:UD 0x0:UD {align1}; + + +/* Use the Luma mode */ +mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1}; +mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +mov (1) tmp_reg0.0<1>:UW INTRA16_DC_PRED:UW {align1}; +mov (1) vme_msg_5.4<1>:ub tmp_reg0.0<0,1,0>:UB {align1}; + +/* m6 */ +mov (8) vme_msg_6<1>:UD 0x0:UD {align1}; +mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + +/* the penalty for Intra mode */ +mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1}; + + +/* m7 */ + +mov (8) vme_msg_7.0<1>:ud 0x0:ud {align1}; + +/* + * SIC VME message + */ + +/* Disable Intra8x8/Intra4x4 Intra-prediction */ +/* m1 */ +mov (8) vme_m1.0<1>:ud 0x0:UD {align1}; + +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1}; +add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1}; +mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1}; + +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + +/* Enable DC HAAR component when calculating HARR SATD block */ +mov (1) tmp_reg0.0<1>:UW DC_HARR_ENABLE:UW {align1}; +mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* m0 */ +mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */ +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_SIC_MESSAGE_TYPE + ) + mlen sic_vme_msg_length + rlen vme_wb_length + {align1}; +/* + * Oword Block Write message + */ +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; + +/* Distortion, Intra (17-16), */ +mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1}; + +mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1}; +/* VME clock counts */ +mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1}; + +mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* the Max MV number is passed by constant buffer */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* Setup the Cost center */ +/* currently four 8x8 share the same cost center */ +mov (4) vme_m3.0<2>:ud 0x0:ud {align1}; +mov (4) vme_m3.4<2>:ud 0x0:ud {align1}; + +mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1}; +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* M4/M5 search path */ +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_4.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.28<1>:UD 0x100F0F0F:UD {align1}; + +mov (1) vme_msg_5.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_5.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_5.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_5.12<1>:UD 0x000F0F0F:UD {align1}; + +mov (4) vme_msg_5.16<1>:UD 0x0:UD {align1}; + +send (8) + vme_msg_ind + vme_wb<1>:UD + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_IME_MESSAGE_TYPE + ) + mlen ime_vme_msg_length + rlen vme_wb_length {align1}; + +/* Set Macroblock-shape/mode for FBR */ + +mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; +mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; +mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; + +and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; +mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* Send FBR message into CRE */ + +mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; + +mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */ +/* Bilinear filter */ +mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; +add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; +mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_FBR_MESSAGE_TYPE + ) + mlen fbr_vme_msg_length + rlen vme_wb_length + {align1}; + +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; +/* write FME info */ +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; + +mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1}; +/* Inter distortion of FME */ +mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1}; + +mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1}; + +/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_0, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Write FME/BME MV */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; + + +mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; +/* bind index 3, write 2 oword (32 bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Write FME/BME RefID */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Issue message fence so that the previous write message is committed */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_FENCE, + OBR_MF_COMMIT, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; + + + nop ; + nop ; + +word_imedian: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_a_ge_b; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + jmpi (1) cmp_end; +cmp_a_ge_b: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; +cmp_end: + RETURN {align1}; + +nop; +nop; +ref_boundary_check: + +/* The left/up coordinate of reference window */ +add (2) TEMP_VAR0.0<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG0.0<2,2,1>:w {align1}; +/* The right/bottom coordinate of reference window */ +add (1) TEMP_VAR0.16<1>:w TEMP_VAR0.0<0,1,0>:w 48:w {align1}; +add (1) TEMP_VAR0.18<1>:w TEMP_VAR0.2<0,1,0>:w 40:w {align1}; + +/* Firstly the MV range is checked */ +mul (2) TEMP_VAR1.16<1>:w INPUT_ARG1.16<2,2,1>:w -1:w {align1}; +add (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w TEMP_VAR1.16<2,2,1>:w {align1}; +add (2) TEMP_VAR1.4<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG1.16<2,2,1>:w {align1}; + +cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w TEMP_VAR1.0<0,1,0>:w {align1}; +(f0.0) mov (1) TEMP_VAR0.0<1>:w TEMP_VAR1.0<0,1,0>:w {align1}; +cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w TEMP_VAR1.4<0,1,0>:w {align1}; +(f0.0) add (1) TEMP_VAR0.0<1>:w TEMP_VAR1.4<0,1,0>:w -48:w {align1}; +cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w TEMP_VAR1.2<0,1,0>:w {align1}; +(f0.0) mov (1) TEMP_VAR0.2<1>:w TEMP_VAR1.2<0,1,0>:w {align1}; +cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w TEMP_VAR1.6<0,1,0>:w {align1}; +(f0.0) add (1) TEMP_VAR0.2<1>:w TEMP_VAR1.6<0,1,0>:w -40:w {align1}; + +x_left_cmp: + cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w 0:w {align1}; + (-f0.0) jmpi (1) x_right_cmp; + (f0.0) mov (1) TEMP_VAR0.0<1>:w 0:w {align1}; + jmpi (1) y_top_cmp; +x_right_cmp: + cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w INPUT_ARG1.0<0,1,0>:w {align1}; + (-f0.0) jmpi (1) y_top_cmp; + (f0.0) add (1) TEMP_VAR0.0<1>:w INPUT_ARG1.0<0,1,0>:w -48:w {align1}; +y_top_cmp: + cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w 0:w {align1}; + (-f0.0) jmpi (1) y_bottom_cmp; + (f0.0) mov (1) TEMP_VAR0.2<1>:w 0:w {align1}; + jmpi (1) y_bottom_end; +y_bottom_cmp: + cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w INPUT_ARG1.2<0,1,0>:w {align1}; + (f0.0) add (1) TEMP_VAR0.2<1>:w INPUT_ARG1.2<0,1,0>:w -40:w {align1}; + +y_bottom_end: +mul (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w -1:w {align1}; +add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1}; + RETURN {align1}; +nop; +nop; + diff --git a/src/shaders/vme/mpeg2_inter_gen8.g8a b/src/shaders/vme/mpeg2_inter_gen8.g8a new file mode 100644 index 00000000..26f94a75 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_gen8.g8a @@ -0,0 +1,3 @@ +#include "vme8.inc" +#include "vme75_mpeg2.inc" +#include "mpeg2_inter_gen8.asm" diff --git a/src/shaders/vme/mpeg2_inter_gen8.g8b b/src/shaders/vme/mpeg2_inter_gen8.g8b new file mode 100644 index 00000000..8bdabf00 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_gen8.g8b @@ -0,0 +1,292 @@ + { 0x00800001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 }, + { 0x00000001, 0x24542288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 }, + { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 }, + { 0x00000041, 0x24880208, 0x06000488, 0x00000018 }, + { 0x00000001, 0x24942288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x2a401248, 0x16450098, 0x00040004 }, + { 0x00200001, 0x2a501248, 0x00450094, 0x00000000 }, + { 0x00600001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2a800608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2ae00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b200608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000720 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 }, + { 0x00000001, 0x2ae00e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24001a68, 0x1e000400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02180203 }, + { 0x00200001, 0x2ae40208, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2af01e68, 0x18000000, 0x00010001 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000360 }, + { 0x00000001, 0x2b000e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02180203 }, + { 0x00200001, 0x2b040208, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b101e68, 0x18000000, 0x00010001 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00080008 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000110 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000180 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02180203 }, + { 0x00200001, 0x2b240208, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000130 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00040004 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00200040, 0x24001a68, 0x1e450400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a88, 0x0e000b40, 0x02180203 }, + { 0x00200001, 0x2b240208, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 }, + { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 }, + { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000001 }, + { 0x00010001, 0x2b040208, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b240208, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b141248, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2b341248, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2ac00208, 0x00000ae4, 0x00000000 }, + { 0x00110001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000190 }, + { 0x00600001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000af4, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000ae4, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b14, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b04, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b34, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b24, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x2ac00208, 0x00000404, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x00000001, 0x2fa01a68, 0x00000ae4, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000740 }, + { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000006e0 }, + { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 }, + { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 }, + { 0x00000001, 0x2fa00208, 0x00000440, 0x00000000 }, + { 0x00000001, 0x2fa80208, 0x00000448, 0x00000000 }, + { 0x00600001, 0x2fc00208, 0x008d0a40, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000780 }, + { 0x00200001, 0x24401a68, 0x00450fe4, 0x00000000 }, + { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28a00608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00010001 }, + { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00bb00bb }, + { 0x00000001, 0x28a42288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 }, + { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 }, + { 0x00600001, 0x28e00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24600608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00020002 }, + { 0x00000040, 0x24001248, 0x16000400, 0x00040004 }, + { 0x00000001, 0x247c2288, 0x00000400, 0x00000000 }, + { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00000000 }, + { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 }, + { 0x00000001, 0x28340208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x7e200000 }, + { 0x00000001, 0x24561648, 0x10000000, 0x28302830 }, + { 0x00000001, 0x24440208, 0x00000440, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24600608, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 }, + { 0x00000001, 0x24680608, 0x00000000, 0x30003030 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00400001, 0x45800608, 0x00000000, 0x00000000 }, + { 0x00400001, 0x45840608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00000001, 0x28800608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28900608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28940608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28980608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x289c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28a00608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28a40608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28a80608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x28ac0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28b00608, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 }, + { 0x00000001, 0x25740608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x25752288, 0x00000199, 0x00000000 }, + { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 }, + { 0x00000005, 0x24001248, 0x16000180, 0x00030003 }, + { 0x00000001, 0x25742288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00241000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00040004 }, + { 0x00000040, 0x247e2288, 0x2200047e, 0x00000400 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000002 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000574, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0003 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000001 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d0200, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000008 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x0219e003 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000a0 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00200040, 0x2f601a68, 0x1a450fa8, 0x00450fa0 }, + { 0x00000040, 0x2f701a68, 0x1e000f60, 0x00300030 }, + { 0x00000040, 0x2f721a68, 0x1e000f62, 0x00280028 }, + { 0x00200041, 0x2f901a68, 0x1e450fd0, 0xffffffff }, + { 0x00200040, 0x2f801a68, 0x1a450fa8, 0x00450f90 }, + { 0x00200040, 0x2f841a68, 0x1a450fa8, 0x00450fd0 }, + { 0x05000010, 0x20001a60, 0x1a000f60, 0x00000f80 }, + { 0x00010001, 0x2f601a68, 0x00000f80, 0x00000000 }, + { 0x03000010, 0x20001a60, 0x1a000f70, 0x00000f84 }, + { 0x00010040, 0x2f601a68, 0x1e000f84, 0xffd0ffd0 }, + { 0x05000010, 0x20001a60, 0x1a000f62, 0x00000f82 }, + { 0x00010001, 0x2f621a68, 0x00000f82, 0x00000000 }, + { 0x03000010, 0x20001a60, 0x1a000f72, 0x00000f86 }, + { 0x00010040, 0x2f621a68, 0x1e000f86, 0xffd8ffd8 }, + { 0x05000010, 0x20001a60, 0x1e000f60, 0x00000000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000020 }, + { 0x00010001, 0x2f601e68, 0x18000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x03000010, 0x20001a60, 0x1a000f70, 0x00000fc0 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000010 }, + { 0x00010040, 0x2f601a68, 0x1e000fc0, 0xffd0ffd0 }, + { 0x05000010, 0x20001a60, 0x1e000f62, 0x00000000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000020 }, + { 0x00010001, 0x2f621e68, 0x18000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000020 }, + { 0x03000010, 0x20001a60, 0x1a000f72, 0x00000fc2 }, + { 0x00010040, 0x2f621a68, 0x1e000fc2, 0xffd8ffd8 }, + { 0x00200041, 0x2f801a68, 0x1e450fa8, 0xffffffff }, + { 0x00200040, 0x2fe41a68, 0x1a450f60, 0x00450f80 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, -- cgit v1.2.1 From 66c43962d2e0502b4e80382cda2aeff6b589c801 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 15 Aug 2013 15:18:39 +0800 Subject: Add the MVP in GPU shader to optimize mpeg2 encoding on Gen8 Signed-off-by: Zhao Yakui --- src/gen8_vme.c | 33 ++++++++++++++++++++++++++++++++- src/shaders/vme/mpeg2_inter_gen8.asm | 6 ++++-- src/shaders/vme/mpeg2_inter_gen8.g8b | 9 +++++---- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/gen8_vme.c b/src/gen8_vme.c index dc325df2..a277565f 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -884,8 +884,39 @@ gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx, VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + bool allow_hwscore = true; + int s; + int kernel_shader; + VAEncPictureParameterBufferMPEG2 *pic_param = NULL; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + int j; + VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer; + + for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) { + if (slice_param->macroblock_address % width_in_mbs) { + allow_hwscore = false; + break; + } + } + } + + pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + if (pic_param->picture_type == VAEncPictureTypeIntra) { + allow_hwscore = false; + kernel_shader = VME_INTRA_SHADER; + } else { + kernel_shader = VME_INTER_SHADER; + } - gen8_vme_mpeg2_fill_vme_batchbuffer(ctx, + if (allow_hwscore) + gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + encoder_context); + else + gen8_vme_mpeg2_fill_vme_batchbuffer(ctx, encode_state, width_in_mbs, height_in_mbs, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, diff --git a/src/shaders/vme/mpeg2_inter_gen8.asm b/src/shaders/vme/mpeg2_inter_gen8.asm index aea2cc66..ace191e5 100644 --- a/src/shaders/vme/mpeg2_inter_gen8.asm +++ b/src/shaders/vme/mpeg2_inter_gen8.asm @@ -334,6 +334,8 @@ mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; __mb_hwdep_end: +mov (2) mv_cc_ref.0<1>:w mba_result.4<2,2,1>:w {align1}; + /* Calibrate the ref window for MPEG2 */ mov (1) vme_m0.0<1>:W -16:W {align1}; mov (1) vme_m0.2<1>:W -12:W {align1}; @@ -470,8 +472,8 @@ mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; /* Setup the Cost center */ /* currently four 8x8 share the same cost center */ -mov (4) vme_m3.0<2>:ud 0x0:ud {align1}; -mov (4) vme_m3.4<2>:ud 0x0:ud {align1}; +mov (4) vme_m3.0<2>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (4) vme_m3.4<2>:ud mv_cc_ref.0<0,1,0>:ud {align1}; mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1}; mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; diff --git a/src/shaders/vme/mpeg2_inter_gen8.g8b b/src/shaders/vme/mpeg2_inter_gen8.g8b index 8bdabf00..18158384 100644 --- a/src/shaders/vme/mpeg2_inter_gen8.g8b +++ b/src/shaders/vme/mpeg2_inter_gen8.g8b @@ -123,14 +123,15 @@ { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000740 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000750 }, { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x000006e0 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000006f0 }, { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, + { 0x00200001, 0x2a201a68, 0x00450ae4, 0x00000000 }, { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 }, { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 }, { 0x00000001, 0x2fa00208, 0x00000440, 0x00000000 }, @@ -182,8 +183,8 @@ { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 }, { 0x00000001, 0x24680608, 0x00000000, 0x30003030 }, { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, - { 0x00400001, 0x45800608, 0x00000000, 0x00000000 }, - { 0x00400001, 0x45840608, 0x00000000, 0x00000000 }, + { 0x00400001, 0x45800208, 0x00000a20, 0x00000000 }, + { 0x00400001, 0x45840208, 0x00000a20, 0x00000000 }, { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, { 0x00000001, 0x28800608, 0x00000000, 0x01010101 }, -- cgit v1.2.1 From e0b8189b7527f28079fc00c6647cf100483631b6 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 15 Aug 2013 15:18:39 +0800 Subject: Optimize the VME shader for MPEG2 encoding on Gen8 Signed-off-by: Zhao Yakui --- src/shaders/vme/mpeg2_inter_gen8.asm | 144 +++++++++++++++++++++++++++++++++++ src/shaders/vme/mpeg2_inter_gen8.g8b | 84 +++++++++++++++++++- 2 files changed, 225 insertions(+), 3 deletions(-) diff --git a/src/shaders/vme/mpeg2_inter_gen8.asm b/src/shaders/vme/mpeg2_inter_gen8.asm index ace191e5..d7cb52d1 100644 --- a/src/shaders/vme/mpeg2_inter_gen8.asm +++ b/src/shaders/vme/mpeg2_inter_gen8.asm @@ -548,6 +548,11 @@ send (8) rlen vme_wb_length {align1}; +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(-f0.0) jmpi (1) vme_run_again; +nop; +vme_mv_output: + add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; /* write FME info */ @@ -722,3 +727,142 @@ add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1}; nop; nop; +vme_run_again: + +asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1}; +mov (2) tmp_reg0.0<1>:w mb_ref_win.0<2,2,1>:w {align1}; +add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1}; +and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1}; + +cmp.l.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 0:w {align1}; +(f0.0) mul (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +cmp.l.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 0:w {align1}; +(f0.0) mul (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; + +cmp.ge.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 4:w {align1}; +(f0.0) jmpi (1) vme_start; +cmp.ge.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 4:w {align1}; +(f0.0) jmpi (1) vme_start; + +jmpi (1) vme_done; + +vme_start: + mov (8) tmp_vme_wb0.0<1>:ud vme_wb0.0<8,8,1>:ud {align1}; + mov (8) tmp_vme_wb1.0<1>:ud vme_wb1.0<8,8,1>:ud {align1}; + +/* Calibrate the ref window for MPEG2 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; +mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1}; +add (2) INPUT_ARG0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; + +SAVE_RET {align1}; +jmpi (1) ref_boundary_check; +mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; + +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (8) vme_m1.0<1>:ud 0x0:UD {align1}; + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* the Max MV number is passed by constant buffer */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* Setup the Cost center */ +/* currently four 8x8 share the same cost center */ +mov (4) vme_m3.0<2>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (4) vme_m3.4<2>:ud mv_cc_ref.0<0,1,0>:ud {align1}; + +mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1}; +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* M4/M5 search path */ +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_4.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.28<1>:UD 0x100F0F0F:UD {align1}; + +mov (1) vme_msg_5.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_5.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_5.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_5.12<1>:UD 0x000F0F0F:UD {align1}; + +mov (4) vme_msg_5.16<1>:UD 0x0:UD {align1}; + +send (8) + vme_msg_ind + vme_wb<1>:UD + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_IME_MESSAGE_TYPE + ) + mlen ime_vme_msg_length + rlen vme_wb_length {align1}; + +/* Set Macroblock-shape/mode for FBR */ + +mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; +mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; +mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; + +and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; +mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* Send FBR message into CRE */ + +mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; + +mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */ +/* Bilinear filter */ +mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; +add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; +mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_FBR_MESSAGE_TYPE + ) + mlen fbr_vme_msg_length + rlen vme_wb_length + {align1}; + +cmp.l.f0.0 (1) null:uw vme_wb0.8<0,1,0>:uw tmp_vme_wb0.8<0,1,0>:uw {align1}; +(f0.0) jmpi (1) vme_done; +mov (8) vme_wb0.0<1>:ud tmp_vme_wb0.0<8,8,1>:ud {align1}; +mov (8) vme_wb1.0<1>:ud tmp_vme_wb1.0<8,8,1>:ud {align1}; + +vme_done: + jmpi (1) vme_mv_output; +nop; +nop; +nop; + diff --git a/src/shaders/vme/mpeg2_inter_gen8.g8b b/src/shaders/vme/mpeg2_inter_gen8.g8b index 18158384..7cee07e5 100644 --- a/src/shaders/vme/mpeg2_inter_gen8.g8b +++ b/src/shaders/vme/mpeg2_inter_gen8.g8b @@ -123,13 +123,13 @@ { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000750 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000780 }, { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x000006f0 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000720 }, { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, { 0x00200001, 0x2a201a68, 0x00450ae4, 0x00000000 }, { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 }, @@ -138,7 +138,7 @@ { 0x00000001, 0x2fa80208, 0x00000448, 0x00000000 }, { 0x00600001, 0x2fc00208, 0x008d0a40, 0x00000000 }, { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000780 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000007b0 }, { 0x00200001, 0x24401a68, 0x00450fe4, 0x00000000 }, { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 }, { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, @@ -218,6 +218,9 @@ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 }, + { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x000004a0 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x00000040, 0x24880208, 0x06000488, 0x00000002 }, { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, @@ -291,3 +294,78 @@ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 }, + { 0x00200001, 0x24001a68, 0x00450a80, 0x00000000 }, + { 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 }, + { 0x00200005, 0x2a901248, 0x16450a88, 0xfffcfffc }, + { 0x05000010, 0x20001a60, 0x1e000400, 0x00000000 }, + { 0x00010041, 0x24001a68, 0x1e000400, 0xffffffff }, + { 0x05000010, 0x20001a60, 0x1e000402, 0x00000000 }, + { 0x00010041, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x04000010, 0x20001a60, 0x1e000400, 0x00040004 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x04000010, 0x20001a60, 0x1e000402, 0x00040004 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000010 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000003a0 }, + { 0x00600001, 0x2c800208, 0x008d0180, 0x00000000 }, + { 0x00600001, 0x2ca00208, 0x008d01a0, 0x00000000 }, + { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 }, + { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 }, + { 0x00000001, 0x2fa80208, 0x00000448, 0x00000000 }, + { 0x00200040, 0x2fa01a68, 0x1a450440, 0x00450a90 }, + { 0x00600001, 0x2fc00208, 0x008d0a40, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0xfffffca0 }, + { 0x00200001, 0x24401a68, 0x00450fe4, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x7e200000 }, + { 0x00000001, 0x24561648, 0x10000000, 0x28302830 }, + { 0x00000001, 0x24440208, 0x00000440, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x24600608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24600608, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 }, + { 0x00000001, 0x24680608, 0x00000000, 0x30003030 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00400001, 0x45800208, 0x00000a20, 0x00000000 }, + { 0x00400001, 0x45840208, 0x00000a20, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00000001, 0x28800608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28900608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28940608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28980608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x289c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28a00608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28a40608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28a80608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x28ac0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28b00608, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 }, + { 0x00000001, 0x25740608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x25752288, 0x00000199, 0x00000000 }, + { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 }, + { 0x00000005, 0x24001248, 0x16000180, 0x00030003 }, + { 0x00000001, 0x25742288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00241000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00040004 }, + { 0x00000040, 0x247e2288, 0x2200047e, 0x00000400 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 }, + { 0x05000010, 0x20001240, 0x12000188, 0x00000c88 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000020 }, + { 0x00600001, 0x21800208, 0x008d0c80, 0x00000000 }, + { 0x00600001, 0x21a00208, 0x008d0ca0, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0xfffff6f0 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, -- cgit v1.2.1 From 3a91deda6c36582515c08992531b60fe2a65194e Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 15 Aug 2013 15:18:39 +0800 Subject: Optimize quantization rounding precision of MPEG2 encoding on Gen8 This is from that on Ivy/Haswell. Signed-off-by: Zhao Yakui --- src/gen8_mfc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 258f94f8..598563dd 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -1636,9 +1636,11 @@ gen8_mfc_mpeg2_pic_state(VADriverContextP ctx, VAEncPictureParameterBufferMPEG2 *pic_param; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + VAEncSliceParameterBufferMPEG2 *slice_param = NULL; assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer); pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; BEGIN_BCS_BATCH(batch, 13); OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2)); @@ -1663,7 +1665,12 @@ gen8_mfc_mpeg2_pic_state(VADriverContextP ctx, 1 << 31 | /* slice concealment */ (height_in_mbs - 1) << 16 | (width_in_mbs - 1)); - OUT_BCS_BATCH(batch, 0); + + if (slice_param && slice_param->quantiser_scale_code >= 14) + OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12)); + else + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0xFFF << 16 | /* InterMBMaxSize */ -- cgit v1.2.1 From 3f222c31b2f22b8389c2e81e6fc1fe7f23835806 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 15 Aug 2013 15:18:39 +0800 Subject: Code cleanup about the media encoding on Gen8 Signed-off-by: Zhao Yakui --- src/gen8_vme.c | 110 --------------------------------------------------------- 1 file changed, 110 deletions(-) diff --git a/src/gen8_vme.c b/src/gen8_vme.c index a277565f..45c01f42 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -379,116 +379,6 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, return VA_STATUS_SUCCESS; } -static const unsigned int intra_mb_mode_cost_table[] = { - 0x31110001, // for qp0 - 0x09110001, // for qp1 - 0x15030001, // for qp2 - 0x0b030001, // for qp3 - 0x0d030011, // for qp4 - 0x17210011, // for qp5 - 0x41210011, // for qp6 - 0x19210011, // for qp7 - 0x25050003, // for qp8 - 0x1b130003, // for qp9 - 0x1d130003, // for qp10 - 0x27070021, // for qp11 - 0x51310021, // for qp12 - 0x29090021, // for qp13 - 0x35150005, // for qp14 - 0x2b0b0013, // for qp15 - 0x2d0d0013, // for qp16 - 0x37170007, // for qp17 - 0x61410031, // for qp18 - 0x39190009, // for qp19 - 0x45250015, // for qp20 - 0x3b1b000b, // for qp21 - 0x3d1d000d, // for qp22 - 0x47270017, // for qp23 - 0x71510041, // for qp24 ! center for qp=0..30 - 0x49290019, // for qp25 - 0x55350025, // for qp26 - 0x4b2b001b, // for qp27 - 0x4d2d001d, // for qp28 - 0x57370027, // for qp29 - 0x81610051, // for qp30 - 0x57270017, // for qp31 - 0x81510041, // for qp32 ! center for qp=31..51 - 0x59290019, // for qp33 - 0x65350025, // for qp34 - 0x5b2b001b, // for qp35 - 0x5d2d001d, // for qp36 - 0x67370027, // for qp37 - 0x91610051, // for qp38 - 0x69390029, // for qp39 - 0x75450035, // for qp40 - 0x6b3b002b, // for qp41 - 0x6d3d002d, // for qp42 - 0x77470037, // for qp43 - 0xa1710061, // for qp44 - 0x79490039, // for qp45 - 0x85550045, // for qp46 - 0x7b4b003b, // for qp47 - 0x7d4d003d, // for qp48 - 0x87570047, // for qp49 - 0xb1810071, // for qp50 - 0x89590049 // for qp51 -}; - -static void gen8_vme_state_setup_fixup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - unsigned int *vme_state_message) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; - - if (slice_param->slice_type != SLICE_TYPE_I && - slice_param->slice_type != SLICE_TYPE_SI) - return; - if (encoder_context->rate_control_mode == VA_RC_CQP) - vme_state_message[0] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta]; - else - vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[slice_param->slice_type].QpPrimeY]; -} - -static VAStatus gen8_vme_vme_state_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int is_intra, - struct intel_encoder_context *encoder_context) -{ - struct gen6_vme_context *vme_context = encoder_context->vme_context; - unsigned int *vme_state_message; - int i; - - //pass the MV/Mb cost into VME message on HASWell - assert(vme_context->vme_state_message); - vme_state_message = (unsigned int *)vme_context->vme_state_message; - - vme_state_message[0] = 0x4a4a4a4a; - vme_state_message[1] = 0x4a4a4a4a; - vme_state_message[2] = 0x4a4a4a4a; - vme_state_message[3] = 0x22120200; - vme_state_message[4] = 0x62524232; - - for (i=5; i < 8; i++) { - vme_state_message[i] = 0; - } - - switch (encoder_context->codec) { - case CODEC_H264: - gen8_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message); - - break; - - default: - /* no fixup */ - break; - } - - return VA_STATUS_SUCCESS; -} - static void gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, -- cgit v1.2.1 From 1adec2d28867b9661de974de82d9e15ae36faea0 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 15 Aug 2013 15:18:39 +0800 Subject: Remove unnecessary GPU binary shader of mpeg2 encoding on Gen8 Signed-off-by: Zhao Yakui --- src/shaders/vme/Makefile.am | 4 +- src/shaders/vme/mpeg2_inter_frame_gen8.g8a | 3 - src/shaders/vme/mpeg2_inter_frame_gen8.g8b | 300 ----------------------------- 3 files changed, 2 insertions(+), 305 deletions(-) delete mode 100644 src/shaders/vme/mpeg2_inter_frame_gen8.g8a delete mode 100644 src/shaders/vme/mpeg2_inter_frame_gen8.g8b diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index 29e70b8d..270a0dee 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -19,8 +19,8 @@ INTEL_GEN75_INC = batchbuffer.inc vme75.inc vme75_mpeg2.inc INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm) -INTEL_G8B = intra_frame_gen8.g8b inter_frame_gen8.g8b mpeg2_inter_frame_gen8.g8b inter_bframe_gen8.g8b mpeg2_inter_gen8.g8b -INTEL_G8A = intra_frame_gen8.g8a inter_frame_gen8.g8a mpeg2_inter_frame_gen8.g8a inter_bframe_gen8.g8a mpeg2_inter_gen8.g8a +INTEL_G8B = intra_frame_gen8.g8b inter_frame_gen8.g8b inter_bframe_gen8.g8b mpeg2_inter_gen8.g8b +INTEL_G8A = intra_frame_gen8.g8a inter_frame_gen8.g8a inter_bframe_gen8.g8a mpeg2_inter_gen8.g8a INTEL_GEN8_INC = vme8.inc vme8_mpeg2.inc INTEL_GEN8_ASM = $(INTEL_G8A:%.g8a=%.gen8.asm) diff --git a/src/shaders/vme/mpeg2_inter_frame_gen8.g8a b/src/shaders/vme/mpeg2_inter_frame_gen8.g8a deleted file mode 100644 index 9925c25b..00000000 --- a/src/shaders/vme/mpeg2_inter_frame_gen8.g8a +++ /dev/null @@ -1,3 +0,0 @@ -#include "vme8.inc" -#include "vme75_mpeg2.inc" -#include "inter_frame_gen8.asm" diff --git a/src/shaders/vme/mpeg2_inter_frame_gen8.g8b b/src/shaders/vme/mpeg2_inter_frame_gen8.g8b deleted file mode 100644 index 7e52539e..00000000 --- a/src/shaders/vme/mpeg2_inter_frame_gen8.g8b +++ /dev/null @@ -1,300 +0,0 @@ - { 0x00800001, 0x24000608, 0x00000000, 0x00000000 }, - { 0x00800001, 0x24400608, 0x00000000, 0x00000000 }, - { 0x00800001, 0x24800608, 0x00000000, 0x00000000 }, - { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 }, - { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 }, - { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, - { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, - { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f }, - { 0x00000001, 0x24142288, 0x00000014, 0x00000000 }, - { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 }, - { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, - { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 }, - { 0x00000001, 0x24342288, 0x00000014, 0x00000000 }, - { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 }, - { 0x00000001, 0x24542288, 0x00000014, 0x00000000 }, - { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 }, - { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 }, - { 0x00000041, 0x24880208, 0x06000488, 0x00000018 }, - { 0x00000001, 0x24942288, 0x00000014, 0x00000000 }, - { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, - { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 }, - { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, - { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 }, - { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 }, - { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 }, - { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, - { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, - { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, - { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 }, - { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 }, - { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 }, - { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, - { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 }, - { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, - { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 }, - { 0x00600001, 0x2ac00608, 0x00000000, 0x00000000 }, - { 0x00600001, 0x2a800608, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000750 }, - { 0x00600001, 0x2ae00608, 0x00000000, 0x00000000 }, - { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 }, - { 0x00600001, 0x2b200608, 0x00000000, 0x00000000 }, - { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, - { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, - { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 }, - { 0x00000001, 0x2ae00e28, 0x08000000, 0x00000001 }, - { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, - { 0x00000040, 0x24001a68, 0x1e000400, 0xffffffff }, - { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, - { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, - { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, - { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, - { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, - { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, - { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, - { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, - { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, - { 0x00200001, 0x2ae40208, 0x00450bc8, 0x00000000 }, - { 0x00000001, 0x2af01e68, 0x18000000, 0x00010001 }, - { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, - { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, - { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000360 }, - { 0x00000001, 0x2b000e28, 0x08000000, 0x00000001 }, - { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, - { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, - { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, - { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, - { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, - { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, - { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, - { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, - { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, - { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, - { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, - { 0x00200001, 0x2b040208, 0x00450bf0, 0x00000000 }, - { 0x00000001, 0x2b101e68, 0x18000000, 0x00010001 }, - { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002240, 0x160000a5, 0x00080008 }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000110 }, - { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, - { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, - { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, - { 0x00000040, 0x24001a68, 0x1e000400, 0x00010001 }, - { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, - { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, - { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, - { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, - { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, - { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, - { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000180 }, - { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, - { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, - { 0x00200001, 0x2b240208, 0x00450bf0, 0x00000000 }, - { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000130 }, - { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002240, 0x160000a5, 0x00040004 }, - { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, - { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 }, - { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, - { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, - { 0x00200040, 0x24001a68, 0x1e450400, 0xffffffff }, - { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, - { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, - { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, - { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, - { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, - { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, - { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, - { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, - { 0x0a800031, 0x2ba00a88, 0x0e000b40, 0x02480403 }, - { 0x00200001, 0x2b240208, 0x00450c18, 0x00000000 }, - { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 }, - { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 }, - { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 }, - { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 }, - { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000001 }, - { 0x00010001, 0x2b040208, 0x00000ae4, 0x00000000 }, - { 0x00010001, 0x2b240208, 0x00000ae4, 0x00000000 }, - { 0x00010001, 0x2b141248, 0x00000af4, 0x00000000 }, - { 0x00010001, 0x2b341248, 0x00000af4, 0x00000000 }, - { 0x00010001, 0x2ac00208, 0x00000ae4, 0x00000000 }, - { 0x00110001, 0x2ac00608, 0x00000000, 0x00000000 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000190 }, - { 0x00600001, 0x24000608, 0x00000000, 0x00000000 }, - { 0x01000010, 0x20001a20, 0x1e000af4, 0x00000000 }, - { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, - { 0x00010001, 0x24040208, 0x00000ae4, 0x00000000 }, - { 0x01000010, 0x20001a20, 0x1e000b14, 0x00000000 }, - { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, - { 0x00010001, 0x24040208, 0x00000b04, 0x00000000 }, - { 0x01000010, 0x20001a20, 0x1e000b34, 0x00000000 }, - { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, - { 0x00010001, 0x24040208, 0x00000b24, 0x00000000 }, - { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 }, - { 0x00010001, 0x2ac00208, 0x00000404, 0x00000000 }, - { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, - { 0x00000001, 0x2fa01a68, 0x00000ae4, 0x00000000 }, - { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, - { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, - { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000870 }, - { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, - { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, - { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, - { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, - { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000810 }, - { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, - { 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 }, - { 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 }, - { 0x00200005, 0x2a901248, 0x16450a88, 0xfffcfffc }, - { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 }, - { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, - { 0x00600001, 0x28600608, 0x00000000, 0x00000000 }, - { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, - { 0x00000001, 0x23800608, 0x00000000, 0x00000000 }, - { 0x00000005, 0x23840208, 0x06000384, 0xff000000 }, - { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 }, - { 0x00000001, 0x24001648, 0x10000000, 0x00010001 }, - { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 }, - { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 }, - { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 }, - { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 }, - { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 }, - { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 }, - { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 }, - { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 }, - { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 }, - { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, - { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 }, - { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 }, - { 0x00000001, 0x24001648, 0x10000000, 0x00200020 }, - { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, - { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, - { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 }, - { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, - { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, - { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, - { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, - { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, - { 0x00000001, 0x28280208, 0x00000194, 0x00000000 }, - { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 }, - { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 }, - { 0x00000001, 0x28340208, 0x00000188, 0x00000000 }, - { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, - { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, - { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, - { 0x00000001, 0x244c0608, 0x00000000, 0x7e200000 }, - { 0x00000001, 0x24561648, 0x10000000, 0x28302830 }, - { 0x00000001, 0x24400208, 0x00000448, 0x00000000 }, - { 0x00000040, 0x24401a68, 0x1e000440, 0xfff0fff0 }, - { 0x00000040, 0x24421a68, 0x1e000442, 0xfff4fff4 }, - { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 }, - { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 }, - { 0x00000001, 0x24440208, 0x00000440, 0x00000000 }, - { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, - { 0x00010040, 0x24401a68, 0x1e000440, 0x000c000c }, - { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, - { 0x00010040, 0x24421a68, 0x1e000442, 0x00080008 }, - { 0x00200040, 0x24401a68, 0x1a450440, 0x00450a90 }, - { 0x00200040, 0x24441a68, 0x1a450444, 0x00450a90 }, - { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, - { 0x00000001, 0x24600608, 0x00000000, 0x00000002 }, - { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 }, - { 0x00000001, 0x24680608, 0x00000000, 0x30003030 }, - { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, - { 0x00400001, 0x45800208, 0x00000ac0, 0x00000000 }, - { 0x00400001, 0x45840208, 0x00000ac0, 0x00000000 }, - { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, - { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, - { 0x00000001, 0x28800608, 0x00000000, 0x01010101 }, - { 0x00000001, 0x28840608, 0x00000000, 0x10010101 }, - { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f }, - { 0x00000001, 0x288c0608, 0x00000000, 0x100f0f0f }, - { 0x00000001, 0x28900608, 0x00000000, 0x01010101 }, - { 0x00000001, 0x28940608, 0x00000000, 0x10010101 }, - { 0x00000001, 0x28980608, 0x00000000, 0x0f0f0f0f }, - { 0x00000001, 0x289c0608, 0x00000000, 0x100f0f0f }, - { 0x00000001, 0x28a00608, 0x00000000, 0x01010101 }, - { 0x00000001, 0x28a40608, 0x00000000, 0x10010101 }, - { 0x00000001, 0x28a80608, 0x00000000, 0x0f0f0f0f }, - { 0x00000001, 0x28ac0608, 0x00000000, 0x000f0f0f }, - { 0x00400001, 0x28b00608, 0x00000000, 0x00000000 }, - { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 }, - { 0x00000001, 0x25740608, 0x00000000, 0x00000000 }, - { 0x00000001, 0x25752288, 0x00000199, 0x00000000 }, - { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 }, - { 0x00000005, 0x24001248, 0x16000180, 0x00030003 }, - { 0x00000001, 0x25742288, 0x00000400, 0x00000000 }, - { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 }, - { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 }, - { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 }, - { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 }, - { 0x00000001, 0x244c0608, 0x00000000, 0x00243000 }, - { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, - { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, - { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, - { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, - { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 }, - { 0x00000040, 0x24880208, 0x06000488, 0x00000002 }, - { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, - { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, - { 0x00000001, 0x28240208, 0x00000198, 0x00000000 }, - { 0x00000001, 0x28280208, 0x00000188, 0x00000000 }, - { 0x00000001, 0x282c0208, 0x00000574, 0x00000000 }, - { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0003 }, - { 0x00000040, 0x24880208, 0x06000488, 0x00000001 }, - { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, - { 0x00600001, 0x28200208, 0x008d01a0, 0x00000000 }, - { 0x00600001, 0x28400208, 0x008d01c0, 0x00000000 }, - { 0x00600001, 0x28600208, 0x008d01e0, 0x00000000 }, - { 0x00600001, 0x28800208, 0x008d0200, 0x00000000 }, - { 0x0a800031, 0x20000a60, 0x0e000800, 0x0a0a0403 }, - { 0x00000040, 0x24880208, 0x06000488, 0x00000008 }, - { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, - { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 }, - { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, - { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x0219e003 }, - { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, - { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 }, - { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, - { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, - { 0x06000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, - { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 }, - { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 }, - { 0x06000010, 0x20001a60, 0x1a000f60, 0x00000fa8 }, - { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 }, - { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, - { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, - { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, - { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 }, - { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 }, - { 0x04000010, 0x20001a60, 0x1a000f60, 0x00000fa8 }, - { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 }, - { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, - { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, - { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 }, - { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, - { 0x00010001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, - { 0x00010020, 0x34000000, 0x0e001400, 0x000000a0 }, - { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, - { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, - { 0x00110001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 }, - { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, - { 0x00010001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, - { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, - { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, - { 0x00110001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, - { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, -- cgit v1.2.1 From b9fd352caa09a36729bb0c4bed3b5544a7a6a93b Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 15 Aug 2013 15:18:39 +0800 Subject: Remove the duplicated header file for mpeg2 encoding on Gen8 The vme75_mepg2.inc is also enough for the mpeg2 encoding on Gen8. And vme8_mpeg2.inc is redundant. Signed-off-by: Zhao Yakui --- src/shaders/vme/Makefile.am | 2 +- src/shaders/vme/vme8_mpeg2.inc | 18 ------------------ 2 files changed, 1 insertion(+), 19 deletions(-) delete mode 100644 src/shaders/vme/vme8_mpeg2.inc diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index 270a0dee..adea78bb 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -21,7 +21,7 @@ INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm) INTEL_G8B = intra_frame_gen8.g8b inter_frame_gen8.g8b inter_bframe_gen8.g8b mpeg2_inter_gen8.g8b INTEL_G8A = intra_frame_gen8.g8a inter_frame_gen8.g8a inter_bframe_gen8.g8a mpeg2_inter_gen8.g8a -INTEL_GEN8_INC = vme8.inc vme8_mpeg2.inc +INTEL_GEN8_INC = vme8.inc vme75_mpeg2.inc INTEL_GEN8_ASM = $(INTEL_G8A:%.g8a=%.gen8.asm) diff --git a/src/shaders/vme/vme8_mpeg2.inc b/src/shaders/vme/vme8_mpeg2.inc deleted file mode 100644 index 9b877acf..00000000 --- a/src/shaders/vme/vme8_mpeg2.inc +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ -// Modual name: ME_header.inc -// -// Global symbols define -// - -/* - * Constant - */ - -define(`INTER_PART_MASK', `0x7e000000') -- cgit v1.2.1 From a633f4e78224d228a0f7211fc0ad4d4637afc32c Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 10 Sep 2013 10:24:14 +0800 Subject: VPP/bdw: a NULL shader for packed 4:2:2 to packed 4:2:2 !!! Develop the shader later Signed-off-by: Xiang, Haihao --- src/i965_post_processing.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 87c9000d..ddafba42 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1210,6 +1210,8 @@ static const uint32_t pp_pa_load_save_nv12_gen8[][4] = { static const uint32_t pp_pa_load_save_pl3_gen8[][4] = { #include "shaders/post_processing/gen8/pa_to_pl3.g8b" }; +static const uint32_t pp_pa_load_save_pa_gen8[][4] = { +}; static const uint32_t pp_rgbx_load_save_nv12_gen8[][4] = { #include "shaders/post_processing/gen8/rgbx_to_nv12.g8b" }; @@ -1374,6 +1376,18 @@ static struct pp_module pp_modules_gen8[] = { gen8_pp_plx_avs_initialize, }, + { + { + "PA_PA module", + PP_PA_LOAD_SAVE_PA, + pp_pa_load_save_pa_gen8, + sizeof(pp_pa_load_save_pa_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + { { "RGBX_NV12 module", -- cgit v1.2.1 From 32b1bb71322a471917b0650e3e746f4e084dedad Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 8 Oct 2013 11:02:56 +0800 Subject: Indent the code of encoding on BDW Signed-off-by: Xiang, Haihao --- src/gen8_mfc.c | 318 ++++++++++++++++++++++++++++----------------------------- src/gen8_vme.c | 232 ++++++++++++++++++++--------------------- 2 files changed, 275 insertions(+), 275 deletions(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 598563dd..14374246 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -157,7 +157,7 @@ gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encod static void gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -166,11 +166,11 @@ gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 26); OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2)); - /* the DW1-3 is for the MFX indirect bistream offset */ + /* the DW1-3 is for the MFX indirect bistream offset */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW4-5 is the MFX upper bound */ + /* the DW4-5 is the MFX upper bound */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -181,14 +181,14 @@ gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ OUT_BCS_BATCH(batch, 0); - /* the DW11-15 is for MFX IT-COFF. Not used on encoder */ + /* the DW11-15 is for MFX IT-COFF. Not used on encoder */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */ + /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -226,13 +226,13 @@ gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, BEGIN_BCS_BATCH(batch, 16); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); - /*DW1. MB setting of frame */ + /*DW1. MB setting of frame */ OUT_BCS_BATCH(batch, ((width_in_mbs * height_in_mbs) & 0xFFFF)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); - /* DW3 QP setting */ + /* DW3 QP setting */ OUT_BCS_BATCH(batch, (0 << 24) | /* Second Chroma QP Offset */ (0 << 16) | /* Chroma QP Offset */ @@ -258,22 +258,22 @@ gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, (1 << 2) | /* Frame MB only flag */ (0 << 1) | /* MBAFF mode is in active */ (0 << 0)); /* Field picture flag */ - /* DW5 Trellis quantization */ + /* DW5 Trellis quantization */ OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */ OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */ (0xBB8 << 16) | /* InterMbMaxSz */ (0xEE8) ); /* IntraMbMaxSz */ OUT_BCS_BATCH(batch, 0); /* Reserved */ - /* DW8. QP delta */ + /* DW8. QP delta */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ - /* DW10. Bit setting for MB */ + /* DW10. Bit setting for MB */ OUT_BCS_BATCH(batch, 0x8C000000); OUT_BCS_BATCH(batch, 0x00010000); - /* DW12. */ + /* DW12. */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0x02010100); - /* DW14. For short format */ + /* DW14. For short format */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -388,8 +388,8 @@ gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *e static void gen8_mfc_init(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -489,7 +489,7 @@ static void gen8_mfc_init(VADriverContextP ctx, static void gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -509,7 +509,7 @@ gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW4-6 is for the post_deblocking */ + /* the DW4-6 is for the post_deblocking */ if (mfc_context->post_deblocking_output.bo) OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo, @@ -521,7 +521,7 @@ gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW7-9 is for the uncompressed_picture */ + /* the DW7-9 is for the uncompressed_picture */ OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* uncompressed data */ @@ -529,7 +529,7 @@ gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW10-12 is for the mb status */ + /* the DW10-12 is for the mb status */ OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* StreamOut data*/ @@ -537,7 +537,7 @@ gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW13-15 is for the intra_row_store_scratch */ + /* the DW13-15 is for the intra_row_store_scratch */ OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); @@ -545,7 +545,7 @@ gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW16-18 is for the deblocking filter */ + /* the DW16-18 is for the deblocking filter */ OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); @@ -568,7 +568,7 @@ gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); - /* The DW 52-54 is for the MB status buffer */ + /* The DW 52-54 is for the MB status buffer */ OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* Macroblock status buffer*/ @@ -591,7 +591,7 @@ gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx, static void gen8_mfc_avc_directmode_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -639,7 +639,7 @@ gen8_mfc_avc_directmode_state(VADriverContextP ctx, static void gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -653,12 +653,12 @@ gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */ + /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW7-9 is for Bitplane Read Buffer Base Address */ + /* the DW7-9 is for Bitplane Read Buffer Base Address */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -668,8 +668,8 @@ gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -895,7 +895,7 @@ gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in struct intel_batchbuffer *batch) { int len_in_dwords = 12; - unsigned int inter_msg = 0; + unsigned int inter_msg = 0; if (batch == NULL) batch = encoder_context->base.batch; { @@ -907,30 +907,30 @@ gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in * command. */ if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) { - /* MV[0] and MV[2] are replicated */ - mv_ptr[4] = mv_ptr[0]; - mv_ptr[5] = mv_ptr[1]; - mv_ptr[2] = mv_ptr[8]; - mv_ptr[3] = mv_ptr[9]; - mv_ptr[6] = mv_ptr[8]; - mv_ptr[7] = mv_ptr[9]; + /* MV[0] and MV[2] are replicated */ + mv_ptr[4] = mv_ptr[0]; + mv_ptr[5] = mv_ptr[1]; + mv_ptr[2] = mv_ptr[8]; + mv_ptr[3] = mv_ptr[9]; + mv_ptr[6] = mv_ptr[8]; + mv_ptr[7] = mv_ptr[9]; } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) { - /* MV[0] and MV[1] are replicated */ - mv_ptr[2] = mv_ptr[0]; - mv_ptr[3] = mv_ptr[1]; - mv_ptr[4] = mv_ptr[16]; - mv_ptr[5] = mv_ptr[17]; - mv_ptr[6] = mv_ptr[24]; - mv_ptr[7] = mv_ptr[25]; + /* MV[0] and MV[1] are replicated */ + mv_ptr[2] = mv_ptr[0]; + mv_ptr[3] = mv_ptr[1]; + mv_ptr[4] = mv_ptr[16]; + mv_ptr[5] = mv_ptr[17]; + mv_ptr[6] = mv_ptr[24]; + mv_ptr[7] = mv_ptr[25]; } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && - !(msg[1] & SUBMB_SHAPE_MASK)) { - /* Don't touch MV[0] or MV[1] */ - mv_ptr[2] = mv_ptr[8]; - mv_ptr[3] = mv_ptr[9]; - mv_ptr[4] = mv_ptr[16]; - mv_ptr[5] = mv_ptr[17]; - mv_ptr[6] = mv_ptr[24]; - mv_ptr[7] = mv_ptr[25]; + !(msg[1] & SUBMB_SHAPE_MASK)) { + /* Don't touch MV[0] or MV[1] */ + mv_ptr[2] = mv_ptr[8]; + mv_ptr[3] = mv_ptr[9]; + mv_ptr[4] = mv_ptr[16]; + mv_ptr[5] = mv_ptr[17]; + mv_ptr[6] = mv_ptr[24]; + mv_ptr[7] = mv_ptr[25]; } } @@ -938,21 +938,21 @@ gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); - inter_msg = 32; - /* MV quantity */ - if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) { - if (msg[1] & SUBMB_SHAPE_MASK) - inter_msg = 128; - } + inter_msg = 32; + /* MV quantity */ + if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) { + if (msg[1] & SUBMB_SHAPE_MASK) + inter_msg = 128; + } OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/ OUT_BCS_BATCH(batch, offset); - inter_msg = msg[0] & (0x1F00FFFF); - inter_msg |= INTER_MV8; - inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17)); - if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && - (msg[1] & SUBMB_SHAPE_MASK)) { - inter_msg |= INTER_MV32; - } + inter_msg = msg[0] & (0x1F00FFFF); + inter_msg |= INTER_MV8; + inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17)); + if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && + (msg[1] & SUBMB_SHAPE_MASK)) { + inter_msg |= INTER_MV32; + } OUT_BCS_BATCH(batch, inter_msg); @@ -968,7 +968,7 @@ gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */ #endif - inter_msg = msg[1] >> 8; + inter_msg = msg[1] >> 8; /*Stuff for Inter MB*/ OUT_BCS_BATCH(batch, inter_msg); OUT_BCS_BATCH(batch, 0x0); @@ -1628,8 +1628,8 @@ va_to_gen8_mpeg2_picture_type[3] = { static void gen8_mfc_mpeg2_pic_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - struct encode_state *encode_state) + struct intel_encoder_context *encoder_context, + struct encode_state *encode_state) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -1689,7 +1689,7 @@ static void gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned char intra_qm[64] = { - 8, 16, 19, 22, 26, 27, 29, 34, + 8, 16, 19, 22, 26, 27, 29, 34, 16, 16, 22, 24, 27, 29, 34, 37, 19, 22, 26, 27, 29, 34, 34, 38, 22, 22, 26, 27, 29, 34, 37, 40, @@ -1718,14 +1718,14 @@ static void gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned short intra_fqm[64] = { - 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, - 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, - 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, - 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, - 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, - 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, - 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, - 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, + 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, + 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, + 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, + 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, + 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, + 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, + 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, + 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, }; unsigned short non_intra_fqm[64] = { @@ -1745,14 +1745,14 @@ gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *enc static void gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - int x, int y, - int next_x, int next_y, - int is_fisrt_slice_group, - int is_last_slice_group, - int intra_slice, - int qp, - struct intel_batchbuffer *batch) + struct intel_encoder_context *encoder_context, + int x, int y, + int next_x, int next_y, + int is_fisrt_slice_group, + int is_last_slice_group, + int intra_slice, + int qp, + struct intel_batchbuffer *batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -1792,18 +1792,18 @@ gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx, static int gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - int x, int y, - int first_mb_in_slice, - int last_mb_in_slice, - int first_mb_in_slice_group, - int last_mb_in_slice_group, - int mb_type, - int qp_scale_code, - int coded_block_pattern, - unsigned char target_size_in_word, - unsigned char max_size_in_word, - struct intel_batchbuffer *batch) + struct intel_encoder_context *encoder_context, + int x, int y, + int first_mb_in_slice, + int last_mb_in_slice, + int first_mb_in_slice_group, + int last_mb_in_slice_group, + int mb_type, + int qp_scale_code, + int coded_block_pattern, + unsigned char target_size_in_word, + unsigned char max_size_in_word, + struct intel_batchbuffer *batch) { int len_in_dwords = 9; @@ -1890,19 +1890,19 @@ mpeg2_motion_vector(int mv, int pos, int display_max, int f_code) static int gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - unsigned int *msg, - int width_in_mbs, int height_in_mbs, - int x, int y, - int first_mb_in_slice, - int last_mb_in_slice, - int first_mb_in_slice_group, - int last_mb_in_slice_group, - int qp_scale_code, - unsigned char target_size_in_word, - unsigned char max_size_in_word, - struct intel_batchbuffer *batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + unsigned int *msg, + int width_in_mbs, int height_in_mbs, + int x, int y, + int first_mb_in_slice, + int last_mb_in_slice, + int first_mb_in_slice_group, + int last_mb_in_slice_group, + int qp_scale_code, + unsigned char target_size_in_word, + unsigned char max_size_in_word, + struct intel_batchbuffer *batch) { VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; int len_in_dwords = 9; @@ -2014,11 +2014,11 @@ intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx, static void gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int slice_index, - VAEncSliceParameterBufferMPEG2 *next_slice_group_param, - struct intel_batchbuffer *slice_batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + VAEncSliceParameterBufferMPEG2 *next_slice_group_param, + struct intel_batchbuffer *slice_batch) { struct gen6_vme_context *vme_context = encoder_context->vme_context; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -2050,16 +2050,16 @@ gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, } gen8_mfc_mpeg2_slicegroup_state(ctx, - encoder_context, - h_start_pos, - v_start_pos, - h_next_start_pos, - v_next_start_pos, - slice_index == 0, - next_slice_group_param == NULL, - slice_param->is_intra_slice, - slice_param->quantiser_scale_code, - slice_batch); + encoder_context, + h_start_pos, + v_start_pos, + h_next_start_pos, + v_next_start_pos, + slice_index == 0, + next_slice_group_param == NULL, + slice_param->is_intra_slice, + slice_param->quantiser_scale_code, + slice_batch); if (slice_index == 0) intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); @@ -2091,33 +2091,33 @@ gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, if (slice_param->is_intra_slice) { gen8_mfc_mpeg2_pak_object_intra(ctx, - encoder_context, - h_pos, v_pos, - first_mb_in_slice, - last_mb_in_slice, - first_mb_in_slice_group, - last_mb_in_slice_group, - 0x1a, - slice_param->quantiser_scale_code, - 0x3f, - 0, - 0xff, - slice_batch); + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); } else { gen8_mfc_mpeg2_pak_object_inter(ctx, - encode_state, - encoder_context, - msg, - width_in_mbs, height_in_mbs, - h_pos, v_pos, - first_mb_in_slice, - last_mb_in_slice, - first_mb_in_slice_group, - last_mb_in_slice_group, - slice_param->quantiser_scale_code, - 0, - 0xff, - slice_batch); + encode_state, + encoder_context, + msg, + width_in_mbs, height_in_mbs, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + slice_param->quantiser_scale_code, + 0, + 0xff, + slice_batch); } } @@ -2159,8 +2159,8 @@ gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, */ static dri_bo * gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch; @@ -2200,8 +2200,8 @@ gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, static void gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -2217,8 +2217,8 @@ gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx, static void gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; dri_bo *slice_batch_bo; @@ -2323,8 +2323,8 @@ intel_mfc_mpeg2_prepare(VADriverContextP ctx, static VAStatus gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { gen8_mfc_init(ctx, encode_state, encoder_context); intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context); @@ -2393,9 +2393,9 @@ gen8_mfc_context_destroy(void *context) } static VAStatus gen8_mfc_pipeline(VADriverContextP ctx, - VAProfile profile, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { VAStatus vaStatus; diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 45c01f42..64aebe7a 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -123,9 +123,9 @@ static struct i965_kernel gen8_vme_mpeg2_kernels[] = { /* only used for VME source surface state */ static void gen8_vme_source_surface_state(VADriverContextP ctx, - int index, - struct object_surface *obj_surface, - struct intel_encoder_context *encoder_context) + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; @@ -138,9 +138,9 @@ gen8_vme_source_surface_state(VADriverContextP ctx, static void gen8_vme_media_source_surface_state(VADriverContextP ctx, - int index, - struct object_surface *obj_surface, - struct intel_encoder_context *encoder_context) + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; @@ -153,9 +153,9 @@ gen8_vme_media_source_surface_state(VADriverContextP ctx, static void gen8_vme_media_chroma_source_surface_state(VADriverContextP ctx, - int index, - struct object_surface *obj_surface, - struct intel_encoder_context *encoder_context) + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; @@ -168,9 +168,9 @@ gen8_vme_media_chroma_source_surface_state(VADriverContextP ctx, static void gen8_vme_output_buffer_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int index, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int index, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -208,9 +208,9 @@ gen8_vme_output_buffer_setup(VADriverContextP ctx, static void gen8_vme_output_vme_batchbuffer_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int index, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int index, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -235,9 +235,9 @@ gen8_vme_output_vme_batchbuffer_setup(VADriverContextP ctx, static VAStatus gen8_vme_surface_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int is_intra, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) { struct object_surface *obj_surface; struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -258,36 +258,36 @@ gen8_vme_surface_setup(VADriverContextP ctx, slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - /* reference 0 */ - if (obj_surface && obj_surface->bo) - gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); + slice_obj_surface = NULL; + ref_surface_id = slice_param->RefPicList0[0].picture_id; + if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + slice_obj_surface = SURFACE(ref_surface_id); + } + if (slice_obj_surface && slice_obj_surface->bo) { + obj_surface = slice_obj_surface; + } else { + obj_surface = encode_state->reference_objects[0]; + } + /* reference 0 */ + if (obj_surface && obj_surface->bo) + gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); } if (slice_type == SLICE_TYPE_B) { - /* reference 1 */ - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } + /* reference 1 */ + slice_obj_surface = NULL; + ref_surface_id = slice_param->RefPicList1[0].picture_id; + if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + slice_obj_surface = SURFACE(ref_surface_id); + } + if (slice_obj_surface && slice_obj_surface->bo) { + obj_surface = slice_obj_surface; + } else { + obj_surface = encode_state->reference_objects[0]; + } - obj_surface = encode_state->reference_objects[1]; - if (obj_surface && obj_surface->bo) - gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); + obj_surface = encode_state->reference_objects[1]; + if (obj_surface && obj_surface->bo) + gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); } } @@ -299,8 +299,8 @@ gen8_vme_surface_setup(VADriverContextP ctx, } static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; struct gen8_interface_descriptor_data *desc; @@ -341,8 +341,8 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, } static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; unsigned char *constant_buffer; @@ -382,11 +382,11 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, static void gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - int mb_width, int mb_height, - int kernel, - int transform_8x8_mode_flag, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; int mb_x = 0, mb_y = 0; @@ -469,8 +469,8 @@ static void gen8_vme_media_init(VADriverContextP ctx, struct intel_encoder_conte } static void gen8_vme_pipeline_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; struct intel_batchbuffer *batch = encoder_context->base.batch; @@ -486,35 +486,35 @@ static void gen8_vme_pipeline_programing(VADriverContextP ctx, for (s = 0; s < encode_state->num_slice_params_ext; s++) { pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; if ((pSliceParameter->macroblock_address % width_in_mbs)) { - allow_hwscore = false; - break; + allow_hwscore = false; + break; } } if ((pSliceParameter->slice_type == SLICE_TYPE_I) || (pSliceParameter->slice_type == SLICE_TYPE_I)) { kernel_shader = VME_INTRA_SHADER; - } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) || - (pSliceParameter->slice_type == SLICE_TYPE_SP)) { + } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) || + (pSliceParameter->slice_type == SLICE_TYPE_SP)) { kernel_shader = VME_INTER_SHADER; - } else { + } else { kernel_shader = VME_BINTER_SHADER; if (!allow_hwscore) - kernel_shader = VME_INTER_SHADER; - } + kernel_shader = VME_INTER_SHADER; + } if (allow_hwscore) gen7_vme_walker_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - pPicParameter->pic_fields.bits.transform_8x8_mode_flag, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); else gen8_vme_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - pPicParameter->pic_fields.bits.transform_8x8_mode_flag, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); intel_batchbuffer_start_atomic(batch, 0x1000); gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); @@ -531,8 +531,8 @@ static void gen8_vme_pipeline_programing(VADriverContextP ctx, } static VAStatus gen8_vme_prepare(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { VAStatus vaStatus = VA_STATUS_SUCCESS; VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; @@ -560,8 +560,8 @@ static VAStatus gen8_vme_prepare(VADriverContextP ctx, } static VAStatus gen8_vme_run(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; @@ -571,17 +571,17 @@ static VAStatus gen8_vme_run(VADriverContextP ctx, } static VAStatus gen8_vme_stop(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { return VA_STATUS_SUCCESS; } static VAStatus gen8_vme_pipeline(VADriverContextP ctx, - VAProfile profile, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { gen8_vme_media_init(ctx, encoder_context); gen8_vme_prepare(ctx, encode_state, encoder_context); @@ -593,10 +593,10 @@ gen8_vme_pipeline(VADriverContextP ctx, static void gen8_vme_mpeg2_output_buffer_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int index, - int is_intra, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int index, + int is_intra, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -632,9 +632,9 @@ gen8_vme_mpeg2_output_buffer_setup(VADriverContextP ctx, static void gen8_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int index, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int index, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -659,9 +659,9 @@ gen8_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx, static VAStatus gen8_vme_mpeg2_surface_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int is_intra, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) { struct object_surface *obj_surface; @@ -695,11 +695,11 @@ gen8_vme_mpeg2_surface_setup(VADriverContextP ctx, static void gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - int mb_width, int mb_height, - int kernel, - int transform_8x8_mode_flag, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; int mb_x = 0, mb_y = 0; @@ -765,9 +765,9 @@ gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, static void gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx, - struct encode_state *encode_state, - int is_intra, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; struct intel_batchbuffer *batch = encoder_context->base.batch; @@ -801,17 +801,17 @@ gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx, if (allow_hwscore) gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + encoder_context); else gen8_vme_mpeg2_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, - 0, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, + 0, + encoder_context); intel_batchbuffer_start_atomic(batch, 0x1000); gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); @@ -828,8 +828,8 @@ gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx, static VAStatus gen8_vme_mpeg2_prepare(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { VAStatus vaStatus = VA_STATUS_SUCCESS; VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; @@ -837,7 +837,7 @@ gen8_vme_mpeg2_prepare(VADriverContextP ctx, struct gen6_vme_context *vme_context = encoder_context->vme_context; if ((!vme_context->mpeg2_level) || - (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) { + (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) { vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK; } @@ -857,9 +857,9 @@ gen8_vme_mpeg2_prepare(VADriverContextP ctx, static VAStatus gen8_vme_mpeg2_pipeline(VADriverContextP ctx, - VAProfile profile, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { gen8_vme_media_init(ctx, encoder_context); gen8_vme_mpeg2_prepare(ctx, encode_state, encoder_context); @@ -897,7 +897,7 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e { struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context)); struct i965_kernel *vme_kernel_list = NULL; - int i965_kernel_num; + int i965_kernel_num; switch (encoder_context->codec) { case CODEC_H264: -- cgit v1.2.1 From 64f3dd08a5a1756a386b4120da8e0401373d7044 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 8 Oct 2013 11:05:28 +0800 Subject: Check the reference surface id against VA_INVALID_SURFACE on BDW Signed-off-by: Xiang, Haihao --- src/gen8_vme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 64aebe7a..12a31371 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -260,7 +260,7 @@ gen8_vme_surface_setup(VADriverContextP ctx, if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { slice_obj_surface = NULL; ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + if (ref_surface_id != VA_INVALID_SURFACE) { slice_obj_surface = SURFACE(ref_surface_id); } if (slice_obj_surface && slice_obj_surface->bo) { @@ -276,7 +276,7 @@ gen8_vme_surface_setup(VADriverContextP ctx, /* reference 1 */ slice_obj_surface = NULL; ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { + if (ref_surface_id != VA_INVALID_SURFACE) { slice_obj_surface = SURFACE(ref_surface_id); } if (slice_obj_surface && slice_obj_surface->bo) { -- cgit v1.2.1 From 6ea484e94c5b42c87f8121275264ea62a2ff54d2 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 8 Oct 2013 11:22:46 +0800 Subject: Fix refrence frame for list1 on BDW Signed-off-by: Xiang, Haihao --- src/gen8_vme.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 12a31371..cb6f1439 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -282,10 +282,9 @@ gen8_vme_surface_setup(VADriverContextP ctx, if (slice_obj_surface && slice_obj_surface->bo) { obj_surface = slice_obj_surface; } else { - obj_surface = encode_state->reference_objects[0]; + obj_surface = encode_state->reference_objects[1]; } - obj_surface = encode_state->reference_objects[1]; if (obj_surface && obj_surface->bo) gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); } -- cgit v1.2.1 From d8f73daade782f3b7e0f5000270df297bf9f7535 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 8 Oct 2013 13:11:18 +0800 Subject: Clean up for setting up reference surface state on BDW Signed-off-by: Xiang, Haihao --- src/gen8_vme.c | 37 ++++--------------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/src/gen8_vme.c b/src/gen8_vme.c index cb6f1439..a5ca1838 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -240,7 +240,6 @@ gen8_vme_surface_setup(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct object_surface *obj_surface; - struct i965_driver_data *i965 = i965_driver_data(ctx); /*Setup surfaces state*/ /* current picture for encoding */ @@ -252,42 +251,14 @@ gen8_vme_surface_setup(VADriverContextP ctx, if (!is_intra) { VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; int slice_type; - struct object_surface *slice_obj_surface; - int ref_surface_id; slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI); - if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - /* reference 0 */ - if (obj_surface && obj_surface->bo) - gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); - } - if (slice_type == SLICE_TYPE_B) { - /* reference 1 */ - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[1]; - } + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen8_vme_source_surface_state); - if (obj_surface && obj_surface->bo) - gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); - } + if (slice_type == SLICE_TYPE_B) + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen8_vme_source_surface_state); } /* VME output */ -- cgit v1.2.1 From 65f7e017b70c745ab4233d9f585fa688e90aeb02 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 8 Oct 2013 13:20:39 +0800 Subject: Pass the reference frame index in List0/1 into the PAK command on BDW Signed-off-by: Xiang, Haihao --- src/gen8_mfc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 14374246..6979b065 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -894,6 +894,7 @@ gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, struct intel_batchbuffer *batch) { + struct gen6_vme_context *vme_context = encoder_context->vme_context; int len_in_dwords = 12; unsigned int inter_msg = 0; if (batch == NULL) @@ -971,8 +972,8 @@ gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in inter_msg = msg[1] >> 8; /*Stuff for Inter MB*/ OUT_BCS_BATCH(batch, inter_msg); - OUT_BCS_BATCH(batch, 0x0); - OUT_BCS_BATCH(batch, 0x0); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]); /*MaxSizeInWord and TargetSzieInWord*/ OUT_BCS_BATCH(batch, (max_mb_size << 24) | -- cgit v1.2.1 From 8a9e85578e2af131f7efbf2e08074fde0cd225a6 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 8 Oct 2013 13:27:55 +0800 Subject: Follow the input Picture/Slice parameters for SLICE_STATE command on BDW Signed-off-by: Xiang, Haihao --- src/gen8_mfc.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 6979b065..b86f6a80 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -744,16 +744,29 @@ gen8_mfc_avc_slice_state(VADriverContextP ctx, int weighted_pred_idc = 0; unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom; unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom; - int bslice = 0; + int num_ref_l0 = 0, num_ref_l1 = 0; if (batch == NULL) batch = encoder_context->base.batch; - if (slice_type == SLICE_TYPE_P) { + if (slice_type == SLICE_TYPE_I) { + luma_log2_weight_denom = 0; + chroma_log2_weight_denom = 0; + } else if (slice_type == SLICE_TYPE_P) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; } else if (slice_type == SLICE_TYPE_B) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc; - bslice = 1; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) { + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1; + } if (weighted_pred_idc == 2) { /* 8.4.3 - Derivation process for prediction weights (8-279) */ @@ -778,14 +791,11 @@ gen8_mfc_avc_slice_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) ); OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/ - if (slice_type == SLICE_TYPE_I) { - OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/ - } else { - OUT_BCS_BATCH(batch, - (1 << 16) | (bslice << 24) | /*1 reference frame*/ - (chroma_log2_weight_denom << 8) | - (luma_log2_weight_denom << 0)); - } + OUT_BCS_BATCH(batch, + (num_ref_l0 << 16) | + (num_ref_l1 << 24) | + (chroma_log2_weight_denom << 8) | + (luma_log2_weight_denom << 0)); OUT_BCS_BATCH(batch, (weighted_pred_idc << 30) | -- cgit v1.2.1 From 801d525194be747cff4bcb07742aa7d1a47970ce Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Sun, 13 Oct 2013 23:11:54 +0800 Subject: VPP: add vebox motion compensation support on BDW Signed-off-by: Zhong Li --- src/gen75_vpp_vebox.c | 3 ++- src/i965_drv_video.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index ab9a0146..8e80474d 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -1336,7 +1336,8 @@ void bdw_veb_state_command(VADriverContextP ctx, struct intel_vebox_context *pro if (di_param->algorithm == VAProcDeinterlacingBob) is_first_frame = 1; - if (di_param->algorithm == VAProcDeinterlacingMotionAdaptive && + if ((di_param->algorithm == VAProcDeinterlacingMotionAdaptive || + di_param->algorithm == VAProcDeinterlacingMotionCompensated) && proc_ctx->frame_order != -1) di_output_frames_flag = 0; /* Output both Current Frame and Previous Frame */ } diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index bfb1092f..6048763d 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -344,6 +344,7 @@ static struct hw_codec_info gen8_hw_codec_info = { .has_accelerated_putimage = 1, .has_tiled_surface = 1, .has_di_motion_adptive = 1, + .has_di_motion_compensated = 1, .has_vp8_decoding = 1, .num_filters = 4, -- cgit v1.2.1 From 051b66864974596c509c5cdca34ea9fa17dc7ae7 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 4 Nov 2013 09:43:19 +0800 Subject: Fix one error of VME shader for H264 encoding on BDW Otherwise the h264 encoding will use the incorrect prediction result for the macroblocks in the first row if the MVP is used Signed-off-by: Zhao Yakui --- src/shaders/vme/inter_bframe_gen8.asm | 2 +- src/shaders/vme/inter_bframe_gen8.g8b | 2 +- src/shaders/vme/inter_frame_gen8.asm | 2 +- src/shaders/vme/inter_frame_gen8.g8b | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/shaders/vme/inter_bframe_gen8.asm b/src/shaders/vme/inter_bframe_gen8.asm index d8de5882..f16a63bb 100644 --- a/src/shaders/vme/inter_bframe_gen8.asm +++ b/src/shaders/vme/inter_bframe_gen8.asm @@ -404,7 +404,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (2) mbb_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; (f0.0) mov (2) mbc_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; (f0.0) mov (2) mbb_result.20<1>:uw mba_result.20<2,2,1>:uw {align1}; diff --git a/src/shaders/vme/inter_bframe_gen8.g8b b/src/shaders/vme/inter_bframe_gen8.g8b index b3d74cce..98531a35 100644 --- a/src/shaders/vme/inter_bframe_gen8.g8b +++ b/src/shaders/vme/inter_bframe_gen8.g8b @@ -194,7 +194,7 @@ { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 }, { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 }, { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 }, - { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000001 }, + { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000000 }, { 0x00210001, 0x2b040208, 0x00450ae4, 0x00000000 }, { 0x00210001, 0x2b240208, 0x00450ae4, 0x00000000 }, { 0x00210001, 0x2b141248, 0x00450af4, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame_gen8.asm b/src/shaders/vme/inter_frame_gen8.asm index aa9fb80c..7db5fa66 100644 --- a/src/shaders/vme/inter_frame_gen8.asm +++ b/src/shaders/vme/inter_frame_gen8.asm @@ -330,7 +330,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; diff --git a/src/shaders/vme/inter_frame_gen8.g8b b/src/shaders/vme/inter_frame_gen8.g8b index c4e2c972..63565417 100644 --- a/src/shaders/vme/inter_frame_gen8.g8b +++ b/src/shaders/vme/inter_frame_gen8.g8b @@ -121,7 +121,7 @@ { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 }, { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 }, { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 }, - { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000001 }, + { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000000 }, { 0x00010001, 0x2b040208, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b240208, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b141248, 0x00000af4, 0x00000000 }, -- cgit v1.2.1 From 76629022f44ff15c546bcdc01e0884bff04cd8d7 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 4 Nov 2013 09:43:23 +0800 Subject: Fix one error of VME shader for MPEG2 encoding on BDW Otherwise the MPEG2 encoding will use the incorrect prediction result for the macroblocks in the first row if the MVP is used Signed-off-by: Zhao Yakui --- src/shaders/vme/mpeg2_inter_gen8.asm | 2 +- src/shaders/vme/mpeg2_inter_gen8.g8b | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shaders/vme/mpeg2_inter_gen8.asm b/src/shaders/vme/mpeg2_inter_gen8.asm index d7cb52d1..6dd8599c 100644 --- a/src/shaders/vme/mpeg2_inter_gen8.asm +++ b/src/shaders/vme/mpeg2_inter_gen8.asm @@ -294,7 +294,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; diff --git a/src/shaders/vme/mpeg2_inter_gen8.g8b b/src/shaders/vme/mpeg2_inter_gen8.g8b index 7cee07e5..6686c9f7 100644 --- a/src/shaders/vme/mpeg2_inter_gen8.g8b +++ b/src/shaders/vme/mpeg2_inter_gen8.g8b @@ -98,7 +98,7 @@ { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 }, { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 }, { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 }, - { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000001 }, + { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000000 }, { 0x00010001, 0x2b040208, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b240208, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b141248, 0x00000af4, 0x00000000 }, -- cgit v1.2.1 From 209da9550a812f37a7c9d7b91022f2cb61b73baf Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 13 Dec 2013 15:18:56 +0800 Subject: Update the pipe_control command on Gen8 to make media pipeline work Signed-off-by: Zhao Yakui --- src/intel_batchbuffer.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index e1f5a5ed..52bf4430 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -187,7 +187,21 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) IS_GEN7(intel->device_id) || IS_GEN8(intel->device_id)) { if (batch->flag == I915_EXEC_RENDER) { - if (IS_GEN6(intel->device_id)) { + if (IS_GEN8(intel->device_id)) { + BEGIN_BATCH(batch, 6); + OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2)); + + OUT_BATCH(batch, + CMD_PIPE_CONTROL_WC_FLUSH | + CMD_PIPE_CONTROL_TC_FLUSH | + CMD_PIPE_CONTROL_DC_FLUSH | + CMD_PIPE_CONTROL_NOWRITE); + OUT_BATCH(batch, 0); /* write address */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* write data */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + } else if (IS_GEN6(intel->device_id)) { assert(batch->wa_render_bo); BEGIN_BATCH(batch, 4 * 3); @@ -214,6 +228,9 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) CMD_PIPE_CONTROL_WC_FLUSH | CMD_PIPE_CONTROL_TC_FLUSH | CMD_PIPE_CONTROL_NOWRITE); + OUT_BATCH(batch, 0); /* write address */ + OUT_BATCH(batch, 0); /* write data */ + ADVANCE_BATCH(batch); } else { BEGIN_BATCH(batch, 4); OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2)); @@ -223,11 +240,11 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) CMD_PIPE_CONTROL_TC_FLUSH | CMD_PIPE_CONTROL_DC_FLUSH | CMD_PIPE_CONTROL_NOWRITE); + OUT_BATCH(batch, 0); /* write address */ + OUT_BATCH(batch, 0); /* write data */ + ADVANCE_BATCH(batch); } - OUT_BATCH(batch, 0); /* write address */ - OUT_BATCH(batch, 0); /* write data */ - ADVANCE_BATCH(batch); } else { if (batch->flag == I915_EXEC_BLT) { BEGIN_BLT_BATCH(batch, 4); -- cgit v1.2.1 From 06313021fc960e5bf52c9195da70289a94859aeb Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 13 Dec 2013 15:18:56 +0800 Subject: Add the missing media pipeline command for encoding on BDW Signed-off-by: Zhao Yakui --- src/gen8_vme.c | 293 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 288 insertions(+), 5 deletions(-) diff --git a/src/gen8_vme.c b/src/gen8_vme.c index a5ca1838..095dbd4b 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -226,11 +226,13 @@ gen8_vme_output_vme_batchbuffer_setup(VADriverContextP ctx, "VME batchbuffer", vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block, 0x1000); + /* vme_context->vme_buffer_suface_setup(ctx, &vme_context->gpe_context, &vme_context->vme_batchbuffer, BINDING_TABLE_OFFSET(index), SURFACE_STATE_OFFSET(index)); + */ } static VAStatus @@ -295,7 +297,7 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5); desc->desc5.constant_urb_entry_read_offset = 0; desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH; - + /*kernel start*/ dri_bo_emit_reloc(bo, @@ -349,6 +351,153 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, return VA_STATUS_SUCCESS; } +#define MB_SCOREBOARD_A (1 << 0) +#define MB_SCOREBOARD_B (1 << 1) +#define MB_SCOREBOARD_C (1 << 2) + +/* check whether the mb of (x_index, y_index) is out of bound */ +static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height) +{ + int mb_index; + if (x_index < 0 || x_index >= mb_width) + return -1; + if (y_index < 0 || y_index >= mb_height) + return -1; + + mb_index = y_index * mb_width + x_index; + if (mb_index < first_mb || mb_index > (first_mb + num_mb)) + return -1; + return 0; +} + +static void +gen8wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + int mb_row; + int s; + unsigned int *command_ptr; + +#define USE_SCOREBOARD (1 << 21) + + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); + command_ptr = vme_context->vme_batchbuffer.bo->virtual; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; + int first_mb = pSliceParameter->macroblock_address; + int num_mb = pSliceParameter->num_macroblocks; + unsigned int mb_intra_ub, score_dep; + int x_outer, y_outer, x_inner, y_inner; + int xtemp_outer = 0; + + x_outer = first_mb % mb_width; + y_outer = first_mb / mb_width; + mb_row = y_outer; + + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { + x_inner = x_outer; + y_inner = y_outer; + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { + mb_intra_ub = 0; + score_dep = 0; + if (x_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + score_dep |= MB_SCOREBOARD_A; + } + if (y_inner != mb_row) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + score_dep |= MB_SCOREBOARD_B; + if (x_inner != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + if (x_inner != (mb_width -1)) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + score_dep |= MB_SCOREBOARD_C; + } + } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = USE_SCOREBOARD; + /* Indirect data */ + *command_ptr++ = 0; + /* the (X, Y) term of scoreboard */ + *command_ptr++ = ((y_inner << 16) | x_inner); + *command_ptr++ = score_dep; + /*inline data */ + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); + *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + + x_inner -= 2; + y_inner += 1; + } + x_outer += 1; + } + + xtemp_outer = mb_width - 2; + if (xtemp_outer < 0) + xtemp_outer = 0; + x_outer = xtemp_outer; + y_outer = first_mb / mb_width; + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { + y_inner = y_outer; + x_inner = x_outer; + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { + mb_intra_ub = 0; + score_dep = 0; + if (x_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + score_dep |= MB_SCOREBOARD_A; + } + if (y_inner != mb_row) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + score_dep |= MB_SCOREBOARD_B; + if (x_inner != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (x_inner != (mb_width -1)) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + score_dep |= MB_SCOREBOARD_C; + } + } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = USE_SCOREBOARD; + /* Indirect data */ + *command_ptr++ = 0; + /* the (X, Y) term of scoreboard */ + *command_ptr++ = ((y_inner << 16) | x_inner); + *command_ptr++ = score_dep; + /*inline data */ + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); + *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + x_inner -= 2; + y_inner += 1; + } + x_outer++; + if (x_outer >= mb_width) { + y_outer += 1; + x_outer = xtemp_outer; + } + } + } + + *command_ptr++ = MI_BATCH_BUFFER_END; + *command_ptr++ = 0; + + dri_bo_unmap(vme_context->vme_batchbuffer.bo); +} static void gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, @@ -410,12 +559,14 @@ gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; i += 1; } } - *command_ptr++ = 0; *command_ptr++ = MI_BATCH_BUFFER_END; + *command_ptr++ = 0; dri_bo_unmap(vme_context->vme_batchbuffer.bo); } @@ -460,6 +611,7 @@ static void gen8_vme_pipeline_programing(VADriverContextP ctx, break; } } + if ((pSliceParameter->slice_type == SLICE_TYPE_I) || (pSliceParameter->slice_type == SLICE_TYPE_I)) { kernel_shader = VME_INTRA_SHADER; @@ -472,7 +624,7 @@ static void gen8_vme_pipeline_programing(VADriverContextP ctx, kernel_shader = VME_INTER_SHADER; } if (allow_hwscore) - gen7_vme_walker_fill_vme_batchbuffer(ctx, + gen8wa_vme_walker_fill_vme_batchbuffer(ctx, encode_state, width_in_mbs, height_in_mbs, kernel_shader, @@ -663,6 +815,135 @@ gen8_vme_mpeg2_surface_setup(VADriverContextP ctx, return VA_STATUS_SUCCESS; } +static void +gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + unsigned int *command_ptr; + +#define MPEG2_SCOREBOARD (1 << 21) + + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); + command_ptr = vme_context->vme_batchbuffer.bo->virtual; + + { + unsigned int mb_intra_ub, score_dep; + int x_outer, y_outer, x_inner, y_inner; + int xtemp_outer = 0; + int first_mb = 0; + int num_mb = mb_width * mb_height; + + x_outer = 0; + y_outer = 0; + + + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { + x_inner = x_outer; + y_inner = y_outer; + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { + mb_intra_ub = 0; + score_dep = 0; + if (x_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + score_dep |= MB_SCOREBOARD_A; + } + if (y_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + score_dep |= MB_SCOREBOARD_B; + + if (x_inner != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (x_inner != (mb_width -1)) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + score_dep |= MB_SCOREBOARD_C; + } + } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = MPEG2_SCOREBOARD; + /* Indirect data */ + *command_ptr++ = 0; + /* the (X, Y) term of scoreboard */ + *command_ptr++ = ((y_inner << 16) | x_inner); + *command_ptr++ = score_dep; + /*inline data */ + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); + *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8)); + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + + x_inner -= 2; + y_inner += 1; + } + x_outer += 1; + } + + xtemp_outer = mb_width - 2; + if (xtemp_outer < 0) + xtemp_outer = 0; + x_outer = xtemp_outer; + y_outer = 0; + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { + y_inner = y_outer; + x_inner = x_outer; + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { + mb_intra_ub = 0; + score_dep = 0; + if (x_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + score_dep |= MB_SCOREBOARD_A; + } + if (y_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + score_dep |= MB_SCOREBOARD_B; + + if (x_inner != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (x_inner != (mb_width -1)) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + score_dep |= MB_SCOREBOARD_C; + } + } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = MPEG2_SCOREBOARD; + /* Indirect data */ + *command_ptr++ = 0; + /* the (X, Y) term of scoreboard */ + *command_ptr++ = ((y_inner << 16) | x_inner); + *command_ptr++ = score_dep; + /*inline data */ + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); + *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8)); + + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + x_inner -= 2; + y_inner += 1; + } + x_outer++; + if (x_outer >= mb_width) { + y_outer += 1; + x_outer = xtemp_outer; + } + } + } + + *command_ptr++ = MI_BATCH_BUFFER_END; + *command_ptr++ = 0; + + dri_bo_unmap(vme_context->vme_batchbuffer.bo); + return; +} + static void gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, struct encode_state *encode_state, @@ -720,6 +1001,8 @@ gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; i += 1; } @@ -727,8 +1010,8 @@ gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, } } - *command_ptr++ = 0; *command_ptr++ = MI_BATCH_BUFFER_END; + *command_ptr++ = 0; dri_bo_unmap(vme_context->vme_batchbuffer.bo); } @@ -770,7 +1053,7 @@ gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx, } if (allow_hwscore) - gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx, + gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx, encode_state, width_in_mbs, height_in_mbs, kernel_shader, -- cgit v1.2.1 From 13df1ed3cc578c2735e61192cfe07d3959736ef4 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 13 Dec 2013 15:18:56 +0800 Subject: Fix the command error for MPEG2 encoding on BDW Signed-off-by: Zhao Yakui --- src/gen8_vme.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 095dbd4b..464c344d 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -1068,12 +1068,14 @@ gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx, intel_batchbuffer_start_atomic(batch, 0x1000); gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); OUT_RELOC(batch, vme_context->vme_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); ADVANCE_BATCH(batch); intel_batchbuffer_end_atomic(batch); -- cgit v1.2.1 From e90a3c3a41a07734163852ff9868336d5ec121b0 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 13 Dec 2013 17:03:47 +0800 Subject: Follow the spec to make BDW encoding media pipeline command support 48-bit addressing mode Signed-off-by: Zhao Yakui --- src/gen8_vme.c | 37 +++++----- src/i965_drv_video.h | 1 + src/i965_gpe_utils.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/i965_gpe_utils.h | 33 +++++++++ 4 files changed, 234 insertions(+), 24 deletions(-) diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 464c344d..5369b319 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -278,11 +278,14 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, struct gen8_interface_descriptor_data *desc; int i; dri_bo *bo; + unsigned char *desc_ptr; - bo = vme_context->gpe_context.idrt.bo; + bo = vme_context->gpe_context.dynamic_state.bo; dri_bo_map(bo, 1); assert(bo->virtual); - desc = bo->virtual; + desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset; + + desc = (struct gen8_interface_descriptor_data *)desc_ptr; for (i = 0; i < vme_context->vme_kernel_sum; i++) { struct i965_kernel *kernel; @@ -290,7 +293,7 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, assert(sizeof(*desc) == 32); /*Setup the descritor table*/ memset(desc, 0, sizeof(*desc)); - desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6); + desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6; desc->desc3.sampler_count = 0; /* FIXME: */ desc->desc3.sampler_state_pointer = 0; desc->desc4.binding_table_entry_count = 1; /* FIXME: */ @@ -299,14 +302,9 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH; - /*kernel start*/ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - i * sizeof(*desc) + offsetof(struct gen8_interface_descriptor_data, desc0), - kernel->bo); desc++; } + dri_bo_unmap(bo); return VA_STATUS_SUCCESS; @@ -336,9 +334,10 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, vme_state_message[31] = mv_num; - dri_bo_map(vme_context->gpe_context.curbe.bo, 1); - assert(vme_context->gpe_context.curbe.bo->virtual); - constant_buffer = vme_context->gpe_context.curbe.bo->virtual; + dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1); + assert(vme_context->gpe_context.dynamic_state.bo->virtual); + constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual + + vme_context->gpe_context.curbe_offset; /* VME MV/Mb cost table is passed by using const buffer */ /* Now it uses the fixed search path. So it is constructed directly @@ -346,7 +345,7 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, */ memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128); - dri_bo_unmap(vme_context->gpe_context.curbe.bo); + dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo); return VA_STATUS_SUCCESS; } @@ -575,7 +574,7 @@ static void gen8_vme_media_init(VADriverContextP ctx, struct intel_encoder_conte { struct gen6_vme_context *vme_context = encoder_context->vme_context; - i965_gpe_context_init(ctx, &vme_context->gpe_context); + gen8_gpe_context_init(ctx, &vme_context->gpe_context); /* VME output buffer */ dri_bo_unreference(vme_context->vme_output.bo); @@ -1129,7 +1128,7 @@ gen8_vme_context_destroy(void *context) { struct gen6_vme_context *vme_context = context; - i965_gpe_context_destroy(&vme_context->gpe_context); + gen8_gpe_context_destroy(&vme_context->gpe_context); dri_bo_unreference(vme_context->vme_output.bo); vme_context->vme_output.bo = NULL; @@ -1177,10 +1176,10 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e vme_context->vme_kernel_sum = i965_kernel_num; vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; - vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6; - vme_context->gpe_context.idrt.entry_size = sizeof(struct gen8_interface_descriptor_data); + vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6; + vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH; + vme_context->gpe_context.sampler_size = 0; - vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH; vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; vme_context->gpe_context.vfe_state.num_urb_entries = 16; @@ -1190,7 +1189,7 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e gen7_vme_scoreboard_init(ctx, vme_context); - i965_gpe_load_kernels(ctx, + gen8_gpe_load_kernels(ctx, &vme_context->gpe_context, vme_kernel_list, i965_kernel_num); diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 1a101f45..98e08fef 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -78,6 +78,7 @@ struct i965_kernel const uint32_t (*bin)[4]; int size; dri_bo *bo; + unsigned int kernel_offset; }; struct buffer_store diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index 2af323f9..0d49703b 100644 --- a/src/i965_gpe_utils.c +++ b/src/i965_gpe_utils.c @@ -949,18 +949,39 @@ gen8_gpe_state_base_address(VADriverContextP ctx, OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); + /*DW4 Surface state base address */ OUT_RELOC(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ OUT_BATCH(batch, 0); + /*DW6. Dynamic state base address */ - OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Dynamic State Base Address + if (gpe_context->dynamic_state.bo) + OUT_RELOC(batch, gpe_context->dynamic_state.bo, + I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER, + 0, BASE_ADDRESS_MODIFY); + else + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); /*DW8. Indirect Object base address */ - OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Indirect Object Base Address + if (gpe_context->indirect_state.bo) + OUT_RELOC(batch, gpe_context->indirect_state.bo, + I915_GEM_DOMAIN_SAMPLER, + 0, BASE_ADDRESS_MODIFY); + else + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + /*DW10. Instruct base address */ - OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Instruction Base Address + if (gpe_context->instruction_state.bo) + OUT_RELOC(batch, gpe_context->instruction_state.bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, BASE_ADDRESS_MODIFY); + else + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); /* DW12. Size limitation */ @@ -1008,6 +1029,38 @@ gen8_gpe_vfe_state(VADriverContextP ctx, } + +static void +gen8_gpe_curbe_load(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch) +{ + BEGIN_BATCH(batch, 4); + + OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, gpe_context->curbe_size); + OUT_BATCH(batch, gpe_context->curbe_offset); + + ADVANCE_BATCH(batch); +} + +static void +gen8_gpe_idrt(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch) +{ + BEGIN_BATCH(batch, 4); + + OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, gpe_context->idrt_size); + OUT_BATCH(batch, gpe_context->idrt_offset); + + ADVANCE_BATCH(batch); +} + + void gen8_gpe_pipeline_setup(VADriverContextP ctx, struct i965_gpe_context *gpe_context, @@ -1018,7 +1071,131 @@ gen8_gpe_pipeline_setup(VADriverContextP ctx, i965_gpe_select(ctx, gpe_context, batch); gen8_gpe_state_base_address(ctx, gpe_context, batch); gen8_gpe_vfe_state(ctx, gpe_context, batch); - gen6_gpe_curbe_load(ctx, gpe_context, batch); - gen6_gpe_idrt(ctx, gpe_context, batch); + gen8_gpe_curbe_load(ctx, gpe_context, batch); + gen8_gpe_idrt(ctx, gpe_context, batch); +} + +void +gen8_gpe_context_init(VADriverContextP ctx, + struct i965_gpe_context *gpe_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + dri_bo *bo; + int bo_size; + unsigned int end_offset; + + dri_bo_unreference(gpe_context->surface_state_binding_table.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + gpe_context->surface_state_binding_table.length, + 4096); + assert(bo); + gpe_context->surface_state_binding_table.bo = bo; + + bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192; + dri_bo_unreference(gpe_context->dynamic_state.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + bo_size, + 4096); + assert(bo); + gpe_context->dynamic_state.bo = bo; + gpe_context->dynamic_state.bo_size = bo_size; + + end_offset = 0; + gpe_context->dynamic_state.end_offset = 0; + + /* Constant buffer offset */ + gpe_context->curbe_offset = ALIGN(end_offset, 64); + end_offset += gpe_context->curbe_size; + + /* Interface descriptor offset */ + gpe_context->idrt_offset = ALIGN(end_offset, 64); + end_offset += gpe_context->idrt_size; + + /* Sampler state offset */ + gpe_context->sampler_offset = ALIGN(end_offset, 64); + end_offset += gpe_context->sampler_size; + + /* update the end offset of dynamic_state */ + gpe_context->dynamic_state.end_offset = end_offset; +} + + +void +gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context) +{ + int i; + + dri_bo_unreference(gpe_context->surface_state_binding_table.bo); + gpe_context->surface_state_binding_table.bo = NULL; + + dri_bo_unreference(gpe_context->instruction_state.bo); + gpe_context->instruction_state.bo = NULL; + + dri_bo_unreference(gpe_context->dynamic_state.bo); + gpe_context->dynamic_state.bo = NULL; + + dri_bo_unreference(gpe_context->indirect_state.bo); + gpe_context->indirect_state.bo = NULL; + +} + + +void +gen8_gpe_load_kernels(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct i965_kernel *kernel_list, + unsigned int num_kernels) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + int i, kernel_size; + unsigned int kernel_offset, end_offset; + unsigned char *kernel_ptr; + struct i965_kernel *kernel; + + assert(num_kernels <= MAX_GPE_KERNELS); + memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels); + gpe_context->num_kernels = num_kernels; + + kernel_size = num_kernels * 64; + for (i = 0; i < num_kernels; i++) { + kernel = &gpe_context->kernels[i]; + + kernel_size += kernel->size; + } + + gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr, + "kernel shader", + kernel_size, + 0x1000); + if (gpe_context->instruction_state.bo == NULL) { + WARN_ONCE("failure to allocate the buffer space for kernel shader\n"); + return; + } + + assert(gpe_context->instruction_state.bo); + + gpe_context->instruction_state.bo_size = kernel_size; + gpe_context->instruction_state.end_offset = 0; + end_offset = 0; + + dri_bo_map(gpe_context->instruction_state.bo, 1); + kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual); + for (i = 0; i < num_kernels; i++) { + kernel_offset = ALIGN(end_offset, 64); + kernel = &gpe_context->kernels[i]; + kernel->kernel_offset = kernel_offset; + + memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size); + + end_offset += kernel->size; + } + + gpe_context->instruction_state.end_offset = end_offset; + + dri_bo_unmap(gpe_context->instruction_state.bo); + + return; } diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h index b96916d5..23311521 100644 --- a/src/i965_gpe_utils.h +++ b/src/i965_gpe_utils.h @@ -114,6 +114,29 @@ struct i965_gpe_context unsigned int num_kernels; struct i965_kernel kernels[MAX_GPE_KERNELS]; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } instruction_state; + + struct { + dri_bo *bo; + } indirect_state; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } dynamic_state; + + unsigned int sampler_offset; + int sampler_size; + unsigned int idrt_offset; + int idrt_size; + unsigned int curbe_offset; + int curbe_size; }; void i965_gpe_context_destroy(struct i965_gpe_context *gpe_context); @@ -186,4 +209,14 @@ extern void gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx, void gen8_gpe_pipeline_setup(VADriverContextP ctx, struct i965_gpe_context *gpe_context, struct intel_batchbuffer *batch); + + +void gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context); +void gen8_gpe_context_init(VADriverContextP ctx, + struct i965_gpe_context *gpe_context); + +void gen8_gpe_load_kernels(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct i965_kernel *kernel_list, + unsigned int num_kernels); #endif /* _I965_GPE_UTILS_H_ */ -- cgit v1.2.1 From 6c83912d70620c5690998fcd0624e734ae81f46e Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 17 Dec 2013 14:32:29 +0800 Subject: Fix incorrect MI_BATCH_BUFFER_START command for MPEG2 encoding on BDW Signed-off-by: Zhao Yakui --- src/gen8_mfc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index b86f6a80..4b2d1b7d 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -2243,12 +2243,14 @@ gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx, // picture level programing gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context); - BEGIN_BCS_BATCH(batch, 2); - OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); + BEGIN_BCS_BATCH(batch, 4); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); OUT_BCS_RELOC(batch, slice_batch_bo, I915_GEM_DOMAIN_COMMAND, 0, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); ADVANCE_BCS_BATCH(batch); // end programing -- cgit v1.2.1 From b490be6a8beaea79b19d5a26ba4050de9df55ac2 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 17 Dec 2013 14:32:35 +0800 Subject: Enable the Intra-prediction for MPEG2 P-B frame on BDW This is picked up from the implementation on Haswell/Ivybridge. Signed-off-by: Zhao Yakui --- src/gen8_mfc.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 4b2d1b7d..0911924c 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -2115,7 +2115,26 @@ gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, 0xff, slice_batch); } else { - gen8_mfc_mpeg2_pak_object_inter(ctx, + int inter_rdo, intra_rdo; + inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; + intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; + + if (intra_rdo < inter_rdo) + gen8_mfc_mpeg2_pak_object_intra(ctx, + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); + else + gen8_mfc_mpeg2_pak_object_inter(ctx, encode_state, encoder_context, msg, -- cgit v1.2.1 From 449366fd74d54969d945d17b5b108bc4fa9b2236 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 17 Dec 2013 16:59:52 +0800 Subject: Follow the spec to restrict the max number of PS thread Signed-off-by: Xiang Haihao Signed-off-by: Zhao Yakui --- src/i965_render.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index b1714a69..7e438c58 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -4244,13 +4244,7 @@ i965_render_init(VADriverContextP ctx) assert(render_state->curbe.bo); if (IS_GEN8(i965->intel.device_id)) { - render_state->max_wm_threads = 48; - if (IS_BDW_GT1(i965->intel.device_id)) - render_state->max_wm_threads = 120; - else if (IS_BDW_GT2(i965->intel.device_id)) - render_state->max_wm_threads = 180; - else if (IS_BDW_GT2PLUS(i965->intel.device_id)) - render_state->max_wm_threads = 360; + render_state->max_wm_threads = 64; } else if (IS_HSW_GT1(i965->intel.device_id)) { render_state->max_wm_threads = 102; } else if (IS_HSW_GT2(i965->intel.device_id)) { -- cgit v1.2.1 From 140869a548b6def440bac6685530e7abd47e83b0 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 17 Dec 2013 16:59:57 +0800 Subject: Add the missing 3D pipeline command for rendering on BDW Signed-off-by: Xiang Haihao Signed-off-by: Zhao Yakui --- src/i965_defines.h | 10 +++++++++ src/i965_render.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/src/i965_defines.h b/src/i965_defines.h index 52ae85f1..5b4a076a 100755 --- a/src/i965_defines.h +++ b/src/i965_defines.h @@ -178,6 +178,10 @@ #define GEN6_3DSTATE_CONSTANT_GS CMD(3, 0, 0x16) #define GEN6_3DSTATE_CONSTANT_PS CMD(3, 0, 0x17) +/* Gen8 WM_HZ_OP */ +#define GEN8_3DSTATE_WM_HZ_OP CMD(3, 0, 0x52) + + # define GEN6_3DSTATE_CONSTANT_BUFFER_3_ENABLE (1 << 15) # define GEN6_3DSTATE_CONSTANT_BUFFER_2_ENABLE (1 << 14) # define GEN6_3DSTATE_CONSTANT_BUFFER_1_ENABLE (1 << 13) @@ -212,6 +216,10 @@ #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS CMD(3, 1, 0x12) #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS CMD(3, 1, 0x16) + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS CMD(3, 1, 0x14) +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS CMD(3, 1, 0x13) +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS CMD(3, 1, 0x15) /* DW1 */ # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 # define GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 @@ -316,6 +324,8 @@ #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS CMD(3, 0, 0x2b) #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS CMD(3, 0, 0x2e) #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS CMD(3, 0, 0x2f) +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS CMD(3, 0, 0x2c) +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS CMD(3, 0, 0x2d) #define MFX(pipeline, op, sub_opa, sub_opb) \ (3 << 29 | \ diff --git a/src/i965_render.c b/src/i965_render.c index 7e438c58..a48acc9c 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -3487,6 +3487,17 @@ gen8_emit_vs_state(VADriverContextP ctx) OUT_BATCH(batch, 0); /* pass-through */ OUT_BATCH(batch, 0); ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + } /* @@ -3504,11 +3515,32 @@ gen8_emit_urb(VADriverContextP ctx) /* The minimum urb entries is 64 */ + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Size is 8Kbs and base address is 0Kb */ BEGIN_BATCH(batch, 2); OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); /* Size is 8Kbs and base address is 0Kb */ OUT_BATCH(batch, - (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) | + (1 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) | (4 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT)); ADVANCE_BATCH(batch); @@ -3584,6 +3616,11 @@ gen8_emit_bypass_state(VADriverContextP ctx) OUT_BATCH(batch, 0); ADVANCE_BATCH(batch); + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + /* disable HS */ BEGIN_BATCH(batch, 11); OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2)); @@ -3619,6 +3656,11 @@ gen8_emit_bypass_state(VADriverContextP ctx) OUT_BATCH(batch, 0); ADVANCE_BATCH(batch); + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + /* Disable TE */ BEGIN_BATCH(batch, 4); OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2)); @@ -3663,6 +3705,11 @@ gen8_emit_bypass_state(VADriverContextP ctx) OUT_BATCH(batch, 0); ADVANCE_BATCH(batch); + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + /* Disable STREAMOUT */ BEGIN_BATCH(batch, 5); OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2)); @@ -3935,6 +3982,21 @@ gen8_emit_depth_stencil_state(VADriverContextP ctx) ADVANCE_BATCH(batch); } +static void +gen8_emit_wm_hz_op(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + static void gen8_render_emit_states(VADriverContextP ctx, int kernel) { @@ -3949,6 +4011,7 @@ gen8_render_emit_states(VADriverContextP ctx, int kernel) gen8_emit_urb(ctx); gen8_emit_cc_state_pointers(ctx); gen7_emit_sampler_state_pointers(ctx); + gen8_emit_wm_hz_op(ctx); gen8_emit_bypass_state(ctx); gen8_emit_vs_state(ctx); gen8_emit_clip_state(ctx); -- cgit v1.2.1 From a556c2f777d2c313f03e52b05fe97beb15a771ee Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 17 Dec 2013 17:00:00 +0800 Subject: follow the spec to fill the Vertex URB entry on BDW Signed-off-by: Xiang Haihao Signed-off-by: Zhao Yakui --- src/i965_render.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index a48acc9c..442b488b 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -3427,23 +3427,45 @@ gen8_emit_vertex_element_state(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; + /* + * The VUE layout + * dword 0-3: pad (0, 0, 0. 0) + * dword 4-7: position (x, y, 1.0, 1.0), + * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0) + */ + /* Set up our vertex elements, sourced from the single vertex buffer. */ - OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2)); - /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ + OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2)); + + /* Element state 0. These are 4 dwords of 0 required for the VUE format. + * We don't really know or care what they do. + */ + OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN8_VE0_VALID | (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); + + /* offset 8: X, Y -> {x, y, 1.0, 1.0} */ + OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN8_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (8 << VE0_OFFSET_SHIFT)); OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); - /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ + + /* offset 0: u,v -> {U, V, 1.0, 1.0} */ OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN8_VE0_VALID | (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | - (8 << VE0_OFFSET_SHIFT)); - OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); @@ -3797,7 +3819,7 @@ gen8_emit_sf_state(VADriverContextP ctx) (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) | (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) | (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) | - (0 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT)); + (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT)); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); ADVANCE_BATCH(batch); -- cgit v1.2.1 From eb640239c0e85649f37daf994083ccebcb297c2c Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 17 Dec 2013 17:00:03 +0800 Subject: Follow the spec to make the 3D pipeline work in 48-bit addressing mode Signed-off-by: Xiang Haihao Signed-off-by: Zhao Yakui --- src/i965_render.c | 416 +++++++++++++++++++++++++++++++++++++++++++----------- src/i965_render.h | 38 +++++ 2 files changed, 374 insertions(+), 80 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 442b488b..7d95d04b 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -2566,6 +2566,8 @@ gen8_render_initialize(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; dri_bo *bo; + int size; + unsigned int end_offset; /* VERTEX BUFFER */ dri_bo_unreference(render_state->vb.vertex_buffer); @@ -2585,50 +2587,70 @@ gen8_render_initialize(VADriverContextP ctx) assert(bo); render_state->wm.surface_state_binding_table_bo = bo; - dri_bo_unreference(render_state->wm.sampler); - bo = dri_bo_alloc(i965->intel.bufmgr, - "sampler state", - MAX_SAMPLERS * sizeof(struct gen8_sampler_state), - 4096); - assert(bo); - render_state->wm.sampler = bo; + render_state->curbe_size = 256; + render_state->wm.sampler_count = 0; - /* COLOR CALCULATOR */ - dri_bo_unreference(render_state->cc.state); - bo = dri_bo_alloc(i965->intel.bufmgr, - "color calc state", - sizeof(struct gen6_color_calc_state), - 4096); - assert(bo); - render_state->cc.state = bo; + render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state); - /* CC VIEWPORT */ - dri_bo_unreference(render_state->cc.viewport); - bo = dri_bo_alloc(i965->intel.bufmgr, - "cc viewport", - sizeof(struct i965_cc_viewport), - 4096); - assert(bo); - render_state->cc.viewport = bo; + render_state->cc_state_size = sizeof(struct gen6_color_calc_state); - /* BLEND STATE */ - dri_bo_unreference(render_state->cc.blend); - bo = dri_bo_alloc(i965->intel.bufmgr, - "blend state", - sizeof(struct gen6_blend_state), - 4096); - assert(bo); - render_state->cc.blend = bo; + render_state->cc_viewport_size = sizeof(struct i965_cc_viewport); - /* DEPTH & STENCIL STATE */ - dri_bo_unreference(render_state->cc.depth_stencil); + render_state->blend_state_size = sizeof(struct gen8_global_blend_state) + + 16 * sizeof(struct gen8_blend_state_rt); + + render_state->sf_clip_size = 1024; + + render_state->scissor_size = 1024; + + size = 4096 + render_state->curbe_size + render_state->sampler_size + + render_state->cc_state_size + render_state->cc_viewport_size + + render_state->blend_state_size + render_state->sf_clip_size + + render_state->scissor_size; + + dri_bo_unreference(render_state->dynamic_state.bo); bo = dri_bo_alloc(i965->intel.bufmgr, - "depth & stencil state", - sizeof(struct gen6_depth_stencil_state), + "dynamic_state", + size, 4096); - assert(bo); - render_state->cc.depth_stencil = bo; + + render_state->dynamic_state.bo = bo; + + end_offset = 0; + render_state->dynamic_state.end_offset = 0; + + /* Constant buffer offset */ + render_state->curbe_offset = ALIGN(end_offset, 64); + end_offset += render_state->curbe_size; + + /* Sampler_state */ + render_state->sampler_offset = ALIGN(end_offset, 64); + end_offset += render_state->sampler_size; + + /* CC_VIEWPORT_state */ + render_state->cc_viewport_offset = ALIGN(end_offset, 64); + end_offset += render_state->cc_viewport_size; + + /* CC_STATE_state */ + render_state->cc_state_offset = ALIGN(end_offset, 64); + end_offset += render_state->cc_state_size; + + /* Blend_state */ + render_state->blend_state_offset = ALIGN(end_offset, 64); + end_offset += render_state->blend_state_size; + + /* SF_CLIP_state */ + render_state->sf_clip_offset = ALIGN(end_offset, 64); + end_offset += render_state->sf_clip_size; + + /* SCISSOR_state */ + render_state->scissor_offset = ALIGN(end_offset, 64); + end_offset += render_state->scissor_size; + + /* update the end offset of dynamic_state */ + render_state->dynamic_state.end_offset = ALIGN(end_offset, 64); + } static void @@ -2714,13 +2736,19 @@ gen8_render_sampler(VADriverContextP ctx) struct i965_render_state *render_state = &i965->render_state; struct gen8_sampler_state *sampler_state; int i; + unsigned char *cc_ptr; assert(render_state->wm.sampler_count > 0); assert(render_state->wm.sampler_count <= MAX_SAMPLERS); - dri_bo_map(render_state->wm.sampler, 1); - assert(render_state->wm.sampler->virtual); - sampler_state = render_state->wm.sampler->virtual; + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->sampler_offset; + + sampler_state = (struct gen8_sampler_state *) cc_ptr; + for (i = 0; i < render_state->wm.sampler_count; i++) { memset(sampler_state, 0, sizeof(*sampler_state)); sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR; @@ -2731,7 +2759,7 @@ gen8_render_sampler(VADriverContextP ctx) sampler_state++; } - dri_bo_unmap(render_state->wm.sampler); + dri_bo_unmap(render_state->dynamic_state.bo); } @@ -2762,17 +2790,104 @@ gen8_render_blend_state(VADriverContextP ctx) struct i965_render_state *render_state = &i965->render_state; struct gen8_global_blend_state *global_blend_state; struct gen8_blend_state_rt *blend_state; + unsigned char *cc_ptr; - dri_bo_map(render_state->cc.blend, 1); - assert(render_state->cc.blend->virtual); - global_blend_state = render_state->cc.blend->virtual; + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->blend_state_offset; + + global_blend_state = (struct gen8_global_blend_state*) cc_ptr; + memset(global_blend_state, 0, sizeof(*global_blend_state)); /* Global blend state + blend_state for Render Target */ blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); blend_state->blend1.logic_op_enable = 1; blend_state->blend1.logic_op_func = 0xc; blend_state->blend1.pre_blend_clamp_enable = 1; - dri_bo_unmap(render_state->cc.blend); + + dri_bo_unmap(render_state->dynamic_state.bo); +} + + +static void +gen8_render_cc_viewport(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct i965_cc_viewport *cc_viewport; + unsigned char *cc_ptr; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->cc_viewport_offset; + + cc_viewport = (struct i965_cc_viewport *) cc_ptr; + + memset(cc_viewport, 0, sizeof(*cc_viewport)); + + cc_viewport->min_depth = -1.e35; + cc_viewport->max_depth = 1.e35; + + dri_bo_unmap(render_state->dynamic_state.bo); +} + +static void +gen8_render_color_calc_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen6_color_calc_state *color_calc_state; + unsigned char *cc_ptr; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->cc_state_offset; + + color_calc_state = (struct gen6_color_calc_state *) cc_ptr; + + memset(color_calc_state, 0, sizeof(*color_calc_state)); + color_calc_state->constant_r = 1.0; + color_calc_state->constant_g = 0.0; + color_calc_state->constant_b = 1.0; + color_calc_state->constant_a = 1.0; + dri_bo_unmap(render_state->dynamic_state.bo); +} + +static void +gen8_render_upload_constants(VADriverContextP ctx, + struct object_surface *obj_surface) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + unsigned short *constant_buffer; + unsigned char *cc_ptr; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->curbe_offset; + + constant_buffer = (unsigned short *) cc_ptr; + + if (obj_surface->subsampling == SUBSAMPLE_YUV400) { + assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0')); + + *constant_buffer = 2; + } else { + if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) + *constant_buffer = 1; + else + *constant_buffer = 0; + } + + dri_bo_unmap(render_state->dynamic_state.bo); } static void @@ -2787,10 +2902,10 @@ gen8_render_setup_states( i965_render_dest_surface_state(ctx, 0); i965_render_src_surfaces_state(ctx, obj_surface, flags); gen8_render_sampler(ctx); - i965_render_cc_viewport(ctx); - gen7_render_color_calc_state(ctx); + gen8_render_cc_viewport(ctx); + gen8_render_color_calc_state(ctx); gen8_render_blend_state(ctx); - i965_render_upload_constants(ctx, obj_surface, flags); + gen8_render_upload_constants(ctx, obj_surface); i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } @@ -2860,16 +2975,19 @@ gen8_emit_state_base_address(VADriverContextP ctx) OUT_BATCH(batch, 0); /*DW6*/ - OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */ - OUT_BATCH(batch, 0); + /* Dynamic state base address */ + OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER, + 0, BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); /*DW8*/ OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */ - OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /*DW10 */ - OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */ - OUT_BATCH(batch, 0); + /* Instruction base address */ + OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); /*DW12 */ OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */ @@ -2992,18 +3110,12 @@ gen8_emit_cc_state_pointers(VADriverContextP ctx) BEGIN_BATCH(batch, 2); OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2)); - OUT_RELOC(batch, - render_state->cc.state, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 1); + OUT_BATCH(batch, (render_state->cc_state_offset + 1)); ADVANCE_BATCH(batch); BEGIN_BATCH(batch, 2); OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); - OUT_RELOC(batch, - render_state->cc.blend, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 1); + OUT_BATCH(batch, (render_state->blend_state_offset + 1)); ADVANCE_BATCH(batch); } @@ -3895,10 +4007,7 @@ gen8_emit_wm_state(VADriverContextP ctx, int kernel) OUT_BATCH(batch, 1); OUT_BATCH(batch, 0); /*DW3-4. Constant buffer 0 */ - OUT_RELOC(batch, - render_state->curbe.bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); + OUT_BATCH(batch, render_state->curbe_offset); OUT_BATCH(batch, 0); /*DW5-10. Constant buffer 1-3 */ @@ -3913,10 +4022,8 @@ gen8_emit_wm_state(VADriverContextP ctx, int kernel) BEGIN_BATCH(batch, 12); OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2)); /* PS shader address */ - OUT_RELOC(batch, - render_state->render_kernels[kernel].bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); + OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset); + OUT_BATCH(batch, 0); /* DW3. PS shader flag .Binding table cnt/sample cnt */ OUT_BATCH(batch, @@ -4019,6 +4126,38 @@ gen8_emit_wm_hz_op(VADriverContextP ctx) ADVANCE_BATCH(batch); } +static void +gen8_emit_viewport_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); + OUT_BATCH(batch, render_state->cc_viewport_offset); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_sampler_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); + OUT_BATCH(batch, render_state->sampler_offset); + ADVANCE_BATCH(batch); +} + + static void gen8_render_emit_states(VADriverContextP ctx, int kernel) { @@ -4029,10 +4168,10 @@ gen8_render_emit_states(VADriverContextP ctx, int kernel) intel_batchbuffer_emit_mi_flush(batch); gen8_emit_invarient_states(ctx); gen8_emit_state_base_address(ctx); - gen7_emit_viewport_state_pointers(ctx); + gen8_emit_viewport_state_pointers(ctx); gen8_emit_urb(ctx); gen8_emit_cc_state_pointers(ctx); - gen7_emit_sampler_state_pointers(ctx); + gen8_emit_sampler_state_pointers(ctx); gen8_emit_wm_hz_op(ctx); gen8_emit_bypass_state(ctx); gen8_emit_vs_state(ctx); @@ -4114,10 +4253,16 @@ gen8_subpicture_render_blend_state(VADriverContextP ctx) struct i965_render_state *render_state = &i965->render_state; struct gen8_global_blend_state *global_blend_state; struct gen8_blend_state_rt *blend_state; + unsigned char *cc_ptr; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->blend_state_offset; + + global_blend_state = (struct gen8_global_blend_state*) cc_ptr; - dri_bo_map(render_state->cc.blend, 1); - assert(render_state->cc.blend->virtual); - global_blend_state = render_state->cc.blend->virtual; memset(global_blend_state, 0, sizeof(*global_blend_state)); /* Global blend state + blend_state for Render Target */ blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); @@ -4130,7 +4275,8 @@ gen8_subpicture_render_blend_state(VADriverContextP ctx) blend_state->blend1.post_blend_clamp_enable = 1; blend_state->blend1.pre_blend_clamp_enable = 1; blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */ - dri_bo_unmap(render_state->cc.blend); + + dri_bo_unmap(render_state->dynamic_state.bo); } static void @@ -4282,6 +4428,79 @@ intel_render_put_subpicture( i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); } +static bool +gen8_render_init(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + int i, kernel_size; + unsigned int kernel_offset, end_offset; + unsigned char *kernel_ptr; + struct i965_kernel *kernel; + + + if (IS_GEN8(i965->intel.device_id)) { + memcpy(render_state->render_kernels, render_kernels_gen8, + sizeof(render_state->render_kernels)); + } + + kernel_size = 4096; + + for (i = 0; i < NUM_RENDER_KERNEL; i++) { + kernel = &render_state->render_kernels[i]; + + if (!kernel->size) + continue; + + kernel_size += kernel->size; + } + + render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr, + "kernel shader", + kernel_size, + 0x1000); + if (render_state->instruction_state.bo == NULL) { + WARN_ONCE("failure to allocate the buffer space for kernel shader\n"); + return false; + } + + assert(render_state->instruction_state.bo); + + render_state->instruction_state.bo_size = kernel_size; + render_state->instruction_state.end_offset = 0; + end_offset = 0; + + dri_bo_map(render_state->instruction_state.bo, 1); + kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual); + for (i = 0; i < NUM_RENDER_KERNEL; i++) { + kernel = &render_state->render_kernels[i]; + kernel_offset = ALIGN(end_offset, 64); + kernel->kernel_offset = kernel_offset; + + if (!kernel->size) + continue; + + memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size); + + end_offset += kernel->size; + } + + render_state->instruction_state.end_offset = end_offset; + + dri_bo_unmap(render_state->instruction_state.bo); + + + if (IS_GEN8(i965->intel.device_id)) { + render_state->max_wm_threads = 64; + } else { + /* should never get here !!! */ + assert(0); + } + + return true; +} + + bool i965_render_init(VADriverContextP ctx) { @@ -4296,8 +4515,7 @@ i965_render_init(VADriverContextP ctx) sizeof(render_kernels_gen6[0]))); if (IS_GEN8(i965->intel.device_id)) { - memcpy(render_state->render_kernels, render_kernels_gen8, - sizeof(render_state->render_kernels)); + return gen8_render_init(ctx); } else if (IS_GEN7(i965->intel.device_id)) memcpy(render_state->render_kernels, (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7), @@ -4328,9 +4546,7 @@ i965_render_init(VADriverContextP ctx) 4096, 64); assert(render_state->curbe.bo); - if (IS_GEN8(i965->intel.device_id)) { - render_state->max_wm_threads = 64; - } else if (IS_HSW_GT1(i965->intel.device_id)) { + if (IS_HSW_GT1(i965->intel.device_id)) { render_state->max_wm_threads = 102; } else if (IS_HSW_GT2(i965->intel.device_id)) { render_state->max_wm_threads = 204; @@ -4356,6 +4572,41 @@ i965_render_init(VADriverContextP ctx) return true; } +static void +gen8_render_terminate(VADriverContextP ctx) +{ + int i; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + + dri_bo_unreference(render_state->vb.vertex_buffer); + render_state->vb.vertex_buffer = NULL; + + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); + render_state->wm.surface_state_binding_table_bo = NULL; + + if (render_state->instruction_state.bo) { + dri_bo_unreference(render_state->instruction_state.bo); + render_state->instruction_state.bo = NULL; + } + + if (render_state->dynamic_state.bo) { + dri_bo_unreference(render_state->dynamic_state.bo); + render_state->dynamic_state.bo = NULL; + } + + if (render_state->indirect_state.bo) { + dri_bo_unreference(render_state->indirect_state.bo); + render_state->indirect_state.bo = NULL; + } + + if (render_state->draw_region) { + dri_bo_unreference(render_state->draw_region->bo); + free(render_state->draw_region); + render_state->draw_region = NULL; + } +} + void i965_render_terminate(VADriverContextP ctx) { @@ -4363,6 +4614,11 @@ i965_render_terminate(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; + if (IS_GEN8(i965->intel.device_id)) { + gen8_render_terminate(ctx); + return; + } + dri_bo_unreference(render_state->curbe.bo); render_state->curbe.bo = NULL; diff --git a/src/i965_render.h b/src/i965_render.h index 69046450..132e7853 100644 --- a/src/i965_render.h +++ b/src/i965_render.h @@ -80,6 +80,44 @@ struct i965_render_state struct i965_kernel render_kernels[3]; int max_wm_threads; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } instruction_state; + + struct { + dri_bo *bo; + } indirect_state; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } dynamic_state; + + unsigned int curbe_offset; + int curbe_size; + + unsigned int sampler_offset; + int sampler_size; + + unsigned int cc_viewport_offset; + int cc_viewport_size; + + unsigned int cc_state_offset; + int cc_state_size; + + unsigned int blend_state_offset; + int blend_state_size; + + unsigned int sf_clip_offset; + int sf_clip_size; + + unsigned int scissor_offset; + int scissor_size; + }; bool i965_render_init(VADriverContextP ctx); -- cgit v1.2.1 From 0dc6fd2decc67c27bc2c60c745179965fddb1f91 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 19 Dec 2013 09:46:30 +0800 Subject: BDW doesn't support H.264 Baseline profile The similar fix to f765987 Signed-off-by: Xiang, Haihao --- src/gen8_mfc.c | 2 +- src/gen8_mfd.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 0911924c..c7d1dbb6 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -2432,7 +2432,7 @@ static VAStatus gen8_mfc_pipeline(VADriverContextP ctx, VAStatus vaStatus; switch (profile) { - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context); diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 3066eb3f..6c818963 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -3054,7 +3054,7 @@ gen8_mfd_decode_picture(VADriverContextP ctx, gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context); @@ -3150,7 +3150,7 @@ gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: gen8_mfd_avc_context_init(ctx, gen7_mfd_context); -- cgit v1.2.1 From 2e6a1a078d8a911734e3eaaf2a2790d58bd9afb2 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 19 Dec 2013 10:31:31 +0800 Subject: Rendering/bdw: fix push constant buffer for PS Signed-off-by: Xiang, Haihao --- src/i965_render.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 7d95d04b..73d39eba 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -3674,8 +3674,8 @@ gen8_emit_urb(VADriverContextP ctx) OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); /* Size is 8Kbs and base address is 0Kb */ OUT_BATCH(batch, - (1 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) | - (4 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT)); + (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) | + (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT)); ADVANCE_BATCH(batch); BEGIN_BATCH(batch, 2); -- cgit v1.2.1 From c7013d4a50acf8f077535fc435f9d666dd0c56ff Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 19 Dec 2013 13:36:11 +0800 Subject: Follow spec to update the URB entry/size setting for encoding on Haswell/BDW Signed-off-by: Zhao Yakui --- src/gen75_vme.c | 4 ++-- src/gen8_vme.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gen75_vme.c b/src/gen75_vme.c index ab8bf899..7f788b81 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -1037,9 +1037,9 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context * vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH; vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; - vme_context->gpe_context.vfe_state.num_urb_entries = 16; + vme_context->gpe_context.vfe_state.num_urb_entries = 64; vme_context->gpe_context.vfe_state.gpgpu_mode = 0; - vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1; + vme_context->gpe_context.vfe_state.urb_entry_size = 16; vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1; gen7_vme_scoreboard_init(ctx, vme_context); diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 5369b319..67571be7 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -1182,9 +1182,9 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; - vme_context->gpe_context.vfe_state.num_urb_entries = 16; + vme_context->gpe_context.vfe_state.num_urb_entries = 64; vme_context->gpe_context.vfe_state.gpgpu_mode = 0; - vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1; + vme_context->gpe_context.vfe_state.urb_entry_size = 16; vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1; gen7_vme_scoreboard_init(ctx, vme_context); -- cgit v1.2.1 From 9816bf521dc28c51911db140630342a516f9258e Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 19 Dec 2013 13:37:13 +0800 Subject: Handle the aux_batchbuffer correctly for H264 encoding on Haswell Signed-off-by: Zhao Yakui --- src/gen75_mfc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index c6a5ab58..7161acd1 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -1621,6 +1621,8 @@ gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx, OUT_BCS_BATCH(slice_batch, 0); OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END); ADVANCE_BCS_BATCH(slice_batch); + mfc_context->aux_batchbuffer = NULL; + intel_batchbuffer_free(slice_batch); } intel_batchbuffer_end_atomic(batch); intel_batchbuffer_flush(batch); @@ -1644,8 +1646,8 @@ gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context); dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); + gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context); return mfc_context->aux_batchbuffer_surface.bo; } -- cgit v1.2.1 From 8b100cfe0aa2030c8158163efcd460e82d2aeec0 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 19 Dec 2013 13:37:16 +0800 Subject: Calculate required space of batch buffer to avoid buffer overflow in encoding on BDW The required size is based on the number of macroblocks and slice parameter. Then it can avoid that too large buffer is allocated or possible overflow. This is picked up from that on Haswell/Ivybridge. Signed-off-by: Zhao Yakui --- src/gen8_mfc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index c7d1dbb6..d2ab264c 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -397,6 +397,7 @@ static void gen8_mfc_init(VADriverContextP ctx, int i; int width_in_mbs = 0; int height_in_mbs = 0; + int slice_batchbuffer_size; if (encoder_context->codec == CODEC_H264) { VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; @@ -411,6 +412,9 @@ static void gen8_mfc_init(VADriverContextP ctx, height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16; } + slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 + + (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext; + /*Encode common setup for MFC*/ dri_bo_unreference(mfc_context->post_deblocking_output.bo); mfc_context->post_deblocking_output.bo = NULL; @@ -477,7 +481,7 @@ static void gen8_mfc_init(VADriverContextP ctx, if (mfc_context->aux_batchbuffer) intel_batchbuffer_free(mfc_context->aux_batchbuffer); - mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0); + mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size); mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer; dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); mfc_context->aux_batchbuffer_surface.pitch = 16; -- cgit v1.2.1 From 3d7451a2bbca676e9f4a2933947e17f0e24e585c Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 19 Dec 2013 13:37:21 +0800 Subject: BDW encoding reuses aux_batchbuffer instead of allocating another new buffer This is picked up from that on Haswell/Ivybridge. Signed-off-by: Zhao Yakui --- src/gen8_mfc.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index d2ab264c..314a3e06 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -1115,17 +1115,13 @@ gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch; dri_bo *batch_bo; int i; - int buffer_size; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - buffer_size = width_in_mbs * height_in_mbs * 64; - batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size); + batch = mfc_context->aux_batchbuffer; batch_bo = batch->buffer; for (i = 0; i < encode_state->num_slice_params_ext; i++) { gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch); @@ -1140,6 +1136,7 @@ gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx, dri_bo_reference(batch_bo); intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; return batch_bo; } @@ -2196,18 +2193,14 @@ gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch; - VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL; dri_bo *batch_bo; int i; - int buffer_size; - int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; - int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; - buffer_size = width_in_mbs * height_in_mbs * 64; - batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size); + batch = mfc_context->aux_batchbuffer; batch_bo = batch->buffer; for (i = 0; i < encode_state->num_slice_params_ext; i++) { @@ -2228,6 +2221,7 @@ gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, dri_bo_reference(batch_bo); intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; return batch_bo; } -- cgit v1.2.1 From d0f7bd7deb4411143bc5717dabd84b7d9881f4ef Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 19 Dec 2013 17:03:31 +0800 Subject: Fix the incorrect setting for subpicture on BDW Signed-off-by: Zhao Yakui --- src/i965_render.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 73d39eba..9fea827b 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -4298,6 +4298,35 @@ gen7_subpicture_render_setup_states( i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); } +static void +gen8_subpic_render_upload_constants(VADriverContextP ctx, + struct object_surface *obj_surface) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + float *constant_buffer; + float global_alpha = 1.0; + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + unsigned char *cc_ptr; + + if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) { + global_alpha = obj_subpic->global_alpha; + } + + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->curbe_offset; + + constant_buffer = (float *) cc_ptr; + *constant_buffer = global_alpha; + + dri_bo_unmap(render_state->dynamic_state.bo); +} + static void gen8_subpicture_render_setup_states( VADriverContextP ctx, @@ -4309,10 +4338,10 @@ gen8_subpicture_render_setup_states( i965_render_dest_surface_state(ctx, 0); i965_subpic_render_src_surfaces_state(ctx, obj_surface); gen8_render_sampler(ctx); - i965_render_cc_viewport(ctx); - gen7_render_color_calc_state(ctx); + gen8_render_cc_viewport(ctx); + gen8_render_color_calc_state(ctx); gen8_subpicture_render_blend_state(ctx); - i965_subpic_render_upload_constants(ctx, obj_surface); + gen8_subpic_render_upload_constants(ctx, obj_surface); i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); } -- cgit v1.2.1 From 662243418cb7199e41b3dffc6bd8d962146b1dab Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 19 Dec 2013 17:03:31 +0800 Subject: Explicitly declare the color blend operation for subpicture on BDW Without this it still can work. This is only human-readable. Signed-off-by: Zhao Yakui --- src/i965_render.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/i965_render.c b/src/i965_render.c index 9fea827b..d79f6b11 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -4266,6 +4266,7 @@ gen8_subpicture_render_blend_state(VADriverContextP ctx) memset(global_blend_state, 0, sizeof(*global_blend_state)); /* Global blend state + blend_state for Render Target */ blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); + blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD; blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD; -- cgit v1.2.1 From 8a6c4bc274b234ba3ce21c2be0b46f186e22a171 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 19 Dec 2013 17:03:31 +0800 Subject: Fix the error in render shader for BDW Signed-off-by: Zhao Yakui --- src/shaders/render/exa_wm_write.g8a | 16 ++++++++-------- src/shaders/render/exa_wm_write.g8b | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/shaders/render/exa_wm_write.g8a b/src/shaders/render/exa_wm_write.g8a index 58347b3f..e6da9b64 100644 --- a/src/shaders/render/exa_wm_write.g8a +++ b/src/shaders/render/exa_wm_write.g8a @@ -45,17 +45,17 @@ define(`slot_b_01', `g71') define(`slot_a_00', `g72') define(`slot_a_01', `g73') -mov (8) slot_r_00<1>F src_sample_r_01<1>F { align1 mask_disable }; -mov (8) slot_r_01<1>F src_sample_r_23<1>F { align1 mask_disable }; +mov (8) slot_r_00<1>F src_sample_r_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_r_01<1>F src_sample_r_23<8,8,1>F { align1 mask_disable }; -mov (8) slot_g_00<1>F src_sample_g_01<1>F { align1 mask_disable }; -mov (8) slot_g_01<1>F src_sample_g_23<1>F { align1 mask_disable }; +mov (8) slot_g_00<1>F src_sample_g_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_g_01<1>F src_sample_g_23<8,8,1>F { align1 mask_disable }; -mov (8) slot_b_00<1>F src_sample_b_01<1>F { align1 mask_disable }; -mov (8) slot_b_01<1>F src_sample_b_23<1>F { align1 mask_disable }; +mov (8) slot_b_00<1>F src_sample_b_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_b_01<1>F src_sample_b_23<8,8,1>F { align1 mask_disable }; -mov (8) slot_a_00<1>F src_sample_a_01<1>F { align1 mask_disable }; -mov (8) slot_a_01<1>F src_sample_a_23<1>F { align1 mask_disable }; +mov (8) slot_a_00<1>F src_sample_a_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_a_01<1>F src_sample_a_23<8,8,1>F { align1 mask_disable }; send (16) data_port_msg_2_ind diff --git a/src/shaders/render/exa_wm_write.g8b b/src/shaders/render/exa_wm_write.g8b index 2f237de1..822578d6 100644 --- a/src/shaders/render/exa_wm_write.g8b +++ b/src/shaders/render/exa_wm_write.g8b @@ -1,13 +1,13 @@ { 0x00600001, 0x2800020c, 0x008d0000, 0x00000000 }, { 0x00600001, 0x2820020c, 0x008d0020, 0x00000000 }, - { 0x00600001, 0x28403aec, 0x002001c0, 0x00000000 }, - { 0x00600001, 0x28603aec, 0x002001e0, 0x00000000 }, - { 0x00600001, 0x28803aec, 0x00200200, 0x00000000 }, - { 0x00600001, 0x28a03aec, 0x00200220, 0x00000000 }, - { 0x00600001, 0x28c03aec, 0x00200240, 0x00000000 }, - { 0x00600001, 0x28e03aec, 0x00200260, 0x00000000 }, - { 0x00600001, 0x29003aec, 0x00200280, 0x00000000 }, - { 0x00600001, 0x29203aec, 0x002002a0, 0x00000000 }, + { 0x00600001, 0x28403aec, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28603aec, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28803aec, 0x008d0200, 0x00000000 }, + { 0x00600001, 0x28a03aec, 0x008d0220, 0x00000000 }, + { 0x00600001, 0x28c03aec, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x28e03aec, 0x008d0260, 0x00000000 }, + { 0x00600001, 0x29003aec, 0x008d0280, 0x00000000 }, + { 0x00600001, 0x29203aec, 0x008d02a0, 0x00000000 }, { 0x05800031, 0x20000a40, 0x0e000800, 0x940b1000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, -- cgit v1.2.1 From 6a62340dce3387639fc63362099b88b126a1c913 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 19 Dec 2013 17:03:31 +0800 Subject: Fix the error in render shader for subpicture Signed-off-by: Zhao Yakui --- src/shaders/render/exa_wm_src_sample_argb.g4a | 4 ++-- src/shaders/render/exa_wm_src_sample_argb.g4b | 4 ++-- src/shaders/render/exa_wm_src_sample_argb.g4b.gen5 | 4 ++-- src/shaders/render/exa_wm_src_sample_argb.g6a | 2 +- src/shaders/render/exa_wm_src_sample_argb.g6b | 2 +- src/shaders/render/exa_wm_src_sample_argb.g7a | 4 ++-- src/shaders/render/exa_wm_src_sample_argb.g7b | 4 ++-- src/shaders/render/exa_wm_src_sample_argb.g8a | 4 ++-- src/shaders/render/exa_wm_src_sample_argb.g8b | 4 ++-- 9 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/shaders/render/exa_wm_src_sample_argb.g4a b/src/shaders/render/exa_wm_src_sample_argb.g4a index 8cc693ed..c6576553 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g4a +++ b/src/shaders/render/exa_wm_src_sample_argb.g4a @@ -48,5 +48,5 @@ send (16) src_msg_ind /* msg reg index */ /* here(src->dst) we should use src_sampler and src_surface */ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ -mul (8) src_sample_a_01<1>f src_sample_a_01<1>f global_alpha { align1 }; -mul (8) src_sample_a_23<1>f src_sample_a_23<1>f global_alpha { align1 }; +mul (8) src_sample_a_01<1>f src_sample_a_01<8,8,1>f global_alpha { align1 }; +mul (8) src_sample_a_23<1>f src_sample_a_23<8,8,1>f global_alpha { align1 }; diff --git a/src/shaders/render/exa_wm_src_sample_argb.g4b b/src/shaders/render/exa_wm_src_sample_argb.g4b index 963c121f..42e4a680 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g4b +++ b/src/shaders/render/exa_wm_src_sample_argb.g4b @@ -1,4 +1,4 @@ { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, { 0x01800031, 0x21c01d29, 0x008d0000, 0x02580001 }, - { 0x00600041, 0x228077bd, 0x00200280, 0x00000040 }, - { 0x00600041, 0x22a077bd, 0x002002a0, 0x00000040 }, + { 0x00600041, 0x228077bd, 0x008d0280, 0x00000040 }, + { 0x00600041, 0x22a077bd, 0x008d02a0, 0x00000040 }, diff --git a/src/shaders/render/exa_wm_src_sample_argb.g4b.gen5 b/src/shaders/render/exa_wm_src_sample_argb.g4b.gen5 index 45b36413..2012f89f 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g4b.gen5 +++ b/src/shaders/render/exa_wm_src_sample_argb.g4b.gen5 @@ -1,4 +1,4 @@ { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, { 0x01800031, 0x21c01d29, 0x208d0000, 0x0a8a0001 }, - { 0x00600041, 0x228077bd, 0x00200280, 0x00000040 }, - { 0x00600041, 0x22a077bd, 0x002002a0, 0x00000040 }, + { 0x00600041, 0x228077bd, 0x008d0280, 0x00000040 }, + { 0x00600041, 0x22a077bd, 0x008d02a0, 0x00000040 }, diff --git a/src/shaders/render/exa_wm_src_sample_argb.g6a b/src/shaders/render/exa_wm_src_sample_argb.g6a index 48e79f71..c30b209f 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g6a +++ b/src/shaders/render/exa_wm_src_sample_argb.g6a @@ -50,6 +50,6 @@ send (16) src_msg_ind /* msg reg index */ /* here(src->dst) we should use src_sampler and src_surface */ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ -mul (8) src_sample_a_01<1>f src_sample_a_01<1>f global_alpha { align1 }; +mul (8) src_sample_a_01<1>f src_sample_a_01<8,8,1>f global_alpha { align1 }; mul (8) src_sample_a_23<1>f src_sample_a_23<1>f global_alpha { align1 }; diff --git a/src/shaders/render/exa_wm_src_sample_argb.g6b b/src/shaders/render/exa_wm_src_sample_argb.g6b index 8964e450..53c62485 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g6b +++ b/src/shaders/render/exa_wm_src_sample_argb.g6b @@ -1,5 +1,5 @@ { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, { 0x00600201, 0x20200022, 0x008d0000, 0x00000000 }, { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a8a0001 }, - { 0x00600041, 0x228077bd, 0x00200280, 0x000000c0 }, + { 0x00600041, 0x228077bd, 0x008d0280, 0x000000c0 }, { 0x00600041, 0x22a077bd, 0x002002a0, 0x000000c0 }, diff --git a/src/shaders/render/exa_wm_src_sample_argb.g7a b/src/shaders/render/exa_wm_src_sample_argb.g7a index 620e0e77..0165f7b6 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g7a +++ b/src/shaders/render/exa_wm_src_sample_argb.g7a @@ -54,6 +54,6 @@ send (16) src_msg_ind_gen7 /* msg reg index */ /* here(src->dst) we should use src_sampler and src_surface */ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ -mul (8) src_sample_a_01<1>f src_sample_a_01<1>f global_alpha { align1 mask_disable }; -mul (8) src_sample_a_23<1>f src_sample_a_23<1>f global_alpha { align1 mask_disable }; +mul (8) src_sample_a_01<1>f src_sample_a_01<8,8,1>f global_alpha { align1 mask_disable }; +mul (8) src_sample_a_23<1>f src_sample_a_23<8,8,1>f global_alpha { align1 mask_disable }; diff --git a/src/shaders/render/exa_wm_src_sample_argb.g7b b/src/shaders/render/exa_wm_src_sample_argb.g7b index 674fc74d..0708bc0c 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g7b +++ b/src/shaders/render/exa_wm_src_sample_argb.g7b @@ -1,5 +1,5 @@ { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 }, { 0x02800031, 0x21c01ca9, 0x00000820, 0x0a8c0001 }, - { 0x00600241, 0x228077bd, 0x00200280, 0x000000c0 }, - { 0x00600241, 0x22a077bd, 0x002002a0, 0x000000c0 }, + { 0x00600241, 0x228077bd, 0x008d0280, 0x000000c0 }, + { 0x00600241, 0x22a077bd, 0x008d02a0, 0x000000c0 }, diff --git a/src/shaders/render/exa_wm_src_sample_argb.g8a b/src/shaders/render/exa_wm_src_sample_argb.g8a index 662ef22f..3a4e99f1 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g8a +++ b/src/shaders/render/exa_wm_src_sample_argb.g8a @@ -54,6 +54,6 @@ send (16) src_msg_ind_gen8 /* msg reg index */ /* here(src->dst) we should use src_sampler and src_surface */ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ -mul (8) src_sample_a_01<1>f src_sample_a_01<1>f global_alpha { align1 mask_disable }; -mul (8) src_sample_a_23<1>f src_sample_a_23<1>f global_alpha { align1 mask_disable }; +mul (8) src_sample_a_01<1>f src_sample_a_01<8,8,1>f global_alpha { align1 mask_disable }; +mul (8) src_sample_a_23<1>f src_sample_a_23<8,8,1>f global_alpha { align1 mask_disable }; diff --git a/src/shaders/render/exa_wm_src_sample_argb.g8b b/src/shaders/render/exa_wm_src_sample_argb.g8b index 3c86fb8b..2b046371 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g8b +++ b/src/shaders/render/exa_wm_src_sample_argb.g8b @@ -1,5 +1,5 @@ { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 }, { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 }, { 0x02800031, 0x21c00a48, 0x0e000820, 0x0a8c0001 }, - { 0x00600041, 0x22803aec, 0x3a200280, 0x000000c0 }, - { 0x00600041, 0x22a03aec, 0x3a2002a0, 0x000000c0 }, + { 0x00600041, 0x22803aec, 0x3a8d0280, 0x000000c0 }, + { 0x00600041, 0x22a03aec, 0x3a8d02a0, 0x000000c0 }, -- cgit v1.2.1 From d7620bbf662bd527a7362b9b00c70d2d26e9b215 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 19 Dec 2013 17:03:31 +0800 Subject: Add the support of brightness/contrast/hue/saturation for BDW rendering This is picked up from the commit 04ecb6e79f4382d96eb5d4b51733049d420f592a Signed-off-by: Xiang Haihao Signed-off-by: Zhao Yakui --- src/i965_render.c | 20 +++++++++++++ src/shaders/render/Makefile.am | 2 ++ src/shaders/render/exa_wm_yuv_color_balance.g8a | 39 +++++++++++++++++++++++++ src/shaders/render/exa_wm_yuv_color_balance.g8b | 15 ++++++++++ 4 files changed, 76 insertions(+) create mode 100644 src/shaders/render/exa_wm_yuv_color_balance.g8a create mode 100644 src/shaders/render/exa_wm_yuv_color_balance.g8b diff --git a/src/i965_render.c b/src/i965_render.c index d79f6b11..3c996141 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -157,6 +157,7 @@ static const uint32_t sf_kernel_static_gen8[][4] = static const uint32_t ps_kernel_static_gen8[][4] = { #include "shaders/render/exa_wm_src_affine.g8b" #include "shaders/render/exa_wm_src_sample_planar.g8b" +#include "shaders/render/exa_wm_yuv_color_balance.g8b" #include "shaders/render/exa_wm_yuv_rgb.g8b" #include "shaders/render/exa_wm_write.g8b" }; @@ -2867,6 +2868,11 @@ gen8_render_upload_constants(VADriverContextP ctx, struct i965_render_state *render_state = &i965->render_state; unsigned short *constant_buffer; unsigned char *cc_ptr; + float *color_balance_base; + float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST; + float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */ + float hue = (float)i965->hue_attrib->value / 180 * PI; + float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION; dri_bo_map(render_state->dynamic_state.bo, 1); assert(render_state->dynamic_state.bo->virtual); @@ -2887,6 +2893,20 @@ gen8_render_upload_constants(VADriverContextP ctx, *constant_buffer = 0; } + if (i965->contrast_attrib->value == DEFAULT_CONTRAST && + i965->brightness_attrib->value == DEFAULT_BRIGHTNESS && + i965->hue_attrib->value == DEFAULT_HUE && + i965->saturation_attrib->value == DEFAULT_SATURATION) + constant_buffer[1] = 1; /* skip color balance transformation */ + else + constant_buffer[1] = 0; + + color_balance_base = (float *)constant_buffer + 4; + *color_balance_base++ = contrast; + *color_balance_base++ = brightness; + *color_balance_base++ = cos(hue) * contrast * saturation; + *color_balance_base++ = sin(hue) * contrast * saturation; + dri_bo_unmap(render_state->dynamic_state.bo); } diff --git a/src/shaders/render/Makefile.am b/src/shaders/render/Makefile.am index 33aa367c..e59869cf 100644 --- a/src/shaders/render/Makefile.am +++ b/src/shaders/render/Makefile.am @@ -89,6 +89,7 @@ INTEL_G8A = \ exa_wm_src_affine.g8a \ exa_wm_src_sample_planar.g8a \ exa_wm_src_sample_argb.g8a \ + exa_wm_yuv_color_balance.g8a \ exa_wm_write.g8a \ exa_wm_yuv_rgb.g8a @@ -98,6 +99,7 @@ INTEL_G8B = \ exa_wm_src_affine.g8b \ exa_wm_src_sample_planar.g8b \ exa_wm_src_sample_argb.g8b \ + exa_wm_yuv_color_balance.g8b \ exa_wm_yuv_rgb.g8b \ exa_wm_write.g8b diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g8a b/src/shaders/render/exa_wm_yuv_color_balance.g8a new file mode 100644 index 00000000..f3cc28f5 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g8a @@ -0,0 +1,39 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Haihao Xiang + * Zhao Yakui + * + */ + +include(`exa_wm.g4i') + +/* Color Balance parameters */ +define(`skip_color_balance', `g6.2<0,1,0>uw') +define(`contrast', `g6.16<0,1,0>f') +define(`brightness', `g6.20<0,1,0>f') +define(`cos_c_s', `g6.24<0,1,0>f') +define(`sin_c_s', `g6.28<0,1,0>f') +define(`sin_c_s_t', `g6.28') + +include(`exa_wm_yuv_color_balance.gxa') diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g8b b/src/shaders/render/exa_wm_yuv_color_balance.g8b new file mode 100644 index 00000000..5dc2c8bc --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g8b @@ -0,0 +1,15 @@ + { 0x01000010, 0x200012e0, 0x160000c2, 0x00010001 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000d0 }, + { 0x00800040, 0x24003ae8, 0x3e8d01c0, 0xbd808081 }, + { 0x00800041, 0x24003ae8, 0x3a8d0400, 0x000000d0 }, + { 0x00800040, 0x24003ae8, 0x3a8d0400, 0x000000d4 }, + { 0x00800040, 0x21c03ae8, 0x3e8d0400, 0x3d808081 }, + { 0x00800040, 0x24803ae8, 0x3e8d0200, 0xbf008084 }, + { 0x00800040, 0x24403ae8, 0x3e8d0240, 0xbf008084 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x3f008084 }, + { 0x00800048, 0x24003ae0, 0x3a8d0440, 0x000000dc }, + { 0x00800048, 0x22003ae8, 0x3a8d0480, 0x000000d8 }, + { 0x00000041, 0x20dc3ae8, 0x3e0000dc, 0xbf800000 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x3f008084 }, + { 0x00800048, 0x24003ae0, 0x3a8d0480, 0x000000dc }, + { 0x00800048, 0x22403ae8, 0x3a8d0440, 0x000000d8 }, -- cgit v1.2.1 From 101f02ae55cfaa2d0bb8b0e1ab4e79671d386d1c Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 19 Dec 2013 17:03:31 +0800 Subject: Add the support of color BT709/SMPTE240M for color-space conversion on BDW This is picked up from that on Haswell/Ivybridge. Signed-off-by: Zhao Yakui --- src/i965_render.c | 18 ++++++-- src/shaders/render/exa_wm_src_affine.g8a | 2 - src/shaders/render/exa_wm_src_affine.g8b | 8 ++-- src/shaders/render/exa_wm_yuv_rgb.g8a | 78 +------------------------------- src/shaders/render/exa_wm_yuv_rgb.g8b | 30 +++++------- 5 files changed, 33 insertions(+), 103 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 3c996141..8e14d874 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -2862,7 +2862,8 @@ gen8_render_color_calc_state(VADriverContextP ctx) static void gen8_render_upload_constants(VADriverContextP ctx, - struct object_surface *obj_surface) + struct object_surface *obj_surface, + unsigned int flags) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; @@ -2873,6 +2874,8 @@ gen8_render_upload_constants(VADriverContextP ctx, float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */ float hue = (float)i965->hue_attrib->value / 180 * PI; float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION; + float *yuv_to_rgb; + unsigned int color_flag; dri_bo_map(render_state->dynamic_state.bo, 1); assert(render_state->dynamic_state.bo->virtual); @@ -2907,6 +2910,15 @@ gen8_render_upload_constants(VADriverContextP ctx, *color_balance_base++ = cos(hue) * contrast * saturation; *color_balance_base++ = sin(hue) * contrast * saturation; + color_flag = flags & VA_SRC_COLOR_MASK; + yuv_to_rgb = (float *)constant_buffer + 8; + if (color_flag == VA_SRC_BT709) + memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709)); + else if (color_flag == VA_SRC_SMPTE_240) + memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240)); + else + memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601)); + dri_bo_unmap(render_state->dynamic_state.bo); } @@ -2925,7 +2937,7 @@ gen8_render_setup_states( gen8_render_cc_viewport(ctx); gen8_render_color_calc_state(ctx); gen8_render_blend_state(ctx); - gen8_render_upload_constants(ctx, obj_surface); + gen8_render_upload_constants(ctx, obj_surface, flags); i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } @@ -4024,7 +4036,7 @@ gen8_emit_wm_state(VADriverContextP ctx, int kernel) BEGIN_BATCH(batch, 11); OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2)); - OUT_BATCH(batch, 1); + OUT_BATCH(batch, URB_CS_ENTRY_SIZE); OUT_BATCH(batch, 0); /*DW3-4. Constant buffer 0 */ OUT_BATCH(batch, render_state->curbe_offset); diff --git a/src/shaders/render/exa_wm_src_affine.g8a b/src/shaders/render/exa_wm_src_affine.g8a index 1d4efcc4..7927c3b1 100644 --- a/src/shaders/render/exa_wm_src_affine.g8a +++ b/src/shaders/render/exa_wm_src_affine.g8a @@ -35,8 +35,6 @@ define(`vh', `g69') define(`bl', `g2.0<8,8,1>F') define(`bh', `g4.0<8,8,1>F') -define(`a0_a_x',`g7.0<0,1,0>F') -define(`a0_a_y',`g7.16<0,1,0>F') /* U */ pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */ diff --git a/src/shaders/render/exa_wm_src_affine.g8b b/src/shaders/render/exa_wm_src_affine.g8b index 02732579..f5f9eca1 100644 --- a/src/shaders/render/exa_wm_src_affine.g8b +++ b/src/shaders/render/exa_wm_src_affine.g8b @@ -1,4 +1,4 @@ - { 0x0060005a, 0x28403ae8, 0x3a0000e0, 0x008d0040 }, - { 0x0060005a, 0x28603ae8, 0x3a0000e0, 0x008d0080 }, - { 0x0060005a, 0x28803ae8, 0x3a0000f0, 0x008d0040 }, - { 0x0060005a, 0x28a03ae8, 0x3a0000f0, 0x008d0080 }, + { 0x0060005a, 0x28403ae8, 0x3a000140, 0x008d0040 }, + { 0x0060005a, 0x28603ae8, 0x3a000140, 0x008d0080 }, + { 0x0060005a, 0x28803ae8, 0x3a000150, 0x008d0040 }, + { 0x0060005a, 0x28a03ae8, 0x3a000150, 0x008d0080 }, diff --git a/src/shaders/render/exa_wm_yuv_rgb.g8a b/src/shaders/render/exa_wm_yuv_rgb.g8a index 62669c80..9da53c8c 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g8a +++ b/src/shaders/render/exa_wm_yuv_rgb.g8a @@ -28,79 +28,5 @@ */ include(`exa_wm.g4i') - -define(`YCbCr_base', `src_sample_base') - -define(`Cr', `src_sample_b') -define(`Cr_01', `src_sample_b_01') -define(`Cr_23', `src_sample_b_23') - -define(`Y', `src_sample_r') -define(`Y_01', `src_sample_r_01') -define(`Y_23', `src_sample_r_23') - -define(`Cb', `src_sample_g') -define(`Cb_01', `src_sample_g_01') -define(`Cb_23', `src_sample_g_23') - -define(`Crn', `mask_sample_g') -define(`Crn_01', `mask_sample_g_01') -define(`Crn_23', `mask_sample_g_23') - -define(`Yn', `mask_sample_r') -define(`Yn_01', `mask_sample_r_01') -define(`Yn_23', `mask_sample_r_23') - -define(`Cbn', `mask_sample_b') -define(`Cbn_01', `mask_sample_b_01') -define(`Cbn_23', `mask_sample_b_23') - - /* color space conversion function: - * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) - * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) - * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) - */ - - /* Normalize Y, Cb and Cr: - * - * Yn = (Y - 16/255) * 1.164 - * Crn = Cr - 128 / 255 - * Cbn = Cb - 128 / 255 - */ -add (16) Yn<1>F Y<8;8,1>F -0.0627451F { compr align1 }; -mul (16) Yn<1>F Yn<8;8,1>F 1.164F { compr align1 }; - -add (16) Crn<1>F Cr<8;8,1>F -0.501961F { compr align1 }; - -add (16) Cbn<1>F Cb<8;8,1>F -0.501961F { compr align1 }; - - /* - * R = Y + Cr * 1.596 - */ -mov (8) acc0<1>F Yn_01.0<8;8,1>F { align1 }; -mac.sat(8) src_sample_r_01<1>F Crn_01<8;8,1>F 1.596F { align1 }; -mov (8) acc0<1>F Yn_23.0<8;8,1>F { align1 }; -mac.sat(8) src_sample_r_23<1>F Crn_23<8;8,1>F 1.596F { align1 }; - - /* - * G = Crn * -0.813 + Cbn * -0.392 + Y - */ -mov (8) acc0<1>F Yn_01.0<8;8,1>F { align1 }; -mac (8) acc0<1>F Crn_01.0<8;8,1>F -0.813F { align1 }; -mac.sat(8) src_sample_g_01<1>F Cbn_01.0<8;8,1>F -0.392F { align1 }; -mov (8) acc0<1>F Yn_23.0<8;8,1>F { align1 }; -mac (8) acc0<1>F Crn_23.0<8;8,1>F -0.813F { align1 }; -mac.sat(8) src_sample_g_23<1>F Cbn_23.0<8;8,1>F -0.392F { align1 }; - - /* - * B = Cbn * 2.017 + Y - */ -mov (8) acc0<1>F Yn_01.0<8;8,1>F { align1 }; -mac.sat(8) src_sample_b_01<1>F Cbn_01.0<8;8,1>F 2.017F { align1 }; - -mov (8) acc0<1>F Yn_23.0<8;8,1>F { align1 }; -mac.sat(8) src_sample_b_23<1>F Cbn_23.0<8;8,1>F 2.017F { align1 }; - /* - * A = 1.0 - */ -mov (16) src_sample_a<1>F 1.0F { compr align1 }; +include(`exa_yuv_gen6.g4i') +include(`exa_yuv_rgb.gxa') diff --git a/src/shaders/render/exa_wm_yuv_rgb.g8b b/src/shaders/render/exa_wm_yuv_rgb.g8b index 8898a395..6b6b4d1c 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g8b +++ b/src/shaders/render/exa_wm_yuv_rgb.g8b @@ -1,19 +1,13 @@ - { 0x00800040, 0x22c03ae8, 0x3e8d01c0, 0xbd808081 }, - { 0x00800041, 0x22c03ae8, 0x3e8d02c0, 0x3f94fdf4 }, - { 0x00800040, 0x23003ae8, 0x3e8d0240, 0xbf008084 }, - { 0x00800040, 0x23403ae8, 0x3e8d0200, 0xbf008084 }, - { 0x00600001, 0x24003ae0, 0x008d02c0, 0x00000000 }, - { 0x80600048, 0x21c03ae8, 0x3e8d0300, 0x3fcc49ba }, - { 0x00600001, 0x24003ae0, 0x008d02e0, 0x00000000 }, - { 0x80600048, 0x21e03ae8, 0x3e8d0320, 0x3fcc49ba }, - { 0x00600001, 0x24003ae0, 0x008d02c0, 0x00000000 }, - { 0x00600048, 0x24003ae0, 0x3e8d0300, 0xbf5020c5 }, - { 0x80600048, 0x22003ae8, 0x3e8d0340, 0xbec8b439 }, - { 0x00600001, 0x24003ae0, 0x008d02e0, 0x00000000 }, - { 0x00600048, 0x24003ae0, 0x3e8d0320, 0xbf5020c5 }, - { 0x80600048, 0x22203ae8, 0x3e8d0360, 0xbec8b439 }, - { 0x00600001, 0x24003ae0, 0x008d02c0, 0x00000000 }, - { 0x80600048, 0x22403ae8, 0x3e8d0340, 0x40011687 }, - { 0x00600001, 0x24003ae0, 0x008d02e0, 0x00000000 }, - { 0x80600048, 0x22603ae8, 0x3e8d0360, 0x40011687 }, + { 0x00800040, 0x22c03ae8, 0x3a8d01c0, 0x000000ec }, + { 0x00800040, 0x23003ae8, 0x3a8d0200, 0x000000fc }, + { 0x00800040, 0x23403ae8, 0x3a8d0240, 0x0000010c }, + { 0x00800041, 0x24003ae0, 0x3a8d02c0, 0x000000e0 }, + { 0x00800048, 0x24003ae0, 0x3a8d0300, 0x000000e4 }, + { 0x80800048, 0x21c03ae8, 0x3a8d0340, 0x000000e8 }, + { 0x00800041, 0x24003ae0, 0x3a8d02c0, 0x000000f0 }, + { 0x00800048, 0x24003ae0, 0x3a8d0300, 0x000000f4 }, + { 0x80800048, 0x22003ae8, 0x3a8d0340, 0x000000f8 }, + { 0x00800041, 0x24003ae0, 0x3a8d02c0, 0x00000100 }, + { 0x00800048, 0x24003ae0, 0x3a8d0300, 0x00000104 }, + { 0x80800048, 0x22403ae8, 0x3a8d0340, 0x00000108 }, { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 }, -- cgit v1.2.1 From 88018fcb61d54e6f25a35a580f2c1e5c7147b354 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 23 Dec 2013 11:01:13 +0800 Subject: Update the supported render target format and pixel format for JPEG on BDW This is picked up from the commit a90e80fb7fde114535ab5e9be74d973117def138 on Ivy/Haswell. Otherwise the JPEG on BDW can't work as expected. Signed-off-by: Xiang Haihao Signed-off-by: Zhao Yakui --- src/gen8_mfd.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 6c818963..3429693a 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -1886,6 +1886,7 @@ gen8_mfd_jpeg_decode_init(VADriverContextP ctx, struct object_surface *obj_surface; VAPictureParameterBufferJPEGBaseline *pic_param; int subsampling = SUBSAMPLE_YUV420; + int fourcc = VA_FOURCC('I', 'M', 'C', '3'); pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; @@ -1900,35 +1901,43 @@ gen8_mfd_jpeg_decode_init(VADriverContextP ctx, int v3 = pic_param->components[2].v_sampling_factor; if (h1 == 2 && h2 == 1 && h3 == 1 && - v1 == 2 && v2 == 1 && v3 == 1) + v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV420; - else if (h1 == 2 && h2 == 1 && h3 == 1 && - v1 == 1 && v2 == 1 && v3 == 1) + fourcc = VA_FOURCC('I', 'M', 'C', '3'); + } else if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422H; - else if (h1 == 1 && h2 == 1 && h3 == 1 && - v1 == 1 && v2 == 1 && v3 == 1) + fourcc = VA_FOURCC('4', '2', '2', 'H'); + } else if (h1 == 1 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV444; - else if (h1 == 4 && h2 == 1 && h3 == 1 && - v1 == 1 && v2 == 1 && v3 == 1) + fourcc = VA_FOURCC('4', '4', '4', 'P'); + } else if (h1 == 4 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV411; - else if (h1 == 1 && h2 == 1 && h3 == 1 && - v1 == 2 && v2 == 1 && v3 == 1) + fourcc = VA_FOURCC('4', '1', '1', 'P'); + } else if (h1 == 1 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422V; - else if (h1 == 2 && h2 == 1 && h3 == 1 && - v1 == 2 && v2 == 2 && v3 == 2) + fourcc = VA_FOURCC('4', '2', '2', 'V'); + } else if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 2 && v3 == 2) { subsampling = SUBSAMPLE_YUV422H; - else if (h2 == 2 && h2 == 2 && h3 == 2 && - v1 == 2 && v2 == 1 && v3 == 1) + fourcc = VA_FOURCC('4', '2', '2', 'H'); + } else if (h2 == 2 && h2 == 2 && h3 == 2 && + v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422V; - else + fourcc = VA_FOURCC('4', '2', '2', 'V'); + } else assert(0); - } else { + } + else { assert(0); } /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling); dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; -- cgit v1.2.1 From 8071b71638e068ac5f0e4a9aca1f87dcf79fcba1 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 23 Dec 2013 15:59:22 +0800 Subject: Configure VPP parameter for RGBX input so that Haswell/Ivy uses the same gen7_pp_plx_avs_initialize Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index ddafba42..4661c288 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -894,7 +894,7 @@ static struct pp_module pp_modules_gen7[] = { NULL, }, - gen7_pp_rgbx_avs_initialize, + gen7_pp_plx_avs_initialize, }, { @@ -1145,7 +1145,7 @@ static struct pp_module pp_modules_gen75[] = { NULL, }, - gen7_pp_rgbx_avs_initialize, + gen7_pp_plx_avs_initialize, }, { @@ -3490,6 +3490,16 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */ if (IS_HASWELL(i965->intel.device_id)) pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */ + + if (pp_static_parameter->grf2.avs_wa_enable) { + int src_fourcc = pp_get_surface_fourcc(ctx, src_surface); + if ((src_fourcc == VA_FOURCC('R', 'G', 'B', 'A')) || + (src_fourcc == VA_FOURCC('R', 'G', 'B', 'X')) || + (src_fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || + (src_fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { + pp_static_parameter->grf2.avs_wa_enable = 0; + } + } pp_static_parameter->grf2.avs_wa_width = dw; pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw); -- cgit v1.2.1 From 70c232a7fb6e58d2ac3e124784ed4c0316b860c3 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 23 Dec 2013 15:59:28 +0800 Subject: Remove the unused function of gen7_pp_rgbx_avs_initialize This is not used any more after it uses the same gen7_pp_plx_avs_initialize function for RGBX input on Ivy/Haswell. Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 216 --------------------------------------------- 1 file changed, 216 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 4661c288..b084eb2a 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -703,13 +703,6 @@ static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_pos const VARectangle *dst_rect, void *filter_param); -static VAStatus gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, - const struct i965_surface *src_surface, - const VARectangle *src_rect, - struct i965_surface *dst_surface, - const VARectangle *dst_rect, - void *filter_param); - static VAStatus gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, const struct i965_surface *src_surface, const VARectangle *src_rect, @@ -3743,215 +3736,6 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con return VA_STATUS_SUCCESS; } -static VAStatus -gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, - const struct i965_surface *src_surface, - const VARectangle *src_rect, - struct i965_surface *dst_surface, - const VARectangle *dst_rect, - void *filter_param) -{ - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context; - struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - struct gen7_sampler_8x8 *sampler_8x8; - struct i965_sampler_8x8_state *sampler_8x8_state; - int index, i; - int width[3], height[3], pitch[3], offset[3]; - int src_width, src_height; - - /* source surface */ - gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0, - width, height, pitch, offset); - src_width = width[0]; - src_height = height[0]; - - /* destination surface */ - gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1, - width, height, pitch, offset); - - /* sampler 8x8 state */ - dri_bo_map(pp_context->sampler_state_table.bo_8x8, True); - assert(pp_context->sampler_state_table.bo_8x8->virtual); - assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138); - sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual; - memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state)); - - /* The sampler_state setting of RGBX surface will be different with - * that for NV12/I420 surface. - */ - for (i = 0; i < 17; i++) { - float coff; - coff = i; - coff = coff / 16; - /* for Y channel, currently ignore */ - sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0; - sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0; - sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0; - sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0; - sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0; - sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0; - sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0; - sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0; - sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0; - sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0; - sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0; - sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0; - /* for U/V channel, 0.25 */ - sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0; - sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0; - sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x00; - sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x00; - sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0; - sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0; - sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0; - sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0; - sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x00; - sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x00; - sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0; - sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0; - } - - sampler_8x8_state->dw136.default_sharpness_level = 0; - sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0; - sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1; - sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1; - dri_bo_unmap(pp_context->sampler_state_table.bo_8x8); - - /* sampler 8x8 */ - dri_bo_map(pp_context->sampler_state_table.bo, True); - assert(pp_context->sampler_state_table.bo->virtual); - assert(sizeof(*sampler_8x8) == sizeof(int) * 4); - sampler_8x8 = pp_context->sampler_state_table.bo->virtual; - - /* sample_8x8 Y index 4 */ - index = 4; - memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8)); - sampler_8x8[index].dw0.global_noise_estimation = 255; - sampler_8x8[index].dw0.ief_bypass = 1; - - sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5; - - sampler_8x8[index].dw2.weak_edge_threshold = 1; - sampler_8x8[index].dw2.strong_edge_threshold = 8; - sampler_8x8[index].dw2.r5x_coefficient = 9; - sampler_8x8[index].dw2.r5cx_coefficient = 8; - sampler_8x8[index].dw2.r5c_coefficient = 3; - - sampler_8x8[index].dw3.r3x_coefficient = 27; - sampler_8x8[index].dw3.r3c_coefficient = 5; - sampler_8x8[index].dw3.gain_factor = 40; - sampler_8x8[index].dw3.non_edge_weight = 1; - sampler_8x8[index].dw3.regular_weight = 2; - sampler_8x8[index].dw3.strong_edge_weight = 7; - sampler_8x8[index].dw3.ief4_smooth_enable = 0; - - dri_bo_emit_reloc(pp_context->sampler_state_table.bo, - I915_GEM_DOMAIN_RENDER, - 0, - 0, - sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1), - pp_context->sampler_state_table.bo_8x8); - - /* sample_8x8 UV index 8 */ - index = 8; - memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8)); - sampler_8x8[index].dw0.disable_8x8_filter = 0; - sampler_8x8[index].dw0.global_noise_estimation = 255; - sampler_8x8[index].dw0.ief_bypass = 1; - sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5; - sampler_8x8[index].dw2.weak_edge_threshold = 1; - sampler_8x8[index].dw2.strong_edge_threshold = 8; - sampler_8x8[index].dw2.r5x_coefficient = 9; - sampler_8x8[index].dw2.r5cx_coefficient = 8; - sampler_8x8[index].dw2.r5c_coefficient = 3; - sampler_8x8[index].dw3.r3x_coefficient = 27; - sampler_8x8[index].dw3.r3c_coefficient = 5; - sampler_8x8[index].dw3.gain_factor = 40; - sampler_8x8[index].dw3.non_edge_weight = 1; - sampler_8x8[index].dw3.regular_weight = 2; - sampler_8x8[index].dw3.strong_edge_weight = 7; - sampler_8x8[index].dw3.ief4_smooth_enable = 0; - - dri_bo_emit_reloc(pp_context->sampler_state_table.bo, - I915_GEM_DOMAIN_RENDER, - 0, - 0, - sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1), - pp_context->sampler_state_table.bo_8x8); - - /* sampler_8x8 V, index 12 */ - index = 12; - memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8)); - sampler_8x8[index].dw0.disable_8x8_filter = 0; - sampler_8x8[index].dw0.global_noise_estimation = 255; - sampler_8x8[index].dw0.ief_bypass = 1; - sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5; - sampler_8x8[index].dw2.weak_edge_threshold = 1; - sampler_8x8[index].dw2.strong_edge_threshold = 8; - sampler_8x8[index].dw2.r5x_coefficient = 9; - sampler_8x8[index].dw2.r5cx_coefficient = 8; - sampler_8x8[index].dw2.r5c_coefficient = 3; - sampler_8x8[index].dw3.r3x_coefficient = 27; - sampler_8x8[index].dw3.r3c_coefficient = 5; - sampler_8x8[index].dw3.gain_factor = 40; - sampler_8x8[index].dw3.non_edge_weight = 1; - sampler_8x8[index].dw3.regular_weight = 2; - sampler_8x8[index].dw3.strong_edge_weight = 7; - sampler_8x8[index].dw3.ief4_smooth_enable = 0; - - dri_bo_emit_reloc(pp_context->sampler_state_table.bo, - I915_GEM_DOMAIN_RENDER, - 0, - 0, - sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1), - pp_context->sampler_state_table.bo_8x8); - - dri_bo_unmap(pp_context->sampler_state_table.bo); - - /* private function & data */ - pp_context->pp_x_steps = gen7_pp_avs_x_steps; - pp_context->pp_y_steps = gen7_pp_avs_y_steps; - pp_context->private_context = &pp_context->pp_avs_context; - pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter; - - pp_avs_context->dest_x = dst_rect->x; - pp_avs_context->dest_y = dst_rect->y; - pp_avs_context->dest_w = ALIGN(dst_rect->width, 16); - pp_avs_context->dest_h = ALIGN(dst_rect->height, 16); - pp_avs_context->src_w = src_rect->width; - pp_avs_context->src_h = src_rect->height; - pp_avs_context->horiz_range = (float)src_rect->width / src_width; - - int dw = (pp_avs_context->src_w - 1) / 16 + 1; - dw = MAX(dw, dst_rect->width); - - pp_static_parameter->grf1.pointer_to_inline_parameter = 7; - pp_static_parameter->grf2.avs_wa_enable = 0; /* It is unnecessary to use WA for RGBX surface */ - pp_static_parameter->grf2.avs_wa_width = dw; - pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw); - pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw); - - pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; - pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height; - pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height - - (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step; - pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width - - (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw; - gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface); - - dst_surface->flags = src_surface->flags; - - return VA_STATUS_SUCCESS; -} - static int pp_dndi_x_steps(void *private_context) { -- cgit v1.2.1 From 6b574e471d132ee6d30157c7da26425a54fd9c81 Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Mon, 23 Dec 2013 16:30:25 +0800 Subject: Fix a bug of vp8 quant index calculation error Signed-off-by: Zhong Li --- src/gen8_mfd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 3429693a..3ce8ebe5 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2745,12 +2745,14 @@ static const int vp8_ac_qlookup[128] = 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284, }; -static inline unsigned int vp8_clip_quantization_index(unsigned int index) +static inline unsigned int vp8_clip_quantization_index(int index) { if(index > 127) return 127; else if(index <0) return 0; + + return index; } static void -- cgit v1.2.1 From 5b45313864e8d1bfc310efbecef518113e357bd9 Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Wed, 25 Dec 2013 14:23:29 +0800 Subject: Add the ring supported for bdw vpp filters Signed-off-by: Zhong Li --- src/i965_drv_video.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 6048763d..36092216 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -349,10 +349,10 @@ static struct hw_codec_info gen8_hw_codec_info = { .num_filters = 4, .filters = { - VAProcFilterNoiseReduction, - VAProcFilterDeinterlacing, - VAProcFilterSharpening, /* need to rebuild the shader for BDW */ - VAProcFilterColorBalance, + { VAProcFilterNoiseReduction, I965_RING_VEBOX }, + { VAProcFilterDeinterlacing, I965_RING_VEBOX }, + { VAProcFilterSharpening, I965_RING_NULL }, /* need to rebuild the shader for BDW */ + { VAProcFilterColorBalance, I965_RING_VEBOX}, }, }; -- cgit v1.2.1 From 7f62d5ee4137c091d89827abe9eb83ae9edfdb4c Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 27 Dec 2013 11:16:48 +0800 Subject: VPP/bdw: Fix the initialize function used for NV12 to NV12 Signed-off-by: Xiang, Haihao --- src/i965_post_processing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index b084eb2a..606459e0 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1307,7 +1307,7 @@ static struct pp_module pp_modules_gen8[] = { NULL, }, - gen7_pp_nv12_dndi_initialize, + gen8_pp_plx_avs_initialize, }, { -- cgit v1.2.1 From 94f1c2816f21f0e298cb7a8cde62c66192697c85 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 27 Dec 2013 15:05:44 +0800 Subject: Fix the error of offset calculation for encoding on BDW Currently although the encoding can work well, the offset in the internal object is calculated incorrectly. So fix it to avoid the potential issue. Signed-off-by: Zhao Yakui --- src/i965_gpe_utils.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index 0d49703b..6bbad2d9 100644 --- a/src/i965_gpe_utils.c +++ b/src/i965_gpe_utils.c @@ -1082,7 +1082,7 @@ gen8_gpe_context_init(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); dri_bo *bo; int bo_size; - unsigned int end_offset; + unsigned int start_offset, end_offset; dri_bo_unreference(gpe_context->surface_state_binding_table.bo); bo = dri_bo_alloc(i965->intel.bufmgr, @@ -1106,16 +1106,19 @@ gen8_gpe_context_init(VADriverContextP ctx, gpe_context->dynamic_state.end_offset = 0; /* Constant buffer offset */ - gpe_context->curbe_offset = ALIGN(end_offset, 64); - end_offset += gpe_context->curbe_size; + start_offset = ALIGN(end_offset, 64); + gpe_context->curbe_offset = start_offset; + end_offset = start_offset + gpe_context->curbe_size; /* Interface descriptor offset */ - gpe_context->idrt_offset = ALIGN(end_offset, 64); - end_offset += gpe_context->idrt_size; + start_offset = ALIGN(end_offset, 64); + gpe_context->idrt_offset = start_offset; + end_offset = start_offset + gpe_context->idrt_size; /* Sampler state offset */ - gpe_context->sampler_offset = ALIGN(end_offset, 64); - end_offset += gpe_context->sampler_size; + start_offset = ALIGN(end_offset, 64); + gpe_context->sampler_offset = start_offset; + end_offset = start_offset + gpe_context->sampler_size; /* update the end offset of dynamic_state */ gpe_context->dynamic_state.end_offset = end_offset; @@ -1187,9 +1190,11 @@ gen8_gpe_load_kernels(VADriverContextP ctx, kernel = &gpe_context->kernels[i]; kernel->kernel_offset = kernel_offset; - memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size); + if (kernel->size) { + memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size); - end_offset += kernel->size; + end_offset = kernel_offset + kernel->size; + } } gpe_context->instruction_state.end_offset = end_offset; -- cgit v1.2.1 From 74a47c2cd3b45c3616bb7e4f3a47a295869c8b09 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 27 Dec 2013 15:05:44 +0800 Subject: Follow the spec to make the VPP media pipeline work in 48-bit addressing mode Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 375 +++++++++++++++++++++++++++++++++++++-------- src/i965_post_processing.h | 23 +++ 2 files changed, 331 insertions(+), 67 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 606459e0..4bd9df92 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -3528,6 +3528,7 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con int i; int width[3], height[3], pitch[3], offset[3]; int src_width, src_height; + unsigned char *cc_ptr; memset(pp_static_parameter, 0, sizeof(struct gen7_pp_static_parameter)); @@ -3542,12 +3543,13 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con width, height, pitch, offset); /* sampler 8x8 state */ - dri_bo_map(pp_context->sampler_state_table.bo, True); - assert(pp_context->sampler_state_table.bo->virtual); + dri_bo_map(pp_context->dynamic_state.bo, True); + assert(pp_context->dynamic_state.bo->virtual); + cc_ptr = (unsigned char *) pp_context->dynamic_state.bo->virtual + + pp_context->sampler_offset; /* Currently only one gen8 sampler_8x8 is initialized */ - sampler_8x8 = (struct gen8_sampler_8x8_avs *) - pp_context->sampler_state_table.bo->virtual; + sampler_8x8 = (struct gen8_sampler_8x8_avs *)cc_ptr; memset(sampler_8x8, 0, sizeof(*sampler_8x8)); sampler_8x8->dw0.gain_factor = 44; @@ -3696,7 +3698,7 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con sampler_8x8->dw153.bypass_y_adaptive_filtering = 1; sampler_8x8->dw153.bypass_x_adaptive_filtering = 1; - dri_bo_unmap(pp_context->sampler_state_table.bo); + dri_bo_unmap(pp_context->dynamic_state.bo); /* private function & data */ @@ -4922,8 +4924,10 @@ gen8_pp_initialize( { VAStatus va_status; struct i965_driver_data *i965 = i965_driver_data(ctx); - struct pp_module *pp_module; dri_bo *bo; + int bo_size; + unsigned int end_offset; + struct pp_module *pp_module; int static_param_size, inline_param_size; dri_bo_unreference(pp_context->surface_state_binding_table.bo); @@ -4934,40 +4938,41 @@ gen8_pp_initialize( assert(bo); pp_context->surface_state_binding_table.bo = bo; - dri_bo_unreference(pp_context->curbe.bo); + pp_context->idrt.num_interface_descriptors = 0; + + pp_context->sampler_size = 2 * 4096; + + bo_size = 4096 + pp_context->curbe_size + pp_context->sampler_size + + pp_context->idrt_size; + + dri_bo_unreference(pp_context->dynamic_state.bo); bo = dri_bo_alloc(i965->intel.bufmgr, - "constant buffer", - 4096, - 4096); - assert(bo); - pp_context->curbe.bo = bo; + "dynamic_state", + bo_size, + 4096); - dri_bo_unreference(pp_context->idrt.bo); - bo = dri_bo_alloc(i965->intel.bufmgr, - "interface discriptor", - sizeof(struct gen8_interface_descriptor_data), - 4096); assert(bo); - pp_context->idrt.bo = bo; - pp_context->idrt.num_interface_descriptors = 0; + pp_context->dynamic_state.bo = bo; + pp_context->dynamic_state.bo_size = bo_size; - dri_bo_unreference(pp_context->sampler_state_table.bo); - bo = dri_bo_alloc(i965->intel.bufmgr, - "sampler 8x8 state ", - 4096 * 2, - 4096); - assert(bo); - pp_context->sampler_state_table.bo = bo; + end_offset = 0; + pp_context->dynamic_state.end_offset = 0; + /* Constant buffer offset */ + pp_context->curbe_offset = ALIGN(end_offset, 64); + end_offset = pp_context->curbe_offset + pp_context->curbe_size; + + /* Interface descriptor offset */ + pp_context->idrt_offset = ALIGN(end_offset, 64); + end_offset = pp_context->idrt_offset + pp_context->idrt_size; + + /* Sampler state offset */ + pp_context->sampler_offset = ALIGN(end_offset, 64); + end_offset = pp_context->sampler_offset + pp_context->sampler_size; + + /* update the end offset of dynamic_state */ + pp_context->dynamic_state.end_offset = ALIGN(end_offset, 64); - dri_bo_unreference(pp_context->vfe_state.bo); - bo = dri_bo_alloc(i965->intel.bufmgr, - "vfe state", - sizeof(struct i965_vfe_state), - 4096); - assert(bo); - pp_context->vfe_state.bo = bo; - static_param_size = sizeof(struct gen7_pp_static_parameter); inline_param_size = sizeof(struct gen7_pp_inline_parameter); @@ -5046,37 +5051,30 @@ gen8_pp_interface_descriptor_table(VADriverContextP ctx, struct gen8_interface_descriptor_data *desc; dri_bo *bo; int pp_index = pp_context->current_pp; + unsigned char *cc_ptr; - bo = pp_context->idrt.bo; - dri_bo_map(bo, True); + bo = pp_context->dynamic_state.bo; + + dri_bo_map(bo, 1); assert(bo->virtual); - desc = bo->virtual; + cc_ptr = (unsigned char *)bo->virtual + pp_context->idrt_offset; + + desc = (struct gen8_interface_descriptor_data *) cc_ptr + + pp_context->idrt.num_interface_descriptors; + memset(desc, 0, sizeof(*desc)); desc->desc0.kernel_start_pointer = - pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */ + pp_context->pp_modules[pp_index].kernel.kernel_offset >> 6; /* reloc */ desc->desc2.single_program_flow = 1; desc->desc2.floating_point_mode = FLOATING_POINT_IEEE_754; - desc->desc3.sampler_count = 1; /* 1 - 4 samplers used */ - desc->desc3.sampler_state_pointer = - pp_context->sampler_state_table.bo->offset >> 5; + desc->desc3.sampler_count = 0; /* 1 - 4 samplers used */ + desc->desc3.sampler_state_pointer = pp_context->sampler_offset >> 5; desc->desc4.binding_table_entry_count = 0; desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5); desc->desc5.constant_urb_entry_read_offset = 0; desc->desc5.constant_urb_entry_read_length = 6; /* grf 1-6 */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - offsetof(struct gen8_interface_descriptor_data, desc0), - pp_context->pp_modules[pp_index].kernel.bo); - - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - desc->desc3.sampler_count << 2, - offsetof(struct gen8_interface_descriptor_data, desc3), - pp_context->sampler_state_table.bo); - dri_bo_unmap(bo); pp_context->idrt.num_interface_descriptors++; } @@ -5113,12 +5111,35 @@ gen6_pp_states_setup(VADriverContextP ctx, gen6_pp_upload_constants(ctx, pp_context); } +static void +gen8_pp_upload_constants(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + unsigned char *constant_buffer; + int param_size; + + assert(sizeof(struct gen7_pp_static_parameter) == 192); + + if (IS_GEN8(i965->intel.device_id)) + param_size = sizeof(struct gen7_pp_static_parameter); + + dri_bo_map(pp_context->dynamic_state.bo, 1); + assert(pp_context->dynamic_state.bo->virtual); + constant_buffer = (unsigned char *) pp_context->dynamic_state.bo->virtual + + pp_context->curbe_offset; + + memcpy(constant_buffer, pp_context->pp_static_parameter, param_size); + dri_bo_unmap(pp_context->dynamic_state.bo); + return; +} + static void gen8_pp_states_setup(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { gen8_pp_interface_descriptor_table(ctx, pp_context); - gen6_pp_upload_constants(ctx, pp_context); + gen8_pp_upload_constants(ctx, pp_context); } static void @@ -5168,7 +5189,8 @@ gen8_pp_state_base_address(VADriverContextP ctx, OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ OUT_BATCH(batch, 0); /* DW6. Dynamic state address */ - OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_RELOC(batch, pp_context->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER, + 0, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(batch, 0); /* DW8. Indirect object address */ @@ -5176,7 +5198,7 @@ gen8_pp_state_base_address(VADriverContextP ctx, OUT_BATCH(batch, 0); /* DW10. Instruction base address */ - OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_RELOC(batch, pp_context->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); @@ -5290,10 +5312,7 @@ gen8_interface_descriptor_load(VADriverContextP ctx, OUT_BATCH(batch, 0); OUT_BATCH(batch, pp_context->idrt.num_interface_descriptors * sizeof(struct gen8_interface_descriptor_data)); - OUT_RELOC(batch, - pp_context->idrt.bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); + OUT_BATCH(batch, pp_context->idrt_offset); ADVANCE_BATCH(batch); } @@ -5435,6 +5454,99 @@ gen6_pp_pipeline_setup(VADriverContextP ctx, intel_batchbuffer_end_atomic(batch); } +static void +gen8_pp_curbe_load(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + struct i965_driver_data *i965 = i965_driver_data(ctx); + int param_size = 64; + + if (IS_GEN8(i965->intel.device_id)) + param_size = sizeof(struct gen7_pp_static_parameter); + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + param_size); + OUT_BATCH(batch, pp_context->curbe_offset); + ADVANCE_BATCH(batch); +} + +static void +gen8_pp_object_walker(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = pp_context->batch; + int x, x_steps, y, y_steps; + int param_size, command_length_in_dws, extra_cmd_in_dws; + dri_bo *command_buffer; + unsigned int *command_ptr; + + param_size = sizeof(struct gen7_pp_inline_parameter); + if (IS_GEN8(i965->intel.device_id)) + param_size = sizeof(struct gen7_pp_inline_parameter); + + x_steps = pp_context->pp_x_steps(pp_context->private_context); + y_steps = pp_context->pp_y_steps(pp_context->private_context); + command_length_in_dws = 6 + (param_size >> 2); + extra_cmd_in_dws = 2; + command_buffer = dri_bo_alloc(i965->intel.bufmgr, + "command objects buffer", + (command_length_in_dws + extra_cmd_in_dws) * 4 * x_steps * y_steps + 64, + 4096); + + dri_bo_map(command_buffer, 1); + command_ptr = command_buffer->virtual; + + for (y = 0; y < y_steps; y++) { + for (x = 0; x < x_steps; x++) { + if (!pp_context->pp_set_block_parameter(pp_context, x, y)) { + + *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2)); + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + memcpy(command_ptr, pp_context->pp_inline_parameter, param_size); + command_ptr += (param_size >> 2); + + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + } + } + } + + if ((command_length_in_dws + extra_cmd_in_dws) * x_steps * y_steps % 2 == 0) + *command_ptr++ = 0; + + *command_ptr++ = MI_BATCH_BUFFER_END; + *command_ptr++ = 0; + + dri_bo_unmap(command_buffer); + + if (IS_GEN8(i965->intel.device_id)) { + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_RELOC(batch, command_buffer, + I915_GEM_DOMAIN_COMMAND, 0, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + } + + dri_bo_unreference(command_buffer); + + /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END + * will cause control to pass back to ring buffer + */ + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); + intel_batchbuffer_start_atomic(batch, 0x1000); +} + static void gen8_pp_pipeline_setup(VADriverContextP ctx, struct i965_post_processing_context *pp_context) @@ -5446,10 +5558,10 @@ gen8_pp_pipeline_setup(VADriverContextP ctx, gen6_pp_pipeline_select(ctx, pp_context); gen8_pp_state_base_address(ctx, pp_context); gen8_pp_vfe_state(ctx, pp_context); - gen6_pp_curbe_load(ctx, pp_context); + gen8_pp_curbe_load(ctx, pp_context); gen8_interface_descriptor_load(ctx, pp_context); gen8_pp_vfe_state(ctx, pp_context); - gen6_pp_object_walker(ctx, pp_context); + gen8_pp_object_walker(ctx, pp_context); intel_batchbuffer_end_atomic(batch); } @@ -6157,6 +6269,39 @@ i965_image_processing(VADriverContextP ctx, return status; } +static void +gen8_post_processing_context_finalize(struct i965_post_processing_context *pp_context) +{ + dri_bo_unreference(pp_context->surface_state_binding_table.bo); + pp_context->surface_state_binding_table.bo = NULL; + + dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo); + pp_context->pp_dndi_context.stmm_bo = NULL; + + dri_bo_unreference(pp_context->pp_dn_context.stmm_bo); + pp_context->pp_dn_context.stmm_bo = NULL; + + if (pp_context->instruction_state.bo) { + dri_bo_unreference(pp_context->instruction_state.bo); + pp_context->instruction_state.bo = NULL; + } + + if (pp_context->indirect_state.bo) { + dri_bo_unreference(pp_context->indirect_state.bo); + pp_context->indirect_state.bo = NULL; + } + + if (pp_context->dynamic_state.bo) { + dri_bo_unreference(pp_context->dynamic_state.bo); + pp_context->dynamic_state.bo = NULL; + } + + free(pp_context->pp_static_parameter); + free(pp_context->pp_inline_parameter); + pp_context->pp_static_parameter = NULL; + pp_context->pp_inline_parameter = NULL; +} + static void i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context) { @@ -6210,7 +6355,11 @@ i965_post_processing_terminate(VADriverContextP ctx) struct i965_post_processing_context *pp_context = i965->pp_context; if (pp_context) { - i965_post_processing_context_finalize(pp_context); + if (IS_GEN8(i965->intel.device_id)) { + gen8_post_processing_context_finalize(pp_context); + } else { + i965_post_processing_context_finalize(pp_context); + } free(pp_context); } @@ -6219,6 +6368,96 @@ i965_post_processing_terminate(VADriverContextP ctx) #define VPP_CURBE_ALLOCATION_SIZE 32 + +static void +gen8_post_processing_context_init(VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + struct intel_batchbuffer *batch) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + int i, kernel_size; + unsigned int kernel_offset, end_offset; + unsigned char *kernel_ptr; + struct pp_module *pp_module; + + { + pp_context->vfe_gpu_state.max_num_threads = 60; + pp_context->vfe_gpu_state.num_urb_entries = 59; + pp_context->vfe_gpu_state.gpgpu_mode = 0; + pp_context->vfe_gpu_state.urb_entry_size = 16 - 1; + pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE; + } + + assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); + + if (IS_GEN8(i965->intel.device_id)) + memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules)); + else { + /* should never get here !!! */ + assert(0); + } + + kernel_size = 4096 ; + + for (i = 0; i < NUM_PP_MODULES; i++) { + pp_module = &pp_context->pp_modules[i]; + + if (pp_module->kernel.bin && pp_module->kernel.size) { + kernel_size += pp_module->kernel.size; + } + } + + pp_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr, + "kernel shader", + kernel_size, + 0x1000); + if (pp_context->instruction_state.bo == NULL) { + WARN_ONCE("failure to allocate the buffer space for kernel shader in VPP\n"); + return; + } + + assert(pp_context->instruction_state.bo); + + + pp_context->instruction_state.bo_size = kernel_size; + pp_context->instruction_state.end_offset = 0; + end_offset = 0; + + dri_bo_map(pp_context->instruction_state.bo, 1); + kernel_ptr = (unsigned char *)(pp_context->instruction_state.bo->virtual); + + for (i = 0; i < NUM_PP_MODULES; i++) { + pp_module = &pp_context->pp_modules[i]; + + kernel_offset = ALIGN(end_offset, 64); + pp_module->kernel.kernel_offset = kernel_offset; + + if (pp_module->kernel.bin && pp_module->kernel.size) { + + memcpy(kernel_ptr + kernel_offset, pp_module->kernel.bin, pp_module->kernel.size); + end_offset = kernel_offset + pp_module->kernel.size; + } + } + + pp_context->instruction_state.end_offset = ALIGN(end_offset, 64); + + dri_bo_unmap(pp_context->instruction_state.bo); + + /* static & inline parameters */ + if (IS_GEN8(i965->intel.device_id)) { + pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1); + pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1); + } + + pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE; + pp_context->pp_dndi_context.current_out_obj_surface = NULL; + pp_context->pp_dndi_context.frame_order = -1; + pp_context->batch = batch; + + pp_context->idrt_size = 5 * sizeof(struct gen8_interface_descriptor_data); + pp_context->curbe_size = 256; +} + static void i965_post_processing_context_init(VADriverContextP ctx, struct i965_post_processing_context *pp_context, @@ -6227,6 +6466,11 @@ i965_post_processing_context_init(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); int i; + if (IS_GEN8(i965->intel.device_id)) { + gen8_post_processing_context_init(ctx, pp_context, batch); + return; + }; + if (IS_IRONLAKE(i965->intel.device_id)) { pp_context->urb.size = URB_SIZE((&i965->intel)); pp_context->urb.num_vfe_entries = 32; @@ -6251,11 +6495,8 @@ i965_post_processing_context_init(VADriverContextP ctx, assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75)); - assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); - if (IS_GEN8(i965->intel.device_id)) - memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules)); - else if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_id)) memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules)); else if (IS_GEN7(i965->intel.device_id)) memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules)); diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index f399cbb0..e525a1aa 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -503,6 +503,29 @@ struct i965_post_processing_context unsigned int block_horizontal_mask_left:16; unsigned int block_horizontal_mask_right:16; unsigned int block_vertical_mask_bottom:8; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } instruction_state; + + struct { + dri_bo *bo; + } indirect_state; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } dynamic_state; + + unsigned int sampler_offset; + int sampler_size; + unsigned int idrt_offset; + int idrt_size; + unsigned int curbe_offset; + int curbe_size; }; struct i965_proc_context -- cgit v1.2.1 From 0a085bed9c16dabcb2f338bfdfed90e29f32cd97 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 27 Dec 2013 15:05:44 +0800 Subject: Fix the wrong pitch of surface for Video post-processing on BDW Now the object surface already contains the pitch after the object surface structure is reworked in the commit f886f24eaaacba9544fa5f6405b7382c686f3a1f. So it is unnecessary to calculate the pitch based on the width. Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 4bd9df92..28b5817b 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -2396,11 +2396,9 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc else width[0] = obj_surface->orig_width; /* surface foramt is YCBCR, width is specified in units of pixels */ - pitch[0] = obj_surface->width * 2; } else if (rgbx_format) { if (is_target) width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */ - pitch[0] = obj_surface->width * 4; } width[1] = obj_surface->cb_cr_width; -- cgit v1.2.1 From ba09098b6e83b32deb8785b9667fcf859242becb Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 27 Dec 2013 15:05:44 +0800 Subject: Add the support of querying the surface attributes on BDW Otherwise the user-space application doesn't query which surfaceformat is supported by the libva-vappi driver on BDW. Signed-off-by: Zhao Yakui --- src/i965_drv_video.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 36092216..ee8e7d4e 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -4622,6 +4622,116 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, i++; } } + } else if (IS_GEN8(i965->intel.device_id)) { + if (obj_config->entrypoint == VAEntrypointVLD) { /* decode */ + if (obj_config->profile == VAProfileJPEGBaseline) { + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '3'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '1'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('Y', '8', '0', '0'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('4', '1', '1', 'P'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('4', '2', '2', 'H'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('4', '2', '2', 'V'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('4', '4', '4', 'P'); + i++; + } else { + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + i++; + } + } else if (obj_config->entrypoint == VAEntrypointEncSlice || /* encode */ + obj_config->entrypoint == VAEntrypointVideoProc) { /* vpp */ + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('Y', 'V', '1', '2'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '3'); + i++; + + if (obj_config->entrypoint == VAEntrypointVideoProc) { + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('Y', 'U', 'Y', '2'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'A'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'X'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('B', 'G', 'R', 'A'); + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('B', 'G', 'R', 'X'); + i++; + } + } } attribs[i].type = VASurfaceAttribMemoryType; -- cgit v1.2.1 From 60df99091d948cd9055fde776d79ef2cc72908f2 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 27 Dec 2013 15:05:44 +0800 Subject: Use the correct sub-context for VPP on BDW to avoid the NULL pointer The structure of sub-context is updated for VPP in the commit of 4faf6bf47f8e4e2fe587e3bb6a004340edd59c4c. So BDW should update the correct sub-context.Otherwise the segment fault will be triggered. Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 28b5817b..aa0311de 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -3519,7 +3519,7 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con void *filter_param) { /* TODO: Add the sampler_8x8 state */ - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context; struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; struct gen8_sampler_8x8_avs *sampler_8x8; struct i965_sampler_8x8_coefficient *sampler_8x8_state; -- cgit v1.2.1 From 3125cd8e17bb8bf11ad9a491686d8ae8d2d11ad9 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 27 Dec 2013 15:05:44 +0800 Subject: Fix the error for the VPP conversion of NV12->NV12 on BDW Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index aa0311de..cf1d1596 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1054,7 +1054,7 @@ static struct pp_module pp_modules_gen75[] = { NULL, }, - gen7_pp_nv12_dndi_initialize, + gen8_pp_plx_avs_initialize, }, { -- cgit v1.2.1 From 12cab0aa931d6f3f71798556bc4d8d7c45f3ebe5 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 27 Dec 2013 15:05:44 +0800 Subject: Use the pp_null_initialize function for the unsupported VPP on BDW The Dn/DI will be implemented by using VEBOX and doesn't use the VPP shader any more. So the corresponding VPP shader should use the pp_null_initialize hook function. Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index cf1d1596..9dd51fea 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1319,7 +1319,7 @@ static struct pp_module pp_modules_gen8[] = { NULL, }, - gen7_pp_nv12_dn_initialize, + pp_null_initialize, }, { { @@ -1378,7 +1378,7 @@ static struct pp_module pp_modules_gen8[] = { NULL, }, - gen8_pp_plx_avs_initialize, + pp_null_initialize, }, { -- cgit v1.2.1 From a7a53832fb2b73ce576186fbcaa652d2d655004b Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 27 Dec 2013 15:05:45 +0800 Subject: Add the VPP shader of conversion between YUY2 and YUY2 on BDW Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 1 + src/shaders/post_processing/gen8/Makefile.am | 1 + src/shaders/post_processing/gen8/pa_to_pa.asm | 17 ++ src/shaders/post_processing/gen8/pa_to_pa.g8b | 279 ++++++++++++++++++++++++++ 4 files changed, 298 insertions(+) create mode 100644 src/shaders/post_processing/gen8/pa_to_pa.asm create mode 100644 src/shaders/post_processing/gen8/pa_to_pa.g8b diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 9dd51fea..3b6ec7ff 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1204,6 +1204,7 @@ static const uint32_t pp_pa_load_save_pl3_gen8[][4] = { #include "shaders/post_processing/gen8/pa_to_pl3.g8b" }; static const uint32_t pp_pa_load_save_pa_gen8[][4] = { +#include "shaders/post_processing/gen8/pa_to_pa.g8b" }; static const uint32_t pp_rgbx_load_save_nv12_gen8[][4] = { #include "shaders/post_processing/gen8/rgbx_to_nv12.g8b" diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index ddb53cd3..50badf7d 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -9,6 +9,7 @@ INTEL_PP_G8B = \ pl3_to_pa.g8b \ pa_to_pl2.g8b \ pa_to_pl3.g8b \ + pa_to_pa.g8b \ $(NULL) INTEL_PP_G8A = \ diff --git a/src/shaders/post_processing/gen8/pa_to_pa.asm b/src/shaders/post_processing/gen8/pa_to_pa.asm new file mode 100644 index 00000000..44e3b35c --- /dev/null +++ b/src/shaders/post_processing/gen8/pa_to_pa.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel YUY2_TO_NV12 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_VYUA.g8a" +#include "PA_AVS_Buf_0.g8a" +#include "PA_AVS_Buf_1.g8a" +#include "PA_AVS_Buf_2.g8a" +#include "PA_AVS_Buf_3.g8a" +#include "Save_AVS_PA.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pa_to_pa.g8b b/src/shaders/post_processing/gen8/pa_to_pa.g8b new file mode 100644 index 00000000..76fe27a7 --- /dev/null +++ b/src/shaders/post_processing/gen8/pa_to_pa.g8b @@ -0,0 +1,279 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x000062ea }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006420 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22082260, 0x1669005c, 0x03a003a0 }, + { 0x00000209, 0x23601a28, 0x1e0000e0, 0x00010001 }, + { 0x00000601, 0x23641a28, 0x000000e2, 0x00000000 }, + { 0x00000401, 0x23680608, 0x00000000, 0x0001001f }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x08000800 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000002 }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000004 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000006 }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000008 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000a }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x0000000c }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000e }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, -- cgit v1.2.1 From a8a48ecc04caebdf801a1b609b3531beffbe17f0 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 27 Dec 2013 15:16:37 +0800 Subject: Fix the wrong VPP initialization function for Dn/DI on Ivybridge Signed-off-by: Zhao Yakui --- src/i965_post_processing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 3b6ec7ff..e058378a 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1054,7 +1054,7 @@ static struct pp_module pp_modules_gen75[] = { NULL, }, - gen8_pp_plx_avs_initialize, + gen7_pp_nv12_dn_initialize, }, { -- cgit v1.2.1 From 2e74da4a2733a264072fdd3ab07254400fa02c6e Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 27 Dec 2013 15:56:33 +0800 Subject: Fix the incorrect MV upper bound setting of MFC_IND_OBJ_BASE_ADDRESS_STAE for encoding on gen8 Signed-off-by: Zhao Yakui --- src/gen8_mfc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 314a3e06..b1e9bd3b 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -162,6 +162,7 @@ gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct gen6_vme_context *vme_context = encoder_context->vme_context; + int vme_size; BEGIN_BCS_BATCH(batch, 26); @@ -174,11 +175,12 @@ gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); + vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks; /* the DW6-10 is for MFX Indirect MV Object Base Address */ OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ + OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size); OUT_BCS_BATCH(batch, 0); /* the DW11-15 is for MFX IT-COFF. Not used on encoder */ -- cgit v1.2.1 From befe2d485f45f8b9a5856581d172c1150aeef917 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 27 Dec 2013 15:56:37 +0800 Subject: Update the MFX_AVC_IMAGE_STATE to follow the spec Signed-off-by: Zhao Yakui --- src/gen75_mfc.c | 2 +- src/gen75_mfd.c | 4 ++-- src/gen7_mfc.c | 2 +- src/gen7_mfd.c | 4 ++-- src/gen8_mfc.c | 2 +- src/gen8_mfd.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 7161acd1..28edd40b 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -259,7 +259,7 @@ gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); /*DW1. MB setting of frame */ OUT_BCS_BATCH(batch, - ((width_in_mbs * height_in_mbs) & 0xFFFF)); + ((width_in_mbs * height_in_mbs - 1) & 0xFFFF)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index dc7c9403..4a4de0b6 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -566,7 +566,7 @@ gen75_mfd_avc_img_state(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 17); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2)); OUT_BCS_BATCH(batch, - width_in_mbs * height_in_mbs); + (width_in_mbs * height_in_mbs - 1)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); @@ -2826,7 +2826,7 @@ gen75_jpeg_wa_avc_img_state(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 16); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); OUT_BCS_BATCH(batch, - width_in_mbs * height_in_mbs); + (width_in_mbs * height_in_mbs - 1)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index 48f4bf23..394665d5 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -214,7 +214,7 @@ gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); /*DW1 frame size */ OUT_BCS_BATCH(batch, - ((width_in_mbs * height_in_mbs) & 0xFFFF)); + ((width_in_mbs * height_in_mbs - 1) & 0xFFFF)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index 51a1850a..50910342 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -353,7 +353,7 @@ gen7_mfd_avc_img_state(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 16); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); OUT_BCS_BATCH(batch, - width_in_mbs * height_in_mbs); + (width_in_mbs * height_in_mbs - 1)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); @@ -2320,7 +2320,7 @@ gen7_jpeg_wa_avc_img_state(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 16); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); OUT_BCS_BATCH(batch, - width_in_mbs * height_in_mbs); + (width_in_mbs * height_in_mbs - 1)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index b1e9bd3b..90092a13 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -230,7 +230,7 @@ gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); /*DW1. MB setting of frame */ OUT_BCS_BATCH(batch, - ((width_in_mbs * height_in_mbs) & 0xFFFF)); + ((width_in_mbs * height_in_mbs - 1) & 0xFFFF)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 3ce8ebe5..6a106631 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -433,7 +433,7 @@ gen8_mfd_avc_img_state(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 17); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2)); OUT_BCS_BATCH(batch, - width_in_mbs * height_in_mbs); + (width_in_mbs * height_in_mbs - 1)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); -- cgit v1.2.1 From a549e480796a9aebb2c1f1255a655980f6d5e438 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 30 Dec 2013 09:39:04 +0800 Subject: Render/HSW: Fix the bug caused by merging code Signed-off-by: Xiang, Haihao --- src/i965_render.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 8e14d874..bb974e79 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -2491,7 +2491,6 @@ gen7_render_initialize(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; dri_bo *bo; - int size; /* VERTEX BUFFER */ dri_bo_unreference(render_state->vb.vertex_buffer); @@ -2540,10 +2539,9 @@ gen7_render_initialize(VADriverContextP ctx) /* BLEND STATE */ dri_bo_unreference(render_state->cc.blend); - size = sizeof(struct gen8_global_blend_state) + 2 * sizeof(struct gen8_blend_state_rt); bo = dri_bo_alloc(i965->intel.bufmgr, "blend state", - size, + sizeof(struct gen6_blend_state), 4096); assert(bo); render_state->cc.blend = bo; -- cgit v1.2.1 From a80141564cf8f9a8dd79a6fe76e149bf4ca5508d Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 30 Dec 2013 12:42:47 +0800 Subject: Render/BDW: Initialize the blend_state for rendering Signed-off-by: Xiang, Haihao Signed-off-by: Zhao Yakui --- src/i965_render.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index bb974e79..1376c32a 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -2799,7 +2799,7 @@ gen8_render_blend_state(VADriverContextP ctx) global_blend_state = (struct gen8_global_blend_state*) cc_ptr; - memset(global_blend_state, 0, sizeof(*global_blend_state)); + memset(global_blend_state, 0, render_state->blend_state_size); /* Global blend state + blend_state for Render Target */ blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); blend_state->blend1.logic_op_enable = 1; @@ -4293,7 +4293,7 @@ gen8_subpicture_render_blend_state(VADriverContextP ctx) global_blend_state = (struct gen8_global_blend_state*) cc_ptr; - memset(global_blend_state, 0, sizeof(*global_blend_state)); + memset(global_blend_state, 0, render_state->blend_state_size); /* Global blend state + blend_state for Render Target */ blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD; -- cgit v1.2.1 From 47fb3cfd469a446d36299c8e77242c427bac24e6 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 30 Dec 2013 12:42:51 +0800 Subject: Render/BDW: Align each offset with 64 bytes Signed-off-by: Xiang, Haihao Signed-off-by: Zhao Yakui --- src/i965_render.c | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 1376c32a..b3a844f1 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -2559,6 +2559,8 @@ gen7_render_initialize(VADriverContextP ctx) /* * for GEN8 */ +#define ALIGNMENT 64 + static void gen8_render_initialize(VADriverContextP ctx) { @@ -2603,10 +2605,13 @@ gen8_render_initialize(VADriverContextP ctx) render_state->scissor_size = 1024; - size = 4096 + render_state->curbe_size + render_state->sampler_size + - render_state->cc_state_size + render_state->cc_viewport_size + - render_state->blend_state_size + render_state->sf_clip_size + - render_state->scissor_size; + size = ALIGN(render_state->curbe_size, ALIGNMENT) + + ALIGN(render_state->sampler_size, ALIGNMENT) + + ALIGN(render_state->cc_viewport_size, ALIGNMENT) + + ALIGN(render_state->cc_state_size, ALIGNMENT) + + ALIGN(render_state->blend_state_size, ALIGNMENT) + + ALIGN(render_state->sf_clip_size, ALIGNMENT) + + ALIGN(render_state->scissor_size, ALIGNMENT); dri_bo_unreference(render_state->dynamic_state.bo); bo = dri_bo_alloc(i965->intel.bufmgr, @@ -2620,35 +2625,35 @@ gen8_render_initialize(VADriverContextP ctx) render_state->dynamic_state.end_offset = 0; /* Constant buffer offset */ - render_state->curbe_offset = ALIGN(end_offset, 64); - end_offset += render_state->curbe_size; + render_state->curbe_offset = end_offset; + end_offset += ALIGN(render_state->curbe_size, ALIGNMENT); /* Sampler_state */ - render_state->sampler_offset = ALIGN(end_offset, 64); - end_offset += render_state->sampler_size; + render_state->sampler_offset = end_offset; + end_offset += ALIGN(render_state->sampler_size, ALIGNMENT); /* CC_VIEWPORT_state */ - render_state->cc_viewport_offset = ALIGN(end_offset, 64); - end_offset += render_state->cc_viewport_size; + render_state->cc_viewport_offset = end_offset; + end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT); /* CC_STATE_state */ - render_state->cc_state_offset = ALIGN(end_offset, 64); - end_offset += render_state->cc_state_size; + render_state->cc_state_offset = end_offset; + end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT); /* Blend_state */ - render_state->blend_state_offset = ALIGN(end_offset, 64); - end_offset += render_state->blend_state_size; + render_state->blend_state_offset = end_offset; + end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT); /* SF_CLIP_state */ - render_state->sf_clip_offset = ALIGN(end_offset, 64); - end_offset += render_state->sf_clip_size; + render_state->sf_clip_offset = end_offset; + end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT); /* SCISSOR_state */ - render_state->scissor_offset = ALIGN(end_offset, 64); - end_offset += render_state->scissor_size; + render_state->scissor_offset = end_offset; + end_offset += ALIGN(render_state->scissor_size, ALIGNMENT); /* update the end offset of dynamic_state */ - render_state->dynamic_state.end_offset = ALIGN(end_offset, 64); + render_state->dynamic_state.end_offset = end_offset; } @@ -4534,7 +4539,7 @@ gen8_render_init(VADriverContextP ctx) kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual); for (i = 0; i < NUM_RENDER_KERNEL; i++) { kernel = &render_state->render_kernels[i]; - kernel_offset = ALIGN(end_offset, 64); + kernel_offset = end_offset; kernel->kernel_offset = kernel_offset; if (!kernel->size) @@ -4542,7 +4547,7 @@ gen8_render_init(VADriverContextP ctx) memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size); - end_offset += kernel->size; + end_offset += ALIGN(kernel->size, ALIGNMENT); } render_state->instruction_state.end_offset = end_offset; -- cgit v1.2.1 From 86321112f18a495d205a7f2f048eef41df28fe22 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 30 Dec 2013 12:42:55 +0800 Subject: Add one environment variable to check the benchmark of decoding/vaPutsurface The swap_buffer callback will wait for the completion of buffer swap, which will affect the benchmark test of decoding/vaPutSurface. Signed-off-by: Zhao Yakui --- src/i965_output_dri.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/i965_output_dri.c b/src/i965_output_dri.c index 14673679..717ee9a4 100644 --- a/src/i965_output_dri.c +++ b/src/i965_output_dri.c @@ -204,7 +204,8 @@ i965_put_surface_dri( } } - dri_vtable->swap_buffer(ctx, dri_drawable); + if (!getenv("INTEL_DEBUG_BENCH")) + dri_vtable->swap_buffer(ctx, dri_drawable); obj_surface->flags |= SURFACE_DISPLAYED; if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) { -- cgit v1.2.1 From de5f63bfe98dd1147c93422063da8826f34db534 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 7 Jan 2014 13:10:58 +0800 Subject: VEBOX/bdw: DW0-DW8 are used for dndi parameters in VEBOX_DNDI_STATE Signed-off-by: Xiang, Haihao --- src/gen75_vpp_vebox.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 8e80474d..29eb14ea 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -131,6 +131,7 @@ VAStatus vpp_surface_scaling(VADriverContextP ctx, void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx) { + struct i965_driver_data *i965 = i965_driver_data(ctx); unsigned int* p_table ; int progressive_dn = 1; int dndi_top_first = 0; @@ -155,7 +156,9 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c */ p_table = (unsigned int *)proc_ctx->dndi_state_table.ptr; - *p_table ++ = 0; // reserved . w0 + if (IS_HASWELL(i965->intel.device_id)) + *p_table ++ = 0; // reserved . w0 + *p_table ++ = ( 140 << 24 | // denoise STAD threshold . w1 192 << 16 | // dnmh_history_max 0 << 12 | // reserved @@ -225,6 +228,8 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c 13 << 6 | // chr temp diff th 7 ); // chr temp diff low + if (IS_GEN8(i965->intel.device_id)) + *p_table ++ = 0; // parameters for hot pixel, } void hsw_veb_iecp_std_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx) -- cgit v1.2.1 From b8836f47f36ed02bc6e407aeb9a825c23c0d9e49 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 7 Jan 2014 13:13:25 +0800 Subject: VEBOX/bdw: set downsample method Signed-off-by: Xiang, Haihao --- src/gen75_vpp_vebox.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 29eb14ea..160560e7 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -1360,8 +1360,8 @@ void bdw_veb_state_command(VADriverContextP ctx, struct intel_vebox_context *pro 0 << 11 | // vignette enable 0 << 10 | // demosaic enable di_output_frames_flag << 8 | // DI output frame - 0 << 7 | // 444->422 downsample method - 0 << 6 | // 422->420 downsample method + 1 << 7 | // 444->422 downsample method + 1 << 6 | // 422->420 downsample method is_first_frame << 5 | // DN/DI first frame is_di_enabled << 4 | // DI enable is_dn_enabled << 3 | // DN enable -- cgit v1.2.1 From 03b701e5c6b2daf5f7be9a5591244eb655a11ffa Mon Sep 17 00:00:00 2001 From: Li Xiaowei Date: Tue, 7 Jan 2014 10:45:56 +0800 Subject: VPP: Refine code for sharpening on Haswell Signed-off-by: Li Xiaowei --- src/gen75_picture_process.c | 6 +- src/gen75_vpp_gpe.c | 147 +++++++++++++++++++++++--------------------- src/gen75_vpp_gpe.h | 38 +++++------- 3 files changed, 96 insertions(+), 95 deletions(-) diff --git a/src/gen75_picture_process.c b/src/gen75_picture_process.c index 9dd7c192..d5b5acb3 100644 --- a/src/gen75_picture_process.c +++ b/src/gen75_picture_process.c @@ -101,14 +101,14 @@ gen75_vpp_gpe(VADriverContextP ctx, VAStatus va_status = VA_STATUS_SUCCESS; if(proc_ctx->vpp_gpe_ctx == NULL){ - proc_ctx->vpp_gpe_ctx = gen75_gpe_context_init(ctx); + proc_ctx->vpp_gpe_ctx = vpp_gpe_context_init(ctx); } proc_ctx->vpp_gpe_ctx->pipeline_param = proc_ctx->pipeline_param; proc_ctx->vpp_gpe_ctx->surface_pipeline_input_object = proc_ctx->surface_pipeline_input_object; proc_ctx->vpp_gpe_ctx->surface_output_object = proc_ctx->surface_render_output_object; - va_status = gen75_gpe_process_picture(ctx, proc_ctx->vpp_gpe_ctx); + va_status = vpp_gpe_process_picture(ctx, proc_ctx->vpp_gpe_ctx); return va_status; } @@ -254,7 +254,7 @@ gen75_proc_context_destroy(void *hw_context) } if(proc_ctx->vpp_gpe_ctx){ - gen75_gpe_context_destroy(ctx,proc_ctx->vpp_gpe_ctx); + vpp_gpe_context_destroy(ctx,proc_ctx->vpp_gpe_ctx); proc_ctx->vpp_gpe_ctx = NULL; } diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index 236ccaf7..a7f8fd55 100644 --- a/src/gen75_vpp_gpe.c +++ b/src/gen75_vpp_gpe.c @@ -33,6 +33,7 @@ #include "intel_batchbuffer.h" #include "intel_driver.h" +#include "i965_structs.h" #include "i965_defines.h" #include "i965_drv_video.h" #include "gen75_vpp_gpe.h" @@ -40,12 +41,9 @@ #define MAX_INTERFACE_DESC_GEN6 MAX_GPE_KERNELS #define MAX_MEDIA_SURFACES_GEN6 34 -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) - -#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * (index)) -#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index)) +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * (index)) +#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index)) #define CURBE_ALLOCATION_SIZE 37 #define CURBE_TOTAL_DATA_LENGTH (4 * 32) @@ -99,7 +97,7 @@ static struct i965_kernel gen75_vpp_sharpening_kernels[] = { }; static VAStatus -gpe_surfaces_setup(VADriverContextP ctx, +gen75_gpe_process_surfaces_setup(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { struct object_surface *obj_surface; @@ -111,44 +109,44 @@ gpe_surfaces_setup(VADriverContextP ctx, for( i = 0; i < input_surface_sum; i += 2){ obj_surface = vpp_gpe_ctx->surface_input_object[i/2]; assert(obj_surface); - vpp_gpe_ctx->vpp_media_rw_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET(i), - SURFACE_STATE_OFFSET(i)); - - vpp_gpe_ctx->vpp_media_chroma_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET(i + 1), - SURFACE_STATE_OFFSET(i + 1)); + gen7_gpe_media_rw_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET(i), + SURFACE_STATE_OFFSET(i)); + + gen75_gpe_media_chroma_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET(i + 1), + SURFACE_STATE_OFFSET(i + 1)); } /* Binding output NV12 surface(Luma + Chroma) */ obj_surface = vpp_gpe_ctx->surface_output_object; assert(obj_surface); - vpp_gpe_ctx->vpp_media_rw_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET(input_surface_sum), - SURFACE_STATE_OFFSET(input_surface_sum)); - vpp_gpe_ctx->vpp_media_chroma_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET(input_surface_sum + 1), - SURFACE_STATE_OFFSET(input_surface_sum + 1)); + gen7_gpe_media_rw_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET(input_surface_sum), + SURFACE_STATE_OFFSET(input_surface_sum)); + gen75_gpe_media_chroma_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET(input_surface_sum + 1), + SURFACE_STATE_OFFSET(input_surface_sum + 1)); /* Bind kernel return buffer surface */ - vpp_gpe_ctx->vpp_buffer_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - &vpp_gpe_ctx->vpp_kernel_return, - BINDING_TABLE_OFFSET((input_surface_sum + 2)), - SURFACE_STATE_OFFSET(input_surface_sum + 2)); + gen7_gpe_buffer_suface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + &vpp_gpe_ctx->vpp_kernel_return, + BINDING_TABLE_OFFSET((input_surface_sum + 2)), + SURFACE_STATE_OFFSET(input_surface_sum + 2)); return VA_STATUS_SUCCESS; } static VAStatus -gpe_interface_setup(VADriverContextP ctx, +gen75_gpe_process_interface_setup(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { struct gen6_interface_descriptor_data *desc; @@ -186,23 +184,21 @@ gpe_interface_setup(VADriverContextP ctx, } static VAStatus -gpe_constant_setup(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx){ +gen75_gpe_process_constant_fill(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1); assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual); - /*Copy buffer into CURB*/ - /* unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual; memcpy(constant_buffer, vpp_gpe_ctx->kernel_param, vpp_gpe_ctx->kernel_param_size); - */ dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo); return VA_STATUS_SUCCESS; } static VAStatus -gpe_fill_thread_parameters(VADriverContextP ctx, +gen75_gpe_process_parameters_fill(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { unsigned int *command_ptr; @@ -237,7 +233,7 @@ gpe_fill_thread_parameters(VADriverContextP ctx, } static VAStatus -gpe_pipeline_setup(VADriverContextP ctx, +gen75_gpe_process_pipeline_setup(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000); @@ -245,7 +241,7 @@ gpe_pipeline_setup(VADriverContextP ctx, gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch); - gpe_fill_thread_parameters(ctx, vpp_gpe_ctx); + gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx); BEGIN_BATCH(vpp_gpe_ctx->batch, 2); OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (2 << 6)); @@ -261,7 +257,7 @@ gpe_pipeline_setup(VADriverContextP ctx, } static VAStatus -gpe_process_init(VADriverContextP ctx, +gen75_gpe_process_init(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -290,28 +286,28 @@ gpe_process_init(VADriverContextP ctx, vpp_gpe_ctx->vpp_kernel_return.bo = bo; dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo); - i965_gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx); + vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx); return VA_STATUS_SUCCESS; } static VAStatus -gpe_process_prepare(VADriverContextP ctx, +gen75_gpe_process_prepare(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { /*Setup all the memory object*/ - gpe_surfaces_setup(ctx, vpp_gpe_ctx); - gpe_interface_setup(ctx, vpp_gpe_ctx); - gpe_constant_setup(ctx, vpp_gpe_ctx); + gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx); + gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx); + //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx); /*Programing media pipeline*/ - gpe_pipeline_setup(ctx, vpp_gpe_ctx); + gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx); return VA_STATUS_SUCCESS; } static VAStatus -gpe_process_run(VADriverContextP ctx, +gen75_gpe_process_run(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { intel_batchbuffer_flush(vpp_gpe_ctx->batch); @@ -320,19 +316,27 @@ gpe_process_run(VADriverContextP ctx, } static VAStatus -gen75_gpe_process(VADriverContextP ctx, +gen75_gpe_process(VADriverContextP ctx, struct vpp_gpe_context * vpp_gpe_ctx) { VAStatus va_status = VA_STATUS_SUCCESS; - va_status = gpe_process_init(ctx, vpp_gpe_ctx); - va_status |=gpe_process_prepare(ctx, vpp_gpe_ctx); - va_status |=gpe_process_run(ctx, vpp_gpe_ctx); + + va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx); + va_status |=gen75_gpe_process_prepare(ctx, vpp_gpe_ctx); + va_status |=gen75_gpe_process_run(ctx, vpp_gpe_ctx); return va_status; } static VAStatus -gen75_gpe_process_sharpening(VADriverContextP ctx, +vpp_gpe_process(VADriverContextP ctx, + struct vpp_gpe_context * vpp_gpe_ctx) +{ + return gen75_gpe_process(ctx, vpp_gpe_ctx); +} + +static VAStatus +vpp_gpe_process_sharpening(VADriverContextP ctx, struct vpp_gpe_context * vpp_gpe_ctx) { VAStatus va_status = VA_STATUS_SUCCESS; @@ -416,10 +420,10 @@ gen75_gpe_process_sharpening(VADriverContextP ctx, } vpp_gpe_ctx->sub_shader_index = 0; - va_status = gen75_gpe_process(ctx, vpp_gpe_ctx); + va_status = vpp_gpe_process(ctx, vpp_gpe_ctx); free(vpp_gpe_ctx->thread_param); - /* Step 2: vertical blur process */ + /* Step 2: vertical blur process */ vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object; vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object; vpp_gpe_ctx->forward_surf_sum = 0; @@ -443,7 +447,7 @@ gen75_gpe_process_sharpening(VADriverContextP ctx, } vpp_gpe_ctx->sub_shader_index = 1; - gen75_gpe_process(ctx, vpp_gpe_ctx); + vpp_gpe_process(ctx, vpp_gpe_ctx); free(vpp_gpe_ctx->thread_param); /* Step 3: apply the blur to original surface */ @@ -471,7 +475,7 @@ gen75_gpe_process_sharpening(VADriverContextP ctx, } vpp_gpe_ctx->sub_shader_index = 2; - va_status = gen75_gpe_process(ctx, vpp_gpe_ctx); + va_status = vpp_gpe_process(ctx, vpp_gpe_ctx); free(vpp_gpe_ctx->thread_param); return va_status; @@ -480,7 +484,7 @@ error: return VA_STATUS_ERROR_INVALID_PARAMETER; } -VAStatus gen75_gpe_process_picture(VADriverContextP ctx, +VAStatus vpp_gpe_process_picture(VADriverContextP ctx, struct vpp_gpe_context * vpp_gpe_ctx) { VAStatus va_status = VA_STATUS_SUCCESS; @@ -538,7 +542,7 @@ VAStatus gen75_gpe_process_picture(VADriverContextP ctx, vpp_gpe_ctx->in_frame_h = obj_surface->orig_height; if(filter && filter->type == VAProcFilterSharpening) { - va_status = gen75_gpe_process_sharpening(ctx, vpp_gpe_ctx); + va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx); } else { va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED; } @@ -552,7 +556,7 @@ error: } void -gen75_gpe_context_destroy(VADriverContextP ctx, +vpp_gpe_context_destroy(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo); @@ -561,7 +565,7 @@ gen75_gpe_context_destroy(VADriverContextP ctx, dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo); vpp_gpe_ctx->vpp_kernel_return.bo = NULL; - i965_gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx); + vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx); if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){ assert(vpp_gpe_ctx->surface_tmp_object != NULL); @@ -576,14 +580,23 @@ gen75_gpe_context_destroy(VADriverContextP ctx, } struct vpp_gpe_context * -gen75_gpe_context_init(VADriverContextP ctx) +vpp_gpe_context_init(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct vpp_gpe_context *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context)); struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx); + assert(IS_HASWELL(i965->intel.device_id)); + + vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init; + vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy; + vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels; + vpp_gpe_ctx->surface_tmp = VA_INVALID_ID; + vpp_gpe_ctx->surface_tmp_object = NULL; + gpe_ctx->surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6; gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); @@ -595,12 +608,6 @@ gen75_gpe_context_init(VADriverContextP ctx) gpe_ctx->vfe_state.urb_entry_size = 59 - 1; gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1; - vpp_gpe_ctx->vpp_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup; - vpp_gpe_ctx->vpp_buffer_surface_setup = gen7_gpe_buffer_suface_setup; - vpp_gpe_ctx->vpp_media_chroma_surface_setup = gen75_gpe_media_chroma_surface_setup; - vpp_gpe_ctx->surface_tmp = VA_INVALID_ID; - vpp_gpe_ctx->surface_tmp_object = NULL; - vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0); vpp_gpe_ctx->is_first_frame = 1; diff --git a/src/gen75_vpp_gpe.h b/src/gen75_vpp_gpe.h index 97ee72dd..5ffee2cf 100644 --- a/src/gen75_vpp_gpe.h +++ b/src/gen75_vpp_gpe.h @@ -79,6 +79,7 @@ struct vpp_gpe_context{ unsigned char * kernel_param; unsigned int kernel_param_size; + unsigned char * thread_param; unsigned int thread_param_size; unsigned int thread_num; @@ -95,33 +96,26 @@ struct vpp_gpe_context{ unsigned int in_frame_h; unsigned int is_first_frame; - void (*vpp_media_rw_surface_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct object_surface *obj_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); - - void (*vpp_buffer_surface_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct i965_buffer_surface *buffer_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); - - void (*vpp_media_chroma_surface_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct object_surface *obj_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); + void (*gpe_context_init)(VADriverContextP ctx, + struct i965_gpe_context *gpe_context); + + void (*gpe_context_destroy)(struct i965_gpe_context *gpe_context); + + void (*gpe_load_kernels)(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct i965_kernel *kernel_list, + unsigned int num_kernels); + }; struct vpp_gpe_context * -gen75_gpe_context_init(VADriverContextP ctx); +vpp_gpe_context_init(VADriverContextP ctx); void -gen75_gpe_context_destroy(VADriverContextP ctx, - struct vpp_gpe_context* vpp_context); +vpp_gpe_context_destroy(VADriverContextP ctx, + struct vpp_gpe_context* vpp_context); VAStatus -gen75_gpe_process_picture(VADriverContextP ctx, - struct vpp_gpe_context * vpp_context); +vpp_gpe_process_picture(VADriverContextP ctx, + struct vpp_gpe_context * vpp_context); #endif -- cgit v1.2.1 From 77372cbc677f94c58d94f23b375af3eee28ea2ea Mon Sep 17 00:00:00 2001 From: Li Xiaowei Date: Tue, 7 Jan 2014 11:38:09 +0800 Subject: VPP: Enable sharpening feature on BDW Signed-off-by: Li Xiaowei --- src/gen75_vpp_gpe.c | 359 +++- .../post_processing/gen8/sharpening_h_blur.g8b | 1718 ++++++++++++++++++++ .../post_processing/gen8/sharpening_unmask.g8b | 159 ++ .../post_processing/gen8/sharpening_v_blur.g8b | 296 ++++ 4 files changed, 2501 insertions(+), 31 deletions(-) create mode 100644 src/shaders/post_processing/gen8/sharpening_h_blur.g8b create mode 100644 src/shaders/post_processing/gen8/sharpening_unmask.g8b create mode 100644 src/shaders/post_processing/gen8/sharpening_v_blur.g8b diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index a7f8fd55..08de61b3 100644 --- a/src/gen75_vpp_gpe.c +++ b/src/gen75_vpp_gpe.c @@ -41,9 +41,11 @@ #define MAX_INTERFACE_DESC_GEN6 MAX_GPE_KERNELS #define MAX_MEDIA_SURFACES_GEN6 34 -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) -#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * (index)) -#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index)) +#define SURFACE_STATE_OFFSET_GEN7(index) (SURFACE_STATE_PADDED_SIZE_GEN7 * (index)) +#define BINDING_TABLE_OFFSET_GEN7(index) (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index)) + +#define SURFACE_STATE_OFFSET_GEN8(index) (SURFACE_STATE_PADDED_SIZE_GEN8 * (index)) +#define BINDING_TABLE_OFFSET_GEN8(index) (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index)) #define CURBE_ALLOCATION_SIZE 37 #define CURBE_TOTAL_DATA_LENGTH (4 * 32) @@ -96,6 +98,41 @@ static struct i965_kernel gen75_vpp_sharpening_kernels[] = { }, }; +/* sharpening kernels for Broadwell */ +static const unsigned int gen8_gpe_sharpening_h_blur[][4] = { + #include "shaders/post_processing/gen8/sharpening_h_blur.g8b" +}; +static const unsigned int gen8_gpe_sharpening_v_blur[][4] = { + #include "shaders/post_processing/gen8/sharpening_v_blur.g8b" +}; +static const unsigned int gen8_gpe_sharpening_unmask[][4] = { + #include "shaders/post_processing/gen8/sharpening_unmask.g8b" +}; + +static struct i965_kernel gen8_vpp_sharpening_kernels[] = { + { + "vpp: sharpening(horizontal blur)", + VPP_GPE_SHARPENING, + gen8_gpe_sharpening_h_blur, + sizeof(gen8_gpe_sharpening_h_blur), + NULL + }, + { + "vpp: sharpening(vertical blur)", + VPP_GPE_SHARPENING, + gen8_gpe_sharpening_v_blur, + sizeof(gen8_gpe_sharpening_v_blur), + NULL + }, + { + "vpp: sharpening(unmask)", + VPP_GPE_SHARPENING, + gen8_gpe_sharpening_unmask, + sizeof(gen8_gpe_sharpening_unmask), + NULL + }, +}; + static VAStatus gen75_gpe_process_surfaces_setup(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) @@ -112,14 +149,14 @@ gen75_gpe_process_surfaces_setup(VADriverContextP ctx, gen7_gpe_media_rw_surface_setup(ctx, &vpp_gpe_ctx->gpe_ctx, obj_surface, - BINDING_TABLE_OFFSET(i), - SURFACE_STATE_OFFSET(i)); + BINDING_TABLE_OFFSET_GEN7(i), + SURFACE_STATE_OFFSET_GEN7(i)); gen75_gpe_media_chroma_surface_setup(ctx, &vpp_gpe_ctx->gpe_ctx, obj_surface, - BINDING_TABLE_OFFSET(i + 1), - SURFACE_STATE_OFFSET(i + 1)); + BINDING_TABLE_OFFSET_GEN7(i + 1), + SURFACE_STATE_OFFSET_GEN7(i + 1)); } /* Binding output NV12 surface(Luma + Chroma) */ @@ -128,19 +165,19 @@ gen75_gpe_process_surfaces_setup(VADriverContextP ctx, gen7_gpe_media_rw_surface_setup(ctx, &vpp_gpe_ctx->gpe_ctx, obj_surface, - BINDING_TABLE_OFFSET(input_surface_sum), - SURFACE_STATE_OFFSET(input_surface_sum)); + BINDING_TABLE_OFFSET_GEN7(input_surface_sum), + SURFACE_STATE_OFFSET_GEN7(input_surface_sum)); gen75_gpe_media_chroma_surface_setup(ctx, &vpp_gpe_ctx->gpe_ctx, obj_surface, - BINDING_TABLE_OFFSET(input_surface_sum + 1), - SURFACE_STATE_OFFSET(input_surface_sum + 1)); + BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1), + SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1)); /* Bind kernel return buffer surface */ gen7_gpe_buffer_suface_setup(ctx, &vpp_gpe_ctx->gpe_ctx, &vpp_gpe_ctx->vpp_kernel_return, - BINDING_TABLE_OFFSET((input_surface_sum + 2)), - SURFACE_STATE_OFFSET(input_surface_sum + 2)); + BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)), + SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2)); return VA_STATUS_SUCCESS; } @@ -166,7 +203,7 @@ gen75_gpe_process_interface_setup(VADriverContextP ctx, desc->desc2.sampler_count = 0; /* FIXME: */ desc->desc2.sampler_state_pointer = 0; desc->desc3.binding_table_entry_count = 6; /* FIXME: */ - desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5); + desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5); desc->desc4.constant_urb_entry_read_offset = 0; desc->desc4.constant_urb_entry_read_length = 0; @@ -328,11 +365,254 @@ gen75_gpe_process(VADriverContextP ctx, return va_status; } + +static VAStatus +gen8_gpe_process_surfaces_setup(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + struct object_surface *obj_surface; + unsigned int i = 0; + unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum + + vpp_gpe_ctx->backward_surf_sum) * 2; + + /* Binding input NV12 surfaces (Luma + Chroma)*/ + for( i = 0; i < input_surface_sum; i += 2){ + obj_surface = vpp_gpe_ctx->surface_input_object[i/2]; + assert(obj_surface); + gen8_gpe_media_rw_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN8(i), + SURFACE_STATE_OFFSET_GEN8(i)); + + gen8_gpe_media_chroma_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN8(i + 1), + SURFACE_STATE_OFFSET_GEN8(i + 1)); + } + + /* Binding output NV12 surface(Luma + Chroma) */ + obj_surface = vpp_gpe_ctx->surface_output_object; + assert(obj_surface); + gen8_gpe_media_rw_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN8(input_surface_sum), + SURFACE_STATE_OFFSET_GEN8(input_surface_sum)); + gen8_gpe_media_chroma_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1), + SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1)); + /* Bind kernel return buffer surface */ + gen7_gpe_buffer_suface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + &vpp_gpe_ctx->vpp_kernel_return, + BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)), + SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2)); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_interface_setup(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + struct gen8_interface_descriptor_data *desc; + dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo; + int i; + + dri_bo_map(bo, 1); + assert(bo->virtual); + desc = (struct gen8_interface_descriptor_data *)(bo->virtual + + vpp_gpe_ctx->gpe_ctx.idrt_offset); + + /*Setup the descritor table*/ + for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){ + struct i965_kernel *kernel; + kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i]; + assert(sizeof(*desc) == 32); + /*Setup the descritor table*/ + memset(desc, 0, sizeof(*desc)); + desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6; + desc->desc3.sampler_count = 0; /* FIXME: */ + desc->desc3.sampler_state_pointer = 0; + desc->desc4.binding_table_entry_count = 6; /* FIXME: */ + desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5); + desc->desc5.constant_urb_entry_read_offset = 0; + desc->desc5.constant_urb_entry_read_length = 0; + + desc++; + } + + dri_bo_unmap(bo); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_constant_fill(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1); + assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual); + unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual; + memcpy(constant_buffer, vpp_gpe_ctx->kernel_param, + vpp_gpe_ctx->kernel_param_size); + dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_parameters_fill(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + unsigned int *command_ptr; + unsigned int i, size = vpp_gpe_ctx->thread_param_size; + unsigned char* position = NULL; + + /* Thread inline data setting*/ + dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1); + command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual; + + for(i = 0; i < vpp_gpe_ctx->thread_num; i ++) + { + *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2)); + *command_ptr++ = vpp_gpe_ctx->sub_shader_index; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + + /* copy thread inline data */ + position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i); + memcpy(command_ptr, position, size); + command_ptr += size/sizeof(int); + + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + } + + *command_ptr++ = 0; + *command_ptr++ = MI_BATCH_BUFFER_END; + + dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_pipeline_setup(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000); + intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch); + + gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch); + + gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx); + + BEGIN_BATCH(vpp_gpe_ctx->batch, 3); + OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_RELOC(vpp_gpe_ctx->batch, + vpp_gpe_ctx->vpp_batchbuffer.bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + OUT_BATCH(vpp_gpe_ctx->batch, 0); + + ADVANCE_BATCH(vpp_gpe_ctx->batch); + + intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_init(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + dri_bo *bo; + + unsigned int batch_buf_size = vpp_gpe_ctx->thread_num * + (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16; + + vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num; + vpp_gpe_ctx->vpp_kernel_return.size_block = 16; + vpp_gpe_ctx->vpp_kernel_return.pitch = 1; + + unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks + * vpp_gpe_ctx->vpp_kernel_return.size_block; + + dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "vpp batch buffer", + batch_buf_size, 0x1000); + vpp_gpe_ctx->vpp_batchbuffer.bo = bo; + dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo); + + dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "vpp kernel return buffer", + kernel_return_size, 0x1000); + vpp_gpe_ctx->vpp_kernel_return.bo = bo; + dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo); + + vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_prepare(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + /*Setup all the memory object*/ + gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx); + gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx); + //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx); + + /*Programing media pipeline*/ + gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_run(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + intel_batchbuffer_flush(vpp_gpe_ctx->batch); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process(VADriverContextP ctx, + struct vpp_gpe_context * vpp_gpe_ctx) +{ + VAStatus va_status = VA_STATUS_SUCCESS; + + va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx); + va_status |=gen8_gpe_process_prepare(ctx, vpp_gpe_ctx); + va_status |=gen8_gpe_process_run(ctx, vpp_gpe_ctx); + + return va_status; +} + static VAStatus vpp_gpe_process(VADriverContextP ctx, struct vpp_gpe_context * vpp_gpe_ctx) { - return gen75_gpe_process(ctx, vpp_gpe_ctx); + struct i965_driver_data *i965 = i965_driver_data(ctx); + if (IS_HASWELL(i965->intel.device_id)) + return gen75_gpe_process(ctx, vpp_gpe_ctx); + else if (IS_GEN8(i965->intel.device_id)) + return gen8_gpe_process(ctx, vpp_gpe_ctx); + + return VA_STATUS_ERROR_UNIMPLEMENTED; } static VAStatus @@ -366,9 +646,15 @@ vpp_gpe_process_sharpening(VADriverContextP ctx, if(vpp_gpe_ctx->is_first_frame){ vpp_gpe_ctx->sub_shader_sum = 3; - i965_gpe_load_kernels(ctx, + struct i965_kernel * vpp_kernels; + if (IS_HASWELL(i965->intel.device_id)) + vpp_kernels = gen75_vpp_sharpening_kernels; + else if (IS_GEN8(i965->intel.device_id)) + vpp_kernels = gen8_vpp_sharpening_kernels; + + vpp_gpe_ctx->gpe_load_kernels(ctx, &vpp_gpe_ctx->gpe_ctx, - gen75_vpp_sharpening_kernels, + vpp_kernels, vpp_gpe_ctx->sub_shader_sum); } @@ -586,21 +872,13 @@ vpp_gpe_context_init(VADriverContextP ctx) struct vpp_gpe_context *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context)); struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx); - assert(IS_HASWELL(i965->intel.device_id)); + assert(IS_HASWELL(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)); - vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init; - vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy; - vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels; vpp_gpe_ctx->surface_tmp = VA_INVALID_ID; vpp_gpe_ctx->surface_tmp_object = NULL; - - gpe_ctx->surface_state_binding_table.length = - (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; - - gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6; - gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); - - gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH; + vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0); + vpp_gpe_ctx->is_first_frame = 1; gpe_ctx->vfe_state.max_num_threads = 60 - 1; gpe_ctx->vfe_state.num_urb_entries = 16; @@ -608,9 +886,28 @@ vpp_gpe_context_init(VADriverContextP ctx) gpe_ctx->vfe_state.urb_entry_size = 59 - 1; gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1; - vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0); + if (IS_HASWELL(i965->intel.device_id)) { + vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init; + vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy; + vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels; + gpe_ctx->surface_state_binding_table.length = + (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + + gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH; + gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6; + gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); + + } else if (IS_GEN8(i965->intel.device_id)) { + vpp_gpe_ctx->gpe_context_init = gen8_gpe_context_init; + vpp_gpe_ctx->gpe_context_destroy = gen8_gpe_context_destroy; + vpp_gpe_ctx->gpe_load_kernels = gen8_gpe_load_kernels; + gpe_ctx->surface_state_binding_table.length = + (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + + gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH; + gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6; - vpp_gpe_ctx->is_first_frame = 1; + } return vpp_gpe_ctx; } diff --git a/src/shaders/post_processing/gen8/sharpening_h_blur.g8b b/src/shaders/post_processing/gen8/sharpening_h_blur.g8b new file mode 100644 index 00000000..ffa759b4 --- /dev/null +++ b/src/shaders/post_processing/gen8/sharpening_h_blur.g8b @@ -0,0 +1,1718 @@ +{ 0x00000001, 0x23401608, 0x00000000, 0x00000000 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000001, 0x23441608, 0x00000000, 0x00020002 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x000f0007 }, +{ 0x00000001, 0x20401608, 0x00000000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000340, 0x02490000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x00000001, 0x202c1608, 0x00000000, 0x00040004 }, +{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 }, +{ 0x00000040, 0x20240208, 0x1e000020, 0xfffcfffc }, +{ 0x06000010, 0x20000201, 0x16000024, 0x00040004 }, +{ 0x00200001, 0x23283ae8, 0x004500b0, 0x00000000 }, +{ 0x00200001, 0x23083ae8, 0x004500a0, 0x00000000 }, +{ 0x00200001, 0x22e83ae8, 0x00450090, 0x00000000 }, +{ 0x00200001, 0x22c83ae8, 0x00450080, 0x00000000 }, +{ 0x00200001, 0x22a83ae8, 0x00450070, 0x00000000 }, +{ 0x00200001, 0x22883ae8, 0x00450060, 0x00000000 }, +{ 0x00200001, 0x22683ae8, 0x00450050, 0x00000000 }, +{ 0x00200001, 0x22483ae8, 0x00450040, 0x00000000 }, +{ 0x00200001, 0x23383ae8, 0x004500b8, 0x00000000 }, +{ 0x00200001, 0x23183ae8, 0x004500a8, 0x00000000 }, +{ 0x00200001, 0x22f83ae8, 0x00450098, 0x00000000 }, +{ 0x00200001, 0x22d83ae8, 0x00450088, 0x00000000 }, +{ 0x00200001, 0x22b83ae8, 0x00450078, 0x00000000 }, +{ 0x00200001, 0x22983ae8, 0x00450068, 0x00000000 }, +{ 0x00200001, 0x22783ae8, 0x00450058, 0x00000000 }, +{ 0x00200001, 0x22583ae8, 0x00450048, 0x00000000 }, +{ 0x00010020, 0x34000005, 0x0e001400, 0x000067d0 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000340, 0x02490000 }, +{ 0x00200001, 0x22603ae8, 0x00450268, 0x00000000 }, +{ 0x00200001, 0x22403ae8, 0x00450248, 0x00000000 }, +{ 0x00200001, 0x22803ae8, 0x00450288, 0x00000000 }, +{ 0x00200001, 0x22c03ae8, 0x004502c8, 0x00000000 }, +{ 0x00200001, 0x22a03ae8, 0x004502a8, 0x00000000 }, +{ 0x00200001, 0x23203ae8, 0x00450328, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x000f0007 }, +{ 0x00200001, 0x23003ae8, 0x00450308, 0x00000000 }, +{ 0x00200001, 0x22e03ae8, 0x004502e8, 0x00000000 }, +{ 0x00000040, 0x20400208, 0x1600002c, 0x00040004 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 }, +{ 0x00200001, 0x22703ae8, 0x00450278, 0x00000000 }, +{ 0x00200001, 0x22503ae8, 0x00450258, 0x00000000 }, +{ 0x00200001, 0x22683ae8, 0x00450050, 0x00000000 }, +{ 0x00200001, 0x22483ae8, 0x00450040, 0x00000000 }, +{ 0x00200001, 0x22783ae8, 0x00450058, 0x00000000 }, +{ 0x00200001, 0x22583ae8, 0x00450048, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000240, 0x00000000 }, +{ 0x00200001, 0x22903ae8, 0x00450298, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000250, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000260, 0x00000000 }, +{ 0x00200001, 0x22883ae8, 0x00450060, 0x00000000 }, +{ 0x00200001, 0x22d03ae8, 0x004502d8, 0x00000000 }, +{ 0x00200001, 0x22b03ae8, 0x004502b8, 0x00000000 }, +{ 0x00200001, 0x22983ae8, 0x00450068, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000270, 0x00000000 }, +{ 0x00200001, 0x22c83ae8, 0x00450080, 0x00000000 }, +{ 0x00200001, 0x22a83ae8, 0x00450070, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000280, 0x00000000 }, +{ 0x00200001, 0x22d83ae8, 0x00450088, 0x00000000 }, +{ 0x00200001, 0x22b83ae8, 0x00450078, 0x00000000 }, +{ 0x00200001, 0x23303ae8, 0x00450338, 0x00000000 }, +{ 0x00200001, 0x23103ae8, 0x00450318, 0x00000000 }, +{ 0x00200001, 0x22f03ae8, 0x004502f8, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000290, 0x00000000 }, +{ 0x00200001, 0x22e83ae8, 0x00450090, 0x00000000 }, +{ 0x00200001, 0x23083ae8, 0x004500a0, 0x00000000 }, +{ 0x00200001, 0x23283ae8, 0x004500b0, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a0, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c0, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d0, 0x00000000 }, +{ 0x00000001, 0x205c22e8, 0x000002b0, 0x00000000 }, +{ 0x00200001, 0x23383ae8, 0x004500b8, 0x00000000 }, +{ 0x00200001, 0x23183ae8, 0x004500a8, 0x00000000 }, +{ 0x00200001, 0x22f83ae8, 0x00450098, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e0, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c1, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000241, 0x00000000 }, +{ 0x00000001, 0x20303ee8, 0x00000000, 0x332bcc77 }, +{ 0x00000001, 0x21403ee8, 0x00000000, 0x3c1d98ad }, +{ 0x00000001, 0x206c22e8, 0x000002f0, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d1, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000251, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000261, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e1, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f1, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000271, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000281, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000301, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000311, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000291, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a1, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000321, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000331, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b1, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x208022e8, 0x00000242, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c2, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000252, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000262, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000272, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000282, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000292, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a2, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x00000001, 0x209c22e8, 0x000002b2, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d2, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x00000243, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e2, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000300, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f2, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000253, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000310, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000263, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000302, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000320, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000312, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000273, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000330, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000283, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000322, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x20bc22e8, 0x00000332, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000293, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a3, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x209c22e8, 0x000002b3, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c3, 0x00000000 }, +{ 0x00000001, 0x21503ee8, 0x00000000, 0x3e525448 }, +{ 0x00000001, 0x21603ee8, 0x00000000, 0x3f11e168 }, +{ 0x00000001, 0x20a422e8, 0x000002d3, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e3, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f3, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000303, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20b422e8, 0x00000313, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000323, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000244, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000254, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000333, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208822e8, 0x00000264, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000274, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c4, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d4, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000284, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000294, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e4, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f4, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a4, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b4, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000304, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000314, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000324, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000334, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x21703ee8, 0x00000000, 0x3875735f }, +{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x21803a28, 0x00000100, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x61a00a88, 0x00000180, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c1, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000241, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000242, 0x00000000 }, +{ 0x00000001, 0x61a80a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x61b00a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x61b80a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x61c00a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x61c80a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x61d00a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x61d80a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x61e00a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x61e80a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x61f00a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x61f80a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d1, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000251, 0x00000000 }, +{ 0x00000001, 0x62000a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000261, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e1, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f1, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000271, 0x00000000 }, +{ 0x00000001, 0x62080a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000281, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000301, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000252, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000311, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000291, 0x00000000 }, +{ 0x00000001, 0x62100a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000262, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a1, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000321, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000272, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000331, 0x00000000 }, +{ 0x00000001, 0x205c22e8, 0x000002b1, 0x00000000 }, +{ 0x00000001, 0x62180a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000282, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c2, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000292, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d2, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e2, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f2, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000302, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000312, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a2, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000322, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000332, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b2, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x208022e8, 0x00000243, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c3, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x208422e8, 0x00000253, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d3, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000263, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000273, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e3, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f3, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000283, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000293, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000303, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000313, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a3, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b3, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000323, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000333, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x00000244, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002c4, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000254, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000264, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d4, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e4, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000274, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000284, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f4, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000304, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000294, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a4, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000314, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000324, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b4, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20bc22e8, 0x00000334, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 }, +{ 0x00000001, 0x61a10a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x61a90a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x61b10a88, 0x00000220, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x61b90a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x61c10a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x61c90a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x61d10a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x61d90a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c3, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000243, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c2, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000242, 0x00000000 }, +{ 0x00000001, 0x61e10a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x61e90a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x61f10a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d3, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000253, 0x00000000 }, +{ 0x00000001, 0x61f90a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d2, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e3, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000263, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000252, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x62010a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000262, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000273, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f3, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e2, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f2, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000303, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000283, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000272, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x62090a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000282, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000293, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000313, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000302, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000312, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000323, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a3, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000292, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x62110a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a2, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b3, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000333, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000322, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000332, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x205c22e8, 0x000002b2, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x62190a88, 0x00000220, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x208022e8, 0x00000244, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c4, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000254, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d4, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000264, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e4, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f4, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000274, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000284, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000304, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000314, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000294, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a4, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000324, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000334, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b4, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x41a22288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x41aa2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x41b22288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x41ba2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x41c22288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x41ca2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x41d22288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x41da2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x41e22288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x41ea2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c4, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000244, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000243, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c3, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x41f22288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d4, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000254, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x41fa2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000253, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e4, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000264, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000263, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f4, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000274, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d3, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x42022288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e3, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000284, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000304, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000273, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000283, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000314, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000294, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f3, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x420a2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000303, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a4, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000324, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000293, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a3, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000334, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b4, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000313, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x42122288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000323, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x205c22e8, 0x000002b3, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000333, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x00000001, 0x421a2288, 0x00000024, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x41a32288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x41ab2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x41b32288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x41bb2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x41c32288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x41cb2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x41d32288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x41db2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x41e32288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x41eb2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c4, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000244, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x41f32288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x41fb2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d4, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000254, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x42032288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000264, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e4, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f4, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000274, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x420b2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000284, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000304, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000314, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000294, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x42132288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a4, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000324, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x207c22e8, 0x00000334, 0x00000000 }, +{ 0x00000001, 0x205c22e8, 0x000002b4, 0x00000000 }, +{ 0x00000001, 0x421b2288, 0x00000024, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024c, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025c, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026c, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027c, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028c, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029c, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ac, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bc, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 }, +{ 0x00000001, 0x61a40a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x61ac0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x61b40a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x61bc0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cc, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002dc, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x61c40a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ec, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fc, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x61cc0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031c, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x61d40a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032c, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033c, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x61dc0a88, 0x00000220, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x61e40a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x61ec0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x61f40a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x61fc0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x62040a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x620c0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c5, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000245, 0x00000000 }, +{ 0x00000001, 0x62140a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x206422e8, 0x000002d5, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000255, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000265, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e5, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f5, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000275, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000285, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000305, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000315, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000295, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a5, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000325, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000335, 0x00000000 }, +{ 0x00000001, 0x205c22e8, 0x000002b5, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x621c0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024c, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025c, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026c, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028c, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029c, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x209822e8, 0x000002ac, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bc, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cc, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002dc, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x208022e8, 0x0000024d, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ec, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fc, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025d, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026d, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031c, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027d, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028d, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032c, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033c, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029d, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ad, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x20a022e8, 0x000002cd, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bd, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002dd, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x20a822e8, 0x000002ed, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000246, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c6, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 }, +{ 0x00000001, 0x61a50a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x61ad0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x61b50a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x61bd0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x61c50a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fd, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x61cd0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030d, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031d, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x61d50a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032d, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033d, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x61dd0a88, 0x00000220, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x61e50a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x61ed0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x61f50a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x61fd0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x62050a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x620d0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x62150a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x205c22e8, 0x000002b6, 0x00000000 }, +{ 0x00000001, 0x621d0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000336, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x208022e8, 0x0000024c, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025c, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026c, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027c, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029c, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ac, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x209c22e8, 0x000002bc, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cc, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002dc, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x20a822e8, 0x000002ec, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fc, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031c, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032c, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033c, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024d, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002cd, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025d, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026d, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002dd, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ed, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027d, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028d, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fd, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030d, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029d, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ad, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031d, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032d, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bd, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x20bc22e8, 0x0000033d, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x208022e8, 0x0000024e, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025e, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002ce, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002de, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026e, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027e, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ee, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fe, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028e, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029e, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030e, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031e, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ae, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002be, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032e, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033e, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x41a62288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x41ae2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x41b62288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x41be2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x41c62288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x41ce2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x41d62288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x41de2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x41e62288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x41ee2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x41f62288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x41fe2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x42062288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x420e2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x42162288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x205c22e8, 0x000002b7, 0x00000000 }, +{ 0x00000001, 0x421e2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000327, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000337, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024c, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025c, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026c, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028c, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029c, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x209822e8, 0x000002ac, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bc, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cc, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002dc, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x208022e8, 0x0000024d, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ec, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fc, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025d, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026d, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031c, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027d, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028d, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032c, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033c, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029d, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ad, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x209c22e8, 0x000002bd, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cd, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x20a422e8, 0x000002dd, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ed, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fd, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030d, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031d, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032d, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033d, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024e, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002ce, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025e, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026e, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002de, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ee, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027e, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028e, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fe, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030e, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029e, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ae, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031e, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032e, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002be, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x20bc22e8, 0x0000033e, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x208022e8, 0x0000024f, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025f, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cf, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002df, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026f, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027f, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ef, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002ff, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028f, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029f, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030f, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031f, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002af, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bf, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032f, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033f, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x20403a28, 0x00000100, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x41a72288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x41af2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000108, 0x00000000 }, +{ 0x00600001, 0x21803ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000344, 0x0a0a8000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x41b72288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x41bf2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x41c72288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x41cf2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x41d72288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x41df2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x41e72288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x41ef2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x41f72288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x41ff2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x42072288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x420f2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x42172288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x21880608, 0x00000000, 0x000f0007 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x21803ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x421f2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x21843ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20003a00, 0x00000180, 0x00000200 }, +{ 0x00000040, 0x202c0208, 0x1600002c, 0x00080008 }, +{ 0x00000040, 0x20240208, 0x1e000020, 0xfffcfffc }, +{ 0x05000010, 0x20000200, 0x0200002c, 0x00000024 }, +{ 0x00010020, 0x34000004, 0x0e001400, 0xffff9830 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000340, 0x02290000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x000f0003 }, +{ 0x00000001, 0x20401608, 0x00000000, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 }, +{ 0x00800001, 0x20603ae8, 0x008d0040, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000344, 0x060a8000 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x000f0003 }, +{ 0x00000001, 0x20401608, 0x00000000, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 }, +{ 0x00000040, 0x20240208, 0x1e000020, 0xfffcfffc }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000340, 0x02290000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x000f0003 }, +{ 0x00000001, 0x20403ae8, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 }, +{ 0x00800001, 0x20603ae8, 0x008d0040, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000344, 0x060a8000 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x000f0003 }, +{ 0x00000001, 0x20403ae8, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 }, +{ 0x00600001, 0x2e003ae8, 0x008d0000, 0x00000000 }, +{ 0x07000031, 0x20003a00, 0x06000e00, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/sharpening_unmask.g8b b/src/shaders/post_processing/gen8/sharpening_unmask.g8b new file mode 100644 index 00000000..f27a2d5c --- /dev/null +++ b/src/shaders/post_processing/gen8/sharpening_unmask.g8b @@ -0,0 +1,159 @@ +{ 0x00000001, 0x21281608, 0x00000000, 0x00000000 }, +{ 0x00000001, 0x202c1608, 0x00000000, 0x00000000 }, +{ 0x00000001, 0x21481608, 0x00000000, 0x00050005 }, +{ 0x00000001, 0x21681608, 0x00000000, 0x00040004 }, +{ 0x00000001, 0x21881608, 0x00000000, 0x00020002 }, +{ 0x00000001, 0x21081608, 0x00000000, 0x00010001 }, +{ 0x06000010, 0x20000202, 0x16000020, 0x00000000 }, +{ 0x00010020, 0x34000006, 0x0e001400, 0x00000530 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000128, 0x02190000 }, +{ 0x06000010, 0x20000201, 0x16000030, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x00030003 }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20603a08, 0x00000040, 0x00000200 }, +{ 0x00000040, 0x22000200, 0x06000188, 0x02190000 }, +{ 0x00400001, 0x21a03ae8, 0x00690060, 0x00000000 }, +{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 }, +{ 0x00400001, 0x21c03ae8, 0x00690040, 0x00000000 }, +{ 0x00010020, 0x34000005, 0x0e001400, 0x00000150 }, +{ 0x00000001, 0x21821e68, 0x00000000, 0x00800080 }, +{ 0x00400001, 0x21703ae8, 0x006901a0, 0x00000000 }, +{ 0x00400001, 0x21503ae8, 0x006901c0, 0x00000000 }, +{ 0x0080802c, 0x21600008, 0x0e490000, 0x00000460 }, +{ 0x00000001, 0x210c1e68, 0x00000000, 0x00800080 }, +{ 0x00400001, 0x21703ae8, 0x006901a0, 0x00000000 }, +{ 0x00400001, 0x21503ae8, 0x00690150, 0x00000000 }, +{ 0x0080802c, 0x21200008, 0x0e490000, 0x000004e0 }, +{ 0x00400001, 0x21703ae8, 0x00690150, 0x00000000 }, +{ 0x00000001, 0x21801e68, 0x00000000, 0x00800080 }, +{ 0x00400001, 0x21503ae8, 0x00690150, 0x00000000 }, +{ 0x0080802c, 0x21000008, 0x0e490000, 0x00000550 }, +{ 0x00000001, 0x21801e68, 0x00000000, 0x00800080 }, +{ 0x0080802c, 0x21000008, 0x0e490000, 0x00000530 }, +{ 0x00000001, 0x41800268, 0x00000030, 0x00000000 }, +{ 0x00400001, 0x21703ae8, 0x00690150, 0x00000000 }, +{ 0x00000001, 0x20241a68, 0x00000180, 0x00000000 }, +{ 0x00400001, 0x21503ae8, 0x006901a0, 0x00000000 }, +{ 0x0080802c, 0x21000008, 0x0e490000, 0x000004e0 }, +{ 0x00400001, 0x21903ae8, 0x00690150, 0x00000000 }, +{ 0x00000020, 0x34000004, 0x0e001400, 0x00000010 }, +{ 0x00400001, 0x21903ae8, 0x006901a0, 0x00000000 }, +{ 0x01000010, 0x20000200, 0x16000034, 0x00000000 }, +{ 0x00010020, 0x34000004, 0x0e001400, 0x00000160 }, +{ 0x00000001, 0x21121e68, 0x00000000, 0x00800080 }, +{ 0x00400001, 0x21703ae8, 0x006901a0, 0x00000000 }, +{ 0x00400001, 0x21503ae8, 0x006901c0, 0x00000000 }, +{ 0x0080802c, 0x21800008, 0x0e490000, 0x00000560 }, +{ 0x00000001, 0x210c1e68, 0x00000000, 0x00800080 }, +{ 0x00400001, 0x21703ae8, 0x006901a0, 0x00000000 }, +{ 0x00400001, 0x21503ae8, 0x00690150, 0x00000000 }, +{ 0x0080802c, 0x21200008, 0x0e490000, 0x00000360 }, +{ 0x00600040, 0x41502288, 0x1eae4150, 0x00ff00ff }, +{ 0x00000001, 0x21101e68, 0x00000000, 0x00800080 }, +{ 0x00600040, 0x41512288, 0x1eae4151, 0x00ff00ff }, +{ 0x00400001, 0x21703ae8, 0x00690150, 0x00000000 }, +{ 0x0080802c, 0x21400008, 0x0e490000, 0x00000590 }, +{ 0x00000001, 0x21101e68, 0x00000000, 0x00800080 }, +{ 0x0080802c, 0x21400008, 0x0e490000, 0x00000570 }, +{ 0x00000001, 0x41100268, 0x00000034, 0x00000000 }, +{ 0x00400001, 0x21703ae8, 0x00690150, 0x00000000 }, +{ 0x00000001, 0x20241a68, 0x00000110, 0x00000000 }, +{ 0x00400001, 0x21503ae8, 0x00690190, 0x00000000 }, +{ 0x0080802c, 0x21400008, 0x0e490000, 0x00000520 }, +{ 0x00400001, 0x21e03ae8, 0x00690150, 0x00000000 }, +{ 0x00000020, 0x34000004, 0x0e001400, 0x00000010 }, +{ 0x00400001, 0x21e03ae8, 0x00690190, 0x00000000 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000168, 0x040a8000 }, +{ 0x00400001, 0x20603ae8, 0x006901e0, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x00030003 }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 }, +{ 0x0000000c, 0x20240208, 0x16000028, 0x00010001 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000108, 0x02190000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x00010003 }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000024, 0x00000000 }, +{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 }, +{ 0x00200001, 0x20603ae8, 0x00450040, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000148, 0x040a8000 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x00010003 }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000024, 0x00000000 }, +{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 }, +{ 0x00000040, 0x202c0208, 0x1600002c, 0x00040004 }, +{ 0x05000010, 0x20000203, 0x0200002c, 0x00000020 }, +{ 0x00010020, 0x34000007, 0x0e001400, 0xfffffad0 }, +{ 0x00600001, 0x2e003ae8, 0x008d0000, 0x00000000 }, +{ 0x07000031, 0x20003a00, 0x06000e00, 0x82000010 }, +{ 0x00000040, 0x20241a28, 0x1e004182, 0x00800080 }, +{ 0x00800041, 0x20802228, 0x1ab10170, 0x00000182 }, +{ 0x05600010, 0x20002260, 0x22ae0170, 0x00ae0150 }, +{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0158 }, +{ 0x00600041, 0x20400a28, 0x22000024, 0x008d0150 }, +{ 0x00800040, 0x20400a28, 0x0a8d0080, 0x008d0040 }, +{ 0x0080000c, 0x40400a68, 0x1e8d0040, 0x00070007 }, +{ 0x05601010, 0x20002260, 0x22ae0171, 0x00ae0151 }, +{ 0x00800001, 0x20401a68, 0x00ae0040, 0x00000000 }, +{ 0x00610001, 0x41501a88, 0x00ae0040, 0x00000000 }, +{ 0x00611001, 0x41511a88, 0x00ae0042, 0x00000000 }, +{ 0x0080002d, 0x20000220, 0x00450160, 0x00000000 }, +{ 0x00800040, 0x20402268, 0x22b10150, 0x00b14170 }, +{ 0x00000040, 0x20241a28, 0x1e00410c, 0x00800080 }, +{ 0x05800010, 0x20001a62, 0x1eb10040, 0x00000000 }, +{ 0x00810001, 0x20401a6a, 0x00b14040, 0x00000000 }, +{ 0x00600041, 0x20800a28, 0x22000024, 0x008d0158 }, +{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0150 }, +{ 0x00800041, 0x20a01a28, 0x1a8d0040, 0x0000010c }, +{ 0x00800040, 0x20400a28, 0x0a8d00a0, 0x008d0060 }, +{ 0x0080000c, 0x60400a88, 0x1e8d0040, 0x00070007 }, +{ 0x00800001, 0x21502288, 0x00cf0040, 0x00000000 }, +{ 0x0080002d, 0x20000220, 0x00450120, 0x00000000 }, +{ 0x00800040, 0x20802228, 0x1eb14170, 0x00ff00ff }, +{ 0x00800040, 0x20402228, 0x1eb14150, 0x00ff00ff }, +{ 0x00000040, 0x20241a28, 0x1e004180, 0x00800080 }, +{ 0x00600041, 0x20c00a28, 0x0a8d0040, 0x008d0080 }, +{ 0x00600041, 0x20e00a28, 0x0a8d0060, 0x008d00a0 }, +{ 0x00000001, 0x20401e28, 0x00000000, 0x00ff00ff }, +{ 0x0c600038, 0x20800a28, 0x0a8d00e0, 0x00000040 }, +{ 0x0c600038, 0x20600a28, 0x0a8d00c0, 0x00000040 }, +{ 0x00800040, 0x40400a68, 0x1e8d4060, 0x00ff00ff }, +{ 0x00800001, 0x20401a68, 0x00ae0040, 0x00000000 }, +{ 0x00600041, 0x20800a28, 0x22000024, 0x008d0158 }, +{ 0x00800041, 0x20a01a28, 0x1a8d0040, 0x00000180 }, +{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0150 }, +{ 0x00800040, 0x20400a28, 0x0a8d00a0, 0x008d0060 }, +{ 0x0080000c, 0x60400a88, 0x1e8d0040, 0x00070007 }, +{ 0x00800001, 0x21502288, 0x00cf0040, 0x00000000 }, +{ 0x0080002d, 0x20000220, 0x00450100, 0x00000000 }, +{ 0x00000040, 0x20241a28, 0x1e004112, 0x00800080 }, +{ 0x00800041, 0x20802228, 0x1ab10170, 0x00000112 }, +{ 0x03600010, 0x20002261, 0x22ae0170, 0x00ae0150 }, +{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0158 }, +{ 0x00600041, 0x20400a28, 0x22000024, 0x008d0150 }, +{ 0x00800040, 0x20400a28, 0x0a8d0080, 0x008d0040 }, +{ 0x0080000c, 0x40400a68, 0x1e8d0040, 0x00070007 }, +{ 0x03601010, 0x20002261, 0x22ae0171, 0x00ae0151 }, +{ 0x00800001, 0x20401a68, 0x00ae0040, 0x00000000 }, +{ 0x00610001, 0x41501a89, 0x00ae0040, 0x00000000 }, +{ 0x00611001, 0x41511a89, 0x00ae0042, 0x00000000 }, +{ 0x0080002d, 0x20000220, 0x00450180, 0x00000000 }, +{ 0x00000001, 0x20801e28, 0x00000000, 0x00ff00ff }, +{ 0x00800041, 0x20402228, 0x22b10170, 0x00b10150 }, +{ 0x00000040, 0x20241a28, 0x1e004110, 0x00800080 }, +{ 0x0c600038, 0x20c00a28, 0x0a8d0060, 0x00000080 }, +{ 0x0c600038, 0x20a00a28, 0x0a8d0040, 0x00000080 }, +{ 0x00800001, 0x40400a68, 0x008d00a0, 0x00000000 }, +{ 0x00800001, 0x20401a68, 0x00ae0040, 0x00000000 }, +{ 0x00600041, 0x20800a28, 0x22000024, 0x008d0158 }, +{ 0x00800041, 0x20a01a28, 0x1a8d0040, 0x00000110 }, +{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0150 }, +{ 0x00800040, 0x20400a28, 0x0a8d00a0, 0x008d0060 }, +{ 0x0080000c, 0x60400a88, 0x1e8d0040, 0x00070007 }, +{ 0x00800001, 0x21502288, 0x00cf0040, 0x00000000 }, +{ 0x0080002d, 0x20000220, 0x00450140, 0x00000000 }, diff --git a/src/shaders/post_processing/gen8/sharpening_v_blur.g8b b/src/shaders/post_processing/gen8/sharpening_v_blur.g8b new file mode 100644 index 00000000..a57f43d3 --- /dev/null +++ b/src/shaders/post_processing/gen8/sharpening_v_blur.g8b @@ -0,0 +1,296 @@ +{ 0x00000001, 0x23601608, 0x00000000, 0x00000000 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000001, 0x23641608, 0x00000000, 0x00020002 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x0007000f }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000360, 0x02490000 }, +{ 0x00000001, 0x20441608, 0x00000000, 0x00000000 }, +{ 0x00000001, 0x20281608, 0x00000000, 0x00040004 }, +{ 0x0c600031, 0x22e03a08, 0x00000040, 0x00000200 }, +{ 0x00000040, 0x20200208, 0x1e000024, 0xfffcfffc }, +{ 0x06000010, 0x20000201, 0x16000020, 0x00040004 }, +{ 0x00010020, 0x34000005, 0x0e001400, 0x00000ff0 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000360, 0x02490000 }, +{ 0x00800001, 0x22a03ae8, 0x008d0320, 0x00000000 }, +{ 0x00800001, 0x22603ae8, 0x008d02e0, 0x00000000 }, +{ 0x00000001, 0x21403ee8, 0x00000000, 0x3e525448 }, +{ 0x00000001, 0x21603ee8, 0x00000000, 0x3875735f }, +{ 0x00000001, 0x20480608, 0x00000000, 0x0007000f }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000040, 0x20440208, 0x16000028, 0x00040004 }, +{ 0x00800001, 0x208022e8, 0x00b10270, 0x00000000 }, +{ 0x0c600031, 0x22e03a08, 0x00000040, 0x00000200 }, +{ 0x00000001, 0x20203ee8, 0x00000000, 0x332bcc77 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x208022e8, 0x00b10280, 0x00000000 }, +{ 0x00800001, 0x204022e8, 0x00b10260, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x00000001, 0x20303ee8, 0x00000000, 0x3c1d98ad }, +{ 0x00800001, 0x204022e8, 0x00b10270, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b10290, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x21503ee8, 0x00000000, 0x3f11e168 }, +{ 0x00800001, 0x208022e8, 0x00b102a0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x208022e8, 0x00b10280, 0x00000000 }, +{ 0x00800001, 0x21803a28, 0x008d0100, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x61800a88, 0x008d0180, 0x00000000 }, +{ 0x00800001, 0x21a02288, 0x00cf0180, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x00800001, 0x208022e8, 0x00b10290, 0x00000000 }, +{ 0x00800001, 0x204022e8, 0x00b10280, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102a0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x208022e8, 0x00b10290, 0x00000000 }, +{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102a0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 }, +{ 0x00800001, 0x21b02288, 0x00cf0220, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 }, +{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102a0, 0x00000000 }, +{ 0x00800001, 0x204022e8, 0x00b10290, 0x00000000 }, +{ 0x00800001, 0x21c02288, 0x00cf0220, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x204022e8, 0x00b102a0, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 }, +{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x21d02288, 0x00cf0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00800001, 0x204022e8, 0x00b102b0, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b10320, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 }, +{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x204022e8, 0x00b102c0, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b10320, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b10330, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x21e02288, 0x00cf0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x21f02288, 0x00cf0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00800001, 0x204022e8, 0x00b102d0, 0x00000000 }, +{ 0x00600001, 0x21803ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000364, 0x0a0a8000 }, +{ 0x00800001, 0x208022e8, 0x00b10320, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b10330, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b10340, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b10320, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b10330, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b10340, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b10350, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00000001, 0x21880608, 0x00000000, 0x0007000f }, +{ 0x00800001, 0x20403a28, 0x008d0100, 0x00000000 }, +{ 0x00000001, 0x21803ae8, 0x0000002c, 0x00000000 }, +{ 0x00800001, 0x22002288, 0x00cf0220, 0x00000000 }, +{ 0x00800001, 0x60400a88, 0x008d0040, 0x00000000 }, +{ 0x00800001, 0x22102288, 0x00cf0040, 0x00000000 }, +{ 0x00000001, 0x21843ae8, 0x00000028, 0x00000000 }, +{ 0x00000040, 0x20280208, 0x16000028, 0x00080008 }, +{ 0x0c600031, 0x20003a00, 0x00000180, 0x00000200 }, +{ 0x00000040, 0x20200208, 0x1e000024, 0xfffcfffc }, +{ 0x05000010, 0x20000200, 0x02000028, 0x00000020 }, +{ 0x00010020, 0x34000004, 0x0e001400, 0xfffff010 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000360, 0x02290000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x0003000f }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20441608, 0x00000000, 0x00000000 }, +{ 0x0c600031, 0x22603a08, 0x00000040, 0x00000200 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000364, 0x060a8000 }, +{ 0x00800001, 0x20603ae8, 0x008d0260, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x0003000f }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20441608, 0x00000000, 0x00000000 }, +{ 0x00000040, 0x20200208, 0x1e000024, 0xfffcfffc }, +{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000360, 0x02290000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x0003000f }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000020, 0x00000000 }, +{ 0x0c600031, 0x22603a08, 0x00000040, 0x00000200 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000364, 0x060a8000 }, +{ 0x00800001, 0x20603ae8, 0x008d0260, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x0003000f }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000020, 0x00000000 }, +{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 }, +{ 0x00600001, 0x2e003ae8, 0x008d0000, 0x00000000 }, +{ 0x07000031, 0x20003a00, 0x06000e00, 0x82000010 }, -- cgit v1.2.1 From 994dd815289fc0b9378078ce2cce19a53250e481 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 9 Jan 2014 09:23:13 +0800 Subject: Remove the whitespace following trailing backslash in a Makefile.am src/shaders/post_processing/gen8/Makefile.am:31: whitespace following trailing backslash src/shaders/post_processing/gen8/Makefile.am:32: whitespace following trailing backslash Signed-off-by: Xiang, Haihao --- src/shaders/post_processing/gen8/Makefile.am | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index 50badf7d..ee3f2982 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -28,8 +28,8 @@ INTEL_PP_G8A = \ PA_AVS_Buf_3.g8a \ Save_AVS_NV12.g8a \ Save_AVS_PL3.g8a \ - Save_AVS_RGBX.g8a \ - Save_AVS_PA.g8a \ + Save_AVS_RGBX.g8a \ + Save_AVS_PA.g8a \ Set_AVS_Buf_0123_PL2.g8a \ Set_AVS_Buf_0123_PL3.g8a \ Set_AVS_Buf_0123_BGRA.g8a \ -- cgit v1.2.1 From eb27b94384e4f5777ef06b0855ecae69155c3175 Mon Sep 17 00:00:00 2001 From: Li Xiaowei Date: Thu, 9 Jan 2014 13:33:44 +0800 Subject: VPP: Correct return value of vpp gpe functions Signed-off-by: Li Xiaowei --- src/gen75_vpp_gpe.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index 08de61b3..ac091000 100644 --- a/src/gen75_vpp_gpe.c +++ b/src/gen75_vpp_gpe.c @@ -359,12 +359,19 @@ gen75_gpe_process(VADriverContextP ctx, VAStatus va_status = VA_STATUS_SUCCESS; va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx); - va_status |=gen75_gpe_process_prepare(ctx, vpp_gpe_ctx); - va_status |=gen75_gpe_process_run(ctx, vpp_gpe_ctx); - - return va_status; -} + if (va_status != VA_STATUS_SUCCESS) + return va_status; + va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + + va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + + return VA_STATUS_SUCCESS; +} static VAStatus gen8_gpe_process_surfaces_setup(VADriverContextP ctx, @@ -596,10 +603,18 @@ gen8_gpe_process(VADriverContextP ctx, VAStatus va_status = VA_STATUS_SUCCESS; va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx); - va_status |=gen8_gpe_process_prepare(ctx, vpp_gpe_ctx); - va_status |=gen8_gpe_process_run(ctx, vpp_gpe_ctx); + if (va_status != VA_STATUS_SUCCESS) + return va_status; - return va_status; + va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + + va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + + return VA_STATUS_SUCCESS; } static VAStatus -- cgit v1.2.1 From b1b6ceb65a70273345eb8fd87c7805e61bc65791 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 13 Jan 2014 09:42:28 +0800 Subject: Complain the warning instead of assert fault when slice picture is not found in DPB for decoder This is to fix the bug https://bugs.freedesktop.org/show_bug.cgi?id=72660 Signed-off-by: Zhao Yakui --- src/i965_decoder_utils.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 51d38a6b..e0d07639 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -268,6 +268,7 @@ gen5_fill_avc_ref_idx_state( ) { unsigned int i, n, frame_idx; + int found; for (i = 0, n = 0; i < ref_list_count; i++) { const VAPictureH264 * const va_pic = &ref_list[i]; @@ -275,16 +276,21 @@ gen5_fill_avc_ref_idx_state( if (va_pic->flags & VA_PICTURE_H264_INVALID) continue; + found = 0; for (frame_idx = 0; frame_idx < MAX_GEN_REFERENCE_FRAMES; frame_idx++) { const GenFrameStore * const fs = &frame_store[frame_idx]; if (fs->surface_id != VA_INVALID_ID && fs->surface_id == va_pic->picture_id) { - assert(frame_idx == fs->frame_store_id); + found = 1; break; } } - assert(frame_idx < MAX_GEN_REFERENCE_FRAMES); - state[n++] = get_ref_idx_state_1(va_pic, frame_idx); + + if (found) { + state[n++] = get_ref_idx_state_1(va_pic, frame_idx); + } else { + WARN_ONCE("Invalid Slice reference frame list !!!. It is not included in DPB \n"); + } } for (; n < 32; n++) -- cgit v1.2.1 From 07cad1269421e3351ba271a235445eb35a6e6170 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 13 Jan 2014 09:42:32 +0800 Subject: Remove the unnecessary sorting to simplify the DPB buffer management Signed-off-by: Zhao Yakui --- src/i965_decoder_utils.c | 52 ++++++++++-------------------------------------- 1 file changed, 11 insertions(+), 41 deletions(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index e0d07639..30ef372c 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -420,6 +420,7 @@ intel_update_avc_frame_store_index(VADriverContextP ctx, /* add the new reference frame into the internal DPB */ if (!found) { int frame_idx; + int slot_found; struct object_surface *obj_surface = decode_state->reference_objects[i]; /* @@ -428,60 +429,29 @@ intel_update_avc_frame_store_index(VADriverContextP ctx, */ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + slot_found = 0; + frame_idx = -1; /* Find a free frame store index */ - for (frame_idx = 0; frame_idx < MAX_GEN_REFERENCE_FRAMES; frame_idx++) { - for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { - if (frame_store[j].surface_id == VA_INVALID_ID || - frame_store[j].obj_surface == NULL) - continue; - - if (frame_store[j].frame_store_id == frame_idx) /* the store index is in use */ - break; - } - - if (j == MAX_GEN_REFERENCE_FRAMES) - break; - } - - assert(frame_idx < MAX_GEN_REFERENCE_FRAMES); - for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { if (frame_store[j].surface_id == VA_INVALID_ID || frame_store[j].obj_surface == NULL) { - frame_store[j].surface_id = ref_pic->picture_id; - frame_store[j].frame_store_id = frame_idx; - frame_store[j].obj_surface = obj_surface; + frame_idx = j; + slot_found = 1; break; } } - } - } - /* sort */ - for (i = 0; i < MAX_GEN_REFERENCE_FRAMES - 1; i++) { - if (frame_store[i].surface_id != VA_INVALID_ID && - frame_store[i].obj_surface != NULL && - frame_store[i].frame_store_id == i) - continue; - for (j = i + 1; j < MAX_GEN_REFERENCE_FRAMES; j++) { - if (frame_store[j].surface_id != VA_INVALID_ID && - frame_store[j].obj_surface != NULL && - frame_store[j].frame_store_id == i) { - VASurfaceID id = frame_store[i].surface_id; - int frame_idx = frame_store[i].frame_store_id; - struct object_surface *obj_surface = frame_store[i].obj_surface; - - frame_store[i].surface_id = frame_store[j].surface_id; - frame_store[i].frame_store_id = frame_store[j].frame_store_id; - frame_store[i].obj_surface = frame_store[j].obj_surface; - frame_store[j].surface_id = id; + if (slot_found) { + frame_store[j].surface_id = ref_pic->picture_id; frame_store[j].frame_store_id = frame_idx; frame_store[j].obj_surface = obj_surface; - break; - } + } else { + WARN_ONCE("Not free slot for DPB reference list!!!\n"); + } } } + } void -- cgit v1.2.1 From 579235a0c65df039573d34a921178a8ac77c3f20 Mon Sep 17 00:00:00 2001 From: "Zhao, Halley" Date: Thu, 9 Jan 2014 03:14:24 +0800 Subject: vp8 dec: follows va_dec_vp8.h update key_frame:0 means an intra frame bool_coder_ctx.count is the remaining bits in bool_coder_ctx.value, range[0,7) slice_data_offset/macroblock_offset update Signed-off-by: Zhao Halley --- src/gen8_mfd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 6a106631..abf30783 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2855,7 +2855,7 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx, pic_param->pic_fields.bits.segmentation_enabled << 8 | 0 << 7 | /* segmentation id streamin disabled */ 0 << 6 | /* segmentation id streamout disabled */ - pic_param->pic_fields.bits.key_frame << 5 | + (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/ pic_param->pic_fields.bits.filter_type << 4 | (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */ !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */ @@ -2964,7 +2964,7 @@ gen8_mfd_vp8_bsd_object(VADriverContextP ctx, { struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; int i, log2num; - unsigned int offset = slice_param->slice_data_offset; + unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3); assert(slice_param->num_of_partitions >= 2); assert(slice_param->num_of_partitions <= 9); @@ -2974,7 +2974,8 @@ gen8_mfd_vp8_bsd_object(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 22); OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2)); OUT_BCS_BATCH(batch, - pic_param->bool_coder_ctx.count << 16 | /* Partition 0 CPBAC Entropy Count */ + // XXX, when bool_coder_ctx.count (remaining bits in value) is 0, 0 is also expected for CPBAC Entropy Count? + ((8-pic_param->bool_coder_ctx.count)%8) << 16 | /* Partition 0 CPBAC Entropy Count */ pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */ log2num << 4 | (slice_param->macroblock_offset & 0x7)); -- cgit v1.2.1 From f08b13dea63bd24e25d026df921773dd20bcc918 Mon Sep 17 00:00:00 2001 From: "Zhao, Halley" Date: Fri, 10 Jan 2014 09:53:16 +0800 Subject: vp8 dec: fix when bool_coder_ctx.count is 0 bool_coder_ctx.count is remaining bits, hw requires used-bits-count: 8-bool_coder_ctx.count, range [0,7] update offset and partition_size[0] as well Signed-off-by: Zhao Halley --- src/gen8_mfd.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index abf30783..b8286759 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2965,6 +2965,15 @@ gen8_mfd_vp8_bsd_object(VADriverContextP ctx, struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; int i, log2num; unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3); + unsigned int used_bits = 8-pic_param->bool_coder_ctx.count; + unsigned int partition_size_0 = slice_param->partition_size[0]; + + assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7); + if (used_bits == 8) { + used_bits = 0; + offset += 1; + partition_size_0 -= 1; + } assert(slice_param->num_of_partitions >= 2); assert(slice_param->num_of_partitions <= 9); @@ -2974,8 +2983,7 @@ gen8_mfd_vp8_bsd_object(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 22); OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2)); OUT_BCS_BATCH(batch, - // XXX, when bool_coder_ctx.count (remaining bits in value) is 0, 0 is also expected for CPBAC Entropy Count? - ((8-pic_param->bool_coder_ctx.count)%8) << 16 | /* Partition 0 CPBAC Entropy Count */ + used_bits << 16 | /* Partition 0 CPBAC Entropy Count */ pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */ log2num << 4 | (slice_param->macroblock_offset & 0x7)); @@ -2983,7 +2991,10 @@ gen8_mfd_vp8_bsd_object(VADriverContextP ctx, pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */ 0); - for (i = 0; i < 9; i++) { + OUT_BCS_BATCH(batch, partition_size_0); + OUT_BCS_BATCH(batch, offset); + offset += partition_size_0; + for (i = 1; i < 9; i++) { if (i < slice_param->num_of_partitions) { OUT_BCS_BATCH(batch, slice_param->partition_size[i]); OUT_BCS_BATCH(batch, offset); -- cgit v1.2.1 From 56e328797247e655502dd0e59f739a888b3b792a Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Tue, 14 Jan 2014 10:44:55 +0800 Subject: Fix vp8 p frame decode error issue. Signed-off-by: Zhong Li --- src/gen8_mfd.c | 5 +++++ src/i965_decoder_utils.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index b8286759..ff38c417 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2770,6 +2770,11 @@ gen8_mfd_vp8_decode_init(VADriverContextP ctx, assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */ assert(height_in_mbs > 0 && height_in_mbs <= 256); + intel_update_vp8_frame_store_index(ctx, + decode_state, + pic_param, + gen7_mfd_context->reference_surface); + /* Current decoded picture */ obj_surface = decode_state->render_object; i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 30ef372c..7ba51ba1 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -493,6 +493,58 @@ intel_update_vc1_frame_store_index(VADriverContextP ctx, } +void +intel_update_vp8_frame_store_index(VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferVP8 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]) +{ + struct object_surface *obj_surface; + int i; + + obj_surface = decode_state->reference_objects[0]; + + if (pic_param->last_ref_frame == VA_INVALID_ID || + !obj_surface || + !obj_surface->bo) { + frame_store[0].surface_id = VA_INVALID_ID; + frame_store[0].obj_surface = NULL; + } else { + frame_store[0].surface_id = pic_param->last_ref_frame; + frame_store[0].obj_surface = obj_surface; + } + + obj_surface = decode_state->reference_objects[1]; + + if (pic_param->golden_ref_frame == VA_INVALID_ID || + !obj_surface || + !obj_surface->bo) { + frame_store[1].surface_id = frame_store[0].surface_id; + frame_store[1].obj_surface = frame_store[0].obj_surface; + } else { + frame_store[1].surface_id = pic_param->golden_ref_frame; + frame_store[1].obj_surface = obj_surface; + } + + obj_surface = decode_state->reference_objects[2]; + + if (pic_param->alt_ref_frame == VA_INVALID_ID || + !obj_surface || + !obj_surface->bo) { + frame_store[2].surface_id = frame_store[0].surface_id; + frame_store[2].obj_surface = frame_store[0].obj_surface; + } else { + frame_store[2].surface_id = pic_param->alt_ref_frame; + frame_store[2].obj_surface = obj_surface; + } + + for (i = 3; i < MAX_GEN_REFERENCE_FRAMES; i++) { + frame_store[i].surface_id = frame_store[i % 2].surface_id; + frame_store[i].obj_surface = frame_store[i % 2].obj_surface; + } + +} + static VAStatus intel_decoder_check_avc_parameter(VADriverContextP ctx, struct decode_state *decode_state) -- cgit v1.2.1 From 2151cda3d3a0d7c96cd19f9b89c18d30a4b612ef Mon Sep 17 00:00:00 2001 From: "Zhao, Halley" Date: Wed, 15 Jan 2014 13:21:46 +0800 Subject: Fix vp8 partition offset set error Signed-off-by: Zhong Li --- src/gen8_mfd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index ff38c417..cd10187e 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2998,7 +2998,8 @@ gen8_mfd_vp8_bsd_object(VADriverContextP ctx, OUT_BCS_BATCH(batch, partition_size_0); OUT_BCS_BATCH(batch, offset); - offset += partition_size_0; + //partion sizes in bytes are present after the above first partition when there are more than one token partition + offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2)); for (i = 1; i < 9; i++) { if (i < slice_param->num_of_partitions) { OUT_BCS_BATCH(batch, slice_param->partition_size[i]); -- cgit v1.2.1 From df966b871c4d321bf332821b22619cfa5519941e Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 17 Jan 2014 16:46:52 +0800 Subject: Don't advertise CBR for MPEG-2 encoding Signed-off-by: Xiang, Haihao --- src/gen6_mfc_common.c | 2 ++ src/i965_drv_video.c | 6 +++++- src/i965_encoder.c | 7 +++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 31573ba3..26d8400d 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -391,6 +391,8 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state, struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; if (rate_control_mode == VA_RC_CBR) { + assert(encoder_context->codec != CODEC_MPEG2); + /*Programing bit rate control */ if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) { intel_mfc_bit_rate_control_context_init(encode_state, mfc_context); diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index ee8e7d4e..8c5894cc 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -527,7 +527,11 @@ i965_GetConfigAttributes(VADriverContextP ctx, case VAConfigAttribRateControl: if (entrypoint == VAEntrypointEncSlice) { - attrib_list[i].value = VA_RC_CBR | VA_RC_CQP; + attrib_list[i].value = VA_RC_CQP; + + if (profile != VAProfileMPEG2Main && + profile != VAProfileMPEG2Simple) + attrib_list[i].value |= VA_RC_CBR; break; } diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 9bf133f0..534591cd 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -367,6 +367,13 @@ intel_enc_hw_context_init(VADriverContextP ctx, for (i = 0; i < obj_config->num_attribs; i++) { if (obj_config->attrib_list[i].type == VAConfigAttribRateControl) { encoder_context->rate_control_mode = obj_config->attrib_list[i].value; + + if (encoder_context->codec == CODEC_MPEG2 && + encoder_context->rate_control_mode & VA_RC_CBR) { + WARN_ONCE("Don't support CBR for MPEG-2 encoding\n"); + encoder_context->rate_control_mode &= ~VA_RC_CBR; + } + break; } } -- cgit v1.2.1 From bb1b7e40140150ca7acb8b5d19db5b0f9a13ef2d Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 17 Jan 2014 16:51:20 +0800 Subject: Use the right parameters to initialize bit rate context Reported-by: Gwenole Beauchesne Signed-off-by: Xiang, Haihao --- src/gen6_mfc_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 26d8400d..07e2eb24 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -89,8 +89,8 @@ intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, struct gen6_mfc_context *mfc_context) { VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; - int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ; int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs; int intra_mb_size = inter_mb_size * 5.0; -- cgit v1.2.1 From 06702fb609b5fc9707f72a6e15e2117653ffd849 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 20 Jan 2014 09:58:06 +0800 Subject: Fix the wrong setting in MI_BATCH_BATCH_START command on Snb/Ivy/Haswell Signed-off-by: Zhao Yakui --- src/gen6_vme.c | 2 +- src/gen75_vme.c | 4 ++-- src/gen75_vpp_gpe.c | 2 +- src/gen7_vme.c | 4 ++-- src/i965_post_processing.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gen6_vme.c b/src/gen6_vme.c index d7d4ba22..8f0006f9 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -520,7 +520,7 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx, intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(batch, vme_context->vme_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, diff --git a/src/gen75_vme.c b/src/gen75_vme.c index 7f788b81..0467f383 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -613,7 +613,7 @@ static void gen75_vme_pipeline_programing(VADriverContextP ctx, intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(batch, vme_context->vme_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, @@ -925,7 +925,7 @@ gen75_vme_mpeg2_pipeline_programing(VADriverContextP ctx, intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(batch, vme_context->vme_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index ac091000..791ee3d6 100644 --- a/src/gen75_vpp_gpe.c +++ b/src/gen75_vpp_gpe.c @@ -281,7 +281,7 @@ gen75_gpe_process_pipeline_setup(VADriverContextP ctx, gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx); BEGIN_BATCH(vpp_gpe_ctx->batch, 2); - OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(vpp_gpe_ctx->batch, vpp_gpe_ctx->vpp_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, diff --git a/src/gen7_vme.c b/src/gen7_vme.c index a7081452..042fe5d0 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -638,7 +638,7 @@ static void gen7_vme_pipeline_programing(VADriverContextP ctx, intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(batch, vme_context->vme_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, @@ -917,7 +917,7 @@ gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(batch, vme_context->vme_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index e058378a..45d5561c 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -5419,7 +5419,7 @@ gen6_pp_object_walker(VADriverContextP ctx, ADVANCE_BATCH(batch); } else { BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(batch, command_buffer, I915_GEM_DOMAIN_COMMAND, 0, 0); -- cgit v1.2.1 From fb0a92c1ed852e51f550d92d510981ac4f680d1b Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 20 Jan 2014 10:59:10 +0800 Subject: Don't use assert() in case getting wrong parameters from user Signed-off-by: Xiang, Haihao --- src/gen8_mfd.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index cd10187e..a801a7c3 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -3033,12 +3033,18 @@ gen8_mfd_vp8_decode_picture(VADriverContextP ctx, pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer; /* one slice per frame */ - assert(decode_state->num_slice_params == 1); - assert(decode_state->slice_params[0]->num_elements == 1); - assert(decode_state->slice_params && decode_state->slice_params[0]->buffer); - assert(decode_state->slice_datas[0]->bo); + if (decode_state->num_slice_params != 1 || + (!decode_state->slice_params || + !decode_state->slice_params[0] || + (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) || + (!decode_state->slice_datas || + !decode_state->slice_datas[0] || + !decode_state->slice_datas[0]->bo) || + !decode_state->probability_data) { + WARN_ONCE("Wrong parameters for VP8 decoding\n"); - assert(decode_state->probability_data); + return; + } slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; slice_data_bo = decode_state->slice_datas[0]->bo; -- cgit v1.2.1 From 098ea14b74cfb621d7b4f58f541360266636d61c Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 20 Jan 2014 11:04:24 +0800 Subject: Warning fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gen8_mfd.c: In function ‘gen8_mfd_vp8_decode_init’: gen8_mfd.c:2773:5: warning: implicit declaration of function ‘intel_update_vp8_frame_store_index’ [-Wimplicit-function-declaration] Signed-off-by: Xiang, Haihao --- src/i965_decoder_utils.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h index 8a9fbe2f..8f64dfb7 100644 --- a/src/i965_decoder_utils.h +++ b/src/i965_decoder_utils.h @@ -99,4 +99,11 @@ intel_mpeg2_find_next_slice(struct decode_state *decode_state, int *group_idx, int *element_idx); + +void +intel_update_vp8_frame_store_index(VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferVP8 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]); + #endif /* I965_DECODER_UTILS_H */ -- cgit v1.2.1 From 9c2f9806af7653675a6d9414a50ce0f620d575d6 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 20 Jan 2014 11:13:11 +0800 Subject: Remove the redundant if () from gen8_pp_upload_constants This fixed the issue reported by Klockwork Signed-off-by: Xiang, Haihao --- src/i965_post_processing.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 45d5561c..bbcf1c71 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -5114,14 +5114,12 @@ static void gen8_pp_upload_constants(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { - struct i965_driver_data *i965 = i965_driver_data(ctx); unsigned char *constant_buffer; int param_size; assert(sizeof(struct gen7_pp_static_parameter) == 192); - if (IS_GEN8(i965->intel.device_id)) - param_size = sizeof(struct gen7_pp_static_parameter); + param_size = sizeof(struct gen7_pp_static_parameter); dri_bo_map(pp_context->dynamic_state.bo, 1); assert(pp_context->dynamic_state.bo->virtual); -- cgit v1.2.1 From 75c5420674e765d30fbbe553353ba8af30b8f0f1 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Sat, 25 Jan 2014 18:43:52 -0700 Subject: intel-vaapi: Add more checks for H264 decoding parameter to filter the unsupported clip Signed-off-by: Yuan Feng Signed-off-by: Zhao Yakui --- src/i965_decoder_utils.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 7ba51ba1..064074f1 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -547,6 +547,7 @@ intel_update_vp8_frame_store_index(VADriverContextP ctx, static VAStatus intel_decoder_check_avc_parameter(VADriverContextP ctx, + VAProfile h264_profile, struct decode_state *decode_state) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -566,6 +567,14 @@ intel_decoder_check_avc_parameter(VADriverContextP ctx, if (pic_param->CurrPic.picture_id != decode_state->current_render_target) goto error; + if ((h264_profile != VAProfileH264Baseline)) { + if (pic_param->num_slice_groups_minus1 || + pic_param->pic_fields.bits.redundant_pic_cnt_present_flag) { + WARN_ONCE("Unsupported the FMO/ASO constraints!!!\n"); + goto error; + } + } + for (i = 0; i < 16; i++) { if (pic_param->ReferenceFrames[i].flags & VA_PICTURE_H264_INVALID || pic_param->ReferenceFrames[i].picture_id == VA_INVALID_SURFACE) @@ -752,7 +761,7 @@ intel_decoder_sanity_check_input(VADriverContextP ctx, case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: - vaStatus = intel_decoder_check_avc_parameter(ctx, decode_state); + vaStatus = intel_decoder_check_avc_parameter(ctx, profile, decode_state); break; case VAProfileVC1Simple: -- cgit v1.2.1 From 568e1522153a6d068fb4c0f7e409b595fe1ea816 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sun, 2 Mar 2014 19:34:43 -0700 Subject: configure.ac: update the dependency on intel-gen4asm The new version of intel-gen4asm is required to build shaders for BDW Reviewed-by: Zhao Yakui Signed-off-by: Xiang, Haihao --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 5f1f64a2..b8500a74 100644 --- a/configure.ac +++ b/configure.ac @@ -76,7 +76,7 @@ PKG_CHECK_MODULES([DRM], [libdrm >= $LIBDRM_VERSION]) AC_SUBST(LIBDRM_VERSION) dnl Check for gen4asm -PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.3], [gen4asm=yes], [gen4asm=no]) +PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.5], [gen4asm=yes], [gen4asm=no]) AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes) AC_PATH_PROG([GEN4ASM], [intel-gen4asm]) -- cgit v1.2.1 From ce9ff2f39309a1189b28f1f57a7e586b7568345c Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 18 Mar 2014 08:40:45 +0800 Subject: Update NEWS Signed-off-by: Xiang, Haihao --- NEWS | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index b2d00ab9..d1f990ea 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,13 @@ -libva-intel-driver NEWS -- summary of changes. 2013-12-16 +libva-intel-driver NEWS -- summary of changes. 2014-03-DD Copyright (C) 2009-2013 Intel Corporation +Version 1.3.0 - DD.Mar.2014 +* Add support for Broadwell + - Decoding: H.264/MPEG-2/VC-1/JPEG/VP8 + - Encoding: H.264/MPEG-2 + - VPP: CSC/scaling/NoiseReduction/Deinterlacing{Bob, MotionAdaptive, MotionCompensated}/Sharpening/ColorBalance +* Fix the wrong setting in MI_BATCH_BATCH_START + Version 1.2.2 - 16.Dec.2013 * Motion compensation DI on HSW * Optimization of FPS for H.264 encoding on HSW -- cgit v1.2.1 From 3df0f27b6c5bfd960975b9ebfdaa9f4293128479 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 17 Mar 2014 12:50:08 +0800 Subject: Bump version to 1.3.0.pre1 To build the code, a new version of VA-API is needed, so update the dependency on VA-API as well Signed-off-by: Xiang, Haihao --- configure.ac | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index b8500a74..872b5895 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # intel-driver package version number m4_define([intel_driver_major_version], [1]) -m4_define([intel_driver_minor_version], [2]) -m4_define([intel_driver_micro_version], [3]) +m4_define([intel_driver_minor_version], [3]) +m4_define([intel_driver_micro_version], [0]) m4_define([intel_driver_pre_version], [1]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) @@ -10,8 +10,8 @@ m4_append([intel_driver_version], intel_driver_pre_version, [.pre]) ]) # libva minimum version requirement -m4_define([va_api_version], [0.34]) -m4_define([libva_package_version], [1.2.0]) +m4_define([va_api_version], [0.35]) +m4_define([libva_package_version], [1.3.0]) # libdrm minimum version requirement m4_define([libdrm_version], [2.4.45]) -- cgit v1.2.1 From f393de526b37a964572b1e5ea326bef826890d2c Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 18 Mar 2014 08:49:04 +0800 Subject: Fix broken make dist Signed-off-by: Xiang, Haihao --- src/shaders/render/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shaders/render/Makefile.am b/src/shaders/render/Makefile.am index e59869cf..e7d5e765 100644 --- a/src/shaders/render/Makefile.am +++ b/src/shaders/render/Makefile.am @@ -166,7 +166,7 @@ EXTRA_DIST = \ $(INTEL_G7A) \ $(INTEL_G7B) \ $(INTEL_G7B_HASWELL) \ - $(INTEL_G8A)) \ + $(INTEL_G8A) \ $(INTEL_G8B) \ $(NULL) -- cgit v1.2.1 From b587769b020e504f2d644d8fa77bc9869367f805 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 24 Mar 2014 16:52:52 +0800 Subject: Fix the broken package made by 'make dist' Signed-off-by: Xiang, Haihao --- src/shaders/post_processing/gen8/Makefile.am | 10 +++++++++- src/shaders/vme/Makefile.am | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index ee3f2982..b41ab46e 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -12,6 +12,11 @@ INTEL_PP_G8B = \ pa_to_pa.g8b \ $(NULL) +INTEL_PP_PRE_G8B = \ + sharpening_h_blur.g8b \ + sharpening_unmask.g8b \ + sharpening_v_blur.g8b + INTEL_PP_G8A = \ EOT.g8a \ PL2_AVS_Buf_0.g8a \ @@ -63,7 +68,10 @@ $(INTEL_PP_GEN8_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G8A) CLEANFILES = $(INTEL_PP_GEN7_ASM) EXTRA_DIST = \ - $(INTEL_PP_G8B) + $(INTEL_PP_ASM) \ + $(INTEL_PP_G8A) \ + $(INTEL_PP_G8B) \ + $(INTEL_PP_PRE_G8B) # Extra clean files so that maintainer-clean removes *everything* MAINTAINERCLEANFILES = Makefile.in diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index adea78bb..d89b6897 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -1,7 +1,7 @@ VME_CORE = batchbuffer.asm intra_frame.asm inter_frame.asm VME7_CORE = batchbuffer.asm intra_frame_ivb.asm inter_frame_ivb.asm inter_bframe_ivb.asm mpeg2_inter_ivb.asm VME75_CORE = batchbuffer.asm intra_frame_haswell.asm inter_frame_haswell.asm inter_bframe_haswell.asm mpeg2_inter_haswell.asm -VME8_CORE = intra_frame_gen8.asm inter_frame_gen8.asm inter_bframe_gen8.asm +VME8_CORE = intra_frame_gen8.asm inter_frame_gen8.asm inter_bframe_gen8.asm mpeg2_inter_gen8.asm INTEL_G6B = batchbuffer.g6b intra_frame.g6b inter_frame.g6b INTEL_G6A = batchbuffer.g6a intra_frame.g6a inter_frame.g6a -- cgit v1.2.1 From 8489b40e33034479298a437d5090d32401dcb2b0 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 24 Mar 2014 19:12:07 +0800 Subject: Intel driver 1.3.0 Signed-off-by: Xiang, Haihao --- NEWS | 6 +++--- configure.ac | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index d1f990ea..9711d387 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,7 @@ -libva-intel-driver NEWS -- summary of changes. 2014-03-DD -Copyright (C) 2009-2013 Intel Corporation +libva-intel-driver NEWS -- summary of changes. 2014-03-24 +Copyright (C) 2009-2014 Intel Corporation -Version 1.3.0 - DD.Mar.2014 +Version 1.3.0 - 24.Mar.2014 * Add support for Broadwell - Decoding: H.264/MPEG-2/VC-1/JPEG/VP8 - Encoding: H.264/MPEG-2 diff --git a/configure.ac b/configure.ac index 872b5895..846938d8 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [3]) m4_define([intel_driver_micro_version], [0]) -m4_define([intel_driver_pre_version], [1]) +m4_define([intel_driver_pre_version], [0]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1 From 75a7b090e9784864d7bf567ea7d68b1c13b19922 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 25 Mar 2014 08:55:14 +0800 Subject: 1.3.1.pre1 for development Signed-off-by: Xiang, Haihao --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 846938d8..f7975bee 100644 --- a/configure.ac +++ b/configure.ac @@ -1,8 +1,8 @@ # intel-driver package version number m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [3]) -m4_define([intel_driver_micro_version], [0]) -m4_define([intel_driver_pre_version], [0]) +m4_define([intel_driver_micro_version], [1]) +m4_define([intel_driver_pre_version], [1]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1 From 276bc5319a8f1b7c0143bc68ba62ffdfe0aa02cd Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 4 Mar 2014 09:08:34 +0800 Subject: bdw: Fix the FENCE message in GPU shader for H264 encoding Use the real register as write_back register instead of NULL register although the Fence Message doesn't touch it. Signed-off-by: Zhao Yakui (cherry picked from commit 7ac4263ff2dae5c877b92356d04df4ccfe10d7c9) --- src/shaders/vme/inter_bframe_gen8.asm | 2 +- src/shaders/vme/inter_bframe_gen8.g8b | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shaders/vme/inter_bframe_gen8.asm b/src/shaders/vme/inter_bframe_gen8.asm index f16a63bb..240dc611 100644 --- a/src/shaders/vme/inter_bframe_gen8.asm +++ b/src/shaders/vme/inter_bframe_gen8.asm @@ -766,7 +766,7 @@ send (16) /* Issue message fence so that the previous write message is committed */ send (16) mb_ind - obw_wb + mb_wb.0<1>:ud NULL data_port( OBR_CACHE_TYPE, diff --git a/src/shaders/vme/inter_bframe_gen8.g8b b/src/shaders/vme/inter_bframe_gen8.g8b index 98531a35..f981ca34 100644 --- a/src/shaders/vme/inter_bframe_gen8.g8b +++ b/src/shaders/vme/inter_bframe_gen8.g8b @@ -358,7 +358,7 @@ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 }, { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, - { 0x0a800031, 0x20000a60, 0x0e000b40, 0x0219e003 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x0219e003 }, { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, @@ -411,7 +411,7 @@ { 0x00000005, 0x2fe41248, 0x16000f70, 0x00030003 }, { 0x00000020, 0x34000000, 0x0e001400, 0x000000a0 }, { 0x00000001, 0x2f681248, 0x00000fa8, 0x00000000 }, - { 0x05000010, 0x20001240, 0x16000f68, 0x00020002 }, + { 0x05000010, 0x20001a60, 0x1e000f68, 0x00020002 }, { 0x00010005, 0x2fe41248, 0x16000f60, 0x00030003 }, { 0x00010020, 0x34000000, 0x0e001400, 0x00000060 }, { 0x00000008, 0x2f701248, 0x16000f60, 0x00020002 }, -- cgit v1.2.1 From cdd6f6e9b6f1c6cb129781dec7d15283eabd8d3c Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 4 Mar 2014 09:08:38 +0800 Subject: bdw: Follow the spec to update the PIPE_CONTROL command This is the hardware requirement. Signed-off-by: Zhao Yakui (cherry picked from commit fc4d39f3b849366ed04223620fa371d76cf813b0) --- src/intel_batchbuffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index 52bf4430..9dc496d0 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -192,6 +192,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2)); OUT_BATCH(batch, + CMD_PIPE_CONTROL_CS_STALL | CMD_PIPE_CONTROL_WC_FLUSH | CMD_PIPE_CONTROL_TC_FLUSH | CMD_PIPE_CONTROL_DC_FLUSH | -- cgit v1.2.1 From bb84d92fcd60b9e889c386f99b3e78084f4d3d77 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 4 Mar 2014 09:08:43 +0800 Subject: BDW: Follow the spec to add the MEDIA_STATE_FLUSH before MEDIA_INTERFACE_LOAD Signed-off-by: Zhao Yakui (cherry picked from commit a90acbe7f08d66084e70113859198c3975f63b80) --- src/i965_gpe_utils.c | 5 ++++- src/i965_post_processing.c | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index 6bbad2d9..c97220a3 100644 --- a/src/i965_gpe_utils.c +++ b/src/i965_gpe_utils.c @@ -1050,7 +1050,10 @@ gen8_gpe_idrt(VADriverContextP ctx, struct i965_gpe_context *gpe_context, struct intel_batchbuffer *batch) { - BEGIN_BATCH(batch, 4); + BEGIN_BATCH(batch, 6); + + OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH); + OUT_BATCH(batch, 0); OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2)); OUT_BATCH(batch, 0); diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index bbcf1c71..28d34806 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -5304,7 +5304,11 @@ gen8_interface_descriptor_load(VADriverContextP ctx, { struct intel_batchbuffer *batch = pp_context->batch; - BEGIN_BATCH(batch, 4); + BEGIN_BATCH(batch, 6); + + OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2)); OUT_BATCH(batch, 0); OUT_BATCH(batch, -- cgit v1.2.1 From c4b6438f72307ac662a22383c8b19d6dfc5eb31c Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 4 Mar 2014 16:23:07 +0800 Subject: Define i965_DestroySurfaces in header file explicitly to avoid multiple declaration Signed-off-by: Zhao Yakui (cherry picked from commit af0687252bfc6f81ff5361feedba7ec8989b3555) --- src/gen75_mfd.c | 4 ---- src/gen75_vpp_gpe.c | 5 ----- src/gen75_vpp_vebox.c | 4 ---- src/gen7_mfd.c | 4 ---- src/gen8_mfd.c | 4 ---- src/i965_drv_video.h | 4 ++++ src/i965_encoder.c | 4 ---- src/i965_post_processing.c | 4 ---- src/i965_render.c | 7 ------- 9 files changed, 4 insertions(+), 36 deletions(-) diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index 4a4de0b6..7bea1f7c 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -2466,10 +2466,6 @@ gen75_mfd_jpeg_bsd_object(VADriverContextP ctx, /* Workaround for JPEG decoding on Ivybridge */ -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); VAStatus i965_CreateSurfaces(VADriverContextP ctx, int width, diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index 791ee3d6..9cb2912f 100644 --- a/src/gen75_vpp_gpe.c +++ b/src/gen75_vpp_gpe.c @@ -59,11 +59,6 @@ i965_CreateSurfaces(VADriverContextP ctx, int num_surfaces, VASurfaceID *surfaces); -extern VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); - /* Shaders information for sharpening */ static const unsigned int gen75_gpe_sharpening_h_blur[][4] = { #include "shaders/post_processing/gen75/sharpening_h_blur.g75b" diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 160560e7..20fb44a9 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -53,10 +53,6 @@ i965_DeriveImage(VADriverContextP ctx, VABufferID surface, VAImage *out_image); extern VAStatus i965_DestroyImage(VADriverContextP ctx, VAImageID image); -extern VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); extern VAStatus i965_CreateSurfaces(VADriverContextP ctx, diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index 50910342..a534fb89 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -2091,10 +2091,6 @@ gen7_mfd_jpeg_bsd_object(VADriverContextP ctx, /* Workaround for JPEG decoding on Ivybridge */ -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); VAStatus i965_CreateSurfaces(VADriverContextP ctx, int width, diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index a801a7c3..08d9b3d0 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2180,10 +2180,6 @@ gen8_mfd_jpeg_bsd_object(VADriverContextP ctx, /* Workaround for JPEG decoding on Ivybridge */ #ifdef JPEG_WA -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); VAStatus i965_CreateSurfaces(VADriverContextP ctx, int width, diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 98e08fef..69b98707 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -410,6 +410,10 @@ extern VAStatus i965_MapBuffer(VADriverContextP ctx, extern VAStatus i965_UnmapBuffer(VADriverContextP ctx, VABufferID buf_id); +extern VAStatus i965_DestroySurfaces(VADriverContextP ctx, + VASurfaceID *surface_list, + int num_surfaces); + #define I965_SURFACE_MEM_NATIVE 0 #define I965_SURFACE_MEM_GEM_FLINK 1 #define I965_SURFACE_MEM_DRM_PRIME 2 diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 534591cd..e2570cff 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -44,10 +44,6 @@ extern Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_con extern Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); extern Bool gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); VAStatus i965_CreateSurfaces(VADriverContextP ctx, int width, diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 28d34806..bbcba202 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -58,10 +58,6 @@ #define VA_STATUS_SUCCESS_1 0xFFFFFFFE -extern VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); extern VAStatus i965_CreateSurfaces(VADriverContextP ctx, int width, diff --git a/src/i965_render.c b/src/i965_render.c index b3a844f1..ff15a38f 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -4423,13 +4423,6 @@ gen8_render_put_subpicture( intel_batchbuffer_flush(batch); } -/* - * global functions - */ -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); void intel_render_put_surface( VADriverContextP ctx, -- cgit v1.2.1 From bf3bcf41872502b4503cd1702fe27745306fc4e3 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 4 Mar 2014 16:23:07 +0800 Subject: Use the XXX_render_put_surface/put_subpicture as callback function for rendering Signed-off-by: Zhao Yakui (cherry picked from commit 9db92268b7b7bf6763ae76df0021608effe260ec) --- src/i965_render.c | 39 +++++++++++++++++++-------------------- src/i965_render.h | 8 ++++++++ 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index ff15a38f..6a994145 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -4433,6 +4433,7 @@ intel_render_put_surface( ) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; int has_done_scaling = 0; VASurfaceID out_surface_id = i965_post_processing(ctx, obj_surface, @@ -4453,14 +4454,7 @@ intel_render_put_surface( src_rect = dst_rect; } - if (IS_GEN8(i965->intel.device_id)) - gen8_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); - else if (IS_GEN7(i965->intel.device_id)) - gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); - else if (IS_GEN6(i965->intel.device_id)) - gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); - else - i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); + render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); if (out_surface_id != VA_INVALID_ID) i965_DestroySurfaces(ctx, &out_surface_id, 1); @@ -4475,15 +4469,9 @@ intel_render_put_subpicture( ) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; - if (IS_GEN8(i965->intel.device_id)) - gen8_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); - else if (IS_GEN7(i965->intel.device_id)) - gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); - else if (IS_GEN6(i965->intel.device_id)) - gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); - else - i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); + render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); } static bool @@ -4496,6 +4484,8 @@ gen8_render_init(VADriverContextP ctx) unsigned char *kernel_ptr; struct i965_kernel *kernel; + render_state->render_put_surface = gen8_render_put_surface; + render_state->render_put_subpicture = gen8_render_put_subpicture; if (IS_GEN8(i965->intel.device_id)) { memcpy(render_state->render_kernels, render_kernels_gen8, @@ -4574,16 +4564,25 @@ i965_render_init(VADriverContextP ctx) if (IS_GEN8(i965->intel.device_id)) { return gen8_render_init(ctx); - } else if (IS_GEN7(i965->intel.device_id)) + } else if (IS_GEN7(i965->intel.device_id)) { memcpy(render_state->render_kernels, (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7), sizeof(render_state->render_kernels)); - else if (IS_GEN6(i965->intel.device_id)) + render_state->render_put_surface = gen7_render_put_surface; + render_state->render_put_subpicture = gen7_render_put_subpicture; + } else if (IS_GEN6(i965->intel.device_id)) { memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels)); - else if (IS_IRONLAKE(i965->intel.device_id)) + render_state->render_put_surface = gen6_render_put_surface; + render_state->render_put_subpicture = gen6_render_put_subpicture; + } else if (IS_IRONLAKE(i965->intel.device_id)) { memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels)); - else + render_state->render_put_surface = i965_render_put_surface; + render_state->render_put_subpicture = i965_render_put_subpicture; + } else { memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels)); + render_state->render_put_surface = i965_render_put_surface; + render_state->render_put_subpicture = i965_render_put_subpicture; + } for (i = 0; i < NUM_RENDER_KERNEL; i++) { struct i965_kernel *kernel = &render_state->render_kernels[i]; diff --git a/src/i965_render.h b/src/i965_render.h index 132e7853..3bb3d3e5 100644 --- a/src/i965_render.h +++ b/src/i965_render.h @@ -118,6 +118,14 @@ struct i965_render_state unsigned int scissor_offset; int scissor_size; + void (*render_put_surface)(VADriverContextP ctx, struct object_surface *, + const VARectangle *src_rec, + const VARectangle *dst_rect, + unsigned int flags); + void (*render_put_subpicture)(VADriverContextP ctx, struct object_surface *, + const VARectangle *src_rec, + const VARectangle *dst_rect); + }; bool i965_render_init(VADriverContextP ctx); -- cgit v1.2.1 From 8eb9d710330410867831c32db0fd0257b81d8f9f Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 4 Mar 2014 16:23:08 +0800 Subject: Add the seperated file for rendering on BDW This is to avoid the interference between the new platform and previous platform. Signed-off-by: Zhao Yakui (cherry picked from commit fbd6b7ff33ed5beb15f0122ec70668ed0c8479d2) --- src/Makefile.am | 1 + src/gen8_render.c | 1832 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/i965_render.c | 1643 +++-------------------------------------------- src/i965_render.h | 4 + 4 files changed, 1933 insertions(+), 1547 deletions(-) create mode 100644 src/gen8_render.c diff --git a/src/Makefile.am b/src/Makefile.am index 5b2ac592..61f35293 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -76,6 +76,7 @@ source_c = \ i965_gpe_utils.c \ i965_post_processing.c \ i965_render.c \ + gen8_render.c \ intel_batchbuffer.c \ intel_batchbuffer_dump.c\ intel_driver.c \ diff --git a/src/gen8_render.c b/src/gen8_render.c new file mode 100644 index 00000000..d052cf4a --- /dev/null +++ b/src/gen8_render.c @@ -0,0 +1,1832 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * Keith Packard + * Xiang Haihao + * Zhao Yakui + * + */ + +/* + * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c + */ + +#include +#include +#include +#include +#include + +#include + +#include "intel_batchbuffer.h" +#include "intel_driver.h" +#include "i965_defines.h" +#include "i965_drv_video.h" +#include "i965_structs.h" + +#include "i965_render.h" + +#define SF_KERNEL_NUM_GRF 16 +#define SF_MAX_THREADS 1 + +#define PS_KERNEL_NUM_GRF 48 +#define PS_MAX_THREADS 32 + +/* Programs for Gen8 */ +static const uint32_t sf_kernel_static_gen8[][4] ={ + +}; +static const uint32_t ps_kernel_static_gen8[][4] = { +#include "shaders/render/exa_wm_src_affine.g8b" +#include "shaders/render/exa_wm_src_sample_planar.g8b" +#include "shaders/render/exa_wm_yuv_color_balance.g8b" +#include "shaders/render/exa_wm_yuv_rgb.g8b" +#include "shaders/render/exa_wm_write.g8b" +}; + +static const uint32_t ps_subpic_kernel_static_gen8[][4] = { +#include "shaders/render/exa_wm_src_affine.g8b" +#include "shaders/render/exa_wm_src_sample_argb.g8b" +#include "shaders/render/exa_wm_write.g8b" +}; + + +#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 + +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES) + +enum { + SF_KERNEL = 0, + PS_KERNEL, + PS_SUBPIC_KERNEL +}; + +static struct i965_kernel render_kernels_gen8[] = { + { + "SF", + SF_KERNEL, + sf_kernel_static_gen8, + sizeof(sf_kernel_static_gen8), + NULL + }, + { + "PS", + PS_KERNEL, + ps_kernel_static_gen8, + sizeof(ps_kernel_static_gen8), + NULL + }, + + { + "PS_SUBPIC", + PS_SUBPIC_KERNEL, + ps_subpic_kernel_static_gen8, + sizeof(ps_subpic_kernel_static_gen8), + NULL + } +}; + +#define URB_VS_ENTRIES 8 +#define URB_VS_ENTRY_SIZE 1 + +#define URB_GS_ENTRIES 0 +#define URB_GS_ENTRY_SIZE 0 + +#define URB_CLIP_ENTRIES 0 +#define URB_CLIP_ENTRY_SIZE 0 + +#define URB_SF_ENTRIES 1 +#define URB_SF_ENTRY_SIZE 2 + +#define URB_CS_ENTRIES 4 +#define URB_CS_ENTRY_SIZE 4 + +static float yuv_to_rgb_bt601[3][4] = { +{1.164, 0, 1.596, -0.06275,}, +{1.164, -0.392, -0.813, -0.50196,}, +{1.164, 2.017, 0, -0.50196,}, +}; + +static float yuv_to_rgb_bt709[3][4] = { +{1.164, 0, 1.793, -0.06275,}, +{1.164, -0.213, -0.533, -0.50196,}, +{1.164, 2.112, 0, -0.50196,}, +}; + +static float yuv_to_rgb_smpte_240[3][4] = { +{1.164, 0, 1.794, -0.06275,}, +{1.164, -0.258, -0.5425, -0.50196,}, +{1.164, 2.078, 0, -0.50196,}, +}; + + +static void +gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss0.tiled_surface = 0; + ss->ss0.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + +/* Set "Shader Channel Select" for GEN8+ */ +void +gen8_render_set_surface_scs(struct gen8_surface_state *ss) +{ + ss->ss7.shader_chanel_select_r = HSW_SCS_RED; + ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN; + ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE; + ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; +} + +static void +gen8_render_set_surface_state( + struct gen8_surface_state *ss, + dri_bo *bo, + unsigned long offset, + int width, + int height, + int pitch, + int format, + unsigned int flags +) +{ + unsigned int tiling; + unsigned int swizzle; + + memset(ss, 0, sizeof(*ss)); + + switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) { + case I965_PP_FLAG_BOTTOM_FIELD: + ss->ss0.vert_line_stride_ofs = 1; + /* fall-through */ + case I965_PP_FLAG_TOP_FIELD: + ss->ss0.vert_line_stride = 1; + height /= 2; + break; + } + + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = format; + + ss->ss8.base_addr = bo->offset + offset; + + ss->ss2.width = width - 1; + ss->ss2.height = height - 1; + + ss->ss3.pitch = pitch - 1; + + /* Always set 1(align 4 mode) per B-spec */ + ss->ss0.vertical_alignment = 1; + ss->ss0.horizontal_alignment = 1; + + dri_bo_get_tiling(bo, &tiling, &swizzle); + gen8_render_set_surface_tiling(ss, tiling); +} + +static void +gen8_render_src_surface_state( + VADriverContextP ctx, + int index, + dri_bo *region, + unsigned long offset, + int w, + int h, + int pitch, + int format, + unsigned int flags +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + void *ss; + dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo; + + assert(index < MAX_RENDER_SURFACES); + + dri_bo_map(ss_bo, 1); + assert(ss_bo->virtual); + ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); + + gen8_render_set_surface_state(ss, + region, offset, + w, h, + pitch, format, flags); + gen8_render_set_surface_scs(ss); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_SAMPLER, 0, + offset, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8), + region); + + ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(ss_bo); + render_state->wm.sampler_count++; +} + +static void +gen8_render_src_surfaces_state( + VADriverContextP ctx, + struct object_surface *obj_surface, + unsigned int flags +) +{ + int region_pitch; + int rw, rh; + dri_bo *region; + + region_pitch = obj_surface->width; + rw = obj_surface->orig_width; + rh = obj_surface->orig_height; + region = obj_surface->bo; + + gen8_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */ + gen8_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); + + if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) { + gen8_render_src_surface_state(ctx, 3, region, + region_pitch * obj_surface->y_cb_offset, + obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, + I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */ + gen8_render_src_surface_state(ctx, 4, region, + region_pitch * obj_surface->y_cb_offset, + obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, + I965_SURFACEFORMAT_R8G8_UNORM, flags); + } else { + gen8_render_src_surface_state(ctx, 3, region, + region_pitch * obj_surface->y_cb_offset, + obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, + I965_SURFACEFORMAT_R8_UNORM, flags); /* U */ + gen8_render_src_surface_state(ctx, 4, region, + region_pitch * obj_surface->y_cb_offset, + obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, + I965_SURFACEFORMAT_R8_UNORM, flags); + gen8_render_src_surface_state(ctx, 5, region, + region_pitch * obj_surface->y_cr_offset, + obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, + I965_SURFACEFORMAT_R8_UNORM, flags); /* V */ + gen8_render_src_surface_state(ctx, 6, region, + region_pitch * obj_surface->y_cr_offset, + obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, + I965_SURFACEFORMAT_R8_UNORM, flags); + } +} + +static void +gen8_subpic_render_src_surfaces_state(VADriverContextP ctx, + struct object_surface *obj_surface) +{ + dri_bo *subpic_region; + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + struct object_image *obj_image = obj_subpic->obj_image; + + assert(obj_surface); + assert(obj_surface->bo); + subpic_region = obj_image->bo; + /*subpicture surface*/ + gen8_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0); + gen8_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0); +} + +static void +gen8_render_dest_surface_state(VADriverContextP ctx, int index) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct intel_region *dest_region = render_state->draw_region; + void *ss; + dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo; + int format; + assert(index < MAX_RENDER_SURFACES); + + if (dest_region->cpp == 2) { + format = I965_SURFACEFORMAT_B5G6R5_UNORM; + } else { + format = I965_SURFACEFORMAT_B8G8R8A8_UNORM; + } + + dri_bo_map(ss_bo, 1); + assert(ss_bo->virtual); + ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); + + gen8_render_set_surface_state(ss, + dest_region->bo, 0, + dest_region->width, dest_region->height, + dest_region->pitch, format, 0); + gen8_render_set_surface_scs(ss); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8), + dest_region->bo); + + ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(ss_bo); +} + +static void +i965_fill_vertex_buffer( + VADriverContextP ctx, + float tex_coords[4], /* [(u1,v1);(u2,v2)] */ + float vid_coords[4] /* [(x1,y1);(x2,y2)] */ +) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + float vb[12]; + + enum { X1, Y1, X2, Y2 }; + + static const unsigned int g_rotation_indices[][6] = { + [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 }, + [VA_ROTATION_90] = { X2, Y1, X2, Y2, X1, Y2 }, + [VA_ROTATION_180] = { X1, Y1, X2, Y1, X2, Y2 }, + [VA_ROTATION_270] = { X1, Y2, X1, Y1, X2, Y1 }, + }; + + const unsigned int * const rotation_indices = + g_rotation_indices[i965->rotation_attrib->value]; + + vb[0] = tex_coords[rotation_indices[0]]; /* bottom-right corner */ + vb[1] = tex_coords[rotation_indices[1]]; + vb[2] = vid_coords[X2]; + vb[3] = vid_coords[Y2]; + + vb[4] = tex_coords[rotation_indices[2]]; /* bottom-left corner */ + vb[5] = tex_coords[rotation_indices[3]]; + vb[6] = vid_coords[X1]; + vb[7] = vid_coords[Y2]; + + vb[8] = tex_coords[rotation_indices[4]]; /* top-left corner */ + vb[9] = tex_coords[rotation_indices[5]]; + vb[10] = vid_coords[X1]; + vb[11] = vid_coords[Y1]; + + dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb); +} + +static void +i965_subpic_render_upload_vertex(VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *output_rect) +{ + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + float tex_coords[4], vid_coords[4]; + VARectangle dst_rect; + + if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD) + dst_rect = obj_subpic->dst_rect; + else { + const float sx = (float)output_rect->width / obj_surface->orig_width; + const float sy = (float)output_rect->height / obj_surface->orig_height; + dst_rect.x = output_rect->x + sx * obj_subpic->dst_rect.x; + dst_rect.y = output_rect->y + sy * obj_subpic->dst_rect.y; + dst_rect.width = sx * obj_subpic->dst_rect.width; + dst_rect.height = sy * obj_subpic->dst_rect.height; + } + + tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width; + tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height; + tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width; + tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height; + + vid_coords[0] = dst_rect.x; + vid_coords[1] = dst_rect.y; + vid_coords[2] = (float)(dst_rect.x + dst_rect.width); + vid_coords[3] = (float)(dst_rect.y + dst_rect.height); + + i965_fill_vertex_buffer(ctx, tex_coords, vid_coords); +} + +static void +i965_render_upload_vertex( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct intel_region *dest_region = render_state->draw_region; + float tex_coords[4], vid_coords[4]; + int width, height; + + width = obj_surface->orig_width; + height = obj_surface->orig_height; + + tex_coords[0] = (float)src_rect->x / width; + tex_coords[1] = (float)src_rect->y / height; + tex_coords[2] = (float)(src_rect->x + src_rect->width) / width; + tex_coords[3] = (float)(src_rect->y + src_rect->height) / height; + + vid_coords[0] = dest_region->x + dst_rect->x; + vid_coords[1] = dest_region->y + dst_rect->y; + vid_coords[2] = vid_coords[0] + dst_rect->width; + vid_coords[3] = vid_coords[1] + dst_rect->height; + + i965_fill_vertex_buffer(ctx, tex_coords, vid_coords); +} + +static void +i965_render_drawing_rectangle(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + struct intel_region *dest_region = render_state->draw_region; + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2); + OUT_BATCH(batch, 0x00000000); + OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16); + OUT_BATCH(batch, 0x00000000); + ADVANCE_BATCH(batch); +} + +static void +i965_render_upload_image_palette( + VADriverContextP ctx, + struct object_image *obj_image, + unsigned int alpha +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + unsigned int i; + + assert(obj_image); + + if (!obj_image) + return; + + if (obj_image->image.num_palette_entries == 0) + return; + + BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries); + OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1)); + /*fill palette*/ + //int32_t out[16]; //0-23:color 23-31:alpha + for (i = 0; i < obj_image->image.num_palette_entries; i++) + OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]); + ADVANCE_BATCH(batch); +} + +static void +gen8_clear_dest_region(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + struct intel_region *dest_region = render_state->draw_region; + unsigned int blt_cmd, br13; + int pitch; + + blt_cmd = GEN8_XY_COLOR_BLT_CMD; + br13 = 0xf0 << 16; + pitch = dest_region->pitch; + + if (dest_region->cpp == 4) { + br13 |= BR13_8888; + blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA); + } else { + assert(dest_region->cpp == 2); + br13 |= BR13_565; + } + + if (dest_region->tiling != I915_TILING_NONE) { + blt_cmd |= XY_COLOR_BLT_DST_TILED; + pitch /= 4; + } + + br13 |= pitch; + + intel_batchbuffer_start_atomic_blt(batch, 24); + BEGIN_BLT_BATCH(batch, 7); + + OUT_BATCH(batch, blt_cmd); + OUT_BATCH(batch, br13); + OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x)); + OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) | + (dest_region->x + dest_region->width)); + OUT_RELOC(batch, dest_region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH(batch, 0x0); + OUT_BATCH(batch, 0x0); + ADVANCE_BATCH(batch); + intel_batchbuffer_end_atomic(batch); +} + + +/* + * for GEN8 + */ +#define ALIGNMENT 64 + +static void +gen8_render_initialize(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + dri_bo *bo; + int size; + unsigned int end_offset; + + /* VERTEX BUFFER */ + dri_bo_unreference(render_state->vb.vertex_buffer); + bo = dri_bo_alloc(i965->intel.bufmgr, + "vertex buffer", + 4096, + 4096); + assert(bo); + render_state->vb.vertex_buffer = bo; + + /* WM */ + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES, + 4096); + assert(bo); + render_state->wm.surface_state_binding_table_bo = bo; + + render_state->curbe_size = 256; + + render_state->wm.sampler_count = 0; + + render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state); + + render_state->cc_state_size = sizeof(struct gen6_color_calc_state); + + render_state->cc_viewport_size = sizeof(struct i965_cc_viewport); + + render_state->blend_state_size = sizeof(struct gen8_global_blend_state) + + 16 * sizeof(struct gen8_blend_state_rt); + + render_state->sf_clip_size = 1024; + + render_state->scissor_size = 1024; + + size = ALIGN(render_state->curbe_size, ALIGNMENT) + + ALIGN(render_state->sampler_size, ALIGNMENT) + + ALIGN(render_state->cc_viewport_size, ALIGNMENT) + + ALIGN(render_state->cc_state_size, ALIGNMENT) + + ALIGN(render_state->blend_state_size, ALIGNMENT) + + ALIGN(render_state->sf_clip_size, ALIGNMENT) + + ALIGN(render_state->scissor_size, ALIGNMENT); + + dri_bo_unreference(render_state->dynamic_state.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "dynamic_state", + size, + 4096); + + render_state->dynamic_state.bo = bo; + + end_offset = 0; + render_state->dynamic_state.end_offset = 0; + + /* Constant buffer offset */ + render_state->curbe_offset = end_offset; + end_offset += ALIGN(render_state->curbe_size, ALIGNMENT); + + /* Sampler_state */ + render_state->sampler_offset = end_offset; + end_offset += ALIGN(render_state->sampler_size, ALIGNMENT); + + /* CC_VIEWPORT_state */ + render_state->cc_viewport_offset = end_offset; + end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT); + + /* CC_STATE_state */ + render_state->cc_state_offset = end_offset; + end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT); + + /* Blend_state */ + render_state->blend_state_offset = end_offset; + end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT); + + /* SF_CLIP_state */ + render_state->sf_clip_offset = end_offset; + end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT); + + /* SCISSOR_state */ + render_state->scissor_offset = end_offset; + end_offset += ALIGN(render_state->scissor_size, ALIGNMENT); + + /* update the end offset of dynamic_state */ + render_state->dynamic_state.end_offset = end_offset; + +} + +static void +gen8_render_sampler(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen8_sampler_state *sampler_state; + int i; + unsigned char *cc_ptr; + + assert(render_state->wm.sampler_count > 0); + assert(render_state->wm.sampler_count <= MAX_SAMPLERS); + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->sampler_offset; + + sampler_state = (struct gen8_sampler_state *) cc_ptr; + + for (i = 0; i < render_state->wm.sampler_count; i++) { + memset(sampler_state, 0, sizeof(*sampler_state)); + sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR; + sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state++; + } + + dri_bo_unmap(render_state->dynamic_state.bo); +} + +static void +gen8_render_blend_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen8_global_blend_state *global_blend_state; + struct gen8_blend_state_rt *blend_state; + unsigned char *cc_ptr; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->blend_state_offset; + + global_blend_state = (struct gen8_global_blend_state*) cc_ptr; + + memset(global_blend_state, 0, render_state->blend_state_size); + /* Global blend state + blend_state for Render Target */ + blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); + blend_state->blend1.logic_op_enable = 1; + blend_state->blend1.logic_op_func = 0xc; + blend_state->blend1.pre_blend_clamp_enable = 1; + + dri_bo_unmap(render_state->dynamic_state.bo); +} + + +static void +gen8_render_cc_viewport(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct i965_cc_viewport *cc_viewport; + unsigned char *cc_ptr; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->cc_viewport_offset; + + cc_viewport = (struct i965_cc_viewport *) cc_ptr; + + memset(cc_viewport, 0, sizeof(*cc_viewport)); + + cc_viewport->min_depth = -1.e35; + cc_viewport->max_depth = 1.e35; + + dri_bo_unmap(render_state->dynamic_state.bo); +} + +static void +gen8_render_color_calc_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen6_color_calc_state *color_calc_state; + unsigned char *cc_ptr; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->cc_state_offset; + + color_calc_state = (struct gen6_color_calc_state *) cc_ptr; + + memset(color_calc_state, 0, sizeof(*color_calc_state)); + color_calc_state->constant_r = 1.0; + color_calc_state->constant_g = 0.0; + color_calc_state->constant_b = 1.0; + color_calc_state->constant_a = 1.0; + dri_bo_unmap(render_state->dynamic_state.bo); +} + +#define PI 3.1415926 + +static void +gen8_render_upload_constants(VADriverContextP ctx, + struct object_surface *obj_surface, + unsigned int flags) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + unsigned short *constant_buffer; + unsigned char *cc_ptr; + float *color_balance_base; + float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST; + float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */ + float hue = (float)i965->hue_attrib->value / 180 * PI; + float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION; + float *yuv_to_rgb; + unsigned int color_flag; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->curbe_offset; + + constant_buffer = (unsigned short *) cc_ptr; + + if (obj_surface->subsampling == SUBSAMPLE_YUV400) { + assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0')); + + *constant_buffer = 2; + } else { + if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) + *constant_buffer = 1; + else + *constant_buffer = 0; + } + + if (i965->contrast_attrib->value == DEFAULT_CONTRAST && + i965->brightness_attrib->value == DEFAULT_BRIGHTNESS && + i965->hue_attrib->value == DEFAULT_HUE && + i965->saturation_attrib->value == DEFAULT_SATURATION) + constant_buffer[1] = 1; /* skip color balance transformation */ + else + constant_buffer[1] = 0; + + color_balance_base = (float *)constant_buffer + 4; + *color_balance_base++ = contrast; + *color_balance_base++ = brightness; + *color_balance_base++ = cos(hue) * contrast * saturation; + *color_balance_base++ = sin(hue) * contrast * saturation; + + color_flag = flags & VA_SRC_COLOR_MASK; + yuv_to_rgb = (float *)constant_buffer + 8; + if (color_flag == VA_SRC_BT709) + memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709)); + else if (color_flag == VA_SRC_SMPTE_240) + memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240)); + else + memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601)); + + dri_bo_unmap(render_state->dynamic_state.bo); +} + +static void +gen8_render_setup_states( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect, + unsigned int flags +) +{ + gen8_render_dest_surface_state(ctx, 0); + gen8_render_src_surfaces_state(ctx, obj_surface, flags); + gen8_render_sampler(ctx); + gen8_render_cc_viewport(ctx); + gen8_render_color_calc_state(ctx); + gen8_render_blend_state(ctx); + gen8_render_upload_constants(ctx, obj_surface, flags); + i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); +} + +static void +gen8_emit_state_base_address(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 16); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2)); + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /*DW4 */ + OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(batch, 0); + + /*DW6*/ + /* Dynamic state base address */ + OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER, + 0, BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + /*DW8*/ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */ + OUT_BATCH(batch, 0); + + /*DW10 */ + /* Instruction base address */ + OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + /*DW12 */ + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */ + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_cc_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2)); + OUT_BATCH(batch, (render_state->cc_state_offset + 1)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); + OUT_BATCH(batch, (render_state->blend_state_offset + 1)); + ADVANCE_BATCH(batch); + +} + +static void +gen8_emit_vertices(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2)); + OUT_BATCH(batch, + (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) | + (0 << GEN8_VB0_MOCS_SHIFT) | + GEN7_VB0_ADDRESS_MODIFYENABLE | + ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); + OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 12 * 4); + ADVANCE_BATCH(batch); + + /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */ + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2)); + OUT_BATCH(batch, + _3DPRIM_RECTLIST); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2)); + OUT_BATCH(batch, + GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); + OUT_BATCH(batch, 3); /* vertex count per instance */ + OUT_BATCH(batch, 0); /* start vertex offset */ + OUT_BATCH(batch, 1); /* single instance */ + OUT_BATCH(batch, 0); /* start instance location */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_vertex_element_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + /* + * The VUE layout + * dword 0-3: pad (0, 0, 0. 0) + * dword 4-7: position (x, y, 1.0, 1.0), + * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0) + */ + + /* Set up our vertex elements, sourced from the single vertex buffer. */ + OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2)); + + /* Element state 0. These are 4 dwords of 0 required for the VUE format. + * We don't really know or care what they do. + */ + + OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN8_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); + + /* offset 8: X, Y -> {x, y, 1.0, 1.0} */ + OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN8_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (8 << VE0_OFFSET_SHIFT)); + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + + /* offset 0: u,v -> {U, V, 1.0, 1.0} */ + OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN8_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); +} + +static void +gen8_emit_vs_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + /* disable VS constant buffer */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* CS Buffer 0 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* CS Buffer 1 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* CS Buffer 2 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* CS Buffer 3 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2)); + OUT_BATCH(batch, 0); /* without VS kernel */ + OUT_BATCH(batch, 0); + /* VS shader dispatch flag */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW6. VS shader GRF and URB buffer definition */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + +} + +/* + * URB layout on GEN8 + * ---------------------------------------- + * | PS Push Constants (8KB) | VS entries | + * ---------------------------------------- + */ +static void +gen8_emit_urb(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + unsigned int num_urb_entries = 64; + + /* The minimum urb entries is 64 */ + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Size is 8Kbs and base address is 0Kb */ + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); + /* Size is 8Kbs and base address is 0Kb */ + OUT_BATCH(batch, + (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) | + (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2)); + OUT_BATCH(batch, + (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) | + (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | + (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2)); + OUT_BATCH(batch, + (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (5 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2)); + OUT_BATCH(batch, + (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (6 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2)); + OUT_BATCH(batch, + (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (7 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_bypass_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + /* bypass GS */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 10); + OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2)); + /* GS shader address */ + OUT_BATCH(batch, 0); /* without GS kernel */ + OUT_BATCH(batch, 0); + /* DW3. GS shader dispatch flag */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW6. GS shader GRF and URB offset/length */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* disable HS */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2)); + OUT_BATCH(batch, 0); + /*DW2. HS pass-through */ + OUT_BATCH(batch, 0); + /*DW3. HS shader address */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /*DW5. HS shader flag. URB offset/length and so on */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Disable TE */ + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Disable DS */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2)); + /* DW1. DS shader pointer */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW3-5. DS shader dispatch flag.*/ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW8. DS shader output URB */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Disable STREAMOUT */ + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_invarient_states(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2)); + OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | + GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + ADVANCE_BATCH(batch); + + /* Update 3D Multisample pattern */ + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(batch, 1); + ADVANCE_BATCH(batch); + + /* Set system instruction pointer */ + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, CMD_STATE_SIP | 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_clip_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ + OUT_BATCH(batch, 0); +} + +static void +gen8_emit_sf_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2)); + OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2)); + OUT_BATCH(batch, + (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) | + (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) | + (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) | + (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) | + (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* SBE for backend setup */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_wm_state(VADriverContextP ctx, int kernel) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + unsigned int num_samples = 0; + unsigned int max_threads; + + max_threads = render_state->max_wm_threads - 2; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2)); + OUT_BATCH(batch, + (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE)); + ADVANCE_BATCH(batch); + + if (kernel == PS_KERNEL) { + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2)); + OUT_BATCH(batch, + GEN8_PS_BLEND_HAS_WRITEABLE_RT); + ADVANCE_BATCH(batch); + } else if (kernel == PS_SUBPIC_KERNEL) { + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2)); + OUT_BATCH(batch, + (GEN8_PS_BLEND_HAS_WRITEABLE_RT | + GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE | + (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) | + (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) | + (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) | + (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT))); + ADVANCE_BATCH(batch); + } + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2)); + OUT_BATCH(batch, + GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2)); + OUT_BATCH(batch, URB_CS_ENTRY_SIZE); + OUT_BATCH(batch, 0); + /*DW3-4. Constant buffer 0 */ + OUT_BATCH(batch, render_state->curbe_offset); + OUT_BATCH(batch, 0); + + /*DW5-10. Constant buffer 1-3 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 12); + OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2)); + /* PS shader address */ + OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset); + + OUT_BATCH(batch, 0); + /* DW3. PS shader flag .Binding table cnt/sample cnt */ + OUT_BATCH(batch, + (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | + (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + /* DW4-5. Scatch space */ + OUT_BATCH(batch, 0); /* scratch space base offset */ + OUT_BATCH(batch, 0); + /* DW6. PS shader threads. */ + OUT_BATCH(batch, + ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples | + GEN7_PS_PUSH_CONSTANT_ENABLE | + GEN7_PS_16_DISPATCH_ENABLE); + /* DW7. PS shader GRF */ + OUT_BATCH(batch, + (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); + OUT_BATCH(batch, 0); /* kernel 1 pointer */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* kernel 2 pointer */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); + OUT_BATCH(batch, BINDING_TABLE_OFFSET); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_depth_buffer_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 8); + OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2)); + OUT_BATCH(batch, + (I965_DEPTHFORMAT_D32_FLOAT << 18) | + (I965_SURFACE_NULL << 29)); + /* DW2-3. Depth Buffer Address */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW4-7. Surface structure */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Update the Hier Depth buffer */ + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Update the stencil buffer */ + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_depth_stencil_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_wm_hz_op(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_viewport_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); + OUT_BATCH(batch, render_state->cc_viewport_offset); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_sampler_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); + OUT_BATCH(batch, render_state->sampler_offset); + ADVANCE_BATCH(batch); +} + + +static void +gen7_emit_drawing_rectangle(VADriverContextP ctx) +{ + i965_render_drawing_rectangle(ctx); +} + +static void +gen8_render_emit_states(VADriverContextP ctx, int kernel) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen8_emit_invarient_states(ctx); + gen8_emit_state_base_address(ctx); + gen8_emit_viewport_state_pointers(ctx); + gen8_emit_urb(ctx); + gen8_emit_cc_state_pointers(ctx); + gen8_emit_sampler_state_pointers(ctx); + gen8_emit_wm_hz_op(ctx); + gen8_emit_bypass_state(ctx); + gen8_emit_vs_state(ctx); + gen8_emit_clip_state(ctx); + gen8_emit_sf_state(ctx); + gen8_emit_depth_stencil_state(ctx); + gen8_emit_wm_state(ctx, kernel); + gen8_emit_depth_buffer_state(ctx); + gen7_emit_drawing_rectangle(ctx); + gen8_emit_vertex_element_state(ctx); + gen8_emit_vertices(ctx); + intel_batchbuffer_end_atomic(batch); +} + +static void +gen8_render_put_surface( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect, + unsigned int flags +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + gen8_render_initialize(ctx); + gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); + gen8_clear_dest_region(ctx); + gen8_render_emit_states(ctx, PS_KERNEL); + intel_batchbuffer_flush(batch); +} + +static void +gen8_subpicture_render_blend_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen8_global_blend_state *global_blend_state; + struct gen8_blend_state_rt *blend_state; + unsigned char *cc_ptr; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->blend_state_offset; + + global_blend_state = (struct gen8_global_blend_state*) cc_ptr; + + memset(global_blend_state, 0, render_state->blend_state_size); + /* Global blend state + blend_state for Render Target */ + blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); + blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD; + blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; + blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; + blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD; + blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; + blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; + blend_state->blend0.colorbuf_blend = 1; + blend_state->blend1.post_blend_clamp_enable = 1; + blend_state->blend1.pre_blend_clamp_enable = 1; + blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */ + + dri_bo_unmap(render_state->dynamic_state.bo); +} + +static void +gen8_subpic_render_upload_constants(VADriverContextP ctx, + struct object_surface *obj_surface) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + float *constant_buffer; + float global_alpha = 1.0; + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + unsigned char *cc_ptr; + + if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) { + global_alpha = obj_subpic->global_alpha; + } + + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->curbe_offset; + + constant_buffer = (float *) cc_ptr; + *constant_buffer = global_alpha; + + dri_bo_unmap(render_state->dynamic_state.bo); +} + +static void +gen8_subpicture_render_setup_states( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect +) +{ + gen8_render_dest_surface_state(ctx, 0); + gen8_subpic_render_src_surfaces_state(ctx, obj_surface); + gen8_render_sampler(ctx); + gen8_render_cc_viewport(ctx); + gen8_render_color_calc_state(ctx); + gen8_subpicture_render_blend_state(ctx); + gen8_subpic_render_upload_constants(ctx, obj_surface); + i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); +} + +static void +gen8_render_put_subpicture( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + + assert(obj_subpic); + gen8_render_initialize(ctx); + gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect); + gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL); + i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff); + intel_batchbuffer_flush(batch); +} + +bool +gen8_render_init(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + int i, kernel_size; + unsigned int kernel_offset, end_offset; + unsigned char *kernel_ptr; + struct i965_kernel *kernel; + + render_state->render_put_surface = gen8_render_put_surface; + render_state->render_put_subpicture = gen8_render_put_subpicture; + + if (IS_GEN8(i965->intel.device_id)) { + memcpy(render_state->render_kernels, render_kernels_gen8, + sizeof(render_state->render_kernels)); + } + + kernel_size = 4096; + + for (i = 0; i < NUM_RENDER_KERNEL; i++) { + kernel = &render_state->render_kernels[i]; + + if (!kernel->size) + continue; + + kernel_size += kernel->size; + } + + render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr, + "kernel shader", + kernel_size, + 0x1000); + if (render_state->instruction_state.bo == NULL) { + WARN_ONCE("failure to allocate the buffer space for kernel shader\n"); + return false; + } + + assert(render_state->instruction_state.bo); + + render_state->instruction_state.bo_size = kernel_size; + render_state->instruction_state.end_offset = 0; + end_offset = 0; + + dri_bo_map(render_state->instruction_state.bo, 1); + kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual); + for (i = 0; i < NUM_RENDER_KERNEL; i++) { + kernel = &render_state->render_kernels[i]; + kernel_offset = end_offset; + kernel->kernel_offset = kernel_offset; + + if (!kernel->size) + continue; + + memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size); + + end_offset += ALIGN(kernel->size, ALIGNMENT); + } + + render_state->instruction_state.end_offset = end_offset; + + dri_bo_unmap(render_state->instruction_state.bo); + + + if (IS_GEN8(i965->intel.device_id)) { + render_state->max_wm_threads = 64; + } else { + /* should never get here !!! */ + assert(0); + } + + return true; +} + + +void +gen8_render_terminate(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + + dri_bo_unreference(render_state->vb.vertex_buffer); + render_state->vb.vertex_buffer = NULL; + + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); + render_state->wm.surface_state_binding_table_bo = NULL; + + if (render_state->instruction_state.bo) { + dri_bo_unreference(render_state->instruction_state.bo); + render_state->instruction_state.bo = NULL; + } + + if (render_state->dynamic_state.bo) { + dri_bo_unreference(render_state->dynamic_state.bo); + render_state->dynamic_state.bo = NULL; + } + + if (render_state->indirect_state.bo) { + dri_bo_unreference(render_state->indirect_state.bo); + render_state->indirect_state.bo = NULL; + } + + if (render_state->draw_region) { + dri_bo_unreference(render_state->draw_region->bo); + free(render_state->draw_region); + render_state->draw_region = NULL; + } +} + diff --git a/src/i965_render.c b/src/i965_render.c index 6a994145..809013b4 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -147,30 +147,8 @@ static const uint32_t ps_kernel_static_gen7_haswell[][4] = { #include "shaders/render/exa_wm_write.g7b" }; -/*TODO: Modify the shader for GEN8. - * Now it only uses the shader for gen7/haswell - */ -/* Programs for Gen8 */ -static const uint32_t sf_kernel_static_gen8[][4] = -{ -}; -static const uint32_t ps_kernel_static_gen8[][4] = { -#include "shaders/render/exa_wm_src_affine.g8b" -#include "shaders/render/exa_wm_src_sample_planar.g8b" -#include "shaders/render/exa_wm_yuv_color_balance.g8b" -#include "shaders/render/exa_wm_yuv_rgb.g8b" -#include "shaders/render/exa_wm_write.g8b" -}; - -static const uint32_t ps_subpic_kernel_static_gen8[][4] = { -#include "shaders/render/exa_wm_src_affine.g8b" -#include "shaders/render/exa_wm_src_sample_argb.g8b" -#include "shaders/render/exa_wm_write.g8b" -}; - -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN8, \ - MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)) +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES) @@ -318,31 +296,6 @@ static struct i965_kernel render_kernels_gen7_haswell[] = { } }; -static struct i965_kernel render_kernels_gen8[] = { - { - "SF", - SF_KERNEL, - sf_kernel_static_gen8, - sizeof(sf_kernel_static_gen8), - NULL - }, - { - "PS", - PS_KERNEL, - ps_kernel_static_gen8, - sizeof(ps_kernel_static_gen8), - NULL - }, - - { - "PS_SUBPIC", - PS_SUBPIC_KERNEL, - ps_subpic_kernel_static_gen8, - sizeof(ps_subpic_kernel_static_gen8), - NULL - } -}; - #define URB_VS_ENTRIES 8 #define URB_VS_ENTRY_SIZE 1 @@ -799,25 +752,6 @@ gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling) } } -static void -gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling) -{ - switch (tiling) { - case I915_TILING_NONE: - ss->ss0.tiled_surface = 0; - ss->ss0.tile_walk = 0; - break; - case I915_TILING_X: - ss->ss0.tiled_surface = 1; - ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; - break; - case I915_TILING_Y: - ss->ss0.tiled_surface = 1; - ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; - break; - } -} - /* Set "Shader Channel Select" */ void gen7_render_set_surface_scs(struct gen7_surface_state *ss) @@ -828,16 +762,6 @@ gen7_render_set_surface_scs(struct gen7_surface_state *ss) ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; } -/* Set "Shader Channel Select" for GEN8+ */ -void -gen8_render_set_surface_scs(struct gen8_surface_state *ss) -{ - ss->ss7.shader_chanel_select_r = HSW_SCS_RED; - ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN; - ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE; - ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; -} - static void gen7_render_set_surface_state( struct gen7_surface_state *ss, @@ -880,51 +804,6 @@ gen7_render_set_surface_state( } -static void -gen8_render_set_surface_state( - struct gen8_surface_state *ss, - dri_bo *bo, - unsigned long offset, - int width, - int height, - int pitch, - int format, - unsigned int flags -) -{ - unsigned int tiling; - unsigned int swizzle; - - memset(ss, 0, sizeof(*ss)); - - switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) { - case I965_PP_FLAG_BOTTOM_FIELD: - ss->ss0.vert_line_stride_ofs = 1; - /* fall-through */ - case I965_PP_FLAG_TOP_FIELD: - ss->ss0.vert_line_stride = 1; - height /= 2; - break; - } - - ss->ss0.surface_type = I965_SURFACE_2D; - ss->ss0.surface_format = format; - - ss->ss8.base_addr = bo->offset + offset; - - ss->ss2.width = width - 1; - ss->ss2.height = height - 1; - - ss->ss3.pitch = pitch - 1; - - /* Always set 1(align 4 mode) per B-spec */ - ss->ss0.vertical_alignment = 1; - ss->ss0.horizontal_alignment = 1; - - dri_bo_get_tiling(bo, &tiling, &swizzle); - gen8_render_set_surface_tiling(ss, tiling); -} - static void i965_render_src_surface_state( VADriverContextP ctx, @@ -949,18 +828,7 @@ i965_render_src_surface_state( assert(ss_bo->virtual); ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); - if (IS_GEN8(i965->intel.device_id)) { - gen8_render_set_surface_state(ss, - region, offset, - w, h, - pitch, format, flags); - gen8_render_set_surface_scs(ss); - dri_bo_emit_reloc(ss_bo, - I915_GEM_DOMAIN_SAMPLER, 0, - offset, - SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8), - region); - } else if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_id)) { gen7_render_set_surface_state(ss, region, offset, w, h, @@ -1075,18 +943,7 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index) assert(ss_bo->virtual); ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); - if (IS_GEN8(i965->intel.device_id)) { - gen8_render_set_surface_state(ss, - dest_region->bo, 0, - dest_region->width, dest_region->height, - dest_region->pitch, format, 0); - gen8_render_set_surface_scs(ss); - dri_bo_emit_reloc(ss_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0, - SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8), - dest_region->bo); - } else if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_id)) { gen7_render_set_surface_state(ss, dest_region->bo, 0, dest_region->width, dest_region->height, @@ -1702,52 +1559,6 @@ i965_clear_dest_region(VADriverContextP ctx) intel_batchbuffer_end_atomic(batch); } -static void -gen8_clear_dest_region(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - struct i965_render_state *render_state = &i965->render_state; - struct intel_region *dest_region = render_state->draw_region; - unsigned int blt_cmd, br13; - int pitch; - - blt_cmd = GEN8_XY_COLOR_BLT_CMD; - br13 = 0xf0 << 16; - pitch = dest_region->pitch; - - if (dest_region->cpp == 4) { - br13 |= BR13_8888; - blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA); - } else { - assert(dest_region->cpp == 2); - br13 |= BR13_565; - } - - if (dest_region->tiling != I915_TILING_NONE) { - blt_cmd |= XY_COLOR_BLT_DST_TILED; - pitch /= 4; - } - - br13 |= pitch; - - intel_batchbuffer_start_atomic_blt(batch, 24); - BEGIN_BLT_BATCH(batch, 7); - - OUT_BATCH(batch, blt_cmd); - OUT_BATCH(batch, br13); - OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x)); - OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) | - (dest_region->x + dest_region->width)); - OUT_RELOC(batch, dest_region->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); - OUT_BATCH(batch, 0x0); - OUT_BATCH(batch, 0x0); - ADVANCE_BATCH(batch); - intel_batchbuffer_end_atomic(batch); -} - static void i965_surface_render_pipeline_setup(VADriverContextP ctx) { @@ -2561,102 +2372,6 @@ gen7_render_initialize(VADriverContextP ctx) */ #define ALIGNMENT 64 -static void -gen8_render_initialize(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - dri_bo *bo; - int size; - unsigned int end_offset; - - /* VERTEX BUFFER */ - dri_bo_unreference(render_state->vb.vertex_buffer); - bo = dri_bo_alloc(i965->intel.bufmgr, - "vertex buffer", - 4096, - 4096); - assert(bo); - render_state->vb.vertex_buffer = bo; - - /* WM */ - dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); - bo = dri_bo_alloc(i965->intel.bufmgr, - "surface state & binding table", - (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES, - 4096); - assert(bo); - render_state->wm.surface_state_binding_table_bo = bo; - - render_state->curbe_size = 256; - - render_state->wm.sampler_count = 0; - - render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state); - - render_state->cc_state_size = sizeof(struct gen6_color_calc_state); - - render_state->cc_viewport_size = sizeof(struct i965_cc_viewport); - - render_state->blend_state_size = sizeof(struct gen8_global_blend_state) + - 16 * sizeof(struct gen8_blend_state_rt); - - render_state->sf_clip_size = 1024; - - render_state->scissor_size = 1024; - - size = ALIGN(render_state->curbe_size, ALIGNMENT) + - ALIGN(render_state->sampler_size, ALIGNMENT) + - ALIGN(render_state->cc_viewport_size, ALIGNMENT) + - ALIGN(render_state->cc_state_size, ALIGNMENT) + - ALIGN(render_state->blend_state_size, ALIGNMENT) + - ALIGN(render_state->sf_clip_size, ALIGNMENT) + - ALIGN(render_state->scissor_size, ALIGNMENT); - - dri_bo_unreference(render_state->dynamic_state.bo); - bo = dri_bo_alloc(i965->intel.bufmgr, - "dynamic_state", - size, - 4096); - - render_state->dynamic_state.bo = bo; - - end_offset = 0; - render_state->dynamic_state.end_offset = 0; - - /* Constant buffer offset */ - render_state->curbe_offset = end_offset; - end_offset += ALIGN(render_state->curbe_size, ALIGNMENT); - - /* Sampler_state */ - render_state->sampler_offset = end_offset; - end_offset += ALIGN(render_state->sampler_size, ALIGNMENT); - - /* CC_VIEWPORT_state */ - render_state->cc_viewport_offset = end_offset; - end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT); - - /* CC_STATE_state */ - render_state->cc_state_offset = end_offset; - end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT); - - /* Blend_state */ - render_state->blend_state_offset = end_offset; - end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT); - - /* SF_CLIP_state */ - render_state->sf_clip_offset = end_offset; - end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT); - - /* SCISSOR_state */ - render_state->scissor_offset = end_offset; - end_offset += ALIGN(render_state->scissor_size, ALIGNMENT); - - /* update the end offset of dynamic_state */ - render_state->dynamic_state.end_offset = end_offset; - -} - static void gen7_render_color_calc_state(VADriverContextP ctx) { @@ -2733,39 +2448,6 @@ gen7_render_sampler(VADriverContextP ctx) dri_bo_unmap(render_state->wm.sampler); } -static void -gen8_render_sampler(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - struct gen8_sampler_state *sampler_state; - int i; - unsigned char *cc_ptr; - - assert(render_state->wm.sampler_count > 0); - assert(render_state->wm.sampler_count <= MAX_SAMPLERS); - - dri_bo_map(render_state->dynamic_state.bo, 1); - assert(render_state->dynamic_state.bo->virtual); - - cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + - render_state->sampler_offset; - - sampler_state = (struct gen8_sampler_state *) cc_ptr; - - for (i = 0; i < render_state->wm.sampler_count; i++) { - memset(sampler_state, 0, sizeof(*sampler_state)); - sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR; - sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR; - sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP; - sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP; - sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP; - sampler_state++; - } - - dri_bo_unmap(render_state->dynamic_state.bo); -} - static void gen7_render_setup_states( @@ -2787,198 +2469,42 @@ gen7_render_setup_states( i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } + static void -gen8_render_blend_state(VADriverContextP ctx) +gen7_emit_invarient_states(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - struct gen8_global_blend_state *global_blend_state; - struct gen8_blend_state_rt *blend_state; - unsigned char *cc_ptr; - - dri_bo_map(render_state->dynamic_state.bo, 1); - assert(render_state->dynamic_state.bo->virtual); + struct intel_batchbuffer *batch = i965->batch; - cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + - render_state->blend_state_offset; + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D); + ADVANCE_BATCH(batch); - global_blend_state = (struct gen8_global_blend_state*) cc_ptr; + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2)); + OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | + GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); - memset(global_blend_state, 0, render_state->blend_state_size); - /* Global blend state + blend_state for Render Target */ - blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); - blend_state->blend1.logic_op_enable = 1; - blend_state->blend1.logic_op_func = 0xc; - blend_state->blend1.pre_blend_clamp_enable = 1; + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(batch, 1); + ADVANCE_BATCH(batch); - dri_bo_unmap(render_state->dynamic_state.bo); + /* Set system instruction pointer */ + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_STATE_SIP | 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); } - -static void -gen8_render_cc_viewport(VADriverContextP ctx) +static void +gen7_emit_state_base_address(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - struct i965_cc_viewport *cc_viewport; - unsigned char *cc_ptr; - - dri_bo_map(render_state->dynamic_state.bo, 1); - assert(render_state->dynamic_state.bo->virtual); - - cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + - render_state->cc_viewport_offset; - - cc_viewport = (struct i965_cc_viewport *) cc_ptr; - - memset(cc_viewport, 0, sizeof(*cc_viewport)); - - cc_viewport->min_depth = -1.e35; - cc_viewport->max_depth = 1.e35; - - dri_bo_unmap(render_state->dynamic_state.bo); -} - -static void -gen8_render_color_calc_state(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - struct gen6_color_calc_state *color_calc_state; - unsigned char *cc_ptr; - - dri_bo_map(render_state->dynamic_state.bo, 1); - assert(render_state->dynamic_state.bo->virtual); - - cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + - render_state->cc_state_offset; - - color_calc_state = (struct gen6_color_calc_state *) cc_ptr; - - memset(color_calc_state, 0, sizeof(*color_calc_state)); - color_calc_state->constant_r = 1.0; - color_calc_state->constant_g = 0.0; - color_calc_state->constant_b = 1.0; - color_calc_state->constant_a = 1.0; - dri_bo_unmap(render_state->dynamic_state.bo); -} - -static void -gen8_render_upload_constants(VADriverContextP ctx, - struct object_surface *obj_surface, - unsigned int flags) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - unsigned short *constant_buffer; - unsigned char *cc_ptr; - float *color_balance_base; - float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST; - float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */ - float hue = (float)i965->hue_attrib->value / 180 * PI; - float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION; - float *yuv_to_rgb; - unsigned int color_flag; - - dri_bo_map(render_state->dynamic_state.bo, 1); - assert(render_state->dynamic_state.bo->virtual); - - cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + - render_state->curbe_offset; - - constant_buffer = (unsigned short *) cc_ptr; - - if (obj_surface->subsampling == SUBSAMPLE_YUV400) { - assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0')); - - *constant_buffer = 2; - } else { - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) - *constant_buffer = 1; - else - *constant_buffer = 0; - } - - if (i965->contrast_attrib->value == DEFAULT_CONTRAST && - i965->brightness_attrib->value == DEFAULT_BRIGHTNESS && - i965->hue_attrib->value == DEFAULT_HUE && - i965->saturation_attrib->value == DEFAULT_SATURATION) - constant_buffer[1] = 1; /* skip color balance transformation */ - else - constant_buffer[1] = 0; - - color_balance_base = (float *)constant_buffer + 4; - *color_balance_base++ = contrast; - *color_balance_base++ = brightness; - *color_balance_base++ = cos(hue) * contrast * saturation; - *color_balance_base++ = sin(hue) * contrast * saturation; - - color_flag = flags & VA_SRC_COLOR_MASK; - yuv_to_rgb = (float *)constant_buffer + 8; - if (color_flag == VA_SRC_BT709) - memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709)); - else if (color_flag == VA_SRC_SMPTE_240) - memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240)); - else - memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601)); - - dri_bo_unmap(render_state->dynamic_state.bo); -} - -static void -gen8_render_setup_states( - VADriverContextP ctx, - struct object_surface *obj_surface, - const VARectangle *src_rect, - const VARectangle *dst_rect, - unsigned int flags -) -{ - i965_render_dest_surface_state(ctx, 0); - i965_render_src_surfaces_state(ctx, obj_surface, flags); - gen8_render_sampler(ctx); - gen8_render_cc_viewport(ctx); - gen8_render_color_calc_state(ctx); - gen8_render_blend_state(ctx); - gen8_render_upload_constants(ctx, obj_surface, flags); - i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); -} - -static void -gen7_emit_invarient_states(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - - BEGIN_BATCH(batch, 1); - OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 4); - OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2)); - OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | - GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); - OUT_BATCH(batch, 1); - ADVANCE_BATCH(batch); - - /* Set system instruction pointer */ - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, CMD_STATE_SIP | 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); -} - -static void -gen7_emit_state_base_address(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; + struct intel_batchbuffer *batch = i965->batch; struct i965_render_state *render_state = &i965->render_state; OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2)); @@ -2993,45 +2519,6 @@ gen7_emit_state_base_address(VADriverContextP ctx) OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ } -static void -gen8_emit_state_base_address(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - struct i965_render_state *render_state = &i965->render_state; - - BEGIN_BATCH(batch, 16); - OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2)); - OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /*DW4 */ - OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ - OUT_BATCH(batch, 0); - - /*DW6*/ - /* Dynamic state base address */ - OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER, - 0, BASE_ADDRESS_MODIFY); - OUT_BATCH(batch, 0); - - /*DW8*/ - OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */ - OUT_BATCH(batch, 0); - - /*DW10 */ - /* Instruction base address */ - OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); - OUT_BATCH(batch, 0); - - /*DW12 */ - OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */ - OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ - OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ - OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ - ADVANCE_BATCH(batch); -} - static void gen7_emit_viewport_state_pointers(VADriverContextP ctx) { @@ -3136,25 +2623,6 @@ gen7_emit_cc_state_pointers(VADriverContextP ctx) ADVANCE_BATCH(batch); } -static void -gen8_emit_cc_state_pointers(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - struct i965_render_state *render_state = &i965->render_state; - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2)); - OUT_BATCH(batch, (render_state->cc_state_offset + 1)); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); - OUT_BATCH(batch, (render_state->blend_state_offset + 1)); - ADVANCE_BATCH(batch); - -} - static void gen7_emit_sampler_state_pointers(VADriverContextP ctx) { @@ -3529,899 +2997,89 @@ gen7_render_emit_states(VADriverContextP ctx, int kernel) intel_batchbuffer_end_atomic(batch); } + static void -gen8_emit_vertices(VADriverContextP ctx) +gen7_render_put_surface( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect, + unsigned int flags +) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; - struct i965_render_state *render_state = &i965->render_state; - BEGIN_BATCH(batch, 5); - OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2)); - OUT_BATCH(batch, - (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) | - (0 << GEN8_VB0_MOCS_SHIFT) | - GEN7_VB0_ADDRESS_MODIFYENABLE | - ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); - OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 12 * 4); - ADVANCE_BATCH(batch); + gen7_render_initialize(ctx); + gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); + i965_clear_dest_region(ctx); + gen7_render_emit_states(ctx, PS_KERNEL); + intel_batchbuffer_flush(batch); +} - /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */ - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2)); - OUT_BATCH(batch, - _3DPRIM_RECTLIST); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 7); - OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2)); - OUT_BATCH(batch, - GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); - OUT_BATCH(batch, 3); /* vertex count per instance */ - OUT_BATCH(batch, 0); /* start vertex offset */ - OUT_BATCH(batch, 1); /* single instance */ - OUT_BATCH(batch, 0); /* start instance location */ - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); +static void +gen7_subpicture_render_blend_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen6_blend_state *blend_state; + + dri_bo_unmap(render_state->cc.state); + dri_bo_map(render_state->cc.blend, 1); + assert(render_state->cc.blend->virtual); + blend_state = render_state->cc.blend->virtual; + memset(blend_state, 0, sizeof(*blend_state)); + blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; + blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; + blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD; + blend_state->blend0.blend_enable = 1; + blend_state->blend1.post_blend_clamp_enable = 1; + blend_state->blend1.pre_blend_clamp_enable = 1; + blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */ + dri_bo_unmap(render_state->cc.blend); +} + +static void +gen7_subpicture_render_setup_states( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect +) +{ + i965_render_dest_surface_state(ctx, 0); + i965_subpic_render_src_surfaces_state(ctx, obj_surface); + i965_render_sampler(ctx); + i965_render_cc_viewport(ctx); + gen7_render_color_calc_state(ctx); + gen7_subpicture_render_blend_state(ctx); + gen7_render_depth_stencil_state(ctx); + i965_subpic_render_upload_constants(ctx, obj_surface); + i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); } static void -gen8_emit_vertex_element_state(VADriverContextP ctx) +gen7_render_put_subpicture( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect +) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; - /* - * The VUE layout - * dword 0-3: pad (0, 0, 0. 0) - * dword 4-7: position (x, y, 1.0, 1.0), - * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0) - */ - - /* Set up our vertex elements, sourced from the single vertex buffer. */ - OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2)); + assert(obj_subpic); + gen7_render_initialize(ctx); + gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect); + gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL); + i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff); + intel_batchbuffer_flush(batch); +} - /* Element state 0. These are 4 dwords of 0 required for the VUE format. - * We don't really know or care what they do. - */ - - OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | - GEN8_VE0_VALID | - (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | - (0 << VE0_OFFSET_SHIFT)); - OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | - (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | - (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | - (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); - - /* offset 8: X, Y -> {x, y, 1.0, 1.0} */ - OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | - GEN8_VE0_VALID | - (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | - (8 << VE0_OFFSET_SHIFT)); - OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | - (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); - - /* offset 0: u,v -> {U, V, 1.0, 1.0} */ - OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | - GEN8_VE0_VALID | - (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | - (0 << VE0_OFFSET_SHIFT)); - OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | - (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); -} - -static void -gen8_emit_vs_state(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - - /* disable VS constant buffer */ - BEGIN_BATCH(batch, 11); - OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /* CS Buffer 0 */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /* CS Buffer 1 */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /* CS Buffer 2 */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /* CS Buffer 3 */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 9); - OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2)); - OUT_BATCH(batch, 0); /* without VS kernel */ - OUT_BATCH(batch, 0); - /* VS shader dispatch flag */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /* DW6. VS shader GRF and URB buffer definition */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); /* pass-through */ - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - -} - -/* - * URB layout on GEN8 - * ---------------------------------------- - * | PS Push Constants (8KB) | VS entries | - * ---------------------------------------- - */ -static void -gen8_emit_urb(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - unsigned int num_urb_entries = 64; - - /* The minimum urb entries is 64 */ - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - /* Size is 8Kbs and base address is 0Kb */ - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); - /* Size is 8Kbs and base address is 0Kb */ - OUT_BATCH(batch, - (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) | - (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT)); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2)); - OUT_BATCH(batch, - (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) | - (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | - (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2)); - OUT_BATCH(batch, - (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | - (5 << GEN7_URB_STARTING_ADDRESS_SHIFT)); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2)); - OUT_BATCH(batch, - (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | - (6 << GEN7_URB_STARTING_ADDRESS_SHIFT)); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2)); - OUT_BATCH(batch, - (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | - (7 << GEN7_URB_STARTING_ADDRESS_SHIFT)); - ADVANCE_BATCH(batch); -} - -static void -gen8_emit_bypass_state(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - - /* bypass GS */ - BEGIN_BATCH(batch, 11); - OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 10); - OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2)); - /* GS shader address */ - OUT_BATCH(batch, 0); /* without GS kernel */ - OUT_BATCH(batch, 0); - /* DW3. GS shader dispatch flag */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /* DW6. GS shader GRF and URB offset/length */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); /* pass-through */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - /* disable HS */ - BEGIN_BATCH(batch, 11); - OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 9); - OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2)); - OUT_BATCH(batch, 0); - /*DW2. HS pass-through */ - OUT_BATCH(batch, 0); - /*DW3. HS shader address */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /*DW5. HS shader flag. URB offset/length and so on */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - /* Disable TE */ - BEGIN_BATCH(batch, 4); - OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - /* Disable DS */ - BEGIN_BATCH(batch, 11); - OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 9); - OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2)); - /* DW1. DS shader pointer */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /* DW3-5. DS shader dispatch flag.*/ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /* DW8. DS shader output URB */ - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - /* Disable STREAMOUT */ - BEGIN_BATCH(batch, 5); - OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); -} - -static void -gen8_emit_invarient_states(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - - BEGIN_BATCH(batch, 1); - OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2)); - OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | - GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ - ADVANCE_BATCH(batch); - - /* Update 3D Multisample pattern */ - BEGIN_BATCH(batch, 9); - OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); - OUT_BATCH(batch, 1); - ADVANCE_BATCH(batch); - - /* Set system instruction pointer */ - BEGIN_BATCH(batch, 3); - OUT_BATCH(batch, CMD_STATE_SIP | 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); -} - -static void -gen8_emit_clip_state(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - - OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); /* pass-through */ - OUT_BATCH(batch, 0); -} - -static void -gen8_emit_sf_state(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - - BEGIN_BATCH(batch, 5); - OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2)); - OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - - BEGIN_BATCH(batch, 4); - OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2)); - OUT_BATCH(batch, - (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) | - (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) | - (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) | - (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) | - (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - /* SBE for backend setup */ - BEGIN_BATCH(batch, 11); - OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 4); - OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); - ADVANCE_BATCH(batch); -} - -static void -gen8_emit_wm_state(VADriverContextP ctx, int kernel) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - struct i965_render_state *render_state = &i965->render_state; - unsigned int num_samples = 0; - unsigned int max_threads; - - max_threads = render_state->max_wm_threads - 2; - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2)); - OUT_BATCH(batch, - (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE)); - ADVANCE_BATCH(batch); - - - if (kernel == PS_KERNEL) { - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2)); - OUT_BATCH(batch, - GEN8_PS_BLEND_HAS_WRITEABLE_RT); - ADVANCE_BATCH(batch); - } else if (kernel == PS_SUBPIC_KERNEL) { - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2)); - OUT_BATCH(batch, - (GEN8_PS_BLEND_HAS_WRITEABLE_RT | - GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE | - (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) | - (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) | - (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) | - (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT))); - ADVANCE_BATCH(batch); - } - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2)); - OUT_BATCH(batch, - GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 11); - OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2)); - OUT_BATCH(batch, URB_CS_ENTRY_SIZE); - OUT_BATCH(batch, 0); - /*DW3-4. Constant buffer 0 */ - OUT_BATCH(batch, render_state->curbe_offset); - OUT_BATCH(batch, 0); - - /*DW5-10. Constant buffer 1-3 */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 12); - OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2)); - /* PS shader address */ - OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset); - - OUT_BATCH(batch, 0); - /* DW3. PS shader flag .Binding table cnt/sample cnt */ - OUT_BATCH(batch, - (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | - (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - /* DW4-5. Scatch space */ - OUT_BATCH(batch, 0); /* scratch space base offset */ - OUT_BATCH(batch, 0); - /* DW6. PS shader threads. */ - OUT_BATCH(batch, - ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples | - GEN7_PS_PUSH_CONSTANT_ENABLE | - GEN7_PS_16_DISPATCH_ENABLE); - /* DW7. PS shader GRF */ - OUT_BATCH(batch, - (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); - OUT_BATCH(batch, 0); /* kernel 1 pointer */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); /* kernel 2 pointer */ - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); - OUT_BATCH(batch, BINDING_TABLE_OFFSET); - ADVANCE_BATCH(batch); -} - -static void -gen8_emit_depth_buffer_state(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - - BEGIN_BATCH(batch, 8); - OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2)); - OUT_BATCH(batch, - (I965_DEPTHFORMAT_D32_FLOAT << 18) | - (I965_SURFACE_NULL << 29)); - /* DW2-3. Depth Buffer Address */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /* DW4-7. Surface structure */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - /* Update the Hier Depth buffer */ - BEGIN_BATCH(batch, 5); - OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - /* Update the stencil buffer */ - BEGIN_BATCH(batch, 5); - OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 3); - OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); -} - -static void -gen8_emit_depth_stencil_state(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - - BEGIN_BATCH(batch, 3); - OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); -} - -static void -gen8_emit_wm_hz_op(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - - BEGIN_BATCH(batch, 5); - OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); -} - -static void -gen8_emit_viewport_state_pointers(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - struct i965_render_state *render_state = &i965->render_state; - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); - OUT_BATCH(batch, render_state->cc_viewport_offset); - ADVANCE_BATCH(batch); - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2)); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); -} - -static void -gen8_emit_sampler_state_pointers(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - struct i965_render_state *render_state = &i965->render_state; - - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); - OUT_BATCH(batch, render_state->sampler_offset); - ADVANCE_BATCH(batch); -} - - -static void -gen8_render_emit_states(VADriverContextP ctx, int kernel) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - - intel_batchbuffer_start_atomic(batch, 0x1000); - intel_batchbuffer_emit_mi_flush(batch); - gen8_emit_invarient_states(ctx); - gen8_emit_state_base_address(ctx); - gen8_emit_viewport_state_pointers(ctx); - gen8_emit_urb(ctx); - gen8_emit_cc_state_pointers(ctx); - gen8_emit_sampler_state_pointers(ctx); - gen8_emit_wm_hz_op(ctx); - gen8_emit_bypass_state(ctx); - gen8_emit_vs_state(ctx); - gen8_emit_clip_state(ctx); - gen8_emit_sf_state(ctx); - gen8_emit_depth_stencil_state(ctx); - gen8_emit_wm_state(ctx, kernel); - gen8_emit_depth_buffer_state(ctx); - gen7_emit_drawing_rectangle(ctx); - gen8_emit_vertex_element_state(ctx); - gen8_emit_vertices(ctx); - intel_batchbuffer_end_atomic(batch); -} - -static void -gen7_render_put_surface( - VADriverContextP ctx, - struct object_surface *obj_surface, - const VARectangle *src_rect, - const VARectangle *dst_rect, - unsigned int flags -) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - - gen7_render_initialize(ctx); - gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); - i965_clear_dest_region(ctx); - gen7_render_emit_states(ctx, PS_KERNEL); - intel_batchbuffer_flush(batch); -} - -static void -gen8_render_put_surface( - VADriverContextP ctx, - struct object_surface *obj_surface, - const VARectangle *src_rect, - const VARectangle *dst_rect, - unsigned int flags -) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - - gen8_render_initialize(ctx); - gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); - gen8_clear_dest_region(ctx); - gen8_render_emit_states(ctx, PS_KERNEL); - intel_batchbuffer_flush(batch); -} - -static void -gen7_subpicture_render_blend_state(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - struct gen6_blend_state *blend_state; - - dri_bo_unmap(render_state->cc.state); - dri_bo_map(render_state->cc.blend, 1); - assert(render_state->cc.blend->virtual); - blend_state = render_state->cc.blend->virtual; - memset(blend_state, 0, sizeof(*blend_state)); - blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; - blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; - blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD; - blend_state->blend0.blend_enable = 1; - blend_state->blend1.post_blend_clamp_enable = 1; - blend_state->blend1.pre_blend_clamp_enable = 1; - blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */ - dri_bo_unmap(render_state->cc.blend); -} - -static void -gen8_subpicture_render_blend_state(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - struct gen8_global_blend_state *global_blend_state; - struct gen8_blend_state_rt *blend_state; - unsigned char *cc_ptr; - - dri_bo_map(render_state->dynamic_state.bo, 1); - assert(render_state->dynamic_state.bo->virtual); - - cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + - render_state->blend_state_offset; - - global_blend_state = (struct gen8_global_blend_state*) cc_ptr; - - memset(global_blend_state, 0, render_state->blend_state_size); - /* Global blend state + blend_state for Render Target */ - blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); - blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD; - blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; - blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; - blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD; - blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; - blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; - blend_state->blend0.colorbuf_blend = 1; - blend_state->blend1.post_blend_clamp_enable = 1; - blend_state->blend1.pre_blend_clamp_enable = 1; - blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */ - - dri_bo_unmap(render_state->dynamic_state.bo); -} - -static void -gen7_subpicture_render_setup_states( - VADriverContextP ctx, - struct object_surface *obj_surface, - const VARectangle *src_rect, - const VARectangle *dst_rect -) -{ - i965_render_dest_surface_state(ctx, 0); - i965_subpic_render_src_surfaces_state(ctx, obj_surface); - i965_render_sampler(ctx); - i965_render_cc_viewport(ctx); - gen7_render_color_calc_state(ctx); - gen7_subpicture_render_blend_state(ctx); - gen7_render_depth_stencil_state(ctx); - i965_subpic_render_upload_constants(ctx, obj_surface); - i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); -} - -static void -gen8_subpic_render_upload_constants(VADriverContextP ctx, - struct object_surface *obj_surface) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - float *constant_buffer; - float global_alpha = 1.0; - unsigned int index = obj_surface->subpic_render_idx; - struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; - unsigned char *cc_ptr; - - if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) { - global_alpha = obj_subpic->global_alpha; - } - - - dri_bo_map(render_state->dynamic_state.bo, 1); - assert(render_state->dynamic_state.bo->virtual); - - cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + - render_state->curbe_offset; - - constant_buffer = (float *) cc_ptr; - *constant_buffer = global_alpha; - - dri_bo_unmap(render_state->dynamic_state.bo); -} - -static void -gen8_subpicture_render_setup_states( - VADriverContextP ctx, - struct object_surface *obj_surface, - const VARectangle *src_rect, - const VARectangle *dst_rect -) -{ - i965_render_dest_surface_state(ctx, 0); - i965_subpic_render_src_surfaces_state(ctx, obj_surface); - gen8_render_sampler(ctx); - gen8_render_cc_viewport(ctx); - gen8_render_color_calc_state(ctx); - gen8_subpicture_render_blend_state(ctx); - gen8_subpic_render_upload_constants(ctx, obj_surface); - i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); -} - -static void -gen7_render_put_subpicture( - VADriverContextP ctx, - struct object_surface *obj_surface, - const VARectangle *src_rect, - const VARectangle *dst_rect -) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - unsigned int index = obj_surface->subpic_render_idx; - struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; - - assert(obj_subpic); - gen7_render_initialize(ctx); - gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect); - gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL); - i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff); - intel_batchbuffer_flush(batch); -} - -static void -gen8_render_put_subpicture( - VADriverContextP ctx, - struct object_surface *obj_surface, - const VARectangle *src_rect, - const VARectangle *dst_rect -) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = i965->batch; - unsigned int index = obj_surface->subpic_render_idx; - struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; - - assert(obj_subpic); - gen8_render_initialize(ctx); - gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect); - gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL); - i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff); - intel_batchbuffer_flush(batch); -} void intel_render_put_surface( @@ -4474,80 +3132,6 @@ intel_render_put_subpicture( render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); } -static bool -gen8_render_init(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - int i, kernel_size; - unsigned int kernel_offset, end_offset; - unsigned char *kernel_ptr; - struct i965_kernel *kernel; - - render_state->render_put_surface = gen8_render_put_surface; - render_state->render_put_subpicture = gen8_render_put_subpicture; - - if (IS_GEN8(i965->intel.device_id)) { - memcpy(render_state->render_kernels, render_kernels_gen8, - sizeof(render_state->render_kernels)); - } - - kernel_size = 4096; - - for (i = 0; i < NUM_RENDER_KERNEL; i++) { - kernel = &render_state->render_kernels[i]; - - if (!kernel->size) - continue; - - kernel_size += kernel->size; - } - - render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr, - "kernel shader", - kernel_size, - 0x1000); - if (render_state->instruction_state.bo == NULL) { - WARN_ONCE("failure to allocate the buffer space for kernel shader\n"); - return false; - } - - assert(render_state->instruction_state.bo); - - render_state->instruction_state.bo_size = kernel_size; - render_state->instruction_state.end_offset = 0; - end_offset = 0; - - dri_bo_map(render_state->instruction_state.bo, 1); - kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual); - for (i = 0; i < NUM_RENDER_KERNEL; i++) { - kernel = &render_state->render_kernels[i]; - kernel_offset = end_offset; - kernel->kernel_offset = kernel_offset; - - if (!kernel->size) - continue; - - memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size); - - end_offset += ALIGN(kernel->size, ALIGNMENT); - } - - render_state->instruction_state.end_offset = end_offset; - - dri_bo_unmap(render_state->instruction_state.bo); - - - if (IS_GEN8(i965->intel.device_id)) { - render_state->max_wm_threads = 64; - } else { - /* should never get here !!! */ - assert(0); - } - - return true; -} - bool i965_render_init(VADriverContextP ctx) @@ -4629,41 +3213,6 @@ i965_render_init(VADriverContextP ctx) return true; } -static void -gen8_render_terminate(VADriverContextP ctx) -{ - int i; - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - - dri_bo_unreference(render_state->vb.vertex_buffer); - render_state->vb.vertex_buffer = NULL; - - dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); - render_state->wm.surface_state_binding_table_bo = NULL; - - if (render_state->instruction_state.bo) { - dri_bo_unreference(render_state->instruction_state.bo); - render_state->instruction_state.bo = NULL; - } - - if (render_state->dynamic_state.bo) { - dri_bo_unreference(render_state->dynamic_state.bo); - render_state->dynamic_state.bo = NULL; - } - - if (render_state->indirect_state.bo) { - dri_bo_unreference(render_state->indirect_state.bo); - render_state->indirect_state.bo = NULL; - } - - if (render_state->draw_region) { - dri_bo_unreference(render_state->draw_region->bo); - free(render_state->draw_region); - render_state->draw_region = NULL; - } -} - void i965_render_terminate(VADriverContextP ctx) { diff --git a/src/i965_render.h b/src/i965_render.h index 3bb3d3e5..a1f2f8ff 100644 --- a/src/i965_render.h +++ b/src/i965_render.h @@ -157,4 +157,8 @@ struct gen8_surface_state; void gen8_render_set_surface_scs(struct gen8_surface_state *ss); +extern bool gen8_render_init(VADriverContextP ctx); + +extern void gen8_render_terminate(VADriverContextP ctx); + #endif /* _I965_RENDER_H_ */ -- cgit v1.2.1 From 8e665f172f23acd3a73c387ab10a0eec35655209 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 4 Mar 2014 16:23:08 +0800 Subject: Use the XXX_post_processing as callback function for post-processing Signed-off-by: Zhao Yakui (cherry picked from commit b7da102c3d237ac5553f8c8ada1bb155e5b8ea75) --- src/i965_post_processing.c | 21 +++++++++++++-------- src/i965_post_processing.h | 9 +++++++++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index bbcba202..5935a9a6 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -5640,14 +5640,15 @@ i965_post_processing_internal( VAStatus va_status; struct i965_driver_data *i965 = i965_driver_data(ctx); - if (IS_GEN8(i965->intel.device_id)) - va_status = gen8_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param); - else if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) - va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param); - else - va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param); - + if (pp_context && pp_context->intel_post_processing) { + va_status = (pp_context->intel_post_processing)(ctx, pp_context, + src_surface, src_rect, + dst_surface, dst_rect, + pp_index, filter_param); + } else { + va_status = VA_STATUS_ERROR_UNIMPLEMENTED; + } + return va_status; } @@ -6385,6 +6386,8 @@ gen8_post_processing_context_init(VADriverContextP ctx, pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE; } + pp_context->intel_post_processing = gen8_post_processing; + assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); if (IS_GEN8(i965->intel.device_id)) @@ -6479,12 +6482,14 @@ i965_post_processing_context_init(VADriverContextP ctx, pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry; assert(pp_context->urb.cs_start + pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + pp_context->intel_post_processing = ironlake_post_processing; } else { pp_context->vfe_gpu_state.max_num_threads = 60; pp_context->vfe_gpu_state.num_urb_entries = 59; pp_context->vfe_gpu_state.gpgpu_mode = 0; pp_context->vfe_gpu_state.urb_entry_size = 16 - 1; pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE; + pp_context->intel_post_processing = gen6_post_processing; } diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index e525a1aa..e76e9c51 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -526,6 +526,15 @@ struct i965_post_processing_context int idrt_size; unsigned int curbe_offset; int curbe_size; + + VAStatus (*intel_post_processing)(VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + int pp_index, + void * filter_param); }; struct i965_proc_context -- cgit v1.2.1 From 5cd2a559ec618c9e628fa695f15a0e90450824f2 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 4 Mar 2014 16:23:08 +0800 Subject: Add the seperated file for Video post-processing on BDW Signed-off-by: Zhao Yakui (cherry picked from commit 6e1baecded9d23b32daa8e34828b6a5d32a27c46) Conflicts: src/i965_post_processing.c --- src/Makefile.am | 1 + src/gen8_post_processing.c | 1491 ++++++++++++++++++++++++++++++++++++++++++++ src/i965_post_processing.c | 1336 +-------------------------------------- src/i965_post_processing.h | 9 + 4 files changed, 1529 insertions(+), 1308 deletions(-) create mode 100644 src/gen8_post_processing.c diff --git a/src/Makefile.am b/src/Makefile.am index 61f35293..28334626 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -75,6 +75,7 @@ source_c = \ i965_media_mpeg2.c \ i965_gpe_utils.c \ i965_post_processing.c \ + gen8_post_processing.c \ i965_render.c \ gen8_render.c \ intel_batchbuffer.c \ diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c new file mode 100644 index 00000000..78f5a837 --- /dev/null +++ b/src/gen8_post_processing.c @@ -0,0 +1,1491 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Xiang Haihao + * Zhao Yakui + * + */ + +#include +#include +#include +#include + +#include "intel_batchbuffer.h" +#include "intel_driver.h" +#include "i965_defines.h" +#include "i965_structs.h" +#include "i965_drv_video.h" +#include "i965_post_processing.h" +#include "i965_render.h" +#include "intel_media.h" + +#define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) || \ + IS_GEN6((ctx)->intel.device_id) || \ + IS_GEN7((ctx)->intel.device_id) || \ + IS_GEN8((ctx)->intel.device_id)) + + +#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 + +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_PP_SURFACES) + +#define GPU_ASM_BLOCK_WIDTH 16 +#define GPU_ASM_BLOCK_HEIGHT 8 +#define GPU_ASM_X_OFFSET_ALIGNMENT 4 + +#define VA_STATUS_SUCCESS_1 0xFFFFFFFE + +static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + void *filter_param); + +static VAStatus gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + void *filter_param); + +/* TODO: Modify the shader and then compile it again. + * Currently it is derived from Haswell*/ +static const uint32_t pp_null_gen8[][4] = { +}; + +static const uint32_t pp_nv12_load_save_nv12_gen8[][4] = { +#include "shaders/post_processing/gen8/pl2_to_pl2.g8b" +}; + +static const uint32_t pp_nv12_load_save_pl3_gen8[][4] = { +#include "shaders/post_processing/gen8/pl2_to_pl3.g8b" +}; + +static const uint32_t pp_pl3_load_save_nv12_gen8[][4] = { +#include "shaders/post_processing/gen8/pl3_to_pl2.g8b" +}; + +static const uint32_t pp_pl3_load_save_pl3_gen8[][4] = { +#include "shaders/post_processing/gen8/pl3_to_pl3.g8b" +}; + +static const uint32_t pp_nv12_scaling_gen8[][4] = { +#include "shaders/post_processing/gen8/pl2_to_pl2.g8b" +}; + +static const uint32_t pp_nv12_avs_gen8[][4] = { +#include "shaders/post_processing/gen8/pl2_to_pl2.g8b" +}; + +static const uint32_t pp_nv12_dndi_gen8[][4] = { +// #include "shaders/post_processing/gen7/dndi.g75b" +}; + +static const uint32_t pp_nv12_dn_gen8[][4] = { +// #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b" +}; +static const uint32_t pp_nv12_load_save_pa_gen8[][4] = { +#include "shaders/post_processing/gen8/pl2_to_pa.g8b" +}; +static const uint32_t pp_pl3_load_save_pa_gen8[][4] = { +#include "shaders/post_processing/gen8/pl3_to_pa.g8b" +}; +static const uint32_t pp_pa_load_save_nv12_gen8[][4] = { +#include "shaders/post_processing/gen8/pa_to_pl2.g8b" +}; +static const uint32_t pp_pa_load_save_pl3_gen8[][4] = { +#include "shaders/post_processing/gen8/pa_to_pl3.g8b" +}; +static const uint32_t pp_pa_load_save_pa_gen8[][4] = { +#include "shaders/post_processing/gen8/pa_to_pa.g8b" +}; +static const uint32_t pp_rgbx_load_save_nv12_gen8[][4] = { +#include "shaders/post_processing/gen8/rgbx_to_nv12.g8b" +}; +static const uint32_t pp_nv12_load_save_rgbx_gen8[][4] = { +#include "shaders/post_processing/gen8/pl2_to_rgbx.g8b" +}; + +static struct pp_module pp_modules_gen8[] = { + { + { + "NULL module (for testing)", + PP_NULL, + pp_null_gen8, + sizeof(pp_null_gen8), + NULL, + }, + + pp_null_initialize, + }, + + { + { + "NV12_NV12", + PP_NV12_LOAD_SAVE_N12, + pp_nv12_load_save_nv12_gen8, + sizeof(pp_nv12_load_save_nv12_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12_PL3", + PP_NV12_LOAD_SAVE_PL3, + pp_nv12_load_save_pl3_gen8, + sizeof(pp_nv12_load_save_pl3_gen8), + NULL, + }, + gen8_pp_plx_avs_initialize, + }, + + { + { + "PL3_NV12", + PP_PL3_LOAD_SAVE_N12, + pp_pl3_load_save_nv12_gen8, + sizeof(pp_pl3_load_save_nv12_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PL3_PL3", + PP_PL3_LOAD_SAVE_N12, + pp_pl3_load_save_pl3_gen8, + sizeof(pp_pl3_load_save_pl3_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12 Scaling module", + PP_NV12_SCALING, + pp_nv12_scaling_gen8, + sizeof(pp_nv12_scaling_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12 AVS module", + PP_NV12_AVS, + pp_nv12_avs_gen8, + sizeof(pp_nv12_avs_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12 DNDI module", + PP_NV12_DNDI, + pp_nv12_dndi_gen8, + sizeof(pp_nv12_dndi_gen8), + NULL, + }, + + pp_null_initialize, + }, + + { + { + "NV12 DN module", + PP_NV12_DN, + pp_nv12_dn_gen8, + sizeof(pp_nv12_dn_gen8), + NULL, + }, + + pp_null_initialize, + }, + { + { + "NV12_PA module", + PP_NV12_LOAD_SAVE_PA, + pp_nv12_load_save_pa_gen8, + sizeof(pp_nv12_load_save_pa_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PL3_PA module", + PP_PL3_LOAD_SAVE_PA, + pp_pl3_load_save_pa_gen8, + sizeof(pp_pl3_load_save_pa_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PA_NV12 module", + PP_PA_LOAD_SAVE_NV12, + pp_pa_load_save_nv12_gen8, + sizeof(pp_pa_load_save_nv12_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PA_PL3 module", + PP_PA_LOAD_SAVE_PL3, + pp_pa_load_save_pl3_gen8, + sizeof(pp_pa_load_save_pl3_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PA_PA module", + PP_PA_LOAD_SAVE_PA, + pp_pa_load_save_pa_gen8, + sizeof(pp_pa_load_save_pa_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "RGBX_NV12 module", + PP_RGBX_LOAD_SAVE_NV12, + pp_rgbx_load_save_nv12_gen8, + sizeof(pp_rgbx_load_save_nv12_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12_RGBX module", + PP_NV12_LOAD_SAVE_RGBX, + pp_nv12_load_save_rgbx_gen8, + sizeof(pp_nv12_load_save_rgbx_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, +}; + +static int +pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface) +{ + int fourcc; + + if (surface->type == I965_SURFACE_TYPE_IMAGE) { + struct object_image *obj_image = (struct object_image *)surface->base; + fourcc = obj_image->image.format.fourcc; + } else { + struct object_surface *obj_surface = (struct object_surface *)surface->base; + fourcc = obj_surface->fourcc; + } + + return fourcc; +} + +static void +gen8_pp_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss0.tiled_surface = 0; + ss->ss0.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + +static void +gen8_pp_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss2.tiled_surface = 0; + ss->ss2.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss2.tiled_surface = 1; + ss->ss2.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss2.tiled_surface = 1; + ss->ss2.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + + +static void +gen8_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + dri_bo *surf_bo, unsigned long surf_bo_offset, + int width, int height, int pitch, int format, + int index, int is_target) +{ + struct gen8_surface_state *ss; + dri_bo *ss_bo; + unsigned int tiling; + unsigned int swizzle; + + dri_bo_get_tiling(surf_bo, &tiling, &swizzle); + ss_bo = pp_context->surface_state_binding_table.bo; + assert(ss_bo); + + dri_bo_map(ss_bo, True); + assert(ss_bo->virtual); + ss = (struct gen8_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index)); + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = format; + ss->ss8.base_addr = surf_bo->offset + surf_bo_offset; + ss->ss2.width = width - 1; + ss->ss2.height = height - 1; + ss->ss3.pitch = pitch - 1; + + /* Always set 1(align 4 mode) per B-spec */ + ss->ss0.vertical_alignment = 1; + ss->ss0.horizontal_alignment = 1; + + gen8_pp_set_surface_tiling(ss, tiling); + gen8_render_set_surface_scs(ss); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0, + surf_bo_offset, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8), + surf_bo); + ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(ss_bo); +} + + +static void +gen8_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + dri_bo *surf_bo, unsigned long surf_bo_offset, + int width, int height, int wpitch, + int xoffset, int yoffset, + int format, int interleave_chroma, + int index) +{ + struct gen8_surface_state2 *ss2; + dri_bo *ss2_bo; + unsigned int tiling; + unsigned int swizzle; + + dri_bo_get_tiling(surf_bo, &tiling, &swizzle); + ss2_bo = pp_context->surface_state_binding_table.bo; + assert(ss2_bo); + + dri_bo_map(ss2_bo, True); + assert(ss2_bo->virtual); + ss2 = (struct gen8_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index)); + memset(ss2, 0, sizeof(*ss2)); + ss2->ss6.base_addr = surf_bo->offset + surf_bo_offset; + ss2->ss1.cbcr_pixel_offset_v_direction = 0; + ss2->ss1.width = width - 1; + ss2->ss1.height = height - 1; + ss2->ss2.pitch = wpitch - 1; + ss2->ss2.interleave_chroma = interleave_chroma; + ss2->ss2.surface_format = format; + ss2->ss3.x_offset_for_cb = xoffset; + ss2->ss3.y_offset_for_cb = yoffset; + gen8_pp_set_surface2_tiling(ss2, tiling); + dri_bo_emit_reloc(ss2_bo, + I915_GEM_DOMAIN_RENDER, 0, + surf_bo_offset, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state2, ss6), + surf_bo); + ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(ss2_bo); +} + +static void +gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *surface, + int base_index, int is_target, + int *width, int *height, int *pitch, int *offset) +{ + struct object_surface *obj_surface; + struct object_image *obj_image; + dri_bo *bo; + int fourcc = pp_get_surface_fourcc(ctx, surface); + const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || + fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1; + const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || + fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2; + int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); + int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); + int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || + fourcc == VA_FOURCC('R', 'G', 'B', 'X') || + fourcc == VA_FOURCC('B', 'G', 'R', 'A') || + fourcc == VA_FOURCC('B', 'G', 'R', 'X')); + + if (surface->type == I965_SURFACE_TYPE_SURFACE) { + obj_surface = (struct object_surface *)surface->base; + bo = obj_surface->bo; + width[0] = obj_surface->orig_width; + height[0] = obj_surface->orig_height; + pitch[0] = obj_surface->width; + offset[0] = 0; + + if (packed_yuv) { + if (is_target) + width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */ + else + width[0] = obj_surface->orig_width; /* surface foramt is YCBCR, width is specified in units of pixels */ + + } else if (rgbx_format) { + if (is_target) + width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */ + } + + width[1] = obj_surface->cb_cr_width; + height[1] = obj_surface->cb_cr_height; + pitch[1] = obj_surface->cb_cr_pitch; + offset[1] = obj_surface->y_cb_offset * obj_surface->width; + + width[2] = obj_surface->cb_cr_width; + height[2] = obj_surface->cb_cr_height; + pitch[2] = obj_surface->cb_cr_pitch; + offset[2] = obj_surface->y_cr_offset * obj_surface->width; + } else { + obj_image = (struct object_image *)surface->base; + bo = obj_image->bo; + width[0] = obj_image->image.width; + height[0] = obj_image->image.height; + pitch[0] = obj_image->image.pitches[0]; + offset[0] = obj_image->image.offsets[0]; + + if (rgbx_format) { + if (is_target) + width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */ + } else if (packed_yuv) { + if (is_target) + width[0] = obj_image->image.width * 2; /* surface format is R8, so double the width */ + else + width[0] = obj_image->image.width; /* surface foramt is YCBCR, width is specified in units of pixels */ + } else if (interleaved_uv) { + width[1] = obj_image->image.width / 2; + height[1] = obj_image->image.height / 2; + pitch[1] = obj_image->image.pitches[1]; + offset[1] = obj_image->image.offsets[1]; + } else { + width[1] = obj_image->image.width / 2; + height[1] = obj_image->image.height / 2; + pitch[1] = obj_image->image.pitches[U]; + offset[1] = obj_image->image.offsets[U]; + width[2] = obj_image->image.width / 2; + height[2] = obj_image->image.height / 2; + pitch[2] = obj_image->image.pitches[V]; + offset[2] = obj_image->image.offsets[V]; + } + } + + if (is_target) { + gen8_pp_set_surface_state(ctx, pp_context, + bo, 0, + width[0] / 4, height[0], pitch[0], + I965_SURFACEFORMAT_R8_UINT, + base_index, 1); + if (rgbx_format) { + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + /* the format is MSB: X-B-G-R */ + pp_static_parameter->grf2.save_avs_rgb_swap = 0; + if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || + (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { + /* It is stored as MSB: X-R-G-B */ + pp_static_parameter->grf2.save_avs_rgb_swap = 1; + } + } + if (!packed_yuv && !rgbx_format) { + if (interleaved_uv) { + gen8_pp_set_surface_state(ctx, pp_context, + bo, offset[1], + width[1] / 2, height[1], pitch[1], + I965_SURFACEFORMAT_R8G8_SINT, + base_index + 1, 1); + } else { + gen8_pp_set_surface_state(ctx, pp_context, + bo, offset[1], + width[1] / 4, height[1], pitch[1], + I965_SURFACEFORMAT_R8_SINT, + base_index + 1, 1); + gen8_pp_set_surface_state(ctx, pp_context, + bo, offset[2], + width[2] / 4, height[2], pitch[2], + I965_SURFACEFORMAT_R8_SINT, + base_index + 2, 1); + } + } + } else { + int format0 = SURFACE_FORMAT_Y8_UNORM; + + switch (fourcc) { + case VA_FOURCC('Y', 'U', 'Y', '2'): + format0 = SURFACE_FORMAT_YCRCB_NORMAL; + break; + + case VA_FOURCC('U', 'Y', 'V', 'Y'): + format0 = SURFACE_FORMAT_YCRCB_SWAPY; + break; + + default: + break; + } + if (rgbx_format) { + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */ + format0 = SURFACE_FORMAT_R8G8B8A8_UNORM; + pp_static_parameter->grf2.src_avs_rgb_swap = 0; + if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || + (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { + pp_static_parameter->grf2.src_avs_rgb_swap = 1; + } + } + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[0], + width[0], height[0], pitch[0], + 0, 0, + format0, 0, + base_index); + + if (!packed_yuv && !rgbx_format) { + if (interleaved_uv) { + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[1], + width[1], height[1], pitch[1], + 0, 0, + SURFACE_FORMAT_R8B8_UNORM, 0, + base_index + 1); + } else { + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[1], + width[1], height[1], pitch[1], + 0, 0, + SURFACE_FORMAT_R8_UNORM, 0, + base_index + 1); + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[2], + width[2], height[2], pitch[2], + 0, 0, + SURFACE_FORMAT_R8_UNORM, 0, + base_index + 2); + } + } + } +} + +static int +pp_null_x_steps(void *private_context) +{ + return 1; +} + +static int +pp_null_y_steps(void *private_context) +{ + return 1; +} + +static int +pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y) +{ + return 0; +} + +static VAStatus +pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + void *filter_param) +{ + /* private function & data */ + pp_context->pp_x_steps = pp_null_x_steps; + pp_context->pp_y_steps = pp_null_y_steps; + pp_context->private_context = NULL; + pp_context->pp_set_block_parameter = pp_null_set_block_parameter; + + dst_surface->flags = src_surface->flags; + + return VA_STATUS_SUCCESS; +} + +static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect) +{ + int i, dst_width_adjust; + /* x offset of dest surface must be dword aligned. + * so we have to extend dst surface on left edge, and mask out pixels not interested + */ + if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) { + pp_context->block_horizontal_mask_left = 0; + for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; iblock_horizontal_mask_left |= 1<block_horizontal_mask_left = 0xffff; + } + + dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; + if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){ + pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1; + } + else { + pp_context->block_horizontal_mask_right = 0xffff; + } + + if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){ + pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1; + } + else { + pp_context->block_vertical_mask_bottom = 0xff; + } + +} + +static int +gen7_pp_avs_x_steps(void *private_context) +{ + struct pp_avs_context *pp_avs_context = private_context; + + return pp_avs_context->dest_w / 16; +} + +static int +gen7_pp_avs_y_steps(void *private_context) +{ + struct pp_avs_context *pp_avs_context = private_context; + + return pp_avs_context->dest_h / 16; +} + +static int +gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y) +{ + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context; + struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; + + pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x; + pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y; + pp_inline_parameter->grf7.constant_0 = 0xffffffff; + pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w; + + return 0; +} + +static void gen7_update_src_surface_uv_offset(VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + const struct i965_surface *surface) +{ + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + int fourcc = pp_get_surface_fourcc(ctx, surface); + + if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) { + pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0; + pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1; + pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3; + } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) { + pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1; + pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0; + pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2; + } +} + +static VAStatus +gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + void *filter_param) +{ +/* TODO: Add the sampler_8x8 state */ + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context; + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + struct gen8_sampler_8x8_avs *sampler_8x8; + struct i965_sampler_8x8_coefficient *sampler_8x8_state; + int i; + int width[3], height[3], pitch[3], offset[3]; + int src_width, src_height; + unsigned char *cc_ptr; + + memset(pp_static_parameter, 0, sizeof(struct gen7_pp_static_parameter)); + + /* source surface */ + gen8_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0, + width, height, pitch, offset); + src_height = height[0]; + src_width = width[0]; + + /* destination surface */ + gen8_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1, + width, height, pitch, offset); + + /* sampler 8x8 state */ + dri_bo_map(pp_context->dynamic_state.bo, True); + assert(pp_context->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) pp_context->dynamic_state.bo->virtual + + pp_context->sampler_offset; + /* Currently only one gen8 sampler_8x8 is initialized */ + sampler_8x8 = (struct gen8_sampler_8x8_avs *) cc_ptr; + memset(sampler_8x8, 0, sizeof(*sampler_8x8)); + + sampler_8x8->dw0.gain_factor = 44; + sampler_8x8->dw0.weak_edge_threshold = 1; + sampler_8x8->dw0.strong_edge_threshold = 8; + /* Use the value like that on Ivy instead of default + * sampler_8x8->dw0.r3x_coefficient = 5; + */ + sampler_8x8->dw0.r3x_coefficient = 27; + sampler_8x8->dw0.r3c_coefficient = 5; + + sampler_8x8->dw2.global_noise_estimation = 255; + sampler_8x8->dw2.non_edge_weight = 1; + sampler_8x8->dw2.regular_weight = 2; + sampler_8x8->dw2.strong_edge_weight = 7; + /* Use the value like that on Ivy instead of default + * sampler_8x8->dw2.r5x_coefficient = 7; + * sampler_8x8->dw2.r5cx_coefficient = 7; + * sampler_8x8->dw2.r5c_coefficient = 7; + */ + sampler_8x8->dw2.r5x_coefficient = 9; + sampler_8x8->dw2.r5cx_coefficient = 8; + sampler_8x8->dw2.r5c_coefficient = 3; + + sampler_8x8->dw3.sin_alpha = 101; /* sin_alpha = 0 */ + sampler_8x8->dw3.cos_alpha = 79; /* cos_alpha = 0 */ + sampler_8x8->dw3.sat_max = 0x1f; + sampler_8x8->dw3.hue_max = 14; + /* The 8tap filter will determine whether the adaptive Filter is + * applied for all channels(dw153). + * If the 8tap filter is disabled, the adaptive filter should be disabled. + * Only when 8tap filter is enabled, it can be enabled or not. + */ + sampler_8x8->dw3.enable_8tap_filter = 3; + sampler_8x8->dw3.ief4_smooth_enable = 0; + + sampler_8x8->dw4.s3u = 0; + sampler_8x8->dw4.diamond_margin = 4; + sampler_8x8->dw4.vy_std_enable = 0; + sampler_8x8->dw4.umid = 110; + sampler_8x8->dw4.vmid = 154; + + sampler_8x8->dw5.diamond_dv = 0; + sampler_8x8->dw5.diamond_th = 35; + sampler_8x8->dw5.diamond_alpha = 100; /* diamond_alpha = 0 */ + sampler_8x8->dw5.hs_margin = 3; + sampler_8x8->dw5.diamond_du = 2; + + sampler_8x8->dw6.y_point1 = 46; + sampler_8x8->dw6.y_point2 = 47; + sampler_8x8->dw6.y_point3 = 254; + sampler_8x8->dw6.y_point4 = 255; + + sampler_8x8->dw7.inv_margin_vyl = 3300; /* inv_margin_vyl = 0 */ + + sampler_8x8->dw8.inv_margin_vyu = 1600; /* inv_margin_vyu = 0 */ + sampler_8x8->dw8.p0l = 46; + sampler_8x8->dw8.p1l = 216; + + sampler_8x8->dw9.p2l = 236; + sampler_8x8->dw9.p3l = 236; + sampler_8x8->dw9.b0l = 133; + sampler_8x8->dw9.b1l = 130; + + sampler_8x8->dw10.b2l = 130; + sampler_8x8->dw10.b3l = 130; + /* s0l = -5 / 256. s2.8 */ + sampler_8x8->dw10.s0l = 1029; /* s0l = 0 */ + sampler_8x8->dw10.y_slope2 = 31; /* y_slop2 = 0 */ + + sampler_8x8->dw11.s1l = 0; + sampler_8x8->dw11.s2l = 0; + + sampler_8x8->dw12.s3l = 0; + sampler_8x8->dw12.p0u = 46; + sampler_8x8->dw12.p1u = 66; + sampler_8x8->dw12.y_slope1 = 31; /* y_slope1 = 0 */ + + sampler_8x8->dw13.p2u = 130; + sampler_8x8->dw13.p3u = 236; + sampler_8x8->dw13.b0u = 143; + sampler_8x8->dw13.b1u = 163; + + sampler_8x8->dw14.b2u = 200; + sampler_8x8->dw14.b3u = 140; + sampler_8x8->dw14.s0u = 256; /* s0u = 0 */ + + sampler_8x8->dw15.s1u = 113; /* s1u = 0 */ + sampler_8x8->dw15.s2u = 1203; /* s2u = 0 */ + + sampler_8x8_state = sampler_8x8->coefficients; + + for (i = 0; i < 17; i++) { + float coff; + coff = i; + coff = coff / 16; + + memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state)); + /* for Y channel, currently ignore */ + sampler_8x8_state->dw0.table_0x_filter_c0 = 0x0; + sampler_8x8_state->dw0.table_0x_filter_c1 = 0x0; + sampler_8x8_state->dw0.table_0x_filter_c2 = 0x0; + sampler_8x8_state->dw0.table_0x_filter_c3 = + intel_format_convert(1 - coff, 1, 6, 0); + sampler_8x8_state->dw1.table_0x_filter_c4 = + intel_format_convert(coff, 1, 6, 0); + sampler_8x8_state->dw1.table_0x_filter_c5 = 0x0; + sampler_8x8_state->dw1.table_0x_filter_c6 = 0x0; + sampler_8x8_state->dw1.table_0x_filter_c7 = 0x0; + sampler_8x8_state->dw2.table_0y_filter_c0 = 0x0; + sampler_8x8_state->dw2.table_0y_filter_c1 = 0x0; + sampler_8x8_state->dw2.table_0y_filter_c2 = 0x0; + sampler_8x8_state->dw2.table_0y_filter_c3 = + intel_format_convert(1 - coff, 1, 6, 0); + sampler_8x8_state->dw3.table_0y_filter_c4 = + intel_format_convert(coff, 1, 6, 0); + sampler_8x8_state->dw3.table_0y_filter_c5 = 0x0; + sampler_8x8_state->dw3.table_0y_filter_c6 = 0x0; + sampler_8x8_state->dw3.table_0y_filter_c7 = 0x0; + /* for U/V channel, 0.25 */ + sampler_8x8_state->dw4.table_1x_filter_c0 = 0x0; + sampler_8x8_state->dw4.table_1x_filter_c1 = 0x0; + sampler_8x8_state->dw4.table_1x_filter_c2 = 0x0; + sampler_8x8_state->dw4.table_1x_filter_c3 = + intel_format_convert(1 - coff, 1, 6, 0); + sampler_8x8_state->dw5.table_1x_filter_c4 = + intel_format_convert(coff, 1, 6, 0); + sampler_8x8_state->dw5.table_1x_filter_c5 = 0x00; + sampler_8x8_state->dw5.table_1x_filter_c6 = 0x0; + sampler_8x8_state->dw5.table_1x_filter_c7 = 0x0; + sampler_8x8_state->dw6.table_1y_filter_c0 = 0x0; + sampler_8x8_state->dw6.table_1y_filter_c1 = 0x0; + sampler_8x8_state->dw6.table_1y_filter_c2 = 0x0; + sampler_8x8_state->dw6.table_1y_filter_c3 = + intel_format_convert(1 - coff, 1, 6, 0); + sampler_8x8_state->dw7.table_1y_filter_c4 = + intel_format_convert(coff, 1, 6,0); + sampler_8x8_state->dw7.table_1y_filter_c5 = 0x0; + sampler_8x8_state->dw7.table_1y_filter_c6 = 0x0; + sampler_8x8_state->dw7.table_1y_filter_c7 = 0x0; + sampler_8x8_state++; + } + + sampler_8x8->dw152.default_sharpness_level = 0; + sampler_8x8->dw153.adaptive_filter_for_all_channel = 1; + sampler_8x8->dw153.bypass_y_adaptive_filtering = 1; + sampler_8x8->dw153.bypass_x_adaptive_filtering = 1; + + dri_bo_unmap(pp_context->dynamic_state.bo); + + + /* private function & data */ + pp_context->pp_x_steps = gen7_pp_avs_x_steps; + pp_context->pp_y_steps = gen7_pp_avs_y_steps; + pp_context->private_context = &pp_context->pp_avs_context; + pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter; + + pp_avs_context->dest_x = dst_rect->x; + pp_avs_context->dest_y = dst_rect->y; + pp_avs_context->dest_w = ALIGN(dst_rect->width, 16); + pp_avs_context->dest_h = ALIGN(dst_rect->height, 16); + pp_avs_context->src_w = src_rect->width; + pp_avs_context->src_h = src_rect->height; + pp_avs_context->horiz_range = (float)src_rect->width / src_width; + + int dw = (pp_avs_context->src_w - 1) / 16 + 1; + dw = MAX(dw, dst_rect->width); + + pp_static_parameter->grf1.pointer_to_inline_parameter = 7; + pp_static_parameter->grf2.avs_wa_enable = 0; /* It is not required on GEN8+ */ + pp_static_parameter->grf2.avs_wa_width = src_width; + pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width); + pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width); + + pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; + pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height; + pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height - + (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step; + pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width - + (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw; + + gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface); + + dst_surface->flags = src_surface->flags; + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_pp_initialize( + VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + int pp_index, + void * filter_param +) +{ + VAStatus va_status; + struct i965_driver_data *i965 = i965_driver_data(ctx); + dri_bo *bo; + int bo_size; + unsigned int end_offset; + struct pp_module *pp_module; + int static_param_size, inline_param_size; + + dri_bo_unreference(pp_context->surface_state_binding_table.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES, + 4096); + assert(bo); + pp_context->surface_state_binding_table.bo = bo; + + pp_context->idrt.num_interface_descriptors = 0; + + pp_context->sampler_size = 2 * 4096; + + bo_size = 4096 + pp_context->curbe_size + pp_context->sampler_size + + pp_context->idrt_size; + + dri_bo_unreference(pp_context->dynamic_state.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "dynamic_state", + bo_size, + 4096); + + assert(bo); + pp_context->dynamic_state.bo = bo; + pp_context->dynamic_state.bo_size = bo_size; + + end_offset = 0; + pp_context->dynamic_state.end_offset = 0; + + /* Constant buffer offset */ + pp_context->curbe_offset = ALIGN(end_offset, 64); + end_offset = pp_context->curbe_offset + pp_context->curbe_size; + + /* Interface descriptor offset */ + pp_context->idrt_offset = ALIGN(end_offset, 64); + end_offset = pp_context->idrt_offset + pp_context->idrt_size; + + /* Sampler state offset */ + pp_context->sampler_offset = ALIGN(end_offset, 64); + end_offset = pp_context->sampler_offset + pp_context->sampler_size; + + /* update the end offset of dynamic_state */ + pp_context->dynamic_state.end_offset = ALIGN(end_offset, 64); + + static_param_size = sizeof(struct gen7_pp_static_parameter); + inline_param_size = sizeof(struct gen7_pp_inline_parameter); + + memset(pp_context->pp_static_parameter, 0, static_param_size); + memset(pp_context->pp_inline_parameter, 0, inline_param_size); + + assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES); + pp_context->current_pp = pp_index; + pp_module = &pp_context->pp_modules[pp_index]; + + if (pp_module->initialize) + va_status = pp_module->initialize(ctx, pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + filter_param); + else + va_status = VA_STATUS_ERROR_UNIMPLEMENTED; + + calculate_boundary_block_mask(pp_context, dst_rect); + + return va_status; +} + +static void +gen8_pp_interface_descriptor_table(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct gen8_interface_descriptor_data *desc; + dri_bo *bo; + int pp_index = pp_context->current_pp; + unsigned char *cc_ptr; + + bo = pp_context->dynamic_state.bo; + + dri_bo_map(bo, 1); + assert(bo->virtual); + cc_ptr = (unsigned char *)bo->virtual + pp_context->idrt_offset; + + desc = (struct gen8_interface_descriptor_data *) cc_ptr + + pp_context->idrt.num_interface_descriptors; + + memset(desc, 0, sizeof(*desc)); + desc->desc0.kernel_start_pointer = + pp_context->pp_modules[pp_index].kernel.kernel_offset >> 6; /* reloc */ + desc->desc2.single_program_flow = 1; + desc->desc2.floating_point_mode = FLOATING_POINT_IEEE_754; + desc->desc3.sampler_count = 0; /* 1 - 4 samplers used */ + desc->desc3.sampler_state_pointer = pp_context->sampler_offset >> 5; + desc->desc4.binding_table_entry_count = 0; + desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5); + desc->desc5.constant_urb_entry_read_offset = 0; + + desc->desc5.constant_urb_entry_read_length = 6; /* grf 1-6 */ + + dri_bo_unmap(bo); + pp_context->idrt.num_interface_descriptors++; +} + + +static void +gen8_pp_upload_constants(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + unsigned char *constant_buffer; + int param_size; + + assert(sizeof(struct gen7_pp_static_parameter) == 192); + + param_size = sizeof(struct gen7_pp_static_parameter); + + dri_bo_map(pp_context->dynamic_state.bo, 1); + assert(pp_context->dynamic_state.bo->virtual); + constant_buffer = (unsigned char *) pp_context->dynamic_state.bo->virtual + + pp_context->curbe_offset; + + memcpy(constant_buffer, pp_context->pp_static_parameter, param_size); + dri_bo_unmap(pp_context->dynamic_state.bo); + return; +} + +static void +gen8_pp_states_setup(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + gen8_pp_interface_descriptor_table(ctx, pp_context); + gen8_pp_upload_constants(ctx, pp_context); +} + +static void +gen6_pp_pipeline_select(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); + ADVANCE_BATCH(batch); +} + +static void +gen8_pp_state_base_address(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + BEGIN_BATCH(batch, 16); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2)); + /* DW1 Generate state address */ + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW4. Surface state address */ + OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(batch, 0); + /* DW6. Dynamic state address */ + OUT_RELOC(batch, pp_context->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER, + 0, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + /* DW8. Indirect object address */ + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + /* DW10. Instruction base address */ + OUT_RELOC(batch, pp_context->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(batch); +} + +static void +gen8_pp_vfe_state(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 | + pp_context->vfe_gpu_state.num_urb_entries << 8); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + (pp_context->vfe_gpu_state.urb_entry_size) << 16 | + /* URB Entry Allocation Size, in 256 bits unit */ + (pp_context->vfe_gpu_state.curbe_allocation_size)); + /* CURBE Allocation Size, in 256 bits unit */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_interface_descriptor_load(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + BEGIN_BATCH(batch, 6); + + OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH); + OUT_BATCH(batch, 0); + + OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + pp_context->idrt.num_interface_descriptors * sizeof(struct gen8_interface_descriptor_data)); + OUT_BATCH(batch, pp_context->idrt_offset); + ADVANCE_BATCH(batch); +} + +static void +gen8_pp_curbe_load(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + struct i965_driver_data *i965 = i965_driver_data(ctx); + int param_size = 64; + + if (IS_GEN8(i965->intel.device_id)) + param_size = sizeof(struct gen7_pp_static_parameter); + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + param_size); + OUT_BATCH(batch, pp_context->curbe_offset); + ADVANCE_BATCH(batch); +} + +static void +gen8_pp_object_walker(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = pp_context->batch; + int x, x_steps, y, y_steps; + int param_size, command_length_in_dws, extra_cmd_in_dws; + dri_bo *command_buffer; + unsigned int *command_ptr; + + param_size = sizeof(struct gen7_pp_inline_parameter); + if (IS_GEN8(i965->intel.device_id)) + param_size = sizeof(struct gen7_pp_inline_parameter); + + x_steps = pp_context->pp_x_steps(pp_context->private_context); + y_steps = pp_context->pp_y_steps(pp_context->private_context); + command_length_in_dws = 6 + (param_size >> 2); + extra_cmd_in_dws = 2; + command_buffer = dri_bo_alloc(i965->intel.bufmgr, + "command objects buffer", + (command_length_in_dws + extra_cmd_in_dws) * 4 * x_steps * y_steps + 64, + 4096); + + dri_bo_map(command_buffer, 1); + command_ptr = command_buffer->virtual; + + for (y = 0; y < y_steps; y++) { + for (x = 0; x < x_steps; x++) { + if (!pp_context->pp_set_block_parameter(pp_context, x, y)) { + + *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2)); + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + memcpy(command_ptr, pp_context->pp_inline_parameter, param_size); + command_ptr += (param_size >> 2); + + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + } + } + } + + if ((command_length_in_dws + extra_cmd_in_dws) * x_steps * y_steps % 2 == 0) + *command_ptr++ = 0; + + *command_ptr++ = MI_BATCH_BUFFER_END; + *command_ptr++ = 0; + + dri_bo_unmap(command_buffer); + + if (IS_GEN8(i965->intel.device_id)) { + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_RELOC(batch, command_buffer, + I915_GEM_DOMAIN_COMMAND, 0, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + } + + dri_bo_unreference(command_buffer); + + /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END + * will cause control to pass back to ring buffer + */ + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); + intel_batchbuffer_start_atomic(batch, 0x1000); +} + +static void +gen8_pp_pipeline_setup(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen6_pp_pipeline_select(ctx, pp_context); + gen8_pp_state_base_address(ctx, pp_context); + gen8_pp_vfe_state(ctx, pp_context); + gen8_pp_curbe_load(ctx, pp_context); + gen8_interface_descriptor_load(ctx, pp_context); + gen8_pp_vfe_state(ctx, pp_context); + gen8_pp_object_walker(ctx, pp_context); + intel_batchbuffer_end_atomic(batch); +} + +static VAStatus +gen8_post_processing( + VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + int pp_index, + void * filter_param +) +{ + VAStatus va_status; + + va_status = gen8_pp_initialize(ctx, pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + pp_index, + filter_param); + + if (va_status == VA_STATUS_SUCCESS) { + gen8_pp_states_setup(ctx, pp_context); + gen8_pp_pipeline_setup(ctx, pp_context); + } + + return va_status; +} + +void +gen8_post_processing_context_finalize(struct i965_post_processing_context *pp_context) +{ + dri_bo_unreference(pp_context->surface_state_binding_table.bo); + pp_context->surface_state_binding_table.bo = NULL; + + dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo); + pp_context->pp_dndi_context.stmm_bo = NULL; + + dri_bo_unreference(pp_context->pp_dn_context.stmm_bo); + pp_context->pp_dn_context.stmm_bo = NULL; + + if (pp_context->instruction_state.bo) { + dri_bo_unreference(pp_context->instruction_state.bo); + pp_context->instruction_state.bo = NULL; + } + + if (pp_context->indirect_state.bo) { + dri_bo_unreference(pp_context->indirect_state.bo); + pp_context->indirect_state.bo = NULL; + } + + if (pp_context->dynamic_state.bo) { + dri_bo_unreference(pp_context->dynamic_state.bo); + pp_context->dynamic_state.bo = NULL; + } + + free(pp_context->pp_static_parameter); + free(pp_context->pp_inline_parameter); + pp_context->pp_static_parameter = NULL; + pp_context->pp_inline_parameter = NULL; +} + +#define VPP_CURBE_ALLOCATION_SIZE 32 + +void +gen8_post_processing_context_init(VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + struct intel_batchbuffer *batch) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + int i, kernel_size; + unsigned int kernel_offset, end_offset; + unsigned char *kernel_ptr; + struct pp_module *pp_module; + + { + pp_context->vfe_gpu_state.max_num_threads = 60; + pp_context->vfe_gpu_state.num_urb_entries = 59; + pp_context->vfe_gpu_state.gpgpu_mode = 0; + pp_context->vfe_gpu_state.urb_entry_size = 16 - 1; + pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE; + } + + pp_context->intel_post_processing = gen8_post_processing; + + assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); + + if (IS_GEN8(i965->intel.device_id)) + memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules)); + else { + /* should never get here !!! */ + assert(0); + } + + kernel_size = 4096 ; + + for (i = 0; i < NUM_PP_MODULES; i++) { + pp_module = &pp_context->pp_modules[i]; + + if (pp_module->kernel.bin && pp_module->kernel.size) { + kernel_size += pp_module->kernel.size; + } + } + + pp_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr, + "kernel shader", + kernel_size, + 0x1000); + if (pp_context->instruction_state.bo == NULL) { + WARN_ONCE("failure to allocate the buffer space for kernel shader in VPP\n"); + return; + } + + assert(pp_context->instruction_state.bo); + + + pp_context->instruction_state.bo_size = kernel_size; + pp_context->instruction_state.end_offset = 0; + end_offset = 0; + + dri_bo_map(pp_context->instruction_state.bo, 1); + kernel_ptr = (unsigned char *)(pp_context->instruction_state.bo->virtual); + + for (i = 0; i < NUM_PP_MODULES; i++) { + pp_module = &pp_context->pp_modules[i]; + + kernel_offset = ALIGN(end_offset, 64); + pp_module->kernel.kernel_offset = kernel_offset; + + if (pp_module->kernel.bin && pp_module->kernel.size) { + + memcpy(kernel_ptr + kernel_offset, pp_module->kernel.bin, pp_module->kernel.size); + end_offset = kernel_offset + pp_module->kernel.size; + } + } + + pp_context->instruction_state.end_offset = ALIGN(end_offset, 64); + + dri_bo_unmap(pp_context->instruction_state.bo); + + /* static & inline parameters */ + if (IS_GEN8(i965->intel.device_id)) { + pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1); + pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1); + } + + pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE; + pp_context->pp_dndi_context.current_out_obj_surface = NULL; + pp_context->pp_dndi_context.frame_order = -1; + pp_context->batch = batch; + + pp_context->idrt_size = 5 * sizeof(struct gen8_interface_descriptor_data); + pp_context->curbe_size = 256; +} diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 5935a9a6..8d53676d 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -699,13 +699,6 @@ static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_pos const VARectangle *dst_rect, void *filter_param); -static VAStatus gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, - const struct i965_surface *src_surface, - const VARectangle *src_rect, - struct i965_surface *dst_surface, - const VARectangle *dst_rect, - void *filter_param); - static struct pp_module pp_modules_gen7[] = { { { @@ -1151,260 +1144,6 @@ static struct pp_module pp_modules_gen75[] = { }; -/* TODO: Modify the shader and then compile it again. - * Currently it is derived from Haswell*/ -static const uint32_t pp_null_gen8[][4] = { -}; - -static const uint32_t pp_nv12_load_save_nv12_gen8[][4] = { -#include "shaders/post_processing/gen8/pl2_to_pl2.g8b" -}; - -static const uint32_t pp_nv12_load_save_pl3_gen8[][4] = { -#include "shaders/post_processing/gen8/pl2_to_pl3.g8b" -}; - -static const uint32_t pp_pl3_load_save_nv12_gen8[][4] = { -#include "shaders/post_processing/gen8/pl3_to_pl2.g8b" -}; - -static const uint32_t pp_pl3_load_save_pl3_gen8[][4] = { -#include "shaders/post_processing/gen8/pl3_to_pl3.g8b" -}; - -static const uint32_t pp_nv12_scaling_gen8[][4] = { -#include "shaders/post_processing/gen8/pl2_to_pl2.g8b" -}; - -static const uint32_t pp_nv12_avs_gen8[][4] = { -#include "shaders/post_processing/gen8/pl2_to_pl2.g8b" -}; - -static const uint32_t pp_nv12_dndi_gen8[][4] = { -// #include "shaders/post_processing/gen7/dndi.g75b" -}; - -static const uint32_t pp_nv12_dn_gen8[][4] = { -// #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b" -}; -static const uint32_t pp_nv12_load_save_pa_gen8[][4] = { -#include "shaders/post_processing/gen8/pl2_to_pa.g8b" -}; -static const uint32_t pp_pl3_load_save_pa_gen8[][4] = { -#include "shaders/post_processing/gen8/pl3_to_pa.g8b" -}; -static const uint32_t pp_pa_load_save_nv12_gen8[][4] = { -#include "shaders/post_processing/gen8/pa_to_pl2.g8b" -}; -static const uint32_t pp_pa_load_save_pl3_gen8[][4] = { -#include "shaders/post_processing/gen8/pa_to_pl3.g8b" -}; -static const uint32_t pp_pa_load_save_pa_gen8[][4] = { -#include "shaders/post_processing/gen8/pa_to_pa.g8b" -}; -static const uint32_t pp_rgbx_load_save_nv12_gen8[][4] = { -#include "shaders/post_processing/gen8/rgbx_to_nv12.g8b" -}; -static const uint32_t pp_nv12_load_save_rgbx_gen8[][4] = { -#include "shaders/post_processing/gen8/pl2_to_rgbx.g8b" -}; - - -static struct pp_module pp_modules_gen8[] = { - { - { - "NULL module (for testing)", - PP_NULL, - pp_null_gen8, - sizeof(pp_null_gen8), - NULL, - }, - - pp_null_initialize, - }, - - { - { - "NV12_NV12", - PP_NV12_LOAD_SAVE_N12, - pp_nv12_load_save_nv12_gen8, - sizeof(pp_nv12_load_save_nv12_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - - { - { - "NV12_PL3", - PP_NV12_LOAD_SAVE_PL3, - pp_nv12_load_save_pl3_gen8, - sizeof(pp_nv12_load_save_pl3_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - - { - { - "PL3_NV12", - PP_PL3_LOAD_SAVE_N12, - pp_pl3_load_save_nv12_gen8, - sizeof(pp_pl3_load_save_nv12_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - - { - { - "PL3_PL3", - PP_PL3_LOAD_SAVE_N12, - pp_pl3_load_save_pl3_gen8, - sizeof(pp_pl3_load_save_pl3_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - - { - { - "NV12 Scaling module", - PP_NV12_SCALING, - pp_nv12_scaling_gen8, - sizeof(pp_nv12_scaling_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - - { - { - "NV12 AVS module", - PP_NV12_AVS, - pp_nv12_avs_gen8, - sizeof(pp_nv12_avs_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - - { - { - "NV12 DNDI module", - PP_NV12_DNDI, - pp_nv12_dndi_gen8, - sizeof(pp_nv12_dndi_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - - { - { - "NV12 DN module", - PP_NV12_DN, - pp_nv12_dn_gen8, - sizeof(pp_nv12_dn_gen8), - NULL, - }, - - pp_null_initialize, - }, - { - { - "NV12_PA module", - PP_NV12_LOAD_SAVE_PA, - pp_nv12_load_save_pa_gen8, - sizeof(pp_nv12_load_save_pa_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - - { - { - "PL3_PA module", - PP_PL3_LOAD_SAVE_PA, - pp_pl3_load_save_pa_gen8, - sizeof(pp_pl3_load_save_pa_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - - { - { - "PA_NV12 module", - PP_PA_LOAD_SAVE_NV12, - pp_pa_load_save_nv12_gen8, - sizeof(pp_pa_load_save_nv12_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - - { - { - "PA_PL3 module", - PP_PA_LOAD_SAVE_PL3, - pp_pa_load_save_pl3_gen8, - sizeof(pp_pa_load_save_pl3_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - - { - { - "PA_PA module", - PP_PA_LOAD_SAVE_PA, - pp_pa_load_save_pa_gen8, - sizeof(pp_pa_load_save_pa_gen8), - NULL, - }, - - pp_null_initialize, - }, - - { - { - "RGBX_NV12 module", - PP_RGBX_LOAD_SAVE_NV12, - pp_rgbx_load_save_nv12_gen8, - sizeof(pp_rgbx_load_save_nv12_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - - { - { - "NV12_RGBX module", - PP_NV12_LOAD_SAVE_RGBX, - pp_nv12_load_save_rgbx_gen8, - sizeof(pp_nv12_load_save_rgbx_gen8), - NULL, - }, - - gen8_pp_plx_avs_initialize, - }, - -}; - - static int pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface) { @@ -1494,25 +1233,6 @@ gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling) } } -static void -gen8_pp_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling) -{ - switch (tiling) { - case I915_TILING_NONE: - ss->ss0.tiled_surface = 0; - ss->ss0.tile_walk = 0; - break; - case I915_TILING_X: - ss->ss0.tiled_surface = 1; - ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; - break; - case I915_TILING_Y: - ss->ss0.tiled_surface = 1; - ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; - break; - } -} - static void gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling) { @@ -1532,25 +1252,6 @@ gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling) } } -static void -gen8_pp_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling) -{ - switch (tiling) { - case I915_TILING_NONE: - ss->ss2.tiled_surface = 0; - ss->ss2.tile_walk = 0; - break; - case I915_TILING_X: - ss->ss2.tiled_surface = 1; - ss->ss2.tile_walk = I965_TILEWALK_XMAJOR; - break; - case I915_TILING_Y: - ss->ss2.tiled_surface = 1; - ss->ss2.tile_walk = I965_TILEWALK_YMAJOR; - break; - } -} - static void ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context) { @@ -1990,117 +1691,35 @@ gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_con dri_bo_unmap(ss2_bo); } -static void -gen8_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context, - dri_bo *surf_bo, unsigned long surf_bo_offset, - int width, int height, int pitch, int format, - int index, int is_target) +static void +pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *surface, + int base_index, int is_target, + int *width, int *height, int *pitch, int *offset) { - struct gen8_surface_state *ss; - dri_bo *ss_bo; - unsigned int tiling; - unsigned int swizzle; - - dri_bo_get_tiling(surf_bo, &tiling, &swizzle); - ss_bo = pp_context->surface_state_binding_table.bo; - assert(ss_bo); - - dri_bo_map(ss_bo, True); - assert(ss_bo->virtual); - ss = (struct gen8_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index)); - memset(ss, 0, sizeof(*ss)); - ss->ss0.surface_type = I965_SURFACE_2D; - ss->ss0.surface_format = format; - ss->ss8.base_addr = surf_bo->offset + surf_bo_offset; - ss->ss2.width = width - 1; - ss->ss2.height = height - 1; - ss->ss3.pitch = pitch - 1; - - /* Always set 1(align 4 mode) per B-spec */ - ss->ss0.vertical_alignment = 1; - ss->ss0.horizontal_alignment = 1; - - gen8_pp_set_surface_tiling(ss, tiling); - gen8_render_set_surface_scs(ss); - dri_bo_emit_reloc(ss_bo, - I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0, - surf_bo_offset, - SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8), - surf_bo); - ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); - dri_bo_unmap(ss_bo); -} - - -static void -gen8_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context, - dri_bo *surf_bo, unsigned long surf_bo_offset, - int width, int height, int wpitch, - int xoffset, int yoffset, - int format, int interleave_chroma, - int index) -{ - struct gen8_surface_state2 *ss2; - dri_bo *ss2_bo; - unsigned int tiling; - unsigned int swizzle; - - dri_bo_get_tiling(surf_bo, &tiling, &swizzle); - ss2_bo = pp_context->surface_state_binding_table.bo; - assert(ss2_bo); - - dri_bo_map(ss2_bo, True); - assert(ss2_bo->virtual); - ss2 = (struct gen8_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index)); - memset(ss2, 0, sizeof(*ss2)); - ss2->ss6.base_addr = surf_bo->offset + surf_bo_offset; - ss2->ss1.cbcr_pixel_offset_v_direction = 0; - ss2->ss1.width = width - 1; - ss2->ss1.height = height - 1; - ss2->ss2.pitch = wpitch - 1; - ss2->ss2.interleave_chroma = interleave_chroma; - ss2->ss2.surface_format = format; - ss2->ss3.x_offset_for_cb = xoffset; - ss2->ss3.y_offset_for_cb = yoffset; - gen8_pp_set_surface2_tiling(ss2, tiling); - dri_bo_emit_reloc(ss2_bo, - I915_GEM_DOMAIN_RENDER, 0, - surf_bo_offset, - SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state2, ss6), - surf_bo); - ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); - dri_bo_unmap(ss2_bo); -} - -static void -pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context, - const struct i965_surface *surface, - int base_index, int is_target, - int *width, int *height, int *pitch, int *offset) -{ - struct object_surface *obj_surface; - struct object_image *obj_image; - dri_bo *bo; - int fourcc = pp_get_surface_fourcc(ctx, surface); - const int Y = 0; - const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1; - const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2; - const int UV = 1; - int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); - int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); - int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || - fourcc == VA_FOURCC('R', 'G', 'B', 'X') || - fourcc == VA_FOURCC('B', 'G', 'R', 'A') || - fourcc == VA_FOURCC('B', 'G', 'R', 'X')); - int scale_factor_of_1st_plane_width_in_byte = 1; - - if (surface->type == I965_SURFACE_TYPE_SURFACE) { - obj_surface = (struct object_surface *)surface->base; - bo = obj_surface->bo; - width[0] = obj_surface->orig_width; - height[0] = obj_surface->orig_height; - pitch[0] = obj_surface->width; - offset[0] = 0; + struct object_surface *obj_surface; + struct object_image *obj_image; + dri_bo *bo; + int fourcc = pp_get_surface_fourcc(ctx, surface); + const int Y = 0; + const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1; + const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2; + const int UV = 1; + int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); + int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); + int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || + fourcc == VA_FOURCC('R', 'G', 'B', 'X') || + fourcc == VA_FOURCC('B', 'G', 'R', 'A') || + fourcc == VA_FOURCC('B', 'G', 'R', 'X')); + int scale_factor_of_1st_plane_width_in_byte = 1; + + if (surface->type == I965_SURFACE_TYPE_SURFACE) { + obj_surface = (struct object_surface *)surface->base; + bo = obj_surface->bo; + width[0] = obj_surface->orig_width; + height[0] = obj_surface->orig_height; + pitch[0] = obj_surface->width; + offset[0] = 0; if (full_packed_format) { scale_factor_of_1st_plane_width_in_byte = 4; @@ -2358,182 +1977,6 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc } } -static void -gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context, - const struct i965_surface *surface, - int base_index, int is_target, - int *width, int *height, int *pitch, int *offset) -{ - struct object_surface *obj_surface; - struct object_image *obj_image; - dri_bo *bo; - int fourcc = pp_get_surface_fourcc(ctx, surface); - const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1; - const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2; - int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); - int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); - int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || - fourcc == VA_FOURCC('R', 'G', 'B', 'X') || - fourcc == VA_FOURCC('B', 'G', 'R', 'A') || - fourcc == VA_FOURCC('B', 'G', 'R', 'X')); - - if (surface->type == I965_SURFACE_TYPE_SURFACE) { - obj_surface = (struct object_surface *)surface->base; - bo = obj_surface->bo; - width[0] = obj_surface->orig_width; - height[0] = obj_surface->orig_height; - pitch[0] = obj_surface->width; - offset[0] = 0; - - if (packed_yuv) { - if (is_target) - width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */ - else - width[0] = obj_surface->orig_width; /* surface foramt is YCBCR, width is specified in units of pixels */ - - } else if (rgbx_format) { - if (is_target) - width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */ - } - - width[1] = obj_surface->cb_cr_width; - height[1] = obj_surface->cb_cr_height; - pitch[1] = obj_surface->cb_cr_pitch; - offset[1] = obj_surface->y_cb_offset * obj_surface->width; - - width[2] = obj_surface->cb_cr_width; - height[2] = obj_surface->cb_cr_height; - pitch[2] = obj_surface->cb_cr_pitch; - offset[2] = obj_surface->y_cr_offset * obj_surface->width; - } else { - obj_image = (struct object_image *)surface->base; - bo = obj_image->bo; - width[0] = obj_image->image.width; - height[0] = obj_image->image.height; - pitch[0] = obj_image->image.pitches[0]; - offset[0] = obj_image->image.offsets[0]; - - if (rgbx_format) { - if (is_target) - width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */ - } else if (packed_yuv) { - if (is_target) - width[0] = obj_image->image.width * 2; /* surface format is R8, so double the width */ - else - width[0] = obj_image->image.width; /* surface foramt is YCBCR, width is specified in units of pixels */ - } else if (interleaved_uv) { - width[1] = obj_image->image.width / 2; - height[1] = obj_image->image.height / 2; - pitch[1] = obj_image->image.pitches[1]; - offset[1] = obj_image->image.offsets[1]; - } else { - width[1] = obj_image->image.width / 2; - height[1] = obj_image->image.height / 2; - pitch[1] = obj_image->image.pitches[U]; - offset[1] = obj_image->image.offsets[U]; - width[2] = obj_image->image.width / 2; - height[2] = obj_image->image.height / 2; - pitch[2] = obj_image->image.pitches[V]; - offset[2] = obj_image->image.offsets[V]; - } - } - - if (is_target) { - gen8_pp_set_surface_state(ctx, pp_context, - bo, 0, - width[0] / 4, height[0], pitch[0], - I965_SURFACEFORMAT_R8_UINT, - base_index, 1); - if (rgbx_format) { - struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - /* the format is MSB: X-B-G-R */ - pp_static_parameter->grf2.save_avs_rgb_swap = 0; - if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || - (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { - /* It is stored as MSB: X-R-G-B */ - pp_static_parameter->grf2.save_avs_rgb_swap = 1; - } - } - if (!packed_yuv && !rgbx_format) { - if (interleaved_uv) { - gen8_pp_set_surface_state(ctx, pp_context, - bo, offset[1], - width[1] / 2, height[1], pitch[1], - I965_SURFACEFORMAT_R8G8_SINT, - base_index + 1, 1); - } else { - gen8_pp_set_surface_state(ctx, pp_context, - bo, offset[1], - width[1] / 4, height[1], pitch[1], - I965_SURFACEFORMAT_R8_SINT, - base_index + 1, 1); - gen8_pp_set_surface_state(ctx, pp_context, - bo, offset[2], - width[2] / 4, height[2], pitch[2], - I965_SURFACEFORMAT_R8_SINT, - base_index + 2, 1); - } - } - } else { - int format0 = SURFACE_FORMAT_Y8_UNORM; - - switch (fourcc) { - case VA_FOURCC('Y', 'U', 'Y', '2'): - format0 = SURFACE_FORMAT_YCRCB_NORMAL; - break; - - case VA_FOURCC('U', 'Y', 'V', 'Y'): - format0 = SURFACE_FORMAT_YCRCB_SWAPY; - break; - - default: - break; - } - if (rgbx_format) { - struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */ - format0 = SURFACE_FORMAT_R8G8B8A8_UNORM; - pp_static_parameter->grf2.src_avs_rgb_swap = 0; - if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || - (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { - pp_static_parameter->grf2.src_avs_rgb_swap = 1; - } - } - gen8_pp_set_surface2_state(ctx, pp_context, - bo, offset[0], - width[0], height[0], pitch[0], - 0, 0, - format0, 0, - base_index); - - if (!packed_yuv && !rgbx_format) { - if (interleaved_uv) { - gen8_pp_set_surface2_state(ctx, pp_context, - bo, offset[1], - width[1], height[1], pitch[1], - 0, 0, - SURFACE_FORMAT_R8B8_UNORM, 0, - base_index + 1); - } else { - gen8_pp_set_surface2_state(ctx, pp_context, - bo, offset[1], - width[1], height[1], pitch[1], - 0, 0, - SURFACE_FORMAT_R8_UNORM, 0, - base_index + 1); - gen8_pp_set_surface2_state(ctx, pp_context, - bo, offset[2], - width[2], height[2], pitch[2], - 0, 0, - SURFACE_FORMAT_R8_UNORM, 0, - base_index + 2); - } - } - } -} - static int pp_null_x_steps(void *private_context) { @@ -3507,232 +2950,6 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con return VA_STATUS_SUCCESS; } -static VAStatus -gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, - const struct i965_surface *src_surface, - const VARectangle *src_rect, - struct i965_surface *dst_surface, - const VARectangle *dst_rect, - void *filter_param) -{ -/* TODO: Add the sampler_8x8 state */ - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context; - struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - struct gen8_sampler_8x8_avs *sampler_8x8; - struct i965_sampler_8x8_coefficient *sampler_8x8_state; - int i; - int width[3], height[3], pitch[3], offset[3]; - int src_width, src_height; - unsigned char *cc_ptr; - - memset(pp_static_parameter, 0, sizeof(struct gen7_pp_static_parameter)); - - /* source surface */ - gen8_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0, - width, height, pitch, offset); - src_height = height[0]; - src_width = width[0]; - - /* destination surface */ - gen8_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1, - width, height, pitch, offset); - - /* sampler 8x8 state */ - dri_bo_map(pp_context->dynamic_state.bo, True); - assert(pp_context->dynamic_state.bo->virtual); - - cc_ptr = (unsigned char *) pp_context->dynamic_state.bo->virtual + - pp_context->sampler_offset; - /* Currently only one gen8 sampler_8x8 is initialized */ - sampler_8x8 = (struct gen8_sampler_8x8_avs *)cc_ptr; - memset(sampler_8x8, 0, sizeof(*sampler_8x8)); - - sampler_8x8->dw0.gain_factor = 44; - sampler_8x8->dw0.weak_edge_threshold = 1; - sampler_8x8->dw0.strong_edge_threshold = 8; - /* Use the value like that on Ivy instead of default - * sampler_8x8->dw0.r3x_coefficient = 5; - */ - sampler_8x8->dw0.r3x_coefficient = 27; - sampler_8x8->dw0.r3c_coefficient = 5; - - sampler_8x8->dw2.global_noise_estimation = 255; - sampler_8x8->dw2.non_edge_weight = 1; - sampler_8x8->dw2.regular_weight = 2; - sampler_8x8->dw2.strong_edge_weight = 7; - /* Use the value like that on Ivy instead of default - * sampler_8x8->dw2.r5x_coefficient = 7; - * sampler_8x8->dw2.r5cx_coefficient = 7; - * sampler_8x8->dw2.r5c_coefficient = 7; - */ - sampler_8x8->dw2.r5x_coefficient = 9; - sampler_8x8->dw2.r5cx_coefficient = 8; - sampler_8x8->dw2.r5c_coefficient = 3; - - sampler_8x8->dw3.sin_alpha = 101; /* sin_alpha = 0 */ - sampler_8x8->dw3.cos_alpha = 79; /* cos_alpha = 0 */ - sampler_8x8->dw3.sat_max = 0x1f; - sampler_8x8->dw3.hue_max = 14; - /* The 8tap filter will determine whether the adaptive Filter is - * applied for all channels(dw153). - * If the 8tap filter is disabled, the adaptive filter should be disabled. - * Only when 8tap filter is enabled, it can be enabled or not - */ - sampler_8x8->dw3.enable_8tap_filter = 3; - sampler_8x8->dw3.ief4_smooth_enable = 0; - - sampler_8x8->dw4.s3u = 0; - sampler_8x8->dw4.diamond_margin = 4; - sampler_8x8->dw4.vy_std_enable = 0; - sampler_8x8->dw4.umid = 110; - sampler_8x8->dw4.vmid = 154; - - sampler_8x8->dw5.diamond_dv = 0; - sampler_8x8->dw5.diamond_th = 35; - sampler_8x8->dw5.diamond_alpha = 100; /* diamond_alpha = 0 */ - sampler_8x8->dw5.hs_margin = 3; - sampler_8x8->dw5.diamond_du = 2; - - sampler_8x8->dw6.y_point1 = 46; - sampler_8x8->dw6.y_point2 = 47; - sampler_8x8->dw6.y_point3 = 254; - sampler_8x8->dw6.y_point4 = 255; - - sampler_8x8->dw7.inv_margin_vyl = 3300; /* inv_margin_vyl = 0 */ - - sampler_8x8->dw8.inv_margin_vyu = 1600; /* inv_margin_vyu = 0 */ - sampler_8x8->dw8.p0l = 46; - sampler_8x8->dw8.p1l = 216; - - sampler_8x8->dw9.p2l = 236; - sampler_8x8->dw9.p3l = 236; - sampler_8x8->dw9.b0l = 133; - sampler_8x8->dw9.b1l = 130; - - sampler_8x8->dw10.b2l = 130; - sampler_8x8->dw10.b3l = 130; - /* s0l = -5 / 256. s2.8 */ - sampler_8x8->dw10.s0l = 1029; /* s0l = 0 */ - sampler_8x8->dw10.y_slope2 = 31; /* y_slop2 = 0 */ - - sampler_8x8->dw11.s1l = 0; - sampler_8x8->dw11.s2l = 0; - - sampler_8x8->dw12.s3l = 0; - sampler_8x8->dw12.p0u = 46; - sampler_8x8->dw12.p1u = 66; - sampler_8x8->dw12.y_slope1 = 31; /* y_slope1 = 0 */ - - sampler_8x8->dw13.p2u = 130; - sampler_8x8->dw13.p3u = 236; - sampler_8x8->dw13.b0u = 143; - sampler_8x8->dw13.b1u = 163; - - sampler_8x8->dw14.b2u = 200; - sampler_8x8->dw14.b3u = 140; - sampler_8x8->dw14.s0u = 256; /* s0u = 0 */ - - sampler_8x8->dw15.s1u = 113; /* s1u = 0 */ - sampler_8x8->dw15.s2u = 1203; /* s2u = 0 */ - - sampler_8x8_state = sampler_8x8->coefficients; - - for (i = 0; i < 17; i++) { - float coff; - coff = i; - coff = coff / 16; - - memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state)); - /* for Y channel, currently ignore */ - sampler_8x8_state->dw0.table_0x_filter_c0 = 0x0; - sampler_8x8_state->dw0.table_0x_filter_c1 = 0x0; - sampler_8x8_state->dw0.table_0x_filter_c2 = 0x0; - sampler_8x8_state->dw0.table_0x_filter_c3 = - intel_format_convert(1 - coff, 1, 6, 0); - sampler_8x8_state->dw1.table_0x_filter_c4 = - intel_format_convert(coff, 1, 6, 0); - sampler_8x8_state->dw1.table_0x_filter_c5 = 0x0; - sampler_8x8_state->dw1.table_0x_filter_c6 = 0x0; - sampler_8x8_state->dw1.table_0x_filter_c7 = 0x0; - sampler_8x8_state->dw2.table_0y_filter_c0 = 0x0; - sampler_8x8_state->dw2.table_0y_filter_c1 = 0x0; - sampler_8x8_state->dw2.table_0y_filter_c2 = 0x0; - sampler_8x8_state->dw2.table_0y_filter_c3 = - intel_format_convert(1 - coff, 1, 6, 0); - sampler_8x8_state->dw3.table_0y_filter_c4 = - intel_format_convert(coff, 1, 6, 0); - sampler_8x8_state->dw3.table_0y_filter_c5 = 0x0; - sampler_8x8_state->dw3.table_0y_filter_c6 = 0x0; - sampler_8x8_state->dw3.table_0y_filter_c7 = 0x0; - /* for U/V channel, 0.25 */ - sampler_8x8_state->dw4.table_1x_filter_c0 = 0x0; - sampler_8x8_state->dw4.table_1x_filter_c1 = 0x0; - sampler_8x8_state->dw4.table_1x_filter_c2 = 0x0; - sampler_8x8_state->dw4.table_1x_filter_c3 = - intel_format_convert(1 - coff, 1, 6, 0); - sampler_8x8_state->dw5.table_1x_filter_c4 = - intel_format_convert(coff, 1, 6, 0); - sampler_8x8_state->dw5.table_1x_filter_c5 = 0x00; - sampler_8x8_state->dw5.table_1x_filter_c6 = 0x0; - sampler_8x8_state->dw5.table_1x_filter_c7 = 0x0; - sampler_8x8_state->dw6.table_1y_filter_c0 = 0x0; - sampler_8x8_state->dw6.table_1y_filter_c1 = 0x0; - sampler_8x8_state->dw6.table_1y_filter_c2 = 0x0; - sampler_8x8_state->dw6.table_1y_filter_c3 = - intel_format_convert(1 - coff, 1, 6, 0); - sampler_8x8_state->dw7.table_1y_filter_c4 = - intel_format_convert(coff, 1, 6,0); - sampler_8x8_state->dw7.table_1y_filter_c5 = 0x0; - sampler_8x8_state->dw7.table_1y_filter_c6 = 0x0; - sampler_8x8_state->dw7.table_1y_filter_c7 = 0x0; - sampler_8x8_state++; - } - - sampler_8x8->dw152.default_sharpness_level = 0; - sampler_8x8->dw153.adaptive_filter_for_all_channel = 1; - sampler_8x8->dw153.bypass_y_adaptive_filtering = 1; - sampler_8x8->dw153.bypass_x_adaptive_filtering = 1; - - dri_bo_unmap(pp_context->dynamic_state.bo); - - - /* private function & data */ - pp_context->pp_x_steps = gen7_pp_avs_x_steps; - pp_context->pp_y_steps = gen7_pp_avs_y_steps; - pp_context->private_context = &pp_context->pp_avs_context; - pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter; - - pp_avs_context->dest_x = dst_rect->x; - pp_avs_context->dest_y = dst_rect->y; - pp_avs_context->dest_w = ALIGN(dst_rect->width, 16); - pp_avs_context->dest_h = ALIGN(dst_rect->height, 16); - pp_avs_context->src_w = src_rect->width; - pp_avs_context->src_h = src_rect->height; - pp_avs_context->horiz_range = (float)src_rect->width / src_width; - - int dw = (pp_avs_context->src_w - 1) / 16 + 1; - dw = MAX(dw, dst_rect->width); - - pp_static_parameter->grf1.pointer_to_inline_parameter = 7; - pp_static_parameter->grf2.avs_wa_enable = 0; /* It is not required on GEN8+ */ - pp_static_parameter->grf2.avs_wa_width = src_width; - pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width); - pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width); - - pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; - pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height; - pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height - - (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step; - pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width - - (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw; - - gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface); - - dst_surface->flags = src_surface->flags; - - return VA_STATUS_SUCCESS; -} - static int pp_dndi_x_steps(void *private_context) { @@ -4905,94 +4122,6 @@ gen6_pp_initialize( } -static VAStatus -gen8_pp_initialize( - VADriverContextP ctx, - struct i965_post_processing_context *pp_context, - const struct i965_surface *src_surface, - const VARectangle *src_rect, - struct i965_surface *dst_surface, - const VARectangle *dst_rect, - int pp_index, - void * filter_param -) -{ - VAStatus va_status; - struct i965_driver_data *i965 = i965_driver_data(ctx); - dri_bo *bo; - int bo_size; - unsigned int end_offset; - struct pp_module *pp_module; - int static_param_size, inline_param_size; - - dri_bo_unreference(pp_context->surface_state_binding_table.bo); - bo = dri_bo_alloc(i965->intel.bufmgr, - "surface state & binding table", - (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES, - 4096); - assert(bo); - pp_context->surface_state_binding_table.bo = bo; - - pp_context->idrt.num_interface_descriptors = 0; - - pp_context->sampler_size = 2 * 4096; - - bo_size = 4096 + pp_context->curbe_size + pp_context->sampler_size - + pp_context->idrt_size; - - dri_bo_unreference(pp_context->dynamic_state.bo); - bo = dri_bo_alloc(i965->intel.bufmgr, - "dynamic_state", - bo_size, - 4096); - - assert(bo); - pp_context->dynamic_state.bo = bo; - pp_context->dynamic_state.bo_size = bo_size; - - end_offset = 0; - pp_context->dynamic_state.end_offset = 0; - - /* Constant buffer offset */ - pp_context->curbe_offset = ALIGN(end_offset, 64); - end_offset = pp_context->curbe_offset + pp_context->curbe_size; - - /* Interface descriptor offset */ - pp_context->idrt_offset = ALIGN(end_offset, 64); - end_offset = pp_context->idrt_offset + pp_context->idrt_size; - - /* Sampler state offset */ - pp_context->sampler_offset = ALIGN(end_offset, 64); - end_offset = pp_context->sampler_offset + pp_context->sampler_size; - - /* update the end offset of dynamic_state */ - pp_context->dynamic_state.end_offset = ALIGN(end_offset, 64); - - static_param_size = sizeof(struct gen7_pp_static_parameter); - inline_param_size = sizeof(struct gen7_pp_inline_parameter); - - memset(pp_context->pp_static_parameter, 0, static_param_size); - memset(pp_context->pp_inline_parameter, 0, inline_param_size); - - assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES); - pp_context->current_pp = pp_index; - pp_module = &pp_context->pp_modules[pp_index]; - - if (pp_module->initialize) - va_status = pp_module->initialize(ctx, pp_context, - src_surface, - src_rect, - dst_surface, - dst_rect, - filter_param); - else - va_status = VA_STATUS_ERROR_UNIMPLEMENTED; - - calculate_boundary_block_mask(pp_context, dst_rect); - - return va_status; -} - static void gen6_pp_interface_descriptor_table(VADriverContextP ctx, struct i965_post_processing_context *pp_context) @@ -5039,41 +4168,6 @@ gen6_pp_interface_descriptor_table(VADriverContextP ctx, pp_context->idrt.num_interface_descriptors++; } -static void -gen8_pp_interface_descriptor_table(VADriverContextP ctx, - struct i965_post_processing_context *pp_context) -{ - struct gen8_interface_descriptor_data *desc; - dri_bo *bo; - int pp_index = pp_context->current_pp; - unsigned char *cc_ptr; - - bo = pp_context->dynamic_state.bo; - - dri_bo_map(bo, 1); - assert(bo->virtual); - cc_ptr = (unsigned char *)bo->virtual + pp_context->idrt_offset; - - desc = (struct gen8_interface_descriptor_data *) cc_ptr + - pp_context->idrt.num_interface_descriptors; - - memset(desc, 0, sizeof(*desc)); - desc->desc0.kernel_start_pointer = - pp_context->pp_modules[pp_index].kernel.kernel_offset >> 6; /* reloc */ - desc->desc2.single_program_flow = 1; - desc->desc2.floating_point_mode = FLOATING_POINT_IEEE_754; - desc->desc3.sampler_count = 0; /* 1 - 4 samplers used */ - desc->desc3.sampler_state_pointer = pp_context->sampler_offset >> 5; - desc->desc4.binding_table_entry_count = 0; - desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5); - desc->desc5.constant_urb_entry_read_offset = 0; - - desc->desc5.constant_urb_entry_read_length = 6; /* grf 1-6 */ - - dri_bo_unmap(bo); - pp_context->idrt.num_interface_descriptors++; -} - static void gen6_pp_upload_constants(VADriverContextP ctx, struct i965_post_processing_context *pp_context) @@ -5106,35 +4200,6 @@ gen6_pp_states_setup(VADriverContextP ctx, gen6_pp_upload_constants(ctx, pp_context); } -static void -gen8_pp_upload_constants(VADriverContextP ctx, - struct i965_post_processing_context *pp_context) -{ - unsigned char *constant_buffer; - int param_size; - - assert(sizeof(struct gen7_pp_static_parameter) == 192); - - param_size = sizeof(struct gen7_pp_static_parameter); - - dri_bo_map(pp_context->dynamic_state.bo, 1); - assert(pp_context->dynamic_state.bo->virtual); - constant_buffer = (unsigned char *) pp_context->dynamic_state.bo->virtual + - pp_context->curbe_offset; - - memcpy(constant_buffer, pp_context->pp_static_parameter, param_size); - dri_bo_unmap(pp_context->dynamic_state.bo); - return; -} - -static void -gen8_pp_states_setup(VADriverContextP ctx, - struct i965_post_processing_context *pp_context) -{ - gen8_pp_interface_descriptor_table(ctx, pp_context); - gen8_pp_upload_constants(ctx, pp_context); -} - static void gen6_pp_pipeline_select(VADriverContextP ctx, struct i965_post_processing_context *pp_context) @@ -5166,41 +4231,6 @@ gen6_pp_state_base_address(VADriverContextP ctx, ADVANCE_BATCH(batch); } -static void -gen8_pp_state_base_address(VADriverContextP ctx, - struct i965_post_processing_context *pp_context) -{ - struct intel_batchbuffer *batch = pp_context->batch; - - BEGIN_BATCH(batch, 16); - OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2)); - /* DW1 Generate state address */ - OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - /* DW4. Surface state address */ - OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ - OUT_BATCH(batch, 0); - /* DW6. Dynamic state address */ - OUT_RELOC(batch, pp_context->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER, - 0, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(batch, 0); - - /* DW8. Indirect object address */ - OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(batch, 0); - - /* DW10. Instruction base address */ - OUT_RELOC(batch, pp_context->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); - OUT_BATCH(batch, 0); - - OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); - OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); - OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); - OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); - ADVANCE_BATCH(batch); -} - static void gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context) @@ -5225,31 +4255,6 @@ gen6_pp_vfe_state(VADriverContextP ctx, ADVANCE_BATCH(batch); } -static void -gen8_pp_vfe_state(VADriverContextP ctx, - struct i965_post_processing_context *pp_context) -{ - struct intel_batchbuffer *batch = pp_context->batch; - - BEGIN_BATCH(batch, 9); - OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, - (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 | - pp_context->vfe_gpu_state.num_urb_entries << 8); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, - (pp_context->vfe_gpu_state.urb_entry_size) << 16 | - /* URB Entry Allocation Size, in 256 bits unit */ - (pp_context->vfe_gpu_state.curbe_allocation_size)); - /* CURBE Allocation Size, in 256 bits unit */ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); -} - static void gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context) @@ -5294,25 +4299,6 @@ gen6_interface_descriptor_load(VADriverContextP ctx, ADVANCE_BATCH(batch); } -static void -gen8_interface_descriptor_load(VADriverContextP ctx, - struct i965_post_processing_context *pp_context) -{ - struct intel_batchbuffer *batch = pp_context->batch; - - BEGIN_BATCH(batch, 6); - - OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH); - OUT_BATCH(batch, 0); - - OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, - pp_context->idrt.num_interface_descriptors * sizeof(struct gen8_interface_descriptor_data)); - OUT_BATCH(batch, pp_context->idrt_offset); - ADVANCE_BATCH(batch); -} - static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) { struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; @@ -5451,117 +4437,6 @@ gen6_pp_pipeline_setup(VADriverContextP ctx, intel_batchbuffer_end_atomic(batch); } -static void -gen8_pp_curbe_load(VADriverContextP ctx, - struct i965_post_processing_context *pp_context) -{ - struct intel_batchbuffer *batch = pp_context->batch; - struct i965_driver_data *i965 = i965_driver_data(ctx); - int param_size = 64; - - if (IS_GEN8(i965->intel.device_id)) - param_size = sizeof(struct gen7_pp_static_parameter); - - BEGIN_BATCH(batch, 4); - OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2)); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, - param_size); - OUT_BATCH(batch, pp_context->curbe_offset); - ADVANCE_BATCH(batch); -} - -static void -gen8_pp_object_walker(VADriverContextP ctx, - struct i965_post_processing_context *pp_context) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = pp_context->batch; - int x, x_steps, y, y_steps; - int param_size, command_length_in_dws, extra_cmd_in_dws; - dri_bo *command_buffer; - unsigned int *command_ptr; - - param_size = sizeof(struct gen7_pp_inline_parameter); - if (IS_GEN8(i965->intel.device_id)) - param_size = sizeof(struct gen7_pp_inline_parameter); - - x_steps = pp_context->pp_x_steps(pp_context->private_context); - y_steps = pp_context->pp_y_steps(pp_context->private_context); - command_length_in_dws = 6 + (param_size >> 2); - extra_cmd_in_dws = 2; - command_buffer = dri_bo_alloc(i965->intel.bufmgr, - "command objects buffer", - (command_length_in_dws + extra_cmd_in_dws) * 4 * x_steps * y_steps + 64, - 4096); - - dri_bo_map(command_buffer, 1); - command_ptr = command_buffer->virtual; - - for (y = 0; y < y_steps; y++) { - for (x = 0; x < x_steps; x++) { - if (!pp_context->pp_set_block_parameter(pp_context, x, y)) { - - *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2)); - *command_ptr++ = 0; - *command_ptr++ = 0; - *command_ptr++ = 0; - *command_ptr++ = 0; - *command_ptr++ = 0; - memcpy(command_ptr, pp_context->pp_inline_parameter, param_size); - command_ptr += (param_size >> 2); - - *command_ptr++ = CMD_MEDIA_STATE_FLUSH; - *command_ptr++ = 0; - } - } - } - - if ((command_length_in_dws + extra_cmd_in_dws) * x_steps * y_steps % 2 == 0) - *command_ptr++ = 0; - - *command_ptr++ = MI_BATCH_BUFFER_END; - *command_ptr++ = 0; - - dri_bo_unmap(command_buffer); - - if (IS_GEN8(i965->intel.device_id)) { - BEGIN_BATCH(batch, 3); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); - OUT_RELOC(batch, command_buffer, - I915_GEM_DOMAIN_COMMAND, 0, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - } - - dri_bo_unreference(command_buffer); - - /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END - * will cause control to pass back to ring buffer - */ - intel_batchbuffer_end_atomic(batch); - intel_batchbuffer_flush(batch); - intel_batchbuffer_start_atomic(batch, 0x1000); -} - -static void -gen8_pp_pipeline_setup(VADriverContextP ctx, - struct i965_post_processing_context *pp_context) -{ - struct intel_batchbuffer *batch = pp_context->batch; - - intel_batchbuffer_start_atomic(batch, 0x1000); - intel_batchbuffer_emit_mi_flush(batch); - gen6_pp_pipeline_select(ctx, pp_context); - gen8_pp_state_base_address(ctx, pp_context); - gen8_pp_vfe_state(ctx, pp_context); - gen8_pp_curbe_load(ctx, pp_context); - gen8_interface_descriptor_load(ctx, pp_context); - gen8_pp_vfe_state(ctx, pp_context); - gen8_pp_object_walker(ctx, pp_context); - intel_batchbuffer_end_atomic(batch); -} - static VAStatus gen6_post_processing( VADriverContextP ctx, @@ -5595,36 +4470,6 @@ gen6_post_processing( return va_status; } -static VAStatus -gen8_post_processing( - VADriverContextP ctx, - struct i965_post_processing_context *pp_context, - const struct i965_surface *src_surface, - const VARectangle *src_rect, - struct i965_surface *dst_surface, - const VARectangle *dst_rect, - int pp_index, - void * filter_param -) -{ - VAStatus va_status; - - va_status = gen8_pp_initialize(ctx, pp_context, - src_surface, - src_rect, - dst_surface, - dst_rect, - pp_index, - filter_param); - - if (va_status == VA_STATUS_SUCCESS) { - gen8_pp_states_setup(ctx, pp_context); - gen8_pp_pipeline_setup(ctx, pp_context); - } - - return va_status; -} - static VAStatus i965_post_processing_internal( VADriverContextP ctx, @@ -6267,39 +5112,6 @@ i965_image_processing(VADriverContextP ctx, return status; } -static void -gen8_post_processing_context_finalize(struct i965_post_processing_context *pp_context) -{ - dri_bo_unreference(pp_context->surface_state_binding_table.bo); - pp_context->surface_state_binding_table.bo = NULL; - - dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo); - pp_context->pp_dndi_context.stmm_bo = NULL; - - dri_bo_unreference(pp_context->pp_dn_context.stmm_bo); - pp_context->pp_dn_context.stmm_bo = NULL; - - if (pp_context->instruction_state.bo) { - dri_bo_unreference(pp_context->instruction_state.bo); - pp_context->instruction_state.bo = NULL; - } - - if (pp_context->indirect_state.bo) { - dri_bo_unreference(pp_context->indirect_state.bo); - pp_context->indirect_state.bo = NULL; - } - - if (pp_context->dynamic_state.bo) { - dri_bo_unreference(pp_context->dynamic_state.bo); - pp_context->dynamic_state.bo = NULL; - } - - free(pp_context->pp_static_parameter); - free(pp_context->pp_inline_parameter); - pp_context->pp_static_parameter = NULL; - pp_context->pp_inline_parameter = NULL; -} - static void i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context) { @@ -6366,98 +5178,6 @@ i965_post_processing_terminate(VADriverContextP ctx) #define VPP_CURBE_ALLOCATION_SIZE 32 - -static void -gen8_post_processing_context_init(VADriverContextP ctx, - struct i965_post_processing_context *pp_context, - struct intel_batchbuffer *batch) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - int i, kernel_size; - unsigned int kernel_offset, end_offset; - unsigned char *kernel_ptr; - struct pp_module *pp_module; - - { - pp_context->vfe_gpu_state.max_num_threads = 60; - pp_context->vfe_gpu_state.num_urb_entries = 59; - pp_context->vfe_gpu_state.gpgpu_mode = 0; - pp_context->vfe_gpu_state.urb_entry_size = 16 - 1; - pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE; - } - - pp_context->intel_post_processing = gen8_post_processing; - - assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); - - if (IS_GEN8(i965->intel.device_id)) - memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules)); - else { - /* should never get here !!! */ - assert(0); - } - - kernel_size = 4096 ; - - for (i = 0; i < NUM_PP_MODULES; i++) { - pp_module = &pp_context->pp_modules[i]; - - if (pp_module->kernel.bin && pp_module->kernel.size) { - kernel_size += pp_module->kernel.size; - } - } - - pp_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr, - "kernel shader", - kernel_size, - 0x1000); - if (pp_context->instruction_state.bo == NULL) { - WARN_ONCE("failure to allocate the buffer space for kernel shader in VPP\n"); - return; - } - - assert(pp_context->instruction_state.bo); - - - pp_context->instruction_state.bo_size = kernel_size; - pp_context->instruction_state.end_offset = 0; - end_offset = 0; - - dri_bo_map(pp_context->instruction_state.bo, 1); - kernel_ptr = (unsigned char *)(pp_context->instruction_state.bo->virtual); - - for (i = 0; i < NUM_PP_MODULES; i++) { - pp_module = &pp_context->pp_modules[i]; - - kernel_offset = ALIGN(end_offset, 64); - pp_module->kernel.kernel_offset = kernel_offset; - - if (pp_module->kernel.bin && pp_module->kernel.size) { - - memcpy(kernel_ptr + kernel_offset, pp_module->kernel.bin, pp_module->kernel.size); - end_offset = kernel_offset + pp_module->kernel.size; - } - } - - pp_context->instruction_state.end_offset = ALIGN(end_offset, 64); - - dri_bo_unmap(pp_context->instruction_state.bo); - - /* static & inline parameters */ - if (IS_GEN8(i965->intel.device_id)) { - pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1); - pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1); - } - - pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE; - pp_context->pp_dndi_context.current_out_obj_surface = NULL; - pp_context->pp_dndi_context.frame_order = -1; - pp_context->batch = batch; - - pp_context->idrt_size = 5 * sizeof(struct gen8_interface_descriptor_data); - pp_context->curbe_size = 256; -} - static void i965_post_processing_context_init(VADriverContextP ctx, struct i965_post_processing_context *pp_context, diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index e76e9c51..29b8cdc2 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -575,4 +575,13 @@ i965_post_processing_terminate(VADriverContextP ctx); bool i965_post_processing_init(VADriverContextP ctx); + +extern void +gen8_post_processing_context_init(VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + struct intel_batchbuffer *batch); + +extern void +gen8_post_processing_context_finalize(struct i965_post_processing_context *pp_context); + #endif /* __I965_POST_PROCESSING_H__ */ -- cgit v1.2.1 From a5259c1c32bdd60582a79f7cffaff468fd43ac30 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 14 Mar 2014 15:16:17 +0800 Subject: Add the support of create surface based on YV16 format Reviewed-by: Wind Yuan Tested-by: Wind Yuan Signed-off-by: Zhao Yakui (cherry picked from commit 12e7421ce1ed2627270dcb281af4d760afeb7209) --- src/i965_drv_video.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 8c5894cc..6bd6c300 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -776,7 +776,8 @@ i965_surface_native_memory(VADriverContextP ctx, // todo, should we disable tiling for 422 format? if (expected_fourcc == VA_FOURCC('I', '4', '2', '0') || expected_fourcc == VA_FOURCC('I', 'Y', 'U', 'V') || - expected_fourcc == VA_FOURCC('Y', 'V', '1', '2')) + expected_fourcc == VA_FOURCC('Y', 'V', '1', '2') || + expected_fourcc == VA_FOURCC('Y', 'V', '1', '6')) tiling = 0; i965_check_alloc_surface_bo(ctx, obj_surface, tiling, expected_fourcc, get_sampling_from_fourcc(expected_fourcc)); @@ -923,6 +924,19 @@ i965_suface_external_memory(VADriverContextP ctx, break; + case VA_FOURCC('Y', 'V', '1', '6'): + assert(memory_attibute->num_planes == 3); + assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); + + obj_surface->subsampling = SUBSAMPLE_YUV422H; + obj_surface->y_cr_offset = memory_attibute->offsets[1] / obj_surface->width; + obj_surface->y_cb_offset = memory_attibute->offsets[2] / obj_surface->width; + obj_surface->cb_cr_width = obj_surface->orig_width / 2; + obj_surface->cb_cr_height = obj_surface->orig_height; + obj_surface->cb_cr_pitch = memory_attibute->pitches[1]; + + break; + case VA_FOURCC('4', '2', '2', 'V'): assert(memory_attibute->num_planes == 3); assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); @@ -3004,6 +3018,15 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, region_height = obj_surface->height + obj_surface->height / 2; break; + case VA_FOURCC('Y', 'V', '1', '6'): + obj_surface->cb_cr_width = obj_surface->orig_width / 2; + obj_surface->cb_cr_height = obj_surface->orig_height; + obj_surface->y_cr_offset = obj_surface->height; + obj_surface->y_cb_offset = obj_surface->y_cr_offset + ALIGN(obj_surface->cb_cr_height, 32) / 2; + obj_surface->cb_cr_pitch = obj_surface->width / 2; + region_height = obj_surface->height + ALIGN(obj_surface->cb_cr_height, 32); + break; + case VA_FOURCC('Y', 'V', '1', '2'): case VA_FOURCC('I', '4', '2', '0'): if (fourcc == VA_FOURCC('Y', 'V', '1', '2')) { @@ -3297,6 +3320,7 @@ get_sampling_from_fourcc(unsigned int fourcc) case VA_FOURCC('Y', 'U', 'Y', '2'): case VA_FOURCC('U', 'Y', 'V', 'Y'): case VA_FOURCC('4', '2', '2', 'H'): + case VA_FOURCC('Y', 'V', '1', '6'): surface_sampling = SUBSAMPLE_YUV422H; break; case VA_FOURCC('4', '2', '2', 'V'): -- cgit v1.2.1 From ae834b37273211cffa77c57ea6dc24abd0db7b31 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 14 Mar 2014 15:16:20 +0800 Subject: Export the surface attribute based on YV16 for VPP on Gen7+ Reviewed-by: Wind Yuan Tested-by: Wind Yuan Signed-off-by: Zhao Yakui (cherry picked from commit 94f415b29ee197f66281370801a9c3bd4240c928) --- src/i965_drv_video.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 6bd6c300..3470ba91 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -4648,6 +4648,12 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'X'); i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('Y', 'V', '1', '6'); + i++; } } } else if (IS_GEN8(i965->intel.device_id)) { @@ -4758,6 +4764,12 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; attribs[i].value.value.i = VA_FOURCC('B', 'G', 'R', 'X'); i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC('Y', 'V', '1', '6'); + i++; } } } -- cgit v1.2.1 From b329b70baea3a489a14fe80830f6178d3b8ab089 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 14 Mar 2014 15:16:24 +0800 Subject: Add the support of derive image from YV16 surface Reviewed-by: Wind Yuan Tested-by: Wind Yuan Signed-off-by: Zhao Yakui (cherry picked from commit 7d5172de91336db2e627c0011404231b6b64b211) --- src/i965_drv_video.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 3470ba91..4a183568 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -3167,6 +3167,16 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->offsets[2] = w_pitch * obj_surface->y_cb_offset; break; + case VA_FOURCC('Y', 'V', '1', '6'): + image->num_planes = 3; + image->pitches[0] = w_pitch; /* Y */ + image->offsets[0] = 0; + image->pitches[1] = obj_surface->cb_cr_pitch; /* V */ + image->offsets[1] = w_pitch * obj_surface->y_cr_offset; + image->pitches[2] = obj_surface->cb_cr_pitch; /* U */ + image->offsets[2] = w_pitch * obj_surface->y_cb_offset; + break; + case VA_FOURCC('N', 'V', '1', '2'): image->num_planes = 2; image->pitches[0] = w_pitch; /* Y */ -- cgit v1.2.1 From 071e5f9abebad6e44e052074b6fc0a1bf62a39be Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 14 Mar 2014 15:16:26 +0800 Subject: Add the csc conversion from YV16 to NV12 V1->V2: Follow Zhiwen's comment to handle the scenario of CSC conversion from YV16 to NV12 when the source is YV16 image instead of YV16 surface. Reviewed-by: Wind Yuan Tested-by: Wind Yuan Signed-off-by: Zhao Yakui (cherry picked from commit 2b5fad11a5c12d3c6ffbef15c02449a3b4e90b98) --- src/gen8_post_processing.c | 8 ++++++++ src/i965_post_processing.c | 23 +++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index 78f5a837..84d4864e 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -470,8 +470,10 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc dri_bo *bo; int fourcc = pp_get_surface_fourcc(ctx, surface); const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || + fourcc == VA_FOURCC('Y', 'V', '1', '6') || fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1; const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || + fourcc == VA_FOURCC('Y', 'V', '1', '6') || fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2; int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); @@ -538,6 +540,12 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc height[2] = obj_image->image.height / 2; pitch[2] = obj_image->image.pitches[V]; offset[2] = obj_image->image.offsets[V]; + if (fourcc == VA_FOURCC('V', 'V', '1', '6')) { + width[1] = obj_image->image.width / 2; + height[1] = obj_image->image.height; + width[2] = obj_image->image.width / 2; + height[2] = obj_image->image.height; + } } } diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 8d53676d..6713e056 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1702,8 +1702,12 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin dri_bo *bo; int fourcc = pp_get_surface_fourcc(ctx, surface); const int Y = 0; - const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1; - const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2; + const int U = ((fourcc == VA_FOURCC('Y', 'V', '1', '2')) || + (fourcc == VA_FOURCC('Y', 'V', '1', '6'))) + ? 2 : 1; + const int V = ((fourcc == VA_FOURCC('Y', 'V', '1', '2')) || + (fourcc == VA_FOURCC('Y', 'V', '1', '6'))) + ? 1 : 2; const int UV = 1; int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); @@ -1770,6 +1774,12 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin height[2] = obj_image->image.height / 2; pitch[2] = obj_image->image.pitches[2]; offset[2] = obj_image->image.offsets[2]; + if (fourcc == VA_FOURCC('V', 'V', '1', '6')) { + width[1] = obj_image->image.width / 2; + height[1] = obj_image->image.height; + width[2] = obj_image->image.width / 2; + height[2] = obj_image->image.height; + } } } @@ -1813,8 +1823,10 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc dri_bo *bo; int fourcc = pp_get_surface_fourcc(ctx, surface); const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || + fourcc == VA_FOURCC('V', 'V', '1', '6') || fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1; const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || + fourcc == VA_FOURCC('V', 'V', '1', '6') || fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2; int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); @@ -1880,6 +1892,12 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc height[2] = obj_image->image.height / 2; pitch[2] = obj_image->image.pitches[V]; offset[2] = obj_image->image.offsets[V]; + if (fourcc == VA_FOURCC('V', 'V', '1', '6')) { + width[1] = obj_image->image.width / 2; + height[1] = obj_image->image.height; + width[2] = obj_image->image.width / 2; + height[2] = obj_image->image.height; + } } } @@ -5069,6 +5087,7 @@ i965_image_processing(VADriverContextP ctx, case VA_FOURCC('4', '2', '2', 'V'): case VA_FOURCC('4', '1', '1', 'P'): case VA_FOURCC('4', '4', '4', 'P'): + case VA_FOURCC('Y', 'V', '1', '6'): status = i965_image_pl3_processing(ctx, src_surface, src_rect, -- cgit v1.2.1 From 257198d36dd8d4c8fbdef36aa319c2ddf7a9b50d Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 18 Mar 2014 16:15:15 +0800 Subject: VPP: Fix the typo error of "VV16" It should be "YV16" instead of "VV16". Thank Gwenole for capturing this typo error which is caused by the commit 2b5fad11a5c12d3c6ffbef15c02449a3b4e90b98. Signed-off-by: Zhao Yakui (cherry picked from commit abd77ff2014322d152d723a3e8b1cba1e41b0a5f) --- src/gen8_post_processing.c | 2 +- src/i965_post_processing.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index 84d4864e..3abe287f 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -540,7 +540,7 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc height[2] = obj_image->image.height / 2; pitch[2] = obj_image->image.pitches[V]; offset[2] = obj_image->image.offsets[V]; - if (fourcc == VA_FOURCC('V', 'V', '1', '6')) { + if (fourcc == VA_FOURCC('Y', 'V', '1', '6')) { width[1] = obj_image->image.width / 2; height[1] = obj_image->image.height; width[2] = obj_image->image.width / 2; diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 6713e056..c4029e16 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1774,7 +1774,7 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin height[2] = obj_image->image.height / 2; pitch[2] = obj_image->image.pitches[2]; offset[2] = obj_image->image.offsets[2]; - if (fourcc == VA_FOURCC('V', 'V', '1', '6')) { + if (fourcc == VA_FOURCC('Y', 'V', '1', '6')) { width[1] = obj_image->image.width / 2; height[1] = obj_image->image.height; width[2] = obj_image->image.width / 2; @@ -1823,10 +1823,10 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc dri_bo *bo; int fourcc = pp_get_surface_fourcc(ctx, surface); const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('V', 'V', '1', '6') || + fourcc == VA_FOURCC('Y', 'V', '1', '6') || fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1; const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('V', 'V', '1', '6') || + fourcc == VA_FOURCC('Y', 'V', '1', '6') || fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2; int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); @@ -1892,7 +1892,7 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc height[2] = obj_image->image.height / 2; pitch[2] = obj_image->image.pitches[V]; offset[2] = obj_image->image.offsets[V]; - if (fourcc == VA_FOURCC('V', 'V', '1', '6')) { + if (fourcc == VA_FOURCC('Y', 'V', '1', '6')) { width[1] = obj_image->image.width / 2; height[1] = obj_image->image.height; width[2] = obj_image->image.width / 2; -- cgit v1.2.1 From f9a8e572693e099f2902e8dc62e9f59664cfc6da Mon Sep 17 00:00:00 2001 From: "Zhao, Halley" Date: Fri, 14 Mar 2014 13:43:33 +0800 Subject: va: User specified tiling and stride support. It is done by two VASurfaceAttrib: * one is buffer attribute described by VASurfaceAttribExternalBufferDescriptor. it covers strides and tiling or not. * another is buffer type to indicate that the buffer is allocated by va driver. VASurfaceAttribMemoryType:VA_SURFACE_ATTRIB_MEM_TYPE_VA Signed-off-by: Zhao Halley Reviewed-by: Zhao Yakui (cherry picked from commit 55e63685dc040e3855868b4d7ccb0ac8e1f66690) --- src/i965_drv_video.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++--- src/i965_drv_video.h | 4 +++ src/intel_driver.h | 1 + 3 files changed, 83 insertions(+), 4 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 4a183568..f4963dc5 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -983,6 +983,40 @@ i965_suface_external_memory(VADriverContextP ctx, return VA_STATUS_SUCCESS; } +/* byte-per-pixel of the first plane */ +static int +bpp_1stplane_by_fourcc(unsigned int fourcc) +{ + switch (fourcc) { + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: + case VA_FOURCC_ARGB: + case VA_FOURCC_XRGB: + case VA_FOURCC_ABGR: + case VA_FOURCC_XBGR: + case VA_FOURCC_AYUV: + return 4; + + case VA_FOURCC_UYVY: + case VA_FOURCC_YUY2: + return 2; + + case VA_FOURCC_YV12: + case VA_FOURCC_IMC3: + case VA_FOURCC_IYUV: + case VA_FOURCC_NV12: + case VA_FOURCC_NV11: + case VA_FOURCC('Y', 'V', '1', '6'): + return 1; + + default: + assert(0); + return 0; + } +} + static VAStatus i965_CreateSurfaces2( VADriverContextP ctx, @@ -1018,6 +1052,8 @@ i965_CreateSurfaces2( memory_type = I965_SURFACE_MEM_GEM_FLINK; /* flinked GEM handle */ else if (attrib_list[i].value.value.i == VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME) memory_type = I965_SURFACE_MEM_DRM_PRIME; /* drm prime fd */ + else if (attrib_list[i].value.value.i == VA_SURFACE_ATTRIB_MEM_TYPE_VA) + memory_type = I965_SURFACE_MEM_NATIVE; /* va native memory, to be allocated */ } if ((attrib_list[i].type == VASurfaceAttribExternalBufferDescriptor) && @@ -1051,6 +1087,9 @@ i965_CreateSurfaces2( obj_surface->status = VASurfaceReady; obj_surface->orig_width = width; obj_surface->orig_height = height; + obj_surface->user_disable_tiling = false; + obj_surface->user_h_stride_set = false; + obj_surface->user_v_stride_set = false; obj_surface->subpic_render_idx = 0; for(j = 0; j < I965_MAX_SUBPIC_SUM; j++){ @@ -1070,6 +1109,34 @@ i965_CreateSurfaces2( switch (memory_type) { case I965_SURFACE_MEM_NATIVE: + if (memory_attibute) { + if (!(memory_attibute->flags & VA_SURFACE_EXTBUF_DESC_ENABLE_TILING)) + obj_surface->user_disable_tiling = true; + + if (memory_attibute->pixel_format) { + if (expected_fourcc) + assert(memory_attibute->pixel_format == expected_fourcc); + else + expected_fourcc = memory_attibute->pixel_format; + } + assert(expected_fourcc); + if (memory_attibute->pitches[0]) { + int bpp_1stplane = bpp_1stplane_by_fourcc(expected_fourcc); + assert(bpp_1stplane); + obj_surface->width = memory_attibute->pitches[0]/bpp_1stplane; + obj_surface->user_h_stride_set = true; + assert(IS_ALIGNED(obj_surface->width, 16)); + assert(obj_surface->width >= width); + + if (memory_attibute->offsets[1]) { + assert(!memory_attibute->offsets[0]); + obj_surface->height = memory_attibute->offsets[1]/memory_attibute->pitches[0]; + obj_surface->user_v_stride_set = true; + assert(IS_ALIGNED(obj_surface->height, 16)); + assert(obj_surface->height >= height); + } + } + } i965_surface_native_memory(ctx, obj_surface, format, @@ -2863,13 +2930,20 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, obj_surface->x_cb_offset = 0; /* X offset is always 0 */ obj_surface->x_cr_offset = 0; - if (tiled) { + if ((tiled && !obj_surface->user_disable_tiling)) { assert(fourcc != VA_FOURCC('I', '4', '2', '0') && fourcc != VA_FOURCC('I', 'Y', 'U', 'V') && fourcc != VA_FOURCC('Y', 'V', '1', '2')); + if (obj_surface->user_h_stride_set) { + assert(IS_ALIGNED(obj_surface->width, 128)); + } else + obj_surface->width = ALIGN(obj_surface->orig_width, 128); + + if (obj_surface->user_v_stride_set) { + assert(IS_ALIGNED(obj_surface->height, 32)); + } else + obj_surface->height = ALIGN(obj_surface->orig_height, 32); - obj_surface->width = ALIGN(obj_surface->orig_width, 128); - obj_surface->height = ALIGN(obj_surface->orig_height, 32); region_height = obj_surface->height; switch (fourcc) { @@ -3072,7 +3146,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, obj_surface->size = ALIGN(region_width * region_height, 0x1000); - if (tiled) { + if ((tiled && !obj_surface->user_disable_tiling)) { uint32_t tiling_mode = I915_TILING_Y; /* always uses Y-tiled format */ unsigned long pitch; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 69b98707..6ff03697 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -228,6 +228,10 @@ struct object_surface int cb_cr_width; int cb_cr_height; int cb_cr_pitch; + /* user specified attributes see: VASurfaceAttribExternalBuffers/VA_SURFACE_ATTRIB_MEM_TYPE_VA */ + uint32_t user_disable_tiling : 1; + uint32_t user_h_stride_set : 1; + uint32_t user_v_stride_set : 1; }; struct object_buffer diff --git a/src/intel_driver.h b/src/intel_driver.h index eae4d12f..abb396b0 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -67,6 +67,7 @@ struct intel_batchbuffer; #define ALIGN(i, n) (((i) + (n) - 1) & ~((n) - 1)) +#define IS_ALIGNED(i, n) (((i) & ((n)-1)) == 0) #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0])) -- cgit v1.2.1 From 5f8dc26fb260a027cb829a04d10014e653d4db71 Mon Sep 17 00:00:00 2001 From: "Zhao, Halley" Date: Fri, 14 Mar 2014 17:12:41 +0800 Subject: clean up some assert in i965_drv_video.c a return value is expected when assert is disabled. Signed-off-by: Zhao Halley (cherry picked from commit 12c81227fd92fe028100af0cb32cc17b7f698b3d) --- src/i965_drv_video.c | 375 ++++++++++++++++++++++++--------------------------- src/i965_drv_video.h | 2 +- src/intel_driver.h | 7 + 3 files changed, 187 insertions(+), 197 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index f4963dc5..b7f8f303 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -367,7 +367,7 @@ va_enc_packed_type_to_idx(int packed_type) if (packed_type & VAEncPackedHeaderMiscMask) { idx = I965_PACKED_MISC_HEADER_BASE; packed_type = (~VAEncPackedHeaderMiscMask & packed_type); - assert(packed_type > 0); + ASSERT_RET(packed_type > 0, 0); idx += (packed_type - 1); } else { idx = I965_PACKED_HEADER_BASE; @@ -387,12 +387,12 @@ va_enc_packed_type_to_idx(int packed_type) default: /* Should not get here */ - assert(0); + ASSERT_RET(0, 0); break; } } - assert(idx < 4); + ASSERT_RET(idx < 4, 0); return idx; } @@ -437,7 +437,7 @@ i965_QueryConfigProfiles(VADriverContextP ctx, } /* If the assert fails then I965_MAX_PROFILES needs to be bigger */ - assert(i <= I965_MAX_PROFILES); + ASSERT_RET(i <= I965_MAX_PROFILES, VA_STATUS_ERROR_OPERATION_FAILED); *num_profiles = i; return VA_STATUS_SUCCESS; @@ -503,7 +503,7 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx, } /* If the assert fails then I965_MAX_ENTRYPOINTS needs to be bigger */ - assert(n <= I965_MAX_ENTRYPOINTS); + ASSERT_RET(n <= I965_MAX_ENTRYPOINTS, VA_STATUS_ERROR_OPERATION_FAILED); *num_entrypoints = n; return n > 0 ? VA_STATUS_SUCCESS : VA_STATUS_ERROR_UNSUPPORTED_PROFILE; } @@ -733,7 +733,7 @@ VAStatus i965_QueryConfigAttributes(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_SUCCESS; int i; - assert(obj_config); + ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG); *profile = obj_config->profile; *entrypoint = obj_config->entrypoint; *num_attribs = obj_config->num_attribs; @@ -799,9 +799,9 @@ i965_suface_external_memory(VADriverContextP ctx, index > memory_attibute->num_buffers) return VA_STATUS_ERROR_INVALID_PARAMETER; - assert(obj_surface->orig_width == memory_attibute->width); - assert(obj_surface->orig_height == memory_attibute->height); - assert(memory_attibute->num_planes >= 1); + ASSERT_RET(obj_surface->orig_width == memory_attibute->width, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(obj_surface->orig_height == memory_attibute->height, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->num_planes >= 1, VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->fourcc = memory_attibute->pixel_format; obj_surface->width = memory_attibute->pitches[0]; @@ -817,8 +817,8 @@ i965_suface_external_memory(VADriverContextP ctx, switch (obj_surface->fourcc) { case VA_FOURCC('N', 'V', '1', '2'): - assert(memory_attibute->num_planes == 2); - assert(memory_attibute->pitches[0] == memory_attibute->pitches[1]); + ASSERT_RET(memory_attibute->num_planes == 2, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[0] == memory_attibute->pitches[1], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV420; obj_surface->y_cb_offset = obj_surface->height; @@ -831,8 +831,8 @@ i965_suface_external_memory(VADriverContextP ctx, case VA_FOURCC('Y', 'V', '1', '2'): case VA_FOURCC('I', 'M', 'C', '1'): - assert(memory_attibute->num_planes == 3); - assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); + ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV420; obj_surface->y_cr_offset = obj_surface->height; @@ -846,8 +846,8 @@ i965_suface_external_memory(VADriverContextP ctx, case VA_FOURCC('I', '4', '2', '0'): case VA_FOURCC('I', 'Y', 'U', 'V'): case VA_FOURCC('I', 'M', 'C', '3'): - assert(memory_attibute->num_planes == 3); - assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); + ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV420; obj_surface->y_cb_offset = obj_surface->height; @@ -860,7 +860,7 @@ i965_suface_external_memory(VADriverContextP ctx, case VA_FOURCC('Y', 'U', 'Y', '2'): case VA_FOURCC('U', 'Y', 'V', 'Y'): - assert(memory_attibute->num_planes == 1); + ASSERT_RET(memory_attibute->num_planes == 1, VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV422H; obj_surface->y_cb_offset = 0; @@ -875,7 +875,7 @@ i965_suface_external_memory(VADriverContextP ctx, case VA_FOURCC('R', 'G', 'B', 'X'): case VA_FOURCC('B', 'G', 'R', 'A'): case VA_FOURCC('B', 'G', 'R', 'X'): - assert(memory_attibute->num_planes == 1); + ASSERT_RET(memory_attibute->num_planes == 1, VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_RGBX; obj_surface->y_cb_offset = 0; @@ -887,7 +887,7 @@ i965_suface_external_memory(VADriverContextP ctx, break; case VA_FOURCC('Y', '8', '0', '0'): /* monochrome surface */ - assert(memory_attibute->num_planes == 1); + ASSERT_RET(memory_attibute->num_planes == 1, VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV400; obj_surface->y_cb_offset = 0; @@ -899,8 +899,8 @@ i965_suface_external_memory(VADriverContextP ctx, break; case VA_FOURCC('4', '1', '1', 'P'): - assert(memory_attibute->num_planes == 3); - assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); + ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV411; obj_surface->y_cb_offset = 0; @@ -912,8 +912,8 @@ i965_suface_external_memory(VADriverContextP ctx, break; case VA_FOURCC('4', '2', '2', 'H'): - assert(memory_attibute->num_planes == 3); - assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); + ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV422H; obj_surface->y_cb_offset = obj_surface->height; @@ -938,8 +938,8 @@ i965_suface_external_memory(VADriverContextP ctx, break; case VA_FOURCC('4', '2', '2', 'V'): - assert(memory_attibute->num_planes == 3); - assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); + ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV422H; obj_surface->y_cb_offset = obj_surface->height; @@ -951,8 +951,8 @@ i965_suface_external_memory(VADriverContextP ctx, break; case VA_FOURCC('4', '4', '4', 'P'): - assert(memory_attibute->num_planes == 3); - assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); + ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV444; obj_surface->y_cb_offset = obj_surface->height; @@ -1012,7 +1012,7 @@ bpp_1stplane_by_fourcc(unsigned int fourcc) return 1; default: - assert(0); + ASSERT_RET(0, 0); return 0; } } @@ -1039,14 +1039,14 @@ i965_CreateSurfaces2( for (i = 0; i < num_attribs && attrib_list; i++) { if ((attrib_list[i].type == VASurfaceAttribPixelFormat) && (attrib_list[i].flags & VA_SURFACE_ATTRIB_SETTABLE)) { - assert(attrib_list[i].value.type == VAGenericValueTypeInteger); + ASSERT_RET(attrib_list[i].value.type == VAGenericValueTypeInteger, VA_STATUS_ERROR_INVALID_PARAMETER); expected_fourcc = attrib_list[i].value.value.i; } if ((attrib_list[i].type == VASurfaceAttribMemoryType) && (attrib_list[i].flags & VA_SURFACE_ATTRIB_SETTABLE)) { - assert(attrib_list[i].value.type == VAGenericValueTypeInteger); + ASSERT_RET(attrib_list[i].value.type == VAGenericValueTypeInteger, VA_STATUS_ERROR_INVALID_PARAMETER); if (attrib_list[i].value.value.i == VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM) memory_type = I965_SURFACE_MEM_GEM_FLINK; /* flinked GEM handle */ @@ -1058,7 +1058,7 @@ i965_CreateSurfaces2( if ((attrib_list[i].type == VASurfaceAttribExternalBufferDescriptor) && (attrib_list[i].flags == VA_SURFACE_ATTRIB_SETTABLE)) { - assert(attrib_list[i].value.type == VAGenericValueTypePointer); + ASSERT_RET(attrib_list[i].value.type == VAGenericValueTypePointer, VA_STATUS_ERROR_INVALID_PARAMETER); memory_attibute = (VASurfaceAttribExternalBuffers *)attrib_list[i].value.value.p; } } @@ -1115,25 +1115,25 @@ i965_CreateSurfaces2( if (memory_attibute->pixel_format) { if (expected_fourcc) - assert(memory_attibute->pixel_format == expected_fourcc); + ASSERT_RET(memory_attibute->pixel_format == expected_fourcc, VA_STATUS_ERROR_INVALID_PARAMETER); else expected_fourcc = memory_attibute->pixel_format; } - assert(expected_fourcc); + ASSERT_RET(expected_fourcc, VA_STATUS_ERROR_INVALID_PARAMETER); if (memory_attibute->pitches[0]) { int bpp_1stplane = bpp_1stplane_by_fourcc(expected_fourcc); - assert(bpp_1stplane); + ASSERT_RET(bpp_1stplane, VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->width = memory_attibute->pitches[0]/bpp_1stplane; obj_surface->user_h_stride_set = true; - assert(IS_ALIGNED(obj_surface->width, 16)); - assert(obj_surface->width >= width); + ASSERT_RET(IS_ALIGNED(obj_surface->width, 16), VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(obj_surface->width >= width, VA_STATUS_ERROR_INVALID_PARAMETER); if (memory_attibute->offsets[1]) { - assert(!memory_attibute->offsets[0]); + ASSERT_RET(!memory_attibute->offsets[0], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->height = memory_attibute->offsets[1]/memory_attibute->pitches[0]; obj_surface->user_v_stride_set = true; - assert(IS_ALIGNED(obj_surface->height, 16)); - assert(obj_surface->height >= height); + ASSERT_RET(IS_ALIGNED(obj_surface->height, 16), VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(obj_surface->height >= height, VA_STATUS_ERROR_INVALID_PARAMETER); } } } @@ -1198,7 +1198,7 @@ i965_DestroySurfaces(VADriverContextP ctx, for (i = num_surfaces; i--; ) { struct object_surface *obj_surface = SURFACE(surface_list[i]); - assert(obj_surface); + ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE); i965_destroy_surface(&i965->surface_heap, (struct object_base *)obj_surface); } @@ -1353,7 +1353,7 @@ i965_DestroySubpicture(VADriverContextP ctx, if (!obj_subpic) return VA_STATUS_ERROR_INVALID_SUBPICTURE; - assert(obj_subpic->obj_image); + ASSERT_RET(obj_subpic->obj_image, VA_STATUS_ERROR_INVALID_SUBPICTURE); i965_destroy_subpic(&i965->subpic_heap, (struct object_base *)obj_subpic); return VA_STATUS_SUCCESS; } @@ -1424,7 +1424,7 @@ i965_AssociateSubpicture(VADriverContextP ctx, if (!obj_subpic) return VA_STATUS_ERROR_INVALID_SUBPICTURE; - assert(obj_subpic->obj_image); + ASSERT_RET(obj_subpic->obj_image, VA_STATUS_ERROR_INVALID_SUBPICTURE); obj_subpic->src_rect.x = src_x; obj_subpic->src_rect.y = src_y; @@ -1714,7 +1714,7 @@ i965_DestroyContext(VADriverContextP ctx, VAContextID context) struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_context *obj_context = CONTEXT(context); - assert(obj_context); + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); if (i965->current_context_id == context) i965->current_context_id = VA_INVALID_ID; @@ -1877,10 +1877,7 @@ i965_BufferSetNumElements(VADriverContextP ctx, struct object_buffer *obj_buffer = BUFFER(buf_id); VAStatus vaStatus = VA_STATUS_SUCCESS; - assert(obj_buffer); - - if (!obj_buffer) - return VA_STATUS_ERROR_INVALID_BUFFER; + ASSERT_RET(obj_buffer, VA_STATUS_ERROR_INVALID_BUFFER); if ((num_elements < 0) || (num_elements > obj_buffer->max_num_elements)) { @@ -1904,12 +1901,9 @@ i965_MapBuffer(VADriverContextP ctx, struct object_buffer *obj_buffer = BUFFER(buf_id); VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; - assert(obj_buffer && obj_buffer->buffer_store); - assert(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer); - assert(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer)); - - if (!obj_buffer || !obj_buffer->buffer_store) - return VA_STATUS_ERROR_INVALID_BUFFER; + ASSERT_RET(obj_buffer && obj_buffer->buffer_store, VA_STATUS_ERROR_INVALID_BUFFER); + ASSERT_RET(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER); + ASSERT_RET(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer), VA_STATUS_ERROR_INVALID_BUFFER); if (NULL != obj_buffer->buffer_store->bo) { unsigned int tiling, swizzle; @@ -1921,7 +1915,7 @@ i965_MapBuffer(VADriverContextP ctx, else dri_bo_map(obj_buffer->buffer_store->bo, 1); - assert(obj_buffer->buffer_store->bo->virtual); + ASSERT_RET(obj_buffer->buffer_store->bo->virtual, VA_STATUS_ERROR_OPERATION_FAILED); *pbuf = obj_buffer->buffer_store->bo->virtual; if (obj_buffer->type == VAEncCodedBufferType) { @@ -1947,7 +1941,7 @@ i965_MapBuffer(VADriverContextP ctx, delimiter3 = MPEG2_DELIMITER3; delimiter4 = MPEG2_DELIMITER4; } else { - assert(0); + ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_PROFILE); } for (i = 0; i < obj_buffer->size_element - I965_CODEDBUFFER_HEADER_SIZE - 3 - 0x1000; i++) { @@ -1989,12 +1983,9 @@ i965_UnmapBuffer(VADriverContextP ctx, VABufferID buf_id) if ((buf_id & OBJECT_HEAP_OFFSET_MASK) != BUFFER_ID_OFFSET) return VA_STATUS_ERROR_INVALID_BUFFER; - assert(obj_buffer && obj_buffer->buffer_store); - assert(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer); - assert(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer)); - - if (!obj_buffer || !obj_buffer->buffer_store) - return VA_STATUS_ERROR_INVALID_BUFFER; + ASSERT_RET(obj_buffer && obj_buffer->buffer_store, VA_STATUS_ERROR_INVALID_BUFFER); + ASSERT_RET(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_OPERATION_FAILED); + ASSERT_RET(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer), VA_STATUS_ERROR_OPERATION_FAILED); if (NULL != obj_buffer->buffer_store->bo) { unsigned int tiling, swizzle; @@ -2021,10 +2012,7 @@ i965_DestroyBuffer(VADriverContextP ctx, VABufferID buffer_id) struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_buffer *obj_buffer = BUFFER(buffer_id); - assert(obj_buffer); - - if (!obj_buffer) - return VA_STATUS_ERROR_INVALID_BUFFER; + ASSERT_RET(obj_buffer, VA_STATUS_ERROR_INVALID_BUFFER); i965_destroy_buffer(&i965->buffer_heap, (struct object_base *)obj_buffer); @@ -2043,18 +2031,10 @@ i965_BeginPicture(VADriverContextP ctx, VAStatus vaStatus; int i; - assert(obj_context); - - if (!obj_context) - return VA_STATUS_ERROR_INVALID_CONTEXT; - - assert(obj_surface); - - if (!obj_surface) - return VA_STATUS_ERROR_INVALID_SURFACE; - + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); + ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE); obj_config = obj_context->obj_config; - assert(obj_config); + ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG); switch (obj_config->profile) { case VAProfileMPEG2Simple: @@ -2087,8 +2067,7 @@ i965_BeginPicture(VADriverContextP ctx, break; default: - assert(0); - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; + ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_PROFILE); break; } @@ -2193,10 +2172,7 @@ i965_decoder_render_picture(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_SUCCESS; int i; - assert(obj_context); - - if (!obj_context) - return VA_STATUS_ERROR_INVALID_CONTEXT; + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); for (i = 0; i < num_buffers && vaStatus == VA_STATUS_SUCCESS; i++) { struct object_buffer *obj_buffer = BUFFER(buffers[i]); @@ -2266,8 +2242,8 @@ i965_encoder_render_packed_header_parameter_buffer(VADriverContextP ctx, { struct encode_state *encode = &obj_context->codec_state.encode; - assert(obj_buffer->buffer_store->bo == NULL); - assert(obj_buffer->buffer_store->buffer); + ASSERT_RET(obj_buffer->buffer_store->bo == NULL, VA_STATUS_ERROR_INVALID_BUFFER); + ASSERT_RET(obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER); i965_release_buffer_store(&encode->packed_header_param[type_index]); i965_reference_buffer_store(&encode->packed_header_param[type_index], obj_buffer->buffer_store); @@ -2282,8 +2258,8 @@ i965_encoder_render_packed_header_data_buffer(VADriverContextP ctx, { struct encode_state *encode = &obj_context->codec_state.encode; - assert(obj_buffer->buffer_store->bo == NULL); - assert(obj_buffer->buffer_store->buffer); + ASSERT_RET(obj_buffer->buffer_store->bo == NULL, VA_STATUS_ERROR_INVALID_BUFFER); + ASSERT_RET(obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER); i965_release_buffer_store(&encode->packed_header_data[type_index]); i965_reference_buffer_store(&encode->packed_header_data[type_index], obj_buffer->buffer_store); @@ -2298,8 +2274,8 @@ i965_encoder_render_misc_parameter_buffer(VADriverContextP ctx, struct encode_state *encode = &obj_context->codec_state.encode; VAEncMiscParameterBuffer *param = NULL; - assert(obj_buffer->buffer_store->bo == NULL); - assert(obj_buffer->buffer_store->buffer); + ASSERT_RET(obj_buffer->buffer_store->bo == NULL, VA_STATUS_ERROR_INVALID_BUFFER); + ASSERT_RET(obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER); param = (VAEncMiscParameterBuffer *)obj_buffer->buffer_store->buffer; @@ -2323,10 +2299,7 @@ i965_encoder_render_picture(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; int i; - assert(obj_context); - - if (!obj_context) - return VA_STATUS_ERROR_INVALID_CONTEXT; + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); for (i = 0; i < num_buffers; i++) { struct object_buffer *obj_buffer = BUFFER(buffers[i]); @@ -2372,11 +2345,12 @@ i965_encoder_render_picture(VADriverContextP ctx, { struct encode_state *encode = &obj_context->codec_state.encode; - assert(encode->last_packed_header_type == VAEncPackedHeaderSequence || + ASSERT_RET(encode->last_packed_header_type == VAEncPackedHeaderSequence || encode->last_packed_header_type == VAEncPackedHeaderPicture || encode->last_packed_header_type == VAEncPackedHeaderSlice || (((encode->last_packed_header_type & VAEncPackedHeaderMiscMask) == VAEncPackedHeaderMiscMask) && - ((encode->last_packed_header_type & (~VAEncPackedHeaderMiscMask)) != 0))); + ((encode->last_packed_header_type & (~VAEncPackedHeaderMiscMask)) != 0)), + VA_STATUS_ERROR_ENCODING_ERROR); vaStatus = i965_encoder_render_packed_header_data_buffer(ctx, obj_context, obj_buffer, @@ -2415,10 +2389,7 @@ i965_proc_render_picture(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_SUCCESS; int i; - assert(obj_context); - - if (!obj_context) - return VA_STATUS_ERROR_INVALID_CONTEXT; + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); for (i = 0; i < num_buffers && vaStatus == VA_STATUS_SUCCESS; i++) { struct object_buffer *obj_buffer = BUFFER(buffers[i]); @@ -2452,16 +2423,13 @@ i965_RenderPicture(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; obj_context = CONTEXT(context); - assert(obj_context); - - if (!obj_context) - return VA_STATUS_ERROR_INVALID_CONTEXT; + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); if (num_buffers <= 0) return VA_STATUS_ERROR_INVALID_PARAMETER; obj_config = obj_context->obj_config; - assert(obj_config); + ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG); if (VAEntrypointVideoProc == obj_config->entrypoint) { vaStatus = i965_proc_render_picture(ctx, context, buffers, num_buffers); @@ -2481,18 +2449,14 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context) struct object_context *obj_context = CONTEXT(context); struct object_config *obj_config; - assert(obj_context); - - if (!obj_context) - return VA_STATUS_ERROR_INVALID_CONTEXT; - + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); obj_config = obj_context->obj_config; - assert(obj_config); + ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG); if (obj_context->codec_type == CODEC_PROC) { - assert(VAEntrypointVideoProc == obj_config->entrypoint); + ASSERT_RET(VAEntrypointVideoProc == obj_config->entrypoint, VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT); } else if (obj_context->codec_type == CODEC_ENC) { - assert(VAEntrypointEncSlice == obj_config->entrypoint); + ASSERT_RET(VAEntrypointEncSlice == obj_config->entrypoint, VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT); if (!(obj_context->codec_state.encode.pic_param || obj_context->codec_state.encode.pic_param_ext)) { @@ -2523,7 +2487,7 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context) } } - assert(obj_context->hw_context->run); + ASSERT_RET(obj_context->hw_context->run, VA_STATUS_ERROR_OPERATION_FAILED); return obj_context->hw_context->run(ctx, obj_config->profile, &obj_context->codec_state, obj_context->hw_context); } @@ -2534,7 +2498,7 @@ i965_SyncSurface(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface = SURFACE(render_target); - assert(obj_surface); + ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE); if(obj_surface->bo) drm_intel_bo_wait_rendering(obj_surface->bo); @@ -2550,7 +2514,7 @@ i965_QuerySurfaceStatus(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface = SURFACE(render_target); - assert(obj_surface); + ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE); if (obj_surface->bo) { if (drm_intel_bo_busy(obj_surface->bo)){ @@ -2910,7 +2874,7 @@ i965_CreateImage(VADriverContextP ctx, return va_status; } -void +VAStatus i965_check_alloc_surface_bo(VADriverContextP ctx, struct object_surface *obj_surface, int tiled, @@ -2921,26 +2885,27 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, int region_width, region_height; if (obj_surface->bo) { - assert(obj_surface->fourcc); - assert(obj_surface->fourcc == fourcc); - assert(obj_surface->subsampling == subsampling); - return; + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(obj_surface->fourcc == fourcc, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(obj_surface->subsampling == subsampling, VA_STATUS_ERROR_INVALID_SURFACE); + return VA_STATUS_SUCCESS; } obj_surface->x_cb_offset = 0; /* X offset is always 0 */ obj_surface->x_cr_offset = 0; if ((tiled && !obj_surface->user_disable_tiling)) { - assert(fourcc != VA_FOURCC('I', '4', '2', '0') && + ASSERT_RET(fourcc != VA_FOURCC('I', '4', '2', '0') && fourcc != VA_FOURCC('I', 'Y', 'U', 'V') && - fourcc != VA_FOURCC('Y', 'V', '1', '2')); + fourcc != VA_FOURCC('Y', 'V', '1', '2'), + VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT); if (obj_surface->user_h_stride_set) { - assert(IS_ALIGNED(obj_surface->width, 128)); + ASSERT_RET(IS_ALIGNED(obj_surface->width, 128), VA_STATUS_ERROR_INVALID_PARAMETER); } else obj_surface->width = ALIGN(obj_surface->orig_width, 128); if (obj_surface->user_v_stride_set) { - assert(IS_ALIGNED(obj_surface->height, 32)); + ASSERT_RET(IS_ALIGNED(obj_surface->height, 32), VA_STATUS_ERROR_INVALID_PARAMETER); } else obj_surface->height = ALIGN(obj_surface->orig_height, 32); @@ -3070,7 +3035,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, default: /* Never get here */ - assert(0); + ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT); break; } } else { @@ -3139,7 +3104,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, default: /* Never get here */ - assert(0); + ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT); break; } } @@ -3170,6 +3135,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, obj_surface->fourcc = fourcc; obj_surface->subsampling = subsampling; assert(obj_surface->bo); + return VA_STATUS_SUCCESS; } VAStatus i965_DeriveImage(VADriverContextP ctx, @@ -3194,10 +3160,13 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, unsigned int fourcc = VA_FOURCC('Y', 'V', '1', '2'); i965_guess_surface_format(ctx, surface, &fourcc, &is_tiled); int sampling = get_sampling_from_fourcc(fourcc); - i965_check_alloc_surface_bo(ctx, obj_surface, is_tiled, fourcc, sampling); + va_status = i965_check_alloc_surface_bo(ctx, obj_surface, is_tiled, fourcc, sampling); } - assert(obj_surface->fourcc); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); w_pitch = obj_surface->width; @@ -3430,7 +3399,7 @@ get_sampling_from_fourcc(unsigned int fourcc) break; default: /* Never get here */ - assert(0); + ASSERT_RET(0, 0); break; } @@ -3452,7 +3421,7 @@ memcpy_pic(uint8_t *dst, unsigned int dst_stride, } } -static void +static VAStatus get_image_i420(struct object_image *obj_image, uint8_t *image_data, struct object_surface *obj_surface, const VARectangle *rect) @@ -3462,11 +3431,12 @@ get_image_i420(struct object_image *obj_image, uint8_t *image_data, const int U = obj_image->image.format.fourcc == obj_surface->fourcc ? 1 : 2; const int V = obj_image->image.format.fourcc == obj_surface->fourcc ? 2 : 1; unsigned int tiling, swizzle; + VAStatus va_status = VA_STATUS_SUCCESS; if (!obj_surface->bo) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; - assert(obj_surface->fourcc); + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); if (tiling != I915_TILING_NONE) @@ -3475,7 +3445,7 @@ get_image_i420(struct object_image *obj_image, uint8_t *image_data, dri_bo_map(obj_surface->bo, 0); if (!obj_surface->bo->virtual) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; /* Dest VA image has either I420 or YV12 format. Source VA surface alway has I420 format */ @@ -3511,18 +3481,21 @@ get_image_i420(struct object_image *obj_image, uint8_t *image_data, drm_intel_gem_bo_unmap_gtt(obj_surface->bo); else dri_bo_unmap(obj_surface->bo); + + return va_status; } -static void +static VAStatus get_image_nv12(struct object_image *obj_image, uint8_t *image_data, struct object_surface *obj_surface, const VARectangle *rect) { uint8_t *dst[2], *src[2]; unsigned int tiling, swizzle; + VAStatus va_status = VA_STATUS_SUCCESS; if (!obj_surface->bo) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; assert(obj_surface->fourcc); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); @@ -3533,7 +3506,7 @@ get_image_nv12(struct object_image *obj_image, uint8_t *image_data, dri_bo_map(obj_surface->bo, 0); if (!obj_surface->bo->virtual) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; /* Both dest VA image and source surface have NV12 format */ dst[0] = image_data + obj_image->image.offsets[0]; @@ -3559,18 +3532,21 @@ get_image_nv12(struct object_image *obj_image, uint8_t *image_data, drm_intel_gem_bo_unmap_gtt(obj_surface->bo); else dri_bo_unmap(obj_surface->bo); + + return va_status; } -static void +static VAStatus get_image_yuy2(struct object_image *obj_image, uint8_t *image_data, struct object_surface *obj_surface, const VARectangle *rect) { uint8_t *dst, *src; unsigned int tiling, swizzle; + VAStatus va_status = VA_STATUS_SUCCESS; if (!obj_surface->bo) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; assert(obj_surface->fourcc); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); @@ -3581,7 +3557,7 @@ get_image_yuy2(struct object_image *obj_image, uint8_t *image_data, dri_bo_map(obj_surface->bo, 0); if (!obj_surface->bo->virtual) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; /* Both dest VA image and source surface have YUYV format */ dst = image_data + obj_image->image.offsets[0]; @@ -3598,6 +3574,8 @@ get_image_yuy2(struct object_image *obj_image, uint8_t *image_data, drm_intel_gem_bo_unmap_gtt(obj_surface->bo); else dri_bo_unmap(obj_surface->bo); + + return va_status; } static VAStatus @@ -3611,6 +3589,7 @@ i965_sw_getimage(VADriverContextP ctx, { struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; + VAStatus va_status = VA_STATUS_SUCCESS; struct object_surface *obj_surface = SURFACE(surface); if (!obj_surface) @@ -3632,7 +3611,6 @@ i965_sw_getimage(VADriverContextP ctx, if (obj_surface->fourcc != obj_image->image.format.fourcc) return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT; - VAStatus va_status; void *image_data = NULL; va_status = i965_MapBuffer(ctx, obj_image->image.buf, &image_data); @@ -3669,7 +3647,10 @@ i965_sw_getimage(VADriverContextP ctx, break; } - i965_UnmapBuffer(ctx, obj_image->image.buf); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + + va_status = i965_UnmapBuffer(ctx, obj_image->image.buf); return va_status; } @@ -3685,7 +3666,7 @@ i965_hw_getimage(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_surface src_surface; struct i965_surface dst_surface; - VAStatus va_status; + VAStatus va_status = VA_STATUS_SUCCESS; VARectangle rect; struct object_surface *obj_surface = SURFACE(surface); struct object_image *obj_image = IMAGE(image); @@ -3742,7 +3723,7 @@ i965_GetImage(VADriverContextP ctx, VAImageID image) { struct i965_driver_data * const i965 = i965_driver_data(ctx); - VAStatus va_status; + VAStatus va_status = VA_STATUS_SUCCESS; if (HAS_ACCELERATED_GETIMAGE(i965)) va_status = i965_hw_getimage(ctx, @@ -3760,7 +3741,7 @@ i965_GetImage(VADriverContextP ctx, return va_status; } -static void +static VAStatus put_image_i420(struct object_surface *obj_surface, const VARectangle *dst_rect, struct object_image *obj_image, uint8_t *image_data, @@ -3771,13 +3752,13 @@ put_image_i420(struct object_surface *obj_surface, const int U = obj_image->image.format.fourcc == obj_surface->fourcc ? 1 : 2; const int V = obj_image->image.format.fourcc == obj_surface->fourcc ? 2 : 1; unsigned int tiling, swizzle; + VAStatus va_status = VA_STATUS_SUCCESS; - if (!obj_surface->bo) - return; + ASSERT_RET(obj_surface->bo, VA_STATUS_ERROR_INVALID_SURFACE); - assert(obj_surface->fourcc); - assert(dst_rect->width == src_rect->width); - assert(dst_rect->height == src_rect->height); + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(dst_rect->width == src_rect->width, VA_STATUS_ERROR_UNIMPLEMENTED); + ASSERT_RET(dst_rect->height == src_rect->height, VA_STATUS_ERROR_UNIMPLEMENTED); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); if (tiling != I915_TILING_NONE) @@ -3786,7 +3767,7 @@ put_image_i420(struct object_surface *obj_surface, dri_bo_map(obj_surface->bo, 0); if (!obj_surface->bo->virtual) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; /* Dest VA image has either I420 or YV12 format. Source VA surface alway has I420 format */ @@ -3822,9 +3803,11 @@ put_image_i420(struct object_surface *obj_surface, drm_intel_gem_bo_unmap_gtt(obj_surface->bo); else dri_bo_unmap(obj_surface->bo); + + return va_status; } -static void +static VAStatus put_image_nv12(struct object_surface *obj_surface, const VARectangle *dst_rect, struct object_image *obj_image, uint8_t *image_data, @@ -3832,13 +3815,14 @@ put_image_nv12(struct object_surface *obj_surface, { uint8_t *dst[2], *src[2]; unsigned int tiling, swizzle; + VAStatus va_status = VA_STATUS_SUCCESS; if (!obj_surface->bo) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; - assert(obj_surface->fourcc); - assert(dst_rect->width == src_rect->width); - assert(dst_rect->height == src_rect->height); + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(dst_rect->width == src_rect->width, VA_STATUS_ERROR_UNIMPLEMENTED); + ASSERT_RET(dst_rect->height == src_rect->height, VA_STATUS_ERROR_UNIMPLEMENTED); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); if (tiling != I915_TILING_NONE) @@ -3847,7 +3831,7 @@ put_image_nv12(struct object_surface *obj_surface, dri_bo_map(obj_surface->bo, 0); if (!obj_surface->bo->virtual) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; /* Both dest VA image and source surface have NV12 format */ dst[0] = (uint8_t *)obj_surface->bo->virtual; @@ -3873,9 +3857,11 @@ put_image_nv12(struct object_surface *obj_surface, drm_intel_gem_bo_unmap_gtt(obj_surface->bo); else dri_bo_unmap(obj_surface->bo); + + return va_status; } -static void +static VAStatus put_image_yuy2(struct object_surface *obj_surface, const VARectangle *dst_rect, struct object_image *obj_image, uint8_t *image_data, @@ -3883,13 +3869,12 @@ put_image_yuy2(struct object_surface *obj_surface, { uint8_t *dst, *src; unsigned int tiling, swizzle; + VAStatus va_status = VA_STATUS_SUCCESS; - if (!obj_surface->bo) - return; - - assert(obj_surface->fourcc); - assert(dst_rect->width == src_rect->width); - assert(dst_rect->height == src_rect->height); + ASSERT_RET(obj_surface->bo, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(dst_rect->width == src_rect->width, VA_STATUS_ERROR_UNIMPLEMENTED); + ASSERT_RET(dst_rect->height == src_rect->height, VA_STATUS_ERROR_UNIMPLEMENTED); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); if (tiling != I915_TILING_NONE) @@ -3898,7 +3883,7 @@ put_image_yuy2(struct object_surface *obj_surface, dri_bo_map(obj_surface->bo, 0); if (!obj_surface->bo->virtual) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; /* Both dest VA image and source surface have YUY2 format */ dst = (uint8_t *)obj_surface->bo->virtual; @@ -3915,6 +3900,8 @@ put_image_yuy2(struct object_surface *obj_surface, drm_intel_gem_bo_unmap_gtt(obj_surface->bo); else dri_bo_unmap(obj_surface->bo); + + return va_status; } @@ -3933,13 +3920,12 @@ i965_sw_putimage(VADriverContextP ctx, { struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface = SURFACE(surface); - - if (!obj_surface) - return VA_STATUS_ERROR_INVALID_SURFACE; - struct object_image *obj_image = IMAGE(image); - if (!obj_image) - return VA_STATUS_ERROR_INVALID_IMAGE; + VAStatus va_status = VA_STATUS_SUCCESS; + void *image_data = NULL; + + ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(obj_image, VA_STATUS_ERROR_INVALID_IMAGE); if (src_x < 0 || src_y < 0) return VA_STATUS_ERROR_INVALID_PARAMETER; @@ -3964,7 +3950,7 @@ i965_sw_putimage(VADriverContextP ctx, else { /* VA is surface not used for decoding, use same VA image format */ - i965_check_alloc_surface_bo( + va_status = i965_check_alloc_surface_bo( ctx, obj_surface, 0, /* XXX: don't use tiled surface */ @@ -3972,8 +3958,8 @@ i965_sw_putimage(VADriverContextP ctx, get_sampling_from_fourcc (obj_image->image.format.fourcc)); } - VAStatus va_status; - void *image_data = NULL; + if (va_status != VA_STATUS_SUCCESS) + return va_status; va_status = i965_MapBuffer(ctx, obj_image->image.buf, &image_data); if (va_status != VA_STATUS_SUCCESS) @@ -3992,20 +3978,22 @@ i965_sw_putimage(VADriverContextP ctx, switch (obj_image->image.format.fourcc) { case VA_FOURCC('Y','V','1','2'): case VA_FOURCC('I','4','2','0'): - put_image_i420(obj_surface, &dest_rect, obj_image, image_data, &src_rect); + va_status = put_image_i420(obj_surface, &dest_rect, obj_image, image_data, &src_rect); break; case VA_FOURCC('N','V','1','2'): - put_image_nv12(obj_surface, &dest_rect, obj_image, image_data, &src_rect); + va_status = put_image_nv12(obj_surface, &dest_rect, obj_image, image_data, &src_rect); break; case VA_FOURCC('Y','U','Y','2'): - put_image_yuy2(obj_surface, &dest_rect, obj_image, image_data, &src_rect); + va_status = put_image_yuy2(obj_surface, &dest_rect, obj_image, image_data, &src_rect); break; default: va_status = VA_STATUS_ERROR_OPERATION_FAILED; break; } + if (va_status != VA_STATUS_SUCCESS) + return va_status; - i965_UnmapBuffer(ctx, obj_image->image.buf); + va_status = i965_UnmapBuffer(ctx, obj_image->image.buf); return va_status; } @@ -4029,11 +4017,8 @@ i965_hw_putimage(VADriverContextP ctx, VAStatus va_status = VA_STATUS_SUCCESS; VARectangle src_rect, dst_rect; - if (!obj_surface) - return VA_STATUS_ERROR_INVALID_SURFACE; - - if (!obj_image || !obj_image->bo) - return VA_STATUS_ERROR_INVALID_IMAGE; + ASSERT_RET(obj_surface,VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(obj_image && obj_image->bo, VA_STATUS_ERROR_INVALID_IMAGE); if (src_x < 0 || src_y < 0 || @@ -4059,7 +4044,7 @@ i965_hw_putimage(VADriverContextP ctx, surface_sampling); } - assert(obj_surface->fourcc); + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); src_surface.base = (struct object_base *)obj_image; src_surface.type = I965_SURFACE_TYPE_IMAGE; @@ -4182,10 +4167,7 @@ i965_BufferInfo( i965 = i965_driver_data(ctx); obj_buffer = BUFFER(buf_id); - assert(obj_buffer); - - if (!obj_buffer) - return VA_STATUS_ERROR_INVALID_BUFFER; + ASSERT_RET(obj_buffer, VA_STATUS_ERROR_INVALID_BUFFER); *type = obj_buffer->type; *size = obj_buffer->size_element; @@ -4214,15 +4196,15 @@ i965_LockSurface( struct object_surface *obj_surface = NULL; VAImage tmpImage; - assert(fourcc); - assert(luma_stride); - assert(chroma_u_stride); - assert(chroma_v_stride); - assert(luma_offset); - assert(chroma_u_offset); - assert(chroma_v_offset); - assert(buffer_name); - assert(buffer); + ASSERT_RET(fourcc, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(luma_stride, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(chroma_u_stride, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(chroma_v_stride, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(luma_offset, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(chroma_u_offset, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(chroma_v_offset, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(buffer_name, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(buffer, VA_STATUS_ERROR_INVALID_PARAMETER); tmpImage.image_id = VA_INVALID_ID; @@ -5118,9 +5100,10 @@ VAStatus i965_QueryVideoProcPipelineCaps( } else if (base->type == VAProcFilterDeinterlacing) { VAProcFilterParameterBufferDeinterlacing *deint = (VAProcFilterParameterBufferDeinterlacing *)base; - assert(deint->algorithm == VAProcDeinterlacingBob || + ASSERT_RET(deint->algorithm == VAProcDeinterlacingBob || deint->algorithm == VAProcDeinterlacingMotionAdaptive || - deint->algorithm == VAProcDeinterlacingMotionCompensated); + deint->algorithm == VAProcDeinterlacingMotionCompensated, + VA_STATUS_ERROR_INVALID_PARAMETER); if (deint->algorithm == VAProcDeinterlacingMotionAdaptive || deint->algorithm == VAProcDeinterlacingMotionCompensated); diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 6ff03697..994dc1b6 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -373,7 +373,7 @@ i965_driver_data(VADriverContextP ctx) return (struct i965_driver_data *)(ctx->pDriverData); } -void +VAStatus i965_check_alloc_surface_bo(VADriverContextP ctx, struct object_surface *obj_surface, int tiled, diff --git a/src/intel_driver.h b/src/intel_driver.h index abb396b0..3c74ed3d 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -76,6 +76,13 @@ struct intel_batchbuffer; #define True 1 #define False 0 +#define ASSERT_RET(value, fail_ret) do { \ + if (!(value)) { \ + assert(0); \ + return fail_ret; \ + } \ + } while (0) + #define SET_BLOCKED_SIGSET() do { \ sigset_t bl_mask; \ sigfillset(&bl_mask); \ -- cgit v1.2.1 From a48fc63de5aad630427d4f800efc6b436d6885be Mon Sep 17 00:00:00 2001 From: "Zhao, Halley" Date: Fri, 21 Mar 2014 01:56:39 -0600 Subject: Fix for check i965_check_alloc_surface_bo ret Signed-off-by: Zhao Halley Reviewed-by: Zhao Yakui (cherry picked from commit 41da810decbb2d64843b95384fc87f7a29152c88) --- src/i965_drv_video.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index b7f8f303..d9fe1367 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -3161,11 +3161,10 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, i965_guess_surface_format(ctx, surface, &fourcc, &is_tiled); int sampling = get_sampling_from_fourcc(fourcc); va_status = i965_check_alloc_surface_bo(ctx, obj_surface, is_tiled, fourcc, sampling); + if (va_status != VA_STATUS_SUCCESS) + return va_status; } - if (va_status != VA_STATUS_SUCCESS) - return va_status; - ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); w_pitch = obj_surface->width; -- cgit v1.2.1 From 1ea8d643e3cde59babf46114f083bdf7e78eaaeb Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 24 Mar 2014 09:48:50 +0800 Subject: Use the VA_FOURCC_XXXX to replace the VA_FOURCC(X,X,X,X) in i965_drv_video This is helpful to avoid the typo error when using VA_FOURCC(A, B, C, D). Signed-off-by: Zhao Yakui (cherry picked from commit ab3e02d63fe672e3f81631f2beb5bc2b7ab17af0) --- src/Makefile.am | 1 + src/i965_drv_video.c | 396 +++++++++++++++++++++++++-------------------------- src/i965_drv_video.h | 1 + src/i965_fourcc.h | 36 +++++ 4 files changed, 236 insertions(+), 198 deletions(-) create mode 100644 src/i965_fourcc.h diff --git a/src/Makefile.am b/src/Makefile.am index 28334626..806ee4ee 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -121,6 +121,7 @@ source_h = \ object_heap.h \ sysdeps.h \ va_backend_compat.h \ + i965_fourcc.h \ $(NULL) i965_drv_video_la_LTLIBRARIES = i965_drv_video.la diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index d9fe1367..0dcac01c 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -143,19 +143,19 @@ typedef struct { static const i965_image_format_map_t i965_image_formats_map[I965_MAX_IMAGE_FORMATS + 1] = { { I965_SURFACETYPE_YUV, - { VA_FOURCC('Y','V','1','2'), VA_LSB_FIRST, 12, } }, + { VA_FOURCC_YV12, VA_LSB_FIRST, 12, } }, { I965_SURFACETYPE_YUV, - { VA_FOURCC('I','4','2','0'), VA_LSB_FIRST, 12, } }, + { VA_FOURCC_I420, VA_LSB_FIRST, 12, } }, { I965_SURFACETYPE_YUV, - { VA_FOURCC('N','V','1','2'), VA_LSB_FIRST, 12, } }, + { VA_FOURCC_NV12, VA_LSB_FIRST, 12, } }, { I965_SURFACETYPE_YUV, - { VA_FOURCC('Y','U','Y','2'), VA_LSB_FIRST, 16, } }, + { VA_FOURCC_YUY2, VA_LSB_FIRST, 16, } }, { I965_SURFACETYPE_YUV, - { VA_FOURCC('U','Y','V','Y'), VA_LSB_FIRST, 16, } }, + { VA_FOURCC_UYVY, VA_LSB_FIRST, 16, } }, { I965_SURFACETYPE_RGBA, - { VA_FOURCC('R','G','B','X'), VA_LSB_FIRST, 32, 24, 0x000000ff, 0x0000ff00, 0x00ff0000 } }, + { VA_FOURCC_RGBX, VA_LSB_FIRST, 32, 24, 0x000000ff, 0x0000ff00, 0x00ff0000 } }, { I965_SURFACETYPE_RGBA, - { VA_FOURCC('B','G','R','X'), VA_LSB_FIRST, 32, 24, 0x00ff0000, 0x0000ff00, 0x000000ff } }, + { VA_FOURCC_BGRX, VA_LSB_FIRST, 32, 24, 0x00ff0000, 0x0000ff00, 0x000000ff } }, }; /* List of supported subpicture formats */ @@ -173,23 +173,23 @@ typedef struct { static const i965_subpic_format_map_t i965_subpic_formats_map[I965_MAX_SUBPIC_FORMATS + 1] = { { I965_SURFACETYPE_INDEXED, I965_SURFACEFORMAT_P4A4_UNORM, - { VA_FOURCC('I','A','4','4'), VA_MSB_FIRST, 8, }, + { VA_FOURCC_IA44, VA_MSB_FIRST, 8, }, COMMON_SUBPICTURE_FLAGS }, { I965_SURFACETYPE_INDEXED, I965_SURFACEFORMAT_A4P4_UNORM, - { VA_FOURCC('A','I','4','4'), VA_MSB_FIRST, 8, }, + { VA_FOURCC_AI44, VA_MSB_FIRST, 8, }, COMMON_SUBPICTURE_FLAGS }, { I965_SURFACETYPE_INDEXED, I965_SURFACEFORMAT_P8A8_UNORM, - { VA_FOURCC('I','A','8','8'), VA_MSB_FIRST, 16, }, + { VA_FOURCC_IA88, VA_MSB_FIRST, 16, }, COMMON_SUBPICTURE_FLAGS }, { I965_SURFACETYPE_INDEXED, I965_SURFACEFORMAT_A8P8_UNORM, - { VA_FOURCC('A','I','8','8'), VA_MSB_FIRST, 16, }, + { VA_FOURCC_AI88, VA_MSB_FIRST, 16, }, COMMON_SUBPICTURE_FLAGS }, { I965_SURFACETYPE_RGBA, I965_SURFACEFORMAT_B8G8R8A8_UNORM, - { VA_FOURCC('B','G','R','A'), VA_LSB_FIRST, 32, + { VA_FOURCC_BGRA, VA_LSB_FIRST, 32, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 }, COMMON_SUBPICTURE_FLAGS }, { I965_SURFACETYPE_RGBA, I965_SURFACEFORMAT_R8G8B8A8_UNORM, - { VA_FOURCC('R','G','B','A'), VA_LSB_FIRST, 32, + { VA_FOURCC_RGBA, VA_LSB_FIRST, 32, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 }, COMMON_SUBPICTURE_FLAGS }, }; @@ -774,10 +774,10 @@ i965_surface_native_memory(VADriverContextP ctx, return VA_STATUS_SUCCESS; // todo, should we disable tiling for 422 format? - if (expected_fourcc == VA_FOURCC('I', '4', '2', '0') || - expected_fourcc == VA_FOURCC('I', 'Y', 'U', 'V') || - expected_fourcc == VA_FOURCC('Y', 'V', '1', '2') || - expected_fourcc == VA_FOURCC('Y', 'V', '1', '6')) + if (expected_fourcc == VA_FOURCC_I420 || + expected_fourcc == VA_FOURCC_IYUV || + expected_fourcc == VA_FOURCC_YV12 || + expected_fourcc == VA_FOURCC_YV16) tiling = 0; i965_check_alloc_surface_bo(ctx, obj_surface, tiling, expected_fourcc, get_sampling_from_fourcc(expected_fourcc)); @@ -816,7 +816,7 @@ i965_suface_external_memory(VADriverContextP ctx, obj_surface->x_cr_offset = 0; switch (obj_surface->fourcc) { - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: ASSERT_RET(memory_attibute->num_planes == 2, VA_STATUS_ERROR_INVALID_PARAMETER); ASSERT_RET(memory_attibute->pitches[0] == memory_attibute->pitches[1], VA_STATUS_ERROR_INVALID_PARAMETER); @@ -829,8 +829,8 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('I', 'M', 'C', '1'): + case VA_FOURCC_YV12: + case VA_FOURCC_IMC1: ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); @@ -843,9 +843,9 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('I', '4', '2', '0'): - case VA_FOURCC('I', 'Y', 'U', 'V'): - case VA_FOURCC('I', 'M', 'C', '3'): + case VA_FOURCC_I420: + case VA_FOURCC_IYUV: + case VA_FOURCC_IMC3: ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); @@ -858,8 +858,8 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: ASSERT_RET(memory_attibute->num_planes == 1, VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV422H; @@ -871,10 +871,10 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('R', 'G', 'B', 'A'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: ASSERT_RET(memory_attibute->num_planes == 1, VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_RGBX; @@ -886,7 +886,7 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('Y', '8', '0', '0'): /* monochrome surface */ + case VA_FOURCC_Y800: /* monochrome surface */ ASSERT_RET(memory_attibute->num_planes == 1, VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV400; @@ -898,7 +898,7 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('4', '1', '1', 'P'): + case VA_FOURCC_411P: ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); @@ -911,7 +911,7 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('4', '2', '2', 'H'): + case VA_FOURCC_422H: ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); @@ -924,7 +924,7 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('Y', 'V', '1', '6'): + case VA_FOURCC_YV16: assert(memory_attibute->num_planes == 3); assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); @@ -937,7 +937,7 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('4', '2', '2', 'V'): + case VA_FOURCC_422V: ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); @@ -950,7 +950,7 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('4', '4', '4', 'P'): + case VA_FOURCC_444P: ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); @@ -1008,7 +1008,7 @@ bpp_1stplane_by_fourcc(unsigned int fourcc) case VA_FOURCC_IYUV: case VA_FOURCC_NV12: case VA_FOURCC_NV11: - case VA_FOURCC('Y', 'V', '1', '6'): + case VA_FOURCC_YV16: return 1; default: @@ -1241,7 +1241,7 @@ i965_guess_surface_format(VADriverContextP ctx, struct object_context *obj_context = NULL; struct object_config *obj_config = NULL; - *fourcc = VA_FOURCC('Y', 'V', '1', '2'); + *fourcc = VA_FOURCC_YV12; *is_tiled = 0; if (i965->current_context_id == VA_INVALID_ID) @@ -1261,7 +1261,7 @@ i965_guess_surface_format(VADriverContextP ctx, if (IS_GEN6(i965->intel.device_id) || IS_GEN7(i965->intel.device_id) || IS_GEN8(i965->intel.device_id)) { - *fourcc = VA_FOURCC('N', 'V', '1', '2'); + *fourcc = VA_FOURCC_NV12; *is_tiled = 1; return; } @@ -1269,12 +1269,12 @@ i965_guess_surface_format(VADriverContextP ctx, switch (obj_config->profile) { case VAProfileMPEG2Simple: case VAProfileMPEG2Main: - *fourcc = VA_FOURCC('I', '4', '2', '0'); + *fourcc = VA_FOURCC_I420; *is_tiled = 0; break; default: - *fourcc = VA_FOURCC('N', 'V', '1', '2'); + *fourcc = VA_FOURCC_NV12; *is_tiled = 0; break; } @@ -2750,8 +2750,8 @@ i965_CreateImage(VADriverContextP ctx, awidth = ALIGN(width, 64); - if ((format->fourcc == VA_FOURCC('Y','V','1','2')) || - (format->fourcc == VA_FOURCC('I','4','2','0'))) { + if ((format->fourcc == VA_FOURCC_YV12) || + (format->fourcc == VA_FOURCC_I420)) { if (awidth % 128 != 0) { awidth = ALIGN(width, 128); } @@ -2766,8 +2766,8 @@ i965_CreateImage(VADriverContextP ctx, memset(image->component_order, 0, sizeof(image->component_order)); switch (format->fourcc) { - case VA_FOURCC('I','A','4','4'): - case VA_FOURCC('A','I','4','4'): + case VA_FOURCC_IA44: + case VA_FOURCC_AI44: image->num_planes = 1; image->pitches[0] = awidth; image->offsets[0] = 0; @@ -2778,8 +2778,8 @@ i965_CreateImage(VADriverContextP ctx, image->component_order[1] = 'G'; image->component_order[2] = 'B'; break; - case VA_FOURCC('I','A','8','8'): - case VA_FOURCC('A','I','8','8'): + case VA_FOURCC_IA88: + case VA_FOURCC_AI88: image->num_planes = 1; image->pitches[0] = awidth * 2; image->offsets[0] = 0; @@ -2790,18 +2790,18 @@ i965_CreateImage(VADriverContextP ctx, image->component_order[1] = 'G'; image->component_order[2] = 'B'; break; - case VA_FOURCC('A','R','G','B'): - case VA_FOURCC('A','B','G','R'): - case VA_FOURCC('B','G','R','A'): - case VA_FOURCC('R','G','B','A'): - case VA_FOURCC('B','G','R','X'): - case VA_FOURCC('R','G','B','X'): + case VA_FOURCC_ARGB: + case VA_FOURCC_ABGR: + case VA_FOURCC_BGRA: + case VA_FOURCC_RGBA: + case VA_FOURCC_BGRX: + case VA_FOURCC_RGBX: image->num_planes = 1; image->pitches[0] = awidth * 4; image->offsets[0] = 0; image->data_size = image->offsets[0] + image->pitches[0] * aheight; break; - case VA_FOURCC('Y','V','1','2'): + case VA_FOURCC_YV12: image->num_planes = 3; image->pitches[0] = awidth; image->offsets[0] = 0; @@ -2811,7 +2811,7 @@ i965_CreateImage(VADriverContextP ctx, image->offsets[2] = size + size2; image->data_size = size + 2 * size2; break; - case VA_FOURCC('I','4','2','0'): + case VA_FOURCC_I420: image->num_planes = 3; image->pitches[0] = awidth; image->offsets[0] = 0; @@ -2821,7 +2821,7 @@ i965_CreateImage(VADriverContextP ctx, image->offsets[2] = size + size2; image->data_size = size + 2 * size2; break; - case VA_FOURCC('N','V','1','2'): + case VA_FOURCC_NV12: image->num_planes = 2; image->pitches[0] = awidth; image->offsets[0] = 0; @@ -2829,8 +2829,8 @@ i965_CreateImage(VADriverContextP ctx, image->offsets[1] = size; image->data_size = size + 2 * size2; break; - case VA_FOURCC('Y','U','Y','2'): - case VA_FOURCC('U','Y','V','Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: image->num_planes = 1; image->pitches[0] = awidth * 2; image->offsets[0] = 0; @@ -2895,9 +2895,9 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, obj_surface->x_cr_offset = 0; if ((tiled && !obj_surface->user_disable_tiling)) { - ASSERT_RET(fourcc != VA_FOURCC('I', '4', '2', '0') && - fourcc != VA_FOURCC('I', 'Y', 'U', 'V') && - fourcc != VA_FOURCC('Y', 'V', '1', '2'), + ASSERT_RET(fourcc != VA_FOURCC_I420 && + fourcc != VA_FOURCC_IYUV && + fourcc != VA_FOURCC_YV12, VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT); if (obj_surface->user_h_stride_set) { ASSERT_RET(IS_ALIGNED(obj_surface->width, 128), VA_STATUS_ERROR_INVALID_PARAMETER); @@ -2912,7 +2912,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, region_height = obj_surface->height; switch (fourcc) { - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: assert(subsampling == SUBSAMPLE_YUV420); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 2; @@ -2924,7 +2924,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('I', 'M', 'C', '1'): + case VA_FOURCC_IMC1: assert(subsampling == SUBSAMPLE_YUV420); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 2; @@ -2936,7 +2936,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('I', 'M', 'C', '3'): + case VA_FOURCC_IMC3: assert(subsampling == SUBSAMPLE_YUV420); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 2; @@ -2948,7 +2948,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('4', '2', '2', 'H'): + case VA_FOURCC_422H: assert(subsampling == SUBSAMPLE_YUV422H); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 2; @@ -2960,7 +2960,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('4', '2', '2', 'V'): + case VA_FOURCC_422V: assert(subsampling == SUBSAMPLE_YUV422V); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width; @@ -2972,7 +2972,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('4', '1', '1', 'P'): + case VA_FOURCC_411P: assert(subsampling == SUBSAMPLE_YUV411); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 4; @@ -2984,7 +2984,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('4', '4', '4', 'P'): + case VA_FOURCC_444P: assert(subsampling == SUBSAMPLE_YUV444); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width; @@ -2996,7 +2996,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('Y', '8', '0', '0'): + case VA_FOURCC_Y800: assert(subsampling == SUBSAMPLE_YUV400); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = 0; @@ -3008,8 +3008,8 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: assert(subsampling == SUBSAMPLE_YUV422H); obj_surface->width = ALIGN(obj_surface->orig_width * 2, 128); obj_surface->cb_cr_pitch = obj_surface->width; @@ -3022,10 +3022,10 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('R', 'G', 'B', 'A'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: assert(subsampling == SUBSAMPLE_RGBX); obj_surface->width = ALIGN(obj_surface->orig_width * 4, 128); @@ -3048,7 +3048,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, region_height = obj_surface->height; switch (fourcc) { - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: obj_surface->y_cb_offset = obj_surface->height; obj_surface->y_cr_offset = obj_surface->height; obj_surface->cb_cr_width = obj_surface->orig_width / 2; @@ -3057,7 +3057,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, region_height = obj_surface->height + obj_surface->height / 2; break; - case VA_FOURCC('Y', 'V', '1', '6'): + case VA_FOURCC_YV16: obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height; obj_surface->y_cr_offset = obj_surface->height; @@ -3066,9 +3066,9 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, region_height = obj_surface->height + ALIGN(obj_surface->cb_cr_height, 32); break; - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): - if (fourcc == VA_FOURCC('Y', 'V', '1', '2')) { + case VA_FOURCC_YV12: + case VA_FOURCC_I420: + if (fourcc == VA_FOURCC_YV12) { obj_surface->y_cr_offset = obj_surface->height; obj_surface->y_cb_offset = obj_surface->height + obj_surface->height / 4; } else { @@ -3082,8 +3082,8 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, region_height = obj_surface->height + obj_surface->height / 2; break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: obj_surface->width = ALIGN(obj_surface->orig_width * 2, 16); obj_surface->y_cb_offset = 0; obj_surface->y_cr_offset = 0; @@ -3093,10 +3093,10 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, region_width = obj_surface->width; region_height = obj_surface->height; break; - case VA_FOURCC('R', 'G', 'B', 'A'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: obj_surface->width = ALIGN(obj_surface->orig_width * 4, 16); region_width = obj_surface->width; region_height = obj_surface->height; @@ -3157,7 +3157,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, if (!obj_surface->bo) { unsigned int is_tiled = 0; - unsigned int fourcc = VA_FOURCC('Y', 'V', '1', '2'); + unsigned int fourcc = VA_FOURCC_YV12; i965_guess_surface_format(ctx, surface, &fourcc, &is_tiled); int sampling = get_sampling_from_fourcc(fourcc); va_status = i965_check_alloc_surface_bo(ctx, obj_surface, is_tiled, fourcc, sampling); @@ -3199,7 +3199,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->format.bits_per_pixel = 12; switch (image->format.fourcc) { - case VA_FOURCC('Y', 'V', '1', '2'): + case VA_FOURCC_YV12: image->num_planes = 3; image->pitches[0] = w_pitch; /* Y */ image->offsets[0] = 0; @@ -3209,7 +3209,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->offsets[2] = w_pitch * obj_surface->y_cb_offset; break; - case VA_FOURCC('Y', 'V', '1', '6'): + case VA_FOURCC_YV16: image->num_planes = 3; image->pitches[0] = w_pitch; /* Y */ image->offsets[0] = 0; @@ -3219,7 +3219,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->offsets[2] = w_pitch * obj_surface->y_cb_offset; break; - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: image->num_planes = 2; image->pitches[0] = w_pitch; /* Y */ image->offsets[0] = 0; @@ -3227,7 +3227,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->offsets[1] = w_pitch * obj_surface->y_cb_offset; break; - case VA_FOURCC('I', '4', '2', '0'): + case VA_FOURCC_I420: image->num_planes = 3; image->pitches[0] = w_pitch; /* Y */ image->offsets[0] = 0; @@ -3236,16 +3236,16 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->pitches[2] = obj_surface->cb_cr_pitch; /* V */ image->offsets[2] = w_pitch * obj_surface->y_cr_offset; break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: image->num_planes = 1; image->pitches[0] = obj_surface->width; /* Y, width is aligned already */ image->offsets[0] = 0; break; - case VA_FOURCC('R', 'G', 'B', 'A'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: image->num_planes = 1; image->pitches[0] = obj_surface->width; break; @@ -3361,39 +3361,39 @@ get_sampling_from_fourcc(unsigned int fourcc) int surface_sampling = -1; switch (fourcc) { - case VA_FOURCC('N', 'V', '1', '2'): - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): - case VA_FOURCC('I', 'Y', 'U', 'V'): - case VA_FOURCC('I', 'M', 'C', '1'): - case VA_FOURCC('I', 'M', 'C', '3'): + case VA_FOURCC_NV12: + case VA_FOURCC_YV12: + case VA_FOURCC_I420: + case VA_FOURCC_IYUV: + case VA_FOURCC_IMC1: + case VA_FOURCC_IMC3: surface_sampling = SUBSAMPLE_YUV420; break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): - case VA_FOURCC('4', '2', '2', 'H'): - case VA_FOURCC('Y', 'V', '1', '6'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: + case VA_FOURCC_422H: + case VA_FOURCC_YV16: surface_sampling = SUBSAMPLE_YUV422H; break; - case VA_FOURCC('4', '2', '2', 'V'): + case VA_FOURCC_422V: surface_sampling = SUBSAMPLE_YUV422V; break; - case VA_FOURCC('4', '4', '4', 'P'): + case VA_FOURCC_444P: surface_sampling = SUBSAMPLE_YUV444; break; - case VA_FOURCC('4', '1', '1', 'P'): + case VA_FOURCC_411P: surface_sampling = SUBSAMPLE_YUV411; break; - case VA_FOURCC('Y', '8', '0', '0'): + case VA_FOURCC_Y800: surface_sampling = SUBSAMPLE_YUV400; break; - case VA_FOURCC('R','G','B','A'): - case VA_FOURCC('R','G','B','X'): - case VA_FOURCC('B','G','R','A'): - case VA_FOURCC('B','G','R','X'): + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: surface_sampling = SUBSAMPLE_RGBX; break; default: @@ -3623,20 +3623,20 @@ i965_sw_getimage(VADriverContextP ctx, rect.height = height; switch (obj_image->image.format.fourcc) { - case VA_FOURCC('Y','V','1','2'): - case VA_FOURCC('I','4','2','0'): + case VA_FOURCC_YV12: + case VA_FOURCC_I420: /* I420 is native format for MPEG-2 decoded surfaces */ if (render_state->interleaved_uv) goto operation_failed; get_image_i420(obj_image, image_data, obj_surface, &rect); break; - case VA_FOURCC('N','V','1','2'): + case VA_FOURCC_NV12: /* NV12 is native format for H.264 decoded surfaces */ if (!render_state->interleaved_uv) goto operation_failed; get_image_nv12(obj_image, image_data, obj_surface, &rect); break; - case VA_FOURCC('Y','U','Y','2'): + case VA_FOURCC_YUY2: /* YUY2 is the format supported by overlay plane */ get_image_yuy2(obj_image, image_data, obj_surface, &rect); break; @@ -3975,14 +3975,14 @@ i965_sw_putimage(VADriverContextP ctx, dest_rect.height = dest_height; switch (obj_image->image.format.fourcc) { - case VA_FOURCC('Y','V','1','2'): - case VA_FOURCC('I','4','2','0'): + case VA_FOURCC_YV12: + case VA_FOURCC_I420: va_status = put_image_i420(obj_surface, &dest_rect, obj_image, image_data, &src_rect); break; - case VA_FOURCC('N','V','1','2'): + case VA_FOURCC_NV12: va_status = put_image_nv12(obj_surface, &dest_rect, obj_image, image_data, &src_rect); break; - case VA_FOURCC('Y','U','Y','2'): + case VA_FOURCC_YUY2: va_status = put_image_yuy2(obj_surface, &dest_rect, obj_image, image_data, &src_rect); break; default: @@ -4341,7 +4341,7 @@ i965_GetSurfaceAttributes( if (IS_G4X(i965->intel.device_id)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { - attrib_list[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attrib_list[i].value.value.i = VA_FOURCC_I420; } else { assert(0); attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED; @@ -4349,31 +4349,31 @@ i965_GetSurfaceAttributes( } else if (IS_IRONLAKE(i965->intel.device_id)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { - attrib_list[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attrib_list[i].value.value.i = VA_FOURCC_I420; } else if (obj_config->profile == VAProfileH264ConstrainedBaseline || obj_config->profile == VAProfileH264Main || obj_config->profile == VAProfileH264High) { - attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attrib_list[i].value.value.i = VA_FOURCC_NV12; } else if (obj_config->profile == VAProfileNone) { - attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attrib_list[i].value.value.i = VA_FOURCC_NV12; } else { assert(0); attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED; } } else if (IS_GEN6(i965->intel.device_id)) { - attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attrib_list[i].value.value.i = VA_FOURCC_NV12; } else if (IS_GEN7(i965->intel.device_id) || IS_GEN8(i965->intel.device_id)) { if (obj_config->profile == VAProfileJPEGBaseline) attrib_list[i].value.value.i = 0; /* internal format */ else - attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attrib_list[i].value.value.i = VA_FOURCC_NV12; } } else { if (IS_G4X(i965->intel.device_id)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { - if (attrib_list[i].value.value.i != VA_FOURCC('I', '4', '2', '0')) { + if (attrib_list[i].value.value.i != VA_FOURCC_I420) { attrib_list[i].value.value.i = 0; attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } @@ -4384,27 +4384,27 @@ i965_GetSurfaceAttributes( } else if (IS_IRONLAKE(i965->intel.device_id)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { - if (attrib_list[i].value.value.i != VA_FOURCC('I', '4', '2', '0')) { + if (attrib_list[i].value.value.i != VA_FOURCC_I420) { attrib_list[i].value.value.i = 0; attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } } else if (obj_config->profile == VAProfileH264ConstrainedBaseline || obj_config->profile == VAProfileH264Main || obj_config->profile == VAProfileH264High) { - if (attrib_list[i].value.value.i != VA_FOURCC('N', 'V', '1', '2')) { + if (attrib_list[i].value.value.i != VA_FOURCC_NV12) { attrib_list[i].value.value.i = 0; attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } } else if (obj_config->profile == VAProfileNone) { switch (attrib_list[i].value.value.i) { - case VA_FOURCC('N', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('R', 'G', 'B', 'A'): + case VA_FOURCC_NV12: + case VA_FOURCC_I420: + case VA_FOURCC_YV12: + case VA_FOURCC_YUY2: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: + case VA_FOURCC_RGBX: + case VA_FOURCC_RGBA: break; default: attrib_list[i].value.value.i = 0; @@ -4419,14 +4419,14 @@ i965_GetSurfaceAttributes( if (obj_config->entrypoint == VAEntrypointEncSlice || obj_config->entrypoint == VAEntrypointVideoProc) { switch (attrib_list[i].value.value.i) { - case VA_FOURCC('N', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('R', 'G', 'B', 'A'): + case VA_FOURCC_NV12: + case VA_FOURCC_I420: + case VA_FOURCC_YV12: + case VA_FOURCC_YUY2: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: + case VA_FOURCC_RGBX: + case VA_FOURCC_RGBA: break; default: attrib_list[i].value.value.i = 0; @@ -4434,7 +4434,7 @@ i965_GetSurfaceAttributes( break; } } else { - if (attrib_list[i].value.value.i != VA_FOURCC('N', 'V', '1', '2')) { + if (attrib_list[i].value.value.i != VA_FOURCC_NV12) { attrib_list[i].value.value.i = 0; attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } @@ -4444,9 +4444,9 @@ i965_GetSurfaceAttributes( if (obj_config->entrypoint == VAEntrypointEncSlice || obj_config->entrypoint == VAEntrypointVideoProc) { switch (attrib_list[i].value.value.i) { - case VA_FOURCC('N', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): - case VA_FOURCC('Y', 'V', '1', '2'): + case VA_FOURCC_NV12: + case VA_FOURCC_I420: + case VA_FOURCC_YV12: break; default: attrib_list[i].value.value.i = 0; @@ -4458,7 +4458,7 @@ i965_GetSurfaceAttributes( attrib_list[i].value.value.i = 0; /* JPEG decoding always uses an internal format */ attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } else { - if (attrib_list[i].value.value.i != VA_FOURCC('N', 'V', '1', '2')) { + if (attrib_list[i].value.value.i != VA_FOURCC_NV12) { attrib_list[i].value.value.i = 0; attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } @@ -4529,7 +4529,7 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attribs[i].value.value.i = VA_FOURCC_I420; i++; } } else if (IS_IRONLAKE(i965->intel.device_id)) { @@ -4539,7 +4539,7 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attribs[i].value.value.i = VA_FOURCC_I420; i++; break; @@ -4550,20 +4550,20 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; case VAProfileNone: attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attribs[i].value.value.i = VA_FOURCC_I420; i++; break; @@ -4576,45 +4576,45 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; } else if (obj_config->entrypoint == VAEntrypointEncSlice || /* encode */ obj_config->entrypoint == VAEntrypointVideoProc) { /* vpp */ attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attribs[i].value.value.i = VA_FOURCC_I420; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_YV12; i++; if (obj_config->entrypoint == VAEntrypointVideoProc) { attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', 'U', 'Y', '2'); + attribs[i].value.value.i = VA_FOURCC_YUY2; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'A'); + attribs[i].value.value.i = VA_FOURCC_RGBA; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'X'); + attribs[i].value.value.i = VA_FOURCC_RGBX; i++; } } @@ -4624,49 +4624,49 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '3'); + attribs[i].value.value.i = VA_FOURCC_IMC3; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '1'); + attribs[i].value.value.i = VA_FOURCC_IMC1; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', '8', '0', '0'); + attribs[i].value.value.i = VA_FOURCC_Y800; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('4', '1', '1', 'P'); + attribs[i].value.value.i = VA_FOURCC_411P; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('4', '2', '2', 'H'); + attribs[i].value.value.i = VA_FOURCC_422H; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('4', '2', '2', 'V'); + attribs[i].value.value.i = VA_FOURCC_422V; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('4', '4', '4', 'P'); + attribs[i].value.value.i = VA_FOURCC_444P; i++; } else { attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; } } else if (obj_config->entrypoint == VAEntrypointEncSlice || /* encode */ @@ -4674,50 +4674,50 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attribs[i].value.value.i = VA_FOURCC_I420; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_YV12; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '3'); + attribs[i].value.value.i = VA_FOURCC_IMC3; i++; if (obj_config->entrypoint == VAEntrypointVideoProc) { attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', 'U', 'Y', '2'); + attribs[i].value.value.i = VA_FOURCC_YUY2; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'A'); + attribs[i].value.value.i = VA_FOURCC_RGBA; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'X'); + attribs[i].value.value.i = VA_FOURCC_RGBX; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', 'V', '1', '6'); + attribs[i].value.value.i = VA_FOURCC_YV16; i++; } } @@ -4727,49 +4727,49 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '3'); + attribs[i].value.value.i = VA_FOURCC_IMC3; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '1'); + attribs[i].value.value.i = VA_FOURCC_IMC1; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', '8', '0', '0'); + attribs[i].value.value.i = VA_FOURCC_Y800; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('4', '1', '1', 'P'); + attribs[i].value.value.i = VA_FOURCC_411P; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('4', '2', '2', 'H'); + attribs[i].value.value.i = VA_FOURCC_422H; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('4', '2', '2', 'V'); + attribs[i].value.value.i = VA_FOURCC_422V; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('4', '4', '4', 'P'); + attribs[i].value.value.i = VA_FOURCC_444P; i++; } else { attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; } } else if (obj_config->entrypoint == VAEntrypointEncSlice || /* encode */ @@ -4778,62 +4778,62 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attribs[i].value.value.i = VA_FOURCC_I420; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_YV12; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '3'); + attribs[i].value.value.i = VA_FOURCC_IMC3; i++; if (obj_config->entrypoint == VAEntrypointVideoProc) { attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', 'U', 'Y', '2'); + attribs[i].value.value.i = VA_FOURCC_YUY2; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'A'); + attribs[i].value.value.i = VA_FOURCC_RGBA; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'X'); + attribs[i].value.value.i = VA_FOURCC_RGBX; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('B', 'G', 'R', 'A'); + attribs[i].value.value.i = VA_FOURCC_BGRA; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('B', 'G', 'R', 'X'); + attribs[i].value.value.i = VA_FOURCC_BGRX; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', 'V', '1', '6'); + attribs[i].value.value.i = VA_FOURCC_YV16; i++; } } diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 994dc1b6..535402fb 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -40,6 +40,7 @@ #include "i965_mutext.h" #include "object_heap.h" #include "intel_driver.h" +#include "i965_fourcc.h" #define I965_MAX_PROFILES 20 #define I965_MAX_ENTRYPOINTS 5 diff --git a/src/i965_fourcc.h b/src/i965_fourcc.h new file mode 100644 index 00000000..510c0441 --- /dev/null +++ b/src/i965_fourcc.h @@ -0,0 +1,36 @@ +#ifndef _I965_FOURCC_H_ +#define _I965_FOURCC_H_ + +#ifndef VA_FOURCC_YV16 +#define VA_FOURCC_YV16 VA_FOURCC('Y','V','1','6') +#endif + +#ifndef VA_FOURCC_I420 +#define VA_FOURCC_I420 VA_FOURCC('I','4','2','0') +#endif + +/* + * VA_FOURCC_IA44 is an exception because the va.h already + * defines the AI44 as VA_FOURCC('I', 'A', '4', '4'). + */ +#ifndef VA_FOURCC_IA44 +#define VA_FOURCC_IA44 VA_FOURCC('A','I','4','4') +#endif + +#ifndef VA_FOURCC_IA88 +#define VA_FOURCC_IA88 VA_FOURCC('I','A','8','8') +#endif + +#ifndef VA_FOURCC_AI88 +#define VA_FOURCC_AI88 VA_FOURCC('A','I','8','8') +#endif + +#ifndef VA_FOURCC_IMC1 +#define VA_FOURCC_IMC1 VA_FOURCC('I','M','C','1') +#endif + +#ifndef VA_FOURCC_YVY2 +#define VA_FOURCC_YVY2 VA_FOURCC('Y','V','Y','2') +#endif + +#endif /* _I965_FOURCC_H_ */ -- cgit v1.2.1 From 947cb535e30506922c22e84343c652b436048eaf Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 24 Mar 2014 09:49:06 +0800 Subject: VPP: Use the VA_FOURCC_ABCD constant to replace the VA_FOURCC(A,B,C,D) This is helpful to avoid the typo error when using VA_FOURCC(A, B, C, D). Signed-off-by: Zhao Yakui (cherry picked from commit 0b9ed6ad9fbe46812d566fa31bf6d60739757a17) Conflicts: src/i965_post_processing.c --- src/gen75_picture_process.c | 6 +- src/gen75_vpp_gpe.c | 2 +- src/gen8_post_processing.c | 42 ++++----- src/i965_post_processing.c | 202 ++++++++++++++++++++++---------------------- 4 files changed, 126 insertions(+), 126 deletions(-) diff --git a/src/gen75_picture_process.c b/src/gen75_picture_process.c index d5b5acb3..ad7d463f 100644 --- a/src/gen75_picture_process.c +++ b/src/gen75_picture_process.c @@ -163,7 +163,7 @@ gen75_proc_picture(VADriverContextP ctx, if (!obj_dst_surf->bo) { unsigned int is_tiled = 0; - unsigned int fourcc = VA_FOURCC('N','V','1','2'); + unsigned int fourcc = VA_FOURCC_NV12; int sampling = SUBSAMPLE_YUV420; i965_check_alloc_surface_bo(ctx, obj_dst_surf, is_tiled, fourcc, sampling); } @@ -197,8 +197,8 @@ gen75_proc_picture(VADriverContextP ctx, filter->type == VAProcFilterColorBalance){ gen75_vpp_vebox(ctx, proc_ctx); }else if(filter->type == VAProcFilterSharpening){ - if (obj_src_surf->fourcc != VA_FOURCC('N', 'V', '1', '2') || - obj_dst_surf->fourcc != VA_FOURCC('N', 'V', '1', '2')) { + if (obj_src_surf->fourcc != VA_FOURCC_NV12 || + obj_dst_surf->fourcc != VA_FOURCC_NV12) { status = VA_STATUS_ERROR_UNIMPLEMENTED; goto error; } diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index 9cb2912f..637d2bfa 100644 --- a/src/gen75_vpp_gpe.c +++ b/src/gen75_vpp_gpe.c @@ -681,7 +681,7 @@ vpp_gpe_process_sharpening(VADriverContextP ctx, assert(obj_surf); if (obj_surf) { - i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC('N','V','1','2'), + i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); vpp_gpe_ctx->surface_tmp_object = obj_surf; } diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index 3abe287f..a4fbcbbf 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -469,18 +469,18 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc struct object_image *obj_image; dri_bo *bo; int fourcc = pp_get_surface_fourcc(ctx, surface); - const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('Y', 'V', '1', '6') || - fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1; - const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('Y', 'V', '1', '6') || - fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2; - int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); - int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); - int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || - fourcc == VA_FOURCC('R', 'G', 'B', 'X') || - fourcc == VA_FOURCC('B', 'G', 'R', 'A') || - fourcc == VA_FOURCC('B', 'G', 'R', 'X')); + const int U = (fourcc == VA_FOURCC_YV12 || + fourcc == VA_FOURCC_YV16 || + fourcc == VA_FOURCC_IMC1) ? 2 : 1; + const int V = (fourcc == VA_FOURCC_YV12 || + fourcc == VA_FOURCC_YV16 || + fourcc == VA_FOURCC_IMC1) ? 1 : 2; + int interleaved_uv = fourcc == VA_FOURCC_NV12; + int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY); + int rgbx_format = (fourcc == VA_FOURCC_RGBA || + fourcc == VA_FOURCC_RGBX || + fourcc == VA_FOURCC_BGRA || + fourcc == VA_FOURCC_BGRX); if (surface->type == I965_SURFACE_TYPE_SURFACE) { obj_surface = (struct object_surface *)surface->base; @@ -540,7 +540,7 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc height[2] = obj_image->image.height / 2; pitch[2] = obj_image->image.pitches[V]; offset[2] = obj_image->image.offsets[V]; - if (fourcc == VA_FOURCC('Y', 'V', '1', '6')) { + if (fourcc == VA_FOURCC_YV16) { width[1] = obj_image->image.width / 2; height[1] = obj_image->image.height; width[2] = obj_image->image.width / 2; @@ -559,8 +559,8 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; /* the format is MSB: X-B-G-R */ pp_static_parameter->grf2.save_avs_rgb_swap = 0; - if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || - (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { + if ((fourcc == VA_FOURCC_BGRA) || + (fourcc == VA_FOURCC_BGRX)) { /* It is stored as MSB: X-R-G-B */ pp_static_parameter->grf2.save_avs_rgb_swap = 1; } @@ -589,11 +589,11 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc int format0 = SURFACE_FORMAT_Y8_UNORM; switch (fourcc) { - case VA_FOURCC('Y', 'U', 'Y', '2'): + case VA_FOURCC_YUY2: format0 = SURFACE_FORMAT_YCRCB_NORMAL; break; - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_UYVY: format0 = SURFACE_FORMAT_YCRCB_SWAPY; break; @@ -605,8 +605,8 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */ format0 = SURFACE_FORMAT_R8G8B8A8_UNORM; pp_static_parameter->grf2.src_avs_rgb_swap = 0; - if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || - (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { + if ((fourcc == VA_FOURCC_BGRA) || + (fourcc == VA_FOURCC_BGRX)) { pp_static_parameter->grf2.src_avs_rgb_swap = 1; } } @@ -751,11 +751,11 @@ static void gen7_update_src_surface_uv_offset(VADriverContextP ctx, struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; int fourcc = pp_get_surface_fourcc(ctx, surface); - if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) { + if (fourcc == VA_FOURCC_YUY2) { pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0; pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1; pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3; - } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) { + } else if (fourcc == VA_FOURCC_UYVY) { pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1; pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0; pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2; diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index c4029e16..136e8e34 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1485,20 +1485,20 @@ static void i965_update_src_surface_static_parameter( int fourcc = pp_get_surface_fourcc(ctx, surface); switch (fourcc) { - case VA_FOURCC('Y', 'U', 'Y', '2'): + case VA_FOURCC_YUY2: pp_static_parameter->grf1.source_packed_u_offset = 1; pp_static_parameter->grf1.source_packed_v_offset = 3; break; - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_UYVY: pp_static_parameter->grf1.source_packed_y_offset = 1; pp_static_parameter->grf1.source_packed_v_offset = 2; break; - case VA_FOURCC('B', 'G', 'R', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): + case VA_FOURCC_BGRX: + case VA_FOURCC_BGRA: pp_static_parameter->grf1.source_rgb_layout = 0; break; - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('R', 'G', 'B', 'A'): + case VA_FOURCC_RGBX: + case VA_FOURCC_RGBA: pp_static_parameter->grf1.source_rgb_layout = 1; break; default: @@ -1516,20 +1516,20 @@ static void i965_update_dst_surface_static_parameter( int fourcc = pp_get_surface_fourcc(ctx, surface); switch (fourcc) { - case VA_FOURCC('Y', 'U', 'Y', '2'): + case VA_FOURCC_YUY2: pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1; pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3; break; - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_UYVY: pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1; pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2; break; - case VA_FOURCC('B', 'G', 'R', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): + case VA_FOURCC_BGRX: + case VA_FOURCC_BGRA: pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0; break; - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('R', 'G', 'B', 'A'): + case VA_FOURCC_RGBX: + case VA_FOURCC_RGBA: pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1; break; default: @@ -1702,19 +1702,19 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin dri_bo *bo; int fourcc = pp_get_surface_fourcc(ctx, surface); const int Y = 0; - const int U = ((fourcc == VA_FOURCC('Y', 'V', '1', '2')) || - (fourcc == VA_FOURCC('Y', 'V', '1', '6'))) + const int U = ((fourcc == VA_FOURCC_YV12) || + (fourcc == VA_FOURCC_YV16)) ? 2 : 1; - const int V = ((fourcc == VA_FOURCC('Y', 'V', '1', '2')) || - (fourcc == VA_FOURCC('Y', 'V', '1', '6'))) + const int V = ((fourcc == VA_FOURCC_YV12) || + (fourcc == VA_FOURCC_YV16)) ? 1 : 2; const int UV = 1; - int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); - int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); - int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || - fourcc == VA_FOURCC('R', 'G', 'B', 'X') || - fourcc == VA_FOURCC('B', 'G', 'R', 'A') || - fourcc == VA_FOURCC('B', 'G', 'R', 'X')); + int interleaved_uv = fourcc == VA_FOURCC_NV12; + int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY); + int full_packed_format = (fourcc == VA_FOURCC_RGBA || + fourcc == VA_FOURCC_RGBX || + fourcc == VA_FOURCC_BGRA || + fourcc == VA_FOURCC_BGRX); int scale_factor_of_1st_plane_width_in_byte = 1; if (surface->type == I965_SURFACE_TYPE_SURFACE) { @@ -1774,7 +1774,7 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin height[2] = obj_image->image.height / 2; pitch[2] = obj_image->image.pitches[2]; offset[2] = obj_image->image.offsets[2]; - if (fourcc == VA_FOURCC('Y', 'V', '1', '6')) { + if (fourcc == VA_FOURCC_YV16) { width[1] = obj_image->image.width / 2; height[1] = obj_image->image.height; width[2] = obj_image->image.width / 2; @@ -1822,18 +1822,18 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc struct object_image *obj_image; dri_bo *bo; int fourcc = pp_get_surface_fourcc(ctx, surface); - const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('Y', 'V', '1', '6') || - fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1; - const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('Y', 'V', '1', '6') || - fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2; - int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); - int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); - int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || - fourcc == VA_FOURCC('R', 'G', 'B', 'X') || - fourcc == VA_FOURCC('B', 'G', 'R', 'A') || - fourcc == VA_FOURCC('B', 'G', 'R', 'X')); + const int U = (fourcc == VA_FOURCC_YV12 || + fourcc == VA_FOURCC_YV16 || + fourcc == VA_FOURCC_IMC1) ? 2 : 1; + const int V = (fourcc == VA_FOURCC_YV12 || + fourcc == VA_FOURCC_YV16 || + fourcc == VA_FOURCC_IMC1) ? 1 : 2; + int interleaved_uv = fourcc == VA_FOURCC_NV12; + int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY); + int rgbx_format = (fourcc == VA_FOURCC_RGBA || + fourcc == VA_FOURCC_RGBX || + fourcc == VA_FOURCC_BGRA || + fourcc == VA_FOURCC_BGRX); if (surface->type == I965_SURFACE_TYPE_SURFACE) { obj_surface = (struct object_surface *)surface->base; @@ -1892,7 +1892,7 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc height[2] = obj_image->image.height / 2; pitch[2] = obj_image->image.pitches[V]; offset[2] = obj_image->image.offsets[V]; - if (fourcc == VA_FOURCC('Y', 'V', '1', '6')) { + if (fourcc == VA_FOURCC_YV16) { width[1] = obj_image->image.width / 2; height[1] = obj_image->image.height; width[2] = obj_image->image.width / 2; @@ -1911,8 +1911,8 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; /* the format is MSB: X-B-G-R */ pp_static_parameter->grf2.save_avs_rgb_swap = 0; - if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || - (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { + if ((fourcc == VA_FOURCC_BGRA) || + (fourcc == VA_FOURCC_BGRX)) { /* It is stored as MSB: X-R-G-B */ pp_static_parameter->grf2.save_avs_rgb_swap = 1; } @@ -1941,11 +1941,11 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc int format0 = SURFACE_FORMAT_Y8_UNORM; switch (fourcc) { - case VA_FOURCC('Y', 'U', 'Y', '2'): + case VA_FOURCC_YUY2: format0 = SURFACE_FORMAT_YCRCB_NORMAL; break; - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_UYVY: format0 = SURFACE_FORMAT_YCRCB_SWAPY; break; @@ -1957,8 +1957,8 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */ format0 = SURFACE_FORMAT_R8G8B8A8_UNORM; pp_static_parameter->grf2.src_avs_rgb_swap = 0; - if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || - (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { + if ((fourcc == VA_FOURCC_BGRA) || + (fourcc == VA_FOURCC_BGRX)) { pp_static_parameter->grf2.src_avs_rgb_swap = 1; } } @@ -2736,11 +2736,11 @@ static void gen7_update_src_surface_uv_offset(VADriverContextP ctx, struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; int fourcc = pp_get_surface_fourcc(ctx, surface); - if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) { + if (fourcc == VA_FOURCC_YUY2) { pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0; pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1; pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3; - } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) { + } else if (fourcc == VA_FOURCC_UYVY) { pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1; pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0; pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2; @@ -2942,10 +2942,10 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con if (pp_static_parameter->grf2.avs_wa_enable) { int src_fourcc = pp_get_surface_fourcc(ctx, src_surface); - if ((src_fourcc == VA_FOURCC('R', 'G', 'B', 'A')) || - (src_fourcc == VA_FOURCC('R', 'G', 'B', 'X')) || - (src_fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || - (src_fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { + if ((src_fourcc == VA_FOURCC_RGBA) || + (src_fourcc == VA_FOURCC_RGBX) || + (src_fourcc == VA_FOURCC_BGRA) || + (src_fourcc == VA_FOURCC_BGRX)) { pp_static_parameter->grf2.avs_wa_enable = 0; } } @@ -3531,7 +3531,7 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c i965_check_alloc_surface_bo(ctx, pp_dndi_context->current_out_obj_surface, tiling != I915_TILING_NONE, - VA_FOURCC('N','V','1','2'), + VA_FOURCC_NV12, SUBSAMPLE_YUV420); } @@ -4547,7 +4547,7 @@ i965_vpp_clear_surface(VADriverContextP ctx, int region_width, region_height; /* Currently only support NV12 surface */ - if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) + if (!obj_surface || obj_surface->fourcc != VA_FOURCC_NV12) return; rgb_to_yuv(color, &y, &u, &v, &a); @@ -4635,8 +4635,8 @@ i965_scaling_processing( VAStatus va_status = VA_STATUS_SUCCESS; struct i965_driver_data *i965 = i965_driver_data(ctx); - assert(src_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2')); - assert(dst_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2')); + assert(src_surface_obj->fourcc == VA_FOURCC_NV12); + assert(dst_surface_obj->fourcc == VA_FOURCC_NV12); if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) { struct i965_surface src_surface; @@ -4687,7 +4687,7 @@ i965_post_processing( struct i965_surface dst_surface; /* Currently only support post processing for NV12 surface */ - if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) + if (obj_surface->fourcc != VA_FOURCC_NV12) return out_surface_id; _i965LockMutex(&i965->pp_mutex); @@ -4707,7 +4707,7 @@ i965_post_processing( assert(status == VA_STATUS_SUCCESS); obj_surface = SURFACE(out_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420); i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); dst_surface.base = (struct object_base *)obj_surface; @@ -4743,7 +4743,7 @@ i965_post_processing( assert(status == VA_STATUS_SUCCESS); obj_surface = SURFACE(out_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420); i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); dst_surface.base = (struct object_base *)obj_surface; @@ -4807,7 +4807,7 @@ i965_image_plx_nv12_plx_processing(VADriverContextP ctx, assert(status == VA_STATUS_SUCCESS); obj_surface = SURFACE(tmp_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420); tmp_surface.base = (struct object_base *)obj_surface; tmp_surface.type = I965_SURFACE_TYPE_SURFACE; @@ -4847,7 +4847,7 @@ i965_image_pl1_rgbx_processing(VADriverContextP ctx, VAStatus vaStatus; switch (fourcc) { - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4884,7 +4884,7 @@ i965_image_pl3_processing(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED; switch (fourcc) { - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4895,10 +4895,10 @@ i965_image_pl3_processing(VADriverContextP ctx, intel_batchbuffer_flush(pp_context->batch); break; - case VA_FOURCC('I', 'M', 'C', '1'): - case VA_FOURCC('I', 'M', 'C', '3'): - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): + case VA_FOURCC_IMC1: + case VA_FOURCC_IMC3: + case VA_FOURCC_YV12: + case VA_FOURCC_I420: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4909,8 +4909,8 @@ i965_image_pl3_processing(VADriverContextP ctx, intel_batchbuffer_flush(pp_context->batch); break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4947,7 +4947,7 @@ i965_image_pl2_processing(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED; switch (fourcc) { - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4957,10 +4957,10 @@ i965_image_pl2_processing(VADriverContextP ctx, NULL); break; - case VA_FOURCC('I', 'M', 'C', '1'): - case VA_FOURCC('I', 'M', 'C', '3'): - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): + case VA_FOURCC_IMC1: + case VA_FOURCC_IMC3: + case VA_FOURCC_YV12: + case VA_FOURCC_I420: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4970,8 +4970,8 @@ i965_image_pl2_processing(VADriverContextP ctx, NULL); break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4981,10 +4981,10 @@ i965_image_pl2_processing(VADriverContextP ctx, NULL); break; - case VA_FOURCC('B', 'G', 'R', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('R', 'G', 'B', 'A'): + case VA_FOURCC_BGRX: + case VA_FOURCC_BGRA: + case VA_FOURCC_RGBX: + case VA_FOURCC_RGBA: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -5016,7 +5016,7 @@ i965_image_pl1_processing(VADriverContextP ctx, VAStatus vaStatus; switch (fourcc) { - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -5027,7 +5027,7 @@ i965_image_pl1_processing(VADriverContextP ctx, intel_batchbuffer_flush(pp_context->batch); break; - case VA_FOURCC('Y', 'V', '1', '2'): + case VA_FOURCC_YV12: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -5038,8 +5038,8 @@ i965_image_pl1_processing(VADriverContextP ctx, intel_batchbuffer_flush(pp_context->batch); break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -5079,15 +5079,15 @@ i965_image_processing(VADriverContextP ctx, _i965LockMutex(&i965->pp_mutex); switch (fourcc) { - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): - case VA_FOURCC('I', 'M', 'C', '1'): - case VA_FOURCC('I', 'M', 'C', '3'): - case VA_FOURCC('4', '2', '2', 'H'): - case VA_FOURCC('4', '2', '2', 'V'): - case VA_FOURCC('4', '1', '1', 'P'): - case VA_FOURCC('4', '4', '4', 'P'): - case VA_FOURCC('Y', 'V', '1', '6'): + case VA_FOURCC_YV12: + case VA_FOURCC_I420: + case VA_FOURCC_IMC1: + case VA_FOURCC_IMC3: + case VA_FOURCC_422H: + case VA_FOURCC_422V: + case VA_FOURCC_411P: + case VA_FOURCC_444P: + case VA_FOURCC_YV16: status = i965_image_pl3_processing(ctx, src_surface, src_rect, @@ -5095,25 +5095,25 @@ i965_image_processing(VADriverContextP ctx, dst_rect); break; - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: status = i965_image_pl2_processing(ctx, src_surface, src_rect, dst_surface, dst_rect); break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: status = i965_image_pl1_processing(ctx, src_surface, src_rect, dst_surface, dst_rect); break; - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): - case VA_FOURCC('R', 'G', 'B', 'A'): - case VA_FOURCC('R', 'G', 'B', 'X'): + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: status = i965_image_pl1_rgbx_processing(ctx, src_surface, src_rect, @@ -5362,7 +5362,7 @@ i965_proc_picture(VADriverContextP ctx, src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3]; VASurfaceID out_surface_id = VA_INVALID_ID; - if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) { + if (obj_surface->fourcc != VA_FOURCC_NV12) { src_surface.base = (struct object_base *)obj_surface; src_surface.type = I965_SURFACE_TYPE_SURFACE; src_surface.flags = I965_SURFACE_FLAG_FRAME; @@ -5381,7 +5381,7 @@ i965_proc_picture(VADriverContextP ctx, tmp_surfaces[num_tmp_surfaces++] = out_surface_id; obj_surface = SURFACE(out_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dst_surface.base = (struct object_base *)obj_surface; dst_surface.type = I965_SURFACE_TYPE_SURFACE; @@ -5459,7 +5459,7 @@ i965_proc_picture(VADriverContextP ctx, tmp_surfaces[num_tmp_surfaces++] = out_surface_id; obj_surface = SURFACE(out_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dst_surface.base = (struct object_base *)obj_surface; dst_surface.type = I965_SURFACE_TYPE_SURFACE; status = i965_post_processing_internal(ctx, &proc_context->pp_context, @@ -5487,7 +5487,7 @@ i965_proc_picture(VADriverContextP ctx, } int csc_needed = 0; - if (obj_surface->fourcc && obj_surface->fourcc != VA_FOURCC('N','V','1','2')){ + if (obj_surface->fourcc && obj_surface->fourcc != VA_FOURCC_NV12){ csc_needed = 1; out_surface_id = VA_INVALID_ID; status = i965_CreateSurfaces(ctx, @@ -5500,10 +5500,10 @@ i965_proc_picture(VADriverContextP ctx, tmp_surfaces[num_tmp_surfaces++] = out_surface_id; struct object_surface *csc_surface = SURFACE(out_surface_id); assert(csc_surface); - i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dst_surface.base = (struct object_base *)csc_surface; } else { - i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dst_surface.base = (struct object_base *)obj_surface; } -- cgit v1.2.1 From a618893863e780475f1b531d27d4dc0cd82c4a15 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 24 Mar 2014 09:49:09 +0800 Subject: VEBOX: Use the VA_FOURCC_ABCD constant to replace the VA_FOURCC(A,B,C,D) This is helpful to avoid the typo error when using VA_FOURCC(A, B, C, D). Signed-off-by: Zhao Yakui -- src/gen75_vpp_vebox.c | 83 ++++++++++++++++++++++++-------------------------- 1 file changed, 41 insertions(+), 42 deletions(-) (cherry picked from commit 2a31ad7e200cfb5df95b11875ee33795cdc7e343) --- src/gen75_vpp_vebox.c | 83 +++++++++++++++++++++++++-------------------------- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 20fb44a9..8acf7432 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -101,8 +101,8 @@ VAStatus vpp_surface_scaling(VADriverContextP ctx, VAStatus va_status = VA_STATUS_SUCCESS; int flags = I965_PP_FLAG_AVS; - assert(src_obj_surf->fourcc == VA_FOURCC('N','V','1','2')); - assert(dst_obj_surf->fourcc == VA_FOURCC('N','V','1','2')); + assert(src_obj_surf->fourcc == VA_FOURCC_NV12); + assert(dst_obj_surf->fourcc == VA_FOURCC_NV12); VARectangle src_rect, dst_rect; src_rect.x = 0; @@ -393,11 +393,11 @@ void hsw_veb_iecp_csc_table(VADriverContextP ctx, struct intel_vebox_context *pr return; } - if(proc_ctx->fourcc_input == VA_FOURCC('R','G','B','A') && - (proc_ctx->fourcc_output == VA_FOURCC('N','V','1','2') || - proc_ctx->fourcc_output == VA_FOURCC('Y','V','1','2') || - proc_ctx->fourcc_output == VA_FOURCC('Y','V','Y','2') || - proc_ctx->fourcc_output == VA_FOURCC('A','Y','U','V'))) { + if(proc_ctx->fourcc_input == VA_FOURCC_RGBA && + (proc_ctx->fourcc_output == VA_FOURCC_NV12 || + proc_ctx->fourcc_output == VA_FOURCC_YV12 || + proc_ctx->fourcc_output == VA_FOURCC_YVY2 || + proc_ctx->fourcc_output == VA_FOURCC_AYUV)) { tran_coef[0] = 0.257; tran_coef[1] = 0.504; @@ -414,12 +414,11 @@ void hsw_veb_iecp_csc_table(VADriverContextP ctx, struct intel_vebox_context *pr u_coef[2] = 128 * 4; is_transform_enabled = 1; - }else if((proc_ctx->fourcc_input == VA_FOURCC('N','V','1','2') || - proc_ctx->fourcc_input == VA_FOURCC('Y','V','1','2') || - proc_ctx->fourcc_input == VA_FOURCC('Y','U','Y','2') || - proc_ctx->fourcc_input == VA_FOURCC('A','Y','U','V'))&& - proc_ctx->fourcc_output == VA_FOURCC('R','G','B','A')) { - + }else if((proc_ctx->fourcc_input == VA_FOURCC_NV12 || + proc_ctx->fourcc_input == VA_FOURCC_YV12 || + proc_ctx->fourcc_input == VA_FOURCC_YUY2 || + proc_ctx->fourcc_input == VA_FOURCC_AYUV) && + proc_ctx->fourcc_output == VA_FOURCC_RGBA) { tran_coef[0] = 1.164; tran_coef[1] = 0.000; tran_coef[2] = 1.569; @@ -755,7 +754,7 @@ void hsw_veb_resource_prepare(VADriverContextP ctx, } if(obj_surf_in->bo == NULL){ - input_fourcc = VA_FOURCC('N','V','1','2'); + input_fourcc = VA_FOURCC_NV12; input_sampling = SUBSAMPLE_YUV420; input_tiling = 0; i965_check_alloc_surface_bo(ctx, obj_surf_in, input_tiling, input_fourcc, input_sampling); @@ -767,7 +766,7 @@ void hsw_veb_resource_prepare(VADriverContextP ctx, } if(obj_surf_out->bo == NULL){ - output_fourcc = VA_FOURCC('N','V','1','2'); + output_fourcc = VA_FOURCC_NV12; output_sampling = SUBSAMPLE_YUV420; output_tiling = 0; i965_check_alloc_surface_bo(ctx, obj_surf_out, output_tiling, output_fourcc, output_sampling); @@ -1014,17 +1013,17 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, } /* convert the following format to NV12 format */ - if(obj_surf_input->fourcc == VA_FOURCC('Y','V','1','2') || - obj_surf_input->fourcc == VA_FOURCC('I','4','2','0') || - obj_surf_input->fourcc == VA_FOURCC('I','M','C','1') || - obj_surf_input->fourcc == VA_FOURCC('I','M','C','3') || - obj_surf_input->fourcc == VA_FOURCC('R','G','B','A')){ + if(obj_surf_input->fourcc == VA_FOURCC_YV12 || + obj_surf_input->fourcc == VA_FOURCC_I420 || + obj_surf_input->fourcc == VA_FOURCC_IMC1 || + obj_surf_input->fourcc == VA_FOURCC_IMC3 || + obj_surf_input->fourcc == VA_FOURCC_RGBA){ proc_ctx->format_convert_flags |= PRE_FORMAT_CONVERT; - } else if(obj_surf_input->fourcc == VA_FOURCC('A','Y','U','V') || - obj_surf_input->fourcc == VA_FOURCC('Y','U','Y','2') || - obj_surf_input->fourcc == VA_FOURCC('N','V','1','2')){ + } else if(obj_surf_input->fourcc == VA_FOURCC_AYUV || + obj_surf_input->fourcc == VA_FOURCC_YUY2 || + obj_surf_input->fourcc == VA_FOURCC_NV12){ // nothing to do here } else { /* not support other format as input */ @@ -1045,7 +1044,7 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, if (obj_surf_input_vebox) { proc_ctx->surface_input_vebox_object = obj_surf_input_vebox; - i965_check_alloc_surface_bo(ctx, obj_surf_input_vebox, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surf_input_vebox, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); } } @@ -1053,16 +1052,16 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, } /* create one temporary NV12 surfaces for conversion*/ - if(obj_surf_output->fourcc == VA_FOURCC('Y','V','1','2') || - obj_surf_output->fourcc == VA_FOURCC('I','4','2','0') || - obj_surf_output->fourcc == VA_FOURCC('I','M','C','1') || - obj_surf_output->fourcc == VA_FOURCC('I','M','C','3') || - obj_surf_output->fourcc == VA_FOURCC('R','G','B','A')) { + if(obj_surf_output->fourcc == VA_FOURCC_YV12 || + obj_surf_output->fourcc == VA_FOURCC_I420 || + obj_surf_output->fourcc == VA_FOURCC_IMC1 || + obj_surf_output->fourcc == VA_FOURCC_IMC3 || + obj_surf_output->fourcc == VA_FOURCC_RGBA) { proc_ctx->format_convert_flags |= POST_FORMAT_CONVERT; - } else if(obj_surf_output->fourcc == VA_FOURCC('A','Y','U','V') || - obj_surf_output->fourcc == VA_FOURCC('Y','U','Y','2') || - obj_surf_output->fourcc == VA_FOURCC('N','V','1','2')){ + } else if(obj_surf_output->fourcc == VA_FOURCC_AYUV || + obj_surf_output->fourcc == VA_FOURCC_YUY2 || + obj_surf_output->fourcc == VA_FOURCC_NV12){ /* Nothing to do here */ } else { /* not support other format as input */ @@ -1084,7 +1083,7 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, if (obj_surf_output_vebox) { proc_ctx->surface_output_vebox_object = obj_surf_output_vebox; - i965_check_alloc_surface_bo(ctx, obj_surf_output_vebox, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surf_output_vebox, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); } } } @@ -1103,7 +1102,7 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, if (obj_surf_output_vebox) { proc_ctx->surface_output_scaled_object = obj_surf_output_vebox; - i965_check_alloc_surface_bo(ctx, obj_surf_output_vebox, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surf_output_vebox, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); } } } @@ -1133,7 +1132,7 @@ int hsw_veb_post_format_convert(VADriverContextP ctx, } else if(proc_ctx->format_convert_flags & POST_SCALING_CONVERT) { /* scaling, convert and copy NV12 to YV12/IMC3/IMC2/RGBA output*/ - assert(obj_surface->fourcc == VA_FOURCC('N','V','1','2')); + assert(obj_surface->fourcc == VA_FOURCC_NV12); /* first step :surface scaling */ vpp_surface_scaling(ctx,proc_ctx->surface_output_scaled_object, obj_surface); @@ -1141,13 +1140,13 @@ int hsw_veb_post_format_convert(VADriverContextP ctx, /* second step: color format convert and copy to output */ obj_surface = proc_ctx->surface_output_object; - if(obj_surface->fourcc == VA_FOURCC('N','V','1','2') || - obj_surface->fourcc == VA_FOURCC('Y','V','1','2') || - obj_surface->fourcc == VA_FOURCC('I','4','2','0') || - obj_surface->fourcc == VA_FOURCC('Y','U','Y','2') || - obj_surface->fourcc == VA_FOURCC('I','M','C','1') || - obj_surface->fourcc == VA_FOURCC('I','M','C','3') || - obj_surface->fourcc == VA_FOURCC('R','G','B','A')) { + if(obj_surface->fourcc == VA_FOURCC_NV12 || + obj_surface->fourcc == VA_FOURCC_YV12 || + obj_surface->fourcc == VA_FOURCC_I420 || + obj_surface->fourcc == VA_FOURCC_YUY2 || + obj_surface->fourcc == VA_FOURCC_IMC1 || + obj_surface->fourcc == VA_FOURCC_IMC3 || + obj_surface->fourcc == VA_FOURCC_RGBA) { vpp_surface_convert(ctx, proc_ctx->surface_output_object, proc_ctx->surface_output_scaled_object); }else { assert(0); -- cgit v1.2.1 From 7b6523cb9d63e3cfcc238dfd0d5f4fc323ca59e2 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 20 Mar 2014 12:08:51 +0800 Subject: Use the VA_FOURCC_ABCD constant to replace the VA_FOURCC(A,B,C,D) This is helpful to avoid the typo error when using VA_FOURCC(A, B, C, D). Signed-off-by: Zhao Yakui (cherry picked from commit acea969011bceee36a57fe2c0e4ee96c0c5e79c7) --- src/gen6_mfc_common.c | 2 +- src/gen6_mfd.c | 6 +++--- src/gen75_mfc.c | 2 +- src/gen75_mfd.c | 26 +++++++++++++------------- src/gen7_mfc.c | 2 +- src/gen7_mfd.c | 26 +++++++++++++------------- src/gen8_mfc.c | 2 +- src/gen8_mfd.c | 26 +++++++++++++------------- src/gen8_render.c | 6 +++--- src/i965_avc_bsd.c | 2 +- src/i965_decoder_utils.c | 2 +- src/i965_encoder.c | 4 ++-- src/i965_gpe_utils.c | 10 +++++----- src/i965_media_mpeg2.c | 2 +- src/i965_output_wayland.c | 18 +++++++++--------- src/i965_render.c | 6 +++--- 16 files changed, 71 insertions(+), 71 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 07e2eb24..7cf9cc6f 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -554,7 +554,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, /* Setup current frame and current direct mv buffer*/ obj_surface = encode_state->reconstructed_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); if ( obj_surface->private_data == NULL) { gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1); diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 17f0be2d..4a220522 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -840,7 +840,7 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx, obj_surface = decode_state->render_object; obj_surface->flags &= ~SURFACE_REF_DIS_MASK; obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); /* initial uv component for YUV400 case */ if (pic_param->seq_fields.bits.chroma_format_idc == 0) { @@ -993,7 +993,7 @@ gen6_mfd_mpeg2_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo); gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo; @@ -1306,7 +1306,7 @@ gen6_mfd_vc1_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo); diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 28edd40b..2bfb6c2a 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -2393,7 +2393,7 @@ intel_mfc_mpeg2_prepare(VADriverContextP ctx, /* reconstructed surface */ obj_surface = encode_state->reconstructed_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); mfc_context->pre_deblocking_output.bo = obj_surface->bo; dri_bo_reference(mfc_context->pre_deblocking_output.bo); mfc_context->surface_state.width = obj_surface->orig_width; diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index 7bea1f7c..2d4e236c 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -1084,7 +1084,7 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx, obj_surface = decode_state->render_object; obj_surface->flags &= ~SURFACE_REF_DIS_MASK; obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); /* initial uv component for YUV400 case */ if (pic_param->seq_fields.bits.chroma_format_idc == 0) { @@ -1233,7 +1233,7 @@ gen75_mfd_mpeg2_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; @@ -1569,7 +1569,7 @@ gen75_mfd_vc1_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); @@ -2171,13 +2171,13 @@ gen75_mfd_jpeg_decode_init(VADriverContextP ctx, struct object_surface *obj_surface; VAPictureParameterBufferJPEGBaseline *pic_param; int subsampling = SUBSAMPLE_YUV420; - int fourcc = VA_FOURCC('I', 'M', 'C', '3'); + int fourcc = VA_FOURCC_IMC3; pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; if (pic_param->num_components == 1) { subsampling = SUBSAMPLE_YUV400; - fourcc = VA_FOURCC('Y', '8', '0', '0'); + fourcc = VA_FOURCC_Y800; } else if (pic_param->num_components == 3) { int h1 = pic_param->components[0].h_sampling_factor; int h2 = pic_param->components[1].h_sampling_factor; @@ -2189,31 +2189,31 @@ gen75_mfd_jpeg_decode_init(VADriverContextP ctx, if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV420; - fourcc = VA_FOURCC('I', 'M', 'C', '3'); + fourcc = VA_FOURCC_IMC3; } else if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422H; - fourcc = VA_FOURCC('4', '2', '2', 'H'); + fourcc = VA_FOURCC_422H; } else if (h1 == 1 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV444; - fourcc = VA_FOURCC('4', '4', '4', 'P'); + fourcc = VA_FOURCC_444P; } else if (h1 == 4 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV411; - fourcc = VA_FOURCC('4', '1', '1', 'P'); + fourcc = VA_FOURCC_411P; } else if (h1 == 1 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422V; - fourcc = VA_FOURCC('4', '2', '2', 'V'); + fourcc = VA_FOURCC_422V; } else if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 2 && v3 == 2) { subsampling = SUBSAMPLE_YUV422H; - fourcc = VA_FOURCC('4', '2', '2', 'H'); + fourcc = VA_FOURCC_422H; } else if (h2 == 2 && h2 == 2 && h3 == 2 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422V; - fourcc = VA_FOURCC('4', '2', '2', 'V'); + fourcc = VA_FOURCC_422V; } else assert(0); } else { @@ -2516,7 +2516,7 @@ gen75_jpeg_wa_init(VADriverContextP ctx, obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); gen7_mfd_context->jpeg_wa_surface_object = obj_surface; if (!gen7_mfd_context->jpeg_wa_slice_data_bo) { diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index 394665d5..78b10965 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -1022,7 +1022,7 @@ gen7_mfc_mpeg2_prepare(VADriverContextP ctx, /* reconstructed surface */ obj_surface = encode_state->reconstructed_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); mfc_context->pre_deblocking_output.bo = obj_surface->bo; dri_bo_reference(mfc_context->pre_deblocking_output.bo); mfc_context->surface_state.width = obj_surface->orig_width; diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index a534fb89..e91cfd30 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -758,7 +758,7 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx, obj_surface = decode_state->render_object; obj_surface->flags &= ~SURFACE_REF_DIS_MASK; obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); /* initial uv component for YUV400 case */ if (pic_param->seq_fields.bits.chroma_format_idc == 0) { @@ -906,7 +906,7 @@ gen7_mfd_mpeg2_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; @@ -1247,7 +1247,7 @@ gen7_mfd_vc1_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); @@ -1798,13 +1798,13 @@ gen7_mfd_jpeg_decode_init(VADriverContextP ctx, struct object_surface *obj_surface; VAPictureParameterBufferJPEGBaseline *pic_param; int subsampling = SUBSAMPLE_YUV420; - int fourcc = VA_FOURCC('I', 'M', 'C', '3'); + int fourcc = VA_FOURCC_IMC3; pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; if (pic_param->num_components == 1) { subsampling = SUBSAMPLE_YUV400; - fourcc = VA_FOURCC('Y', '8', '0', '0'); + fourcc = VA_FOURCC_Y800; } else if (pic_param->num_components == 3) { int h1 = pic_param->components[0].h_sampling_factor; int h2 = pic_param->components[1].h_sampling_factor; @@ -1816,31 +1816,31 @@ gen7_mfd_jpeg_decode_init(VADriverContextP ctx, if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV420; - fourcc = VA_FOURCC('I', 'M', 'C', '3'); + fourcc = VA_FOURCC_IMC3; } else if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422H; - fourcc = VA_FOURCC('4', '2', '2', 'H'); + fourcc = VA_FOURCC_422H; } else if (h1 == 1 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV444; - fourcc = VA_FOURCC('4', '4', '4', 'P'); + fourcc = VA_FOURCC_444P; } else if (h1 == 4 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV411; - fourcc = VA_FOURCC('4', '1', '1', 'P'); + fourcc = VA_FOURCC_411P; } else if (h1 == 1 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422V; - fourcc = VA_FOURCC('4', '2', '2', 'V'); + fourcc = VA_FOURCC_422V; } else if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 2 && v3 == 2) { subsampling = SUBSAMPLE_YUV422H; - fourcc = VA_FOURCC('4', '2', '2', 'H'); + fourcc = VA_FOURCC_422H; } else if (h2 == 2 && h2 == 2 && h3 == 2 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422V; - fourcc = VA_FOURCC('4', '2', '2', 'V'); + fourcc = VA_FOURCC_422V; } else assert(0); } else { @@ -2141,7 +2141,7 @@ gen7_jpeg_wa_init(VADriverContextP ctx, obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); gen7_mfd_context->jpeg_wa_surface_object = obj_surface; if (!gen7_mfd_context->jpeg_wa_slice_data_bo) { diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 90092a13..ac421bdb 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -2293,7 +2293,7 @@ intel_mfc_mpeg2_prepare(VADriverContextP ctx, /* reconstructed surface */ obj_surface = encode_state->reconstructed_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); mfc_context->pre_deblocking_output.bo = obj_surface->bo; dri_bo_reference(mfc_context->pre_deblocking_output.bo); mfc_context->surface_state.width = obj_surface->orig_width; diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 08d9b3d0..743fd74d 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -845,7 +845,7 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx, obj_surface = decode_state->render_object; obj_surface->flags &= ~SURFACE_REF_DIS_MASK; obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); /* initial uv component for YUV400 case */ if (pic_param->seq_fields.bits.chroma_format_idc == 0) { @@ -994,7 +994,7 @@ gen8_mfd_mpeg2_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; @@ -1330,7 +1330,7 @@ gen8_mfd_vc1_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); @@ -1886,7 +1886,7 @@ gen8_mfd_jpeg_decode_init(VADriverContextP ctx, struct object_surface *obj_surface; VAPictureParameterBufferJPEGBaseline *pic_param; int subsampling = SUBSAMPLE_YUV420; - int fourcc = VA_FOURCC('I', 'M', 'C', '3'); + int fourcc = VA_FOURCC_IMC3; pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; @@ -1903,31 +1903,31 @@ gen8_mfd_jpeg_decode_init(VADriverContextP ctx, if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV420; - fourcc = VA_FOURCC('I', 'M', 'C', '3'); + fourcc = VA_FOURCC_IMC3; } else if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422H; - fourcc = VA_FOURCC('4', '2', '2', 'H'); + fourcc = VA_FOURCC_422H; } else if (h1 == 1 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV444; - fourcc = VA_FOURCC('4', '4', '4', 'P'); + fourcc = VA_FOURCC_444P; } else if (h1 == 4 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV411; - fourcc = VA_FOURCC('4', '1', '1', 'P'); + fourcc = VA_FOURCC_411P; } else if (h1 == 1 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422V; - fourcc = VA_FOURCC('4', '2', '2', 'V'); + fourcc = VA_FOURCC_422V; } else if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 2 && v3 == 2) { subsampling = SUBSAMPLE_YUV422H; - fourcc = VA_FOURCC('4', '2', '2', 'H'); + fourcc = VA_FOURCC_422H; } else if (h2 == 2 && h2 == 2 && h3 == 2 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422V; - fourcc = VA_FOURCC('4', '2', '2', 'V'); + fourcc = VA_FOURCC_422V; } else assert(0); } @@ -2230,7 +2230,7 @@ gen8_jpeg_wa_init(VADriverContextP ctx, obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); gen7_mfd_context->jpeg_wa_surface_object = obj_surface; if (!gen7_mfd_context->jpeg_wa_slice_data_bo) { @@ -2773,7 +2773,7 @@ gen8_mfd_vp8_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo; diff --git a/src/gen8_render.c b/src/gen8_render.c index d052cf4a..3b3fc89d 100644 --- a/src/gen8_render.c +++ b/src/gen8_render.c @@ -276,7 +276,7 @@ gen8_render_src_surfaces_state( gen8_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */ gen8_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) { + if (obj_surface->fourcc == VA_FOURCC_NV12) { gen8_render_src_surface_state(ctx, 3, region, region_pitch * obj_surface->y_cb_offset, obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, @@ -791,11 +791,11 @@ gen8_render_upload_constants(VADriverContextP ctx, constant_buffer = (unsigned short *) cc_ptr; if (obj_surface->subsampling == SUBSAMPLE_YUV400) { - assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0')); + assert(obj_surface->fourcc == VA_FOURCC_Y800); *constant_buffer = 2; } else { - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) + if (obj_surface->fourcc == VA_FOURCC_NV12) *constant_buffer = 1; else *constant_buffer = 0; diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c index 80a51b88..72b83074 100644 --- a/src/i965_avc_bsd.c +++ b/src/i965_avc_bsd.c @@ -450,7 +450,7 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, obj_surface = decode_state->render_object; obj_surface->flags &= ~SURFACE_REF_DIS_MASK; obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420); /* initial uv component for YUV400 case */ if (pic_param->seq_fields.bits.chroma_format_idc == 0) { diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 064074f1..e80749c5 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -427,7 +427,7 @@ intel_update_avc_frame_store_index(VADriverContextP ctx, * Sometimes a dummy frame comes from the upper layer library, call i965_check_alloc_surface_bo() * to ake sure the store buffer is allocated for this reference frame */ - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); slot_found = 0; frame_idx = -1; diff --git a/src/i965_encoder.c b/src/i965_encoder.c index e2570cff..174f882e 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -77,7 +77,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, if (!obj_surface || !obj_surface->bo) return VA_STATUS_ERROR_INVALID_PARAMETER; - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) { + if (obj_surface->fourcc == VA_FOURCC_NV12) { unsigned int tiling = 0, swizzle = 0; dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); @@ -112,7 +112,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, obj_surface = SURFACE(encoder_context->input_yuv_surface); encode_state->input_yuv_object = obj_surface; assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dst_surface.base = (struct object_base *)obj_surface; dst_surface.type = I965_SURFACE_TYPE_SURFACE; diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index c97220a3..3386b092 100644 --- a/src/i965_gpe_utils.c +++ b/src/i965_gpe_utils.c @@ -342,7 +342,7 @@ i965_gpe_set_surface2_state(VADriverContextP ctx, unsigned int tiling, swizzle; assert(obj_surface->bo); - assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')); + assert(obj_surface->fourcc == VA_FOURCC_NV12); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); w = obj_surface->orig_width; @@ -505,7 +505,7 @@ gen7_gpe_set_surface2_state(VADriverContextP ctx, unsigned int tiling, swizzle; assert(obj_surface->bo); - assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')); + assert(obj_surface->fourcc == VA_FOURCC_NV12); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); w = obj_surface->orig_width; @@ -648,7 +648,7 @@ gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx, dri_bo *bo; int cbcr_offset; - assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')); + assert(obj_surface->fourcc == VA_FOURCC_NV12); bo = gpe_context->surface_state_binding_table.bo; dri_bo_map(bo, True); assert(bo->virtual); @@ -725,7 +725,7 @@ gen8_gpe_set_surface2_state(VADriverContextP ctx, unsigned int tiling, swizzle; assert(obj_surface->bo); - assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')); + assert(obj_surface->fourcc == VA_FOURCC_NV12); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); w = obj_surface->orig_width; @@ -869,7 +869,7 @@ gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx, dri_bo *bo; int cbcr_offset; - assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')); + assert(obj_surface->fourcc == VA_FOURCC_NV12); bo = gpe_context->surface_state_binding_table.bo; dri_bo_map(bo, True); assert(bo->virtual); diff --git a/src/i965_media_mpeg2.c b/src/i965_media_mpeg2.c index 1c105b30..76a7035a 100644 --- a/src/i965_media_mpeg2.c +++ b/src/i965_media_mpeg2.c @@ -515,7 +515,7 @@ i965_media_mpeg2_surface_setup(VADriverContextP ctx, int w = obj_surface->width; int h = obj_surface->height; - i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('I','4','2','0'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_I420, SUBSAMPLE_YUV420); if (picture_structure == MPEG_FRAME) { i965_media_mpeg2_surface_state(ctx, base_index + 0, obj_surface, diff --git a/src/i965_output_wayland.c b/src/i965_output_wayland.c index 569d79be..5a75397c 100644 --- a/src/i965_output_wayland.c +++ b/src/i965_output_wayland.c @@ -237,7 +237,7 @@ va_GetSurfaceBufferWl( return VA_STATUS_ERROR_INVALID_SURFACE; switch (obj_surface->fourcc) { - case VA_FOURCC('N','V','1','2'): + case VA_FOURCC_NV12: drm_format = WL_DRM_FORMAT_NV12; offsets[0] = 0; pitches[0] = obj_surface->width; @@ -246,14 +246,14 @@ va_GetSurfaceBufferWl( offsets[2] = 0; pitches[2] = 0; break; - case VA_FOURCC('Y','V','1','2'): - case VA_FOURCC('I','4','2','0'): - case VA_FOURCC('I','M','C','1'): - case VA_FOURCC('I','M','C','3'): - case VA_FOURCC('4','2','2','H'): - case VA_FOURCC('4','2','2','V'): - case VA_FOURCC('4','1','1','P'): - case VA_FOURCC('4','4','4','P'): + case VA_FOURCC_YV12: + case VA_FOURCC_I420: + case VA_FOURCC_IMC1: + case VA_FOURCC_IMC3: + case VA_FOURCC_422H: + case VA_FOURCC_422V: + case VA_FOURCC_411P: + case VA_FOURCC_444P: switch (obj_surface->subsampling) { case SUBSAMPLE_YUV411: drm_format = WL_DRM_FORMAT_YUV411; diff --git a/src/i965_render.c b/src/i965_render.c index 809013b4..6520ce3e 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -876,7 +876,7 @@ i965_render_src_surfaces_state( i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */ i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) { + if (obj_surface->fourcc == VA_FOURCC_NV12) { i965_render_src_surface_state(ctx, 3, region, region_pitch * obj_surface->y_cb_offset, obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, @@ -1098,11 +1098,11 @@ i965_render_upload_constants(VADriverContextP ctx, constant_buffer = render_state->curbe.bo->virtual; if (obj_surface->subsampling == SUBSAMPLE_YUV400) { - assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0')); + assert(obj_surface->fourcc == VA_FOURCC_Y800); constant_buffer[0] = 2; } else { - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) + if (obj_surface->fourcc == VA_FOURCC_NV12) constant_buffer[0] = 1; else constant_buffer[0] = 0; -- cgit v1.2.1 From 5bf0709f52fc3308a6345585f9acae182e60df00 Mon Sep 17 00:00:00 2001 From: Alex wu Date: Sun, 23 Mar 2014 20:45:27 -0600 Subject: V3: Add 422H support. Changes between V3 to V2: 1. Add 422H support into gen8_post_processing.c, according to yakui's comments. changes between V2 and V1: 1. Rebase on staging branch. 2. Add 422H support for pp. 3. Reword the commit title. Signed-off-by: Alex wu Reviewed-by: Zhao Yakui (cherry picked from commit 347dd731d31dd37b242bbace744125554f2c09e7) --- src/gen8_post_processing.c | 2 +- src/i965_drv_video.c | 13 +++++++++++++ src/i965_post_processing.c | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index a4fbcbbf..4fbc01e8 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -540,7 +540,7 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc height[2] = obj_image->image.height / 2; pitch[2] = obj_image->image.pitches[V]; offset[2] = obj_image->image.offsets[V]; - if (fourcc == VA_FOURCC_YV16) { + if (fourcc == VA_FOURCC_YV16 || fourcc == VA_FOURCC_422H) { width[1] = obj_image->image.width / 2; height[1] = obj_image->image.height; width[2] = obj_image->image.width / 2; diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 0dcac01c..e801a4d5 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -152,6 +152,8 @@ i965_image_formats_map[I965_MAX_IMAGE_FORMATS + 1] = { { VA_FOURCC_YUY2, VA_LSB_FIRST, 16, } }, { I965_SURFACETYPE_YUV, { VA_FOURCC_UYVY, VA_LSB_FIRST, 16, } }, + { I965_SURFACETYPE_YUV, + { VA_FOURCC_422H, VA_LSB_FIRST, 16, } }, { I965_SURFACETYPE_RGBA, { VA_FOURCC_RGBX, VA_LSB_FIRST, 32, 24, 0x000000ff, 0x0000ff00, 0x00ff0000 } }, { I965_SURFACETYPE_RGBA, @@ -2821,6 +2823,16 @@ i965_CreateImage(VADriverContextP ctx, image->offsets[2] = size + size2; image->data_size = size + 2 * size2; break; + case VA_FOURCC_422H: + image->num_planes = 3; + image->pitches[0] = awidth; + image->offsets[0] = 0; + image->pitches[1] = awidth / 2; + image->offsets[1] = size; + image->pitches[2] = awidth / 2; + image->offsets[2] = size + (awidth / 2) * aheight; + image->data_size = size + 2 * ((awidth / 2) * aheight); + break; case VA_FOURCC_NV12: image->num_planes = 2; image->pitches[0] = awidth; @@ -3228,6 +3240,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, break; case VA_FOURCC_I420: + case VA_FOURCC_422H: image->num_planes = 3; image->pitches[0] = w_pitch; /* Y */ image->offsets[0] = 0; diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 136e8e34..3dc7d8c2 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1892,7 +1892,7 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc height[2] = obj_image->image.height / 2; pitch[2] = obj_image->image.pitches[V]; offset[2] = obj_image->image.offsets[V]; - if (fourcc == VA_FOURCC_YV16) { + if (fourcc == VA_FOURCC_YV16 || fourcc == VA_FOURCC_422H) { width[1] = obj_image->image.width / 2; height[1] = obj_image->image.height; width[2] = obj_image->image.width / 2; -- cgit v1.2.1 From dc481879610fa323386a5567bd9f904346cc040c Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 24 Mar 2014 10:46:14 +0800 Subject: BDW: Fix one error in shader binaray for media encoding The commit 7ac4263ff2dae5c877b92356d04df4ccfe10d7c9 updates the shader binary more than it required. So it is removed. Signed-off-by: Zhao Yakui (cherry picked from commit b1319c7f7cb9d20179b20dac2308330bd0e51ffe) --- src/shaders/vme/inter_bframe_gen8.g8b | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shaders/vme/inter_bframe_gen8.g8b b/src/shaders/vme/inter_bframe_gen8.g8b index f981ca34..77daf5a0 100644 --- a/src/shaders/vme/inter_bframe_gen8.g8b +++ b/src/shaders/vme/inter_bframe_gen8.g8b @@ -411,7 +411,7 @@ { 0x00000005, 0x2fe41248, 0x16000f70, 0x00030003 }, { 0x00000020, 0x34000000, 0x0e001400, 0x000000a0 }, { 0x00000001, 0x2f681248, 0x00000fa8, 0x00000000 }, - { 0x05000010, 0x20001a60, 0x1e000f68, 0x00020002 }, + { 0x05000010, 0x20001240, 0x16000f68, 0x00020002 }, { 0x00010005, 0x2fe41248, 0x16000f60, 0x00030003 }, { 0x00010020, 0x34000000, 0x0e001400, 0x00000060 }, { 0x00000008, 0x2f701248, 0x16000f60, 0x00020002 }, -- cgit v1.2.1 From 8908b464caad4eef669a9eecc56f54325ce7fdc2 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Tue, 8 Apr 2014 06:56:03 -0600 Subject: vp8: fix loop filter for bitexact reconstruction. Each loop filter delta update value shall be encoded within 7 bits, including the sign bit and 6-bit magnitude in 2's complement. So, don't propagate the sign bit while packing the filter level values. Signed-off-by: Gwenole Beauchesne (cherry picked from commit 36ccd9c3e47766edc70ecbdf82acc89ed67e26c4) --- src/gen8_mfd.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 743fd74d..72b26403 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2938,16 +2938,16 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx, } OUT_BCS_BATCH(batch, - pic_param->loop_filter_deltas_ref_frame[3] << 24 | - pic_param->loop_filter_deltas_ref_frame[2] << 16 | - pic_param->loop_filter_deltas_ref_frame[1] << 8 | - pic_param->loop_filter_deltas_ref_frame[0] << 0); + (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 | + (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 | + (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 | + (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0); OUT_BCS_BATCH(batch, - pic_param->loop_filter_deltas_mode[3] << 24 | - pic_param->loop_filter_deltas_mode[2] << 16 | - pic_param->loop_filter_deltas_mode[1] << 8 | - pic_param->loop_filter_deltas_mode[0] << 0); + (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 | + (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 | + (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 | + (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0); /* segmentation id stream base address, DW35-DW37 */ OUT_BCS_BATCH(batch, 0); -- cgit v1.2.1 From 3867b81bf99924f9887cc6ccecf9287256f1d7e6 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 9 Apr 2014 11:40:16 +0800 Subject: Rendering/BDW:Follow the hardware spec to update the 3DSTATE_URB_VS command This is to fix the GPU hang when doing the color-space conversion from NV12 to RGB on BDW GT3 machine. Signed-off-by: Zhao Yakui (cherry picked from commit 4a3f17ae44bae58daf65dcc706332b28a6d478ac) --- src/gen8_render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gen8_render.c b/src/gen8_render.c index 3b3fc89d..90f278e5 100644 --- a/src/gen8_render.c +++ b/src/gen8_render.c @@ -1092,7 +1092,7 @@ gen8_emit_urb(VADriverContextP ctx) OUT_BATCH(batch, (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) | (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | - (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + (4 << GEN7_URB_STARTING_ADDRESS_SHIFT)); ADVANCE_BATCH(batch); BEGIN_BATCH(batch, 2); -- cgit v1.2.1 From 2d00edc3d07997bd322ce4d905c84cb7f52dd421 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 14 Apr 2014 20:20:31 -0600 Subject: Fix bound checking Otherwise it might result in buffer overflow. Reviewed-by: Zhao Yakui Signed-off-by: Xiang, Haihao (cherry picked from commit 782b8afdda14f000874d8acf51c3e8c490d55773) --- src/i965_drv_video.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index e801a4d5..60174b2a 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -2281,7 +2281,7 @@ i965_encoder_render_misc_parameter_buffer(VADriverContextP ctx, param = (VAEncMiscParameterBuffer *)obj_buffer->buffer_store->buffer; - if (param->type > ARRAY_ELEMS(encode->misc_param)) + if (param->type >= ARRAY_ELEMS(encode->misc_param)) return VA_STATUS_ERROR_INVALID_PARAMETER; i965_release_buffer_store(&encode->misc_param[param->type]); -- cgit v1.2.1 From 0d3462360f624750233d77771d55f48bc00039e5 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 22 Apr 2014 11:05:18 +0800 Subject: VPP: Set the alpha channel when doing the conversion from NV12 to RGBA on Ivy/Haswell/BDW Currently zero is written to alpha channel when doing the conversion from NV12 to RGBA(BGRA), which affects the following the rendering operation. Signed-off-by: Zhao Yakui (cherry picked from commit 4082c9db1eef45bc117fc151d60a178926ab9f73) --- src/gen8_post_processing.c | 1 + src/i965_post_processing.c | 1 + src/i965_post_processing.h | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index 4fbc01e8..f0768309 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -973,6 +973,7 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con pp_static_parameter->grf2.avs_wa_width = src_width; pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width); pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width); + pp_static_parameter->grf2.alpha = 255; pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height; diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 3dc7d8c2..95b20e47 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -2953,6 +2953,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con pp_static_parameter->grf2.avs_wa_width = dw; pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw); pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw); + pp_static_parameter->grf2.alpha = 255; pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height; diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index 29b8cdc2..fd4cbcf5 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -380,7 +380,7 @@ struct gen7_pp_static_parameter unsigned int di_destination_packed_y_component_offset:8; unsigned int di_destination_packed_u_component_offset:8; unsigned int di_destination_packed_v_component_offset:8; - unsigned int pad0:8; + unsigned int alpha:8; } grf2; struct { -- cgit v1.2.1 From 61fbb1bba1ad8a41ffae4fd1ba90391adf819b6e Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Wed, 23 Apr 2014 17:23:21 +0200 Subject: vp8: fix support for segmentation-enabled streams. If segmentation is enabled, then the segmentation map shall be live across frames until the current frame updates the segment ids. This means that the driver needs to maintain the segmentation map buffer allocation and enable writes (resp. reads) whenever necessary. This fixes decoding of 00-comprehensive-010. Signed-off-by: Gwenole Beauchesne --- src/gen7_mfd.h | 1 + src/gen8_mfd.c | 35 ++++++++++++++++++++++++++++++----- src/i965_decoder_utils.c | 24 ++++++++++++++++++++++++ src/i965_decoder_utils.h | 4 ++++ 4 files changed, 59 insertions(+), 5 deletions(-) diff --git a/src/gen7_mfd.h b/src/gen7_mfd.h index e3111abe..02002164 100644 --- a/src/gen7_mfd.h +++ b/src/gen7_mfd.h @@ -85,6 +85,7 @@ struct gen7_mfd_context GenBuffer bsd_mpc_row_store_scratch_buffer; GenBuffer mpr_row_store_scratch_buffer; GenBuffer bitplane_read_buffer; + GenBuffer segmentation_buffer; VASurfaceID jpeg_wa_surface_id; struct object_surface *jpeg_wa_surface_object; diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 72b26403..1742beaa 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2785,6 +2785,9 @@ gen8_mfd_vp8_decode_init(VADriverContextP ctx, dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo); gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable; + intel_ensure_vp8_segmentation_buffer(ctx, + &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs); + /* The same as AVC */ dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo); bo = dri_bo_alloc(i965->intel.bufmgr, @@ -2838,6 +2841,13 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx, int i, j,log2num; unsigned int quantization_value[4][6]; + /* There is no safe way to error out if the segmentation buffer + could not be allocated. So, instead of aborting, simply decode + something even if the result may look totally inacurate */ + const unsigned int enable_segmentation = + pic_param->pic_fields.bits.segmentation_enabled && + gen7_mfd_context->segmentation_buffer.valid; + log2num = (int)log2(slice_param->num_of_partitions - 1); BEGIN_BCS_BATCH(batch, 38); @@ -2854,8 +2864,10 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx, pic_param->pic_fields.bits.mb_no_coeff_skip << 10 | pic_param->pic_fields.bits.update_mb_segmentation_map << 9 | pic_param->pic_fields.bits.segmentation_enabled << 8 | - 0 << 7 | /* segmentation id streamin disabled */ - 0 << 6 | /* segmentation id streamout disabled */ + (enable_segmentation && + !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 | + (enable_segmentation && + pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 | (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/ pic_param->pic_fields.bits.filter_type << 4 | (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */ @@ -2950,9 +2962,18 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx, (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0); /* segmentation id stream base address, DW35-DW37 */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + if (enable_segmentation) { + OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo, + 0, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } ADVANCE_BCS_BATCH(batch); } @@ -3142,6 +3163,9 @@ gen8_mfd_context_destroy(void *hw_context) dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo); gen7_mfd_context->bitplane_read_buffer.bo = NULL; + dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo); + gen7_mfd_context->segmentation_buffer.bo = NULL; + dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo); intel_batchbuffer_free(gen7_mfd_context->base.batch); @@ -3174,6 +3198,7 @@ gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) } gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE; + gen7_mfd_context->segmentation_buffer.valid = 0; switch (obj_config->profile) { case VAProfileMPEG2Simple: diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index e80749c5..2533381c 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -827,3 +827,27 @@ intel_mpeg2_find_next_slice(struct decode_state *decode_state, return NULL; } + +/* Ensure the segmentation buffer is large enough for the supplied + number of MBs, or re-allocate it */ +bool +intel_ensure_vp8_segmentation_buffer(VADriverContextP ctx, GenBuffer *buf, + unsigned int mb_width, unsigned int mb_height) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + /* The segmentation map is a 64-byte aligned linear buffer, with + each cache line holding only 8 bits for 4 continuous MBs */ + const unsigned int buf_size = ((mb_width + 3) / 4) * 64 * mb_height; + + if (buf->valid) { + if (buf->bo && buf->bo->size >= buf_size) + return true; + drm_intel_bo_unreference(buf->bo); + buf->valid = false; + } + + buf->bo = drm_intel_bo_alloc(i965->intel.bufmgr, "segmentation map", + buf_size, 0x1000); + buf->valid = buf->bo != NULL; + return buf->valid; +} diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h index 8f64dfb7..b7b72b3e 100644 --- a/src/i965_decoder_utils.h +++ b/src/i965_decoder_utils.h @@ -106,4 +106,8 @@ intel_update_vp8_frame_store_index(VADriverContextP ctx, VAPictureParameterBufferVP8 *pic_param, GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]); +bool +intel_ensure_vp8_segmentation_buffer(VADriverContextP ctx, GenBuffer *buf, + unsigned int mb_width, unsigned int mb_height); + #endif /* I965_DECODER_UTILS_H */ -- cgit v1.2.1 From f044aab513e26a349fbe47742719785b6b6e529d Mon Sep 17 00:00:00 2001 From: Sirisha Muppavarapu Date: Tue, 25 Mar 2014 15:04:29 -0700 Subject: VPP: Enable Skin Tone Detection and Enhancement feature in the driver. The VPP-STDE feature is enabled in the driver code for gen75 and gen8. In this commit, I added the filter and made appropriate changes to the hw_codec_info and the supporting methods. (cherry picked from commit 691b149b7afe578889a423841a29db3ac56aad83) --- src/gen75_picture_process.c | 10 ++++++---- src/gen75_vpp_vebox.c | 6 ++++++ src/i965_drv_video.c | 9 +++++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/gen75_picture_process.c b/src/gen75_picture_process.c index ad7d463f..3c4fc0b0 100644 --- a/src/gen75_picture_process.c +++ b/src/gen75_picture_process.c @@ -192,8 +192,9 @@ gen75_proc_picture(VADriverContextP ctx, VAProcFilterParameterBuffer* filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer; - if (filter->type == VAProcFilterNoiseReduction || - filter->type == VAProcFilterDeinterlacing || + if (filter->type == VAProcFilterNoiseReduction || + filter->type == VAProcFilterDeinterlacing || + filter->type == VAProcFilterSkinToneEnhancement || filter->type == VAProcFilterColorBalance){ gen75_vpp_vebox(ctx, proc_ctx); }else if(filter->type == VAProcFilterSharpening){ @@ -220,8 +221,9 @@ gen75_proc_picture(VADriverContextP ctx, VAProcFilterParameterBuffer* filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer; - if (filter->type != VAProcFilterNoiseReduction && - filter->type != VAProcFilterDeinterlacing && + if (filter->type != VAProcFilterNoiseReduction && + filter->type != VAProcFilterDeinterlacing && + filter->type != VAProcFilterSkinToneEnhancement && filter->type != VAProcFilterColorBalance) { printf("Do not support multiply filters outside vebox pipeline \n"); assert(0); diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 8acf7432..38956419 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -1186,6 +1186,9 @@ VAStatus gen75_vebox_process_picture(VADriverContextP ctx, proc_ctx->filters_mask |= VPP_IECP_PRO_AMP; proc_ctx->filter_iecp_amp = filter; proc_ctx->filter_iecp_amp_num_elements = obj_buf->num_elements; + } else if (filter->type == VAProcFilterSkinToneEnhancement) { + proc_ctx->filters_mask |= VPP_IECP_STD_STE; + proc_ctx->filter_iecp_std = filter; } } @@ -1483,6 +1486,9 @@ VAStatus gen8_vebox_process_picture(VADriverContextP ctx, proc_ctx->filters_mask |= VPP_IECP_PRO_AMP; proc_ctx->filter_iecp_amp = filter; proc_ctx->filter_iecp_amp_num_elements = obj_buf->num_elements; + } else if (filter->type == VAProcFilterSkinToneEnhancement) { + proc_ctx->filters_mask |= VPP_IECP_STD_STE; + proc_ctx->filter_iecp_std = filter; } } diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 60174b2a..efe47779 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -318,12 +318,13 @@ static struct hw_codec_info gen75_hw_codec_info = { .has_di_motion_adptive = 1, .has_di_motion_compensated = 1, - .num_filters = 4, + .num_filters = 5, .filters = { { VAProcFilterNoiseReduction, I965_RING_VEBOX }, { VAProcFilterDeinterlacing, I965_RING_VEBOX }, { VAProcFilterSharpening, I965_RING_NULL }, { VAProcFilterColorBalance, I965_RING_VEBOX}, + { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX}, }, }; @@ -349,12 +350,13 @@ static struct hw_codec_info gen8_hw_codec_info = { .has_di_motion_compensated = 1, .has_vp8_decoding = 1, - .num_filters = 4, + .num_filters = 5, .filters = { { VAProcFilterNoiseReduction, I965_RING_VEBOX }, { VAProcFilterDeinterlacing, I965_RING_VEBOX }, { VAProcFilterSharpening, I965_RING_NULL }, /* need to rebuild the shader for BDW */ { VAProcFilterColorBalance, I965_RING_VEBOX}, + { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX}, }, }; @@ -5120,6 +5122,9 @@ VAStatus i965_QueryVideoProcPipelineCaps( if (deint->algorithm == VAProcDeinterlacingMotionAdaptive || deint->algorithm == VAProcDeinterlacingMotionCompensated); pipeline_cap->num_forward_references++; + } else if (base->type == VAProcFilterSkinToneEnhancement) { + VAProcFilterParameterBuffer *stde = (VAProcFilterParameterBuffer *)base; + (void)stde; } } -- cgit v1.2.1 From da837dc0334e95b77be7389771732494f6f3917a Mon Sep 17 00:00:00 2001 From: Sirisha Muppavarapu Date: Tue, 25 Mar 2014 15:04:30 -0700 Subject: VPP: Enable Skin tone detection and enhancement - Added STDE coefficients. In this commit, I added the optimized STDE coefficients to the vebox state table. (cherry picked from commit 150f67c67bd92cd201b75a92388fe3a63b00cd8a) --- src/gen75_vpp_vebox.c | 207 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 173 insertions(+), 34 deletions(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 38956419..788a75ee 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -237,40 +237,179 @@ void hsw_veb_iecp_std_table(VADriverContextP ctx, struct intel_vebox_context *pr if(!(proc_ctx->filters_mask & VPP_IECP_STD_STE)){ memset(p_table, 0, 29 * 4); }else{ - *p_table ++ = 0x9a6e39f0; - *p_table ++ = 0x400c0000; - *p_table ++ = 0x00001180; - *p_table ++ = 0xfe2f2e00; - *p_table ++ = 0x000000ff; - - *p_table ++ = 0x00140000; - *p_table ++ = 0xd82e0000; - *p_table ++ = 0x8285ecec; - *p_table ++ = 0x00008282; - *p_table ++ = 0x00000000; - - *p_table ++ = 0x02117000; - *p_table ++ = 0xa38fec96; - *p_table ++ = 0x0000c8c8; - *p_table ++ = 0x00000000; - *p_table ++ = 0x01478000; - - *p_table ++ = 0x0007c306; - *p_table ++ = 0x00000000; - *p_table ++ = 0x00000000; - *p_table ++ = 0x1c1bd000; - *p_table ++ = 0x00000000; - - *p_table ++ = 0x00000000; - *p_table ++ = 0x00000000; - *p_table ++ = 0x0007cf80; - *p_table ++ = 0x00000000; - *p_table ++ = 0x00000000; - - *p_table ++ = 0x1c080000; - *p_table ++ = 0x00000000; - *p_table ++ = 0x00000000; - *p_table ++ = 0x00000000; + //DWord 0 + *p_table ++ = ( 154 << 24 | // V_Mid + 110 << 16 | // U_Mid + 14 << 10 | // Hue_Max + 31 << 4 | // Sat_Max + 0 << 3 | // Reserved + 0 << 2 | // Output Control is set to output the 1=STD score /0=Output Pixels + 1 << 1 | // Set STE Enable + 1 ); // Set STD Enable + + //DWord 1 + *p_table ++ = ( 0 << 31 | // Reserved + 4 << 28 | // Diamond Margin + 0 << 21 | // Diamond_du + 3 << 18 | // HS_Margin + 79 << 10 | // Cos(alpha) + 0 << 8 | // Reserved + 101 ); // Sin(alpha) + + //DWord 2 + *p_table ++ = ( 0 << 21 | // Reserved + 100 << 13 | // Diamond_alpha + 35 << 7 | // Diamond_Th + 0 ); + + //DWord 3 + *p_table ++ = ( 254 << 24 | // Y_point_3 + 47 << 16 | // Y_point_2 + 46 << 8 | // Y_point_1 + 1 << 7 | // VY_STD_Enable + 0 ); // Reserved + + //DWord 4 + *p_table ++ = ( 0 << 18 | // Reserved + 31 << 13 | // Y_slope_2 + 31 << 8 | // Y_slope_1 + 255 ); // Y_point_4 + + //DWord 5 + *p_table ++ = ( 400 << 16 | // INV_Skin_types_margin = 20* Skin_Type_margin => 20*20 + 3300 ); // INV_Margin_VYL => 1/Margin_VYL + + //DWord 6 + *p_table ++ = ( 216 << 24 | // P1L + 46 << 16 | // P0L + 1600 ); // INV_Margin_VYU + + //DWord 7 + *p_table ++ = ( 130 << 24 | // B1L + 133 << 16 | // B0L + 236 << 8 | // P3L + 236 ); // P2L + + //DWord 8 + *p_table ++ = ( 0 << 27 | // Reserved + 0x7FB << 16 | // S0L (11 bits, Default value: -5 = FBh, pad it with 1s to make it 11bits) + 130 << 8 | // B3L + 130 ); + + //DWord 9 + *p_table ++ = ( 0 << 22 | // Reserved + 0 << 11 | // S2L + 0); // S1L + + //DWord 10 + *p_table ++ = ( 0 << 27 | // Reserved + 66 << 19 | // P1U + 46 << 11 | // P0U + 0 ); // S3 + + //DWord 11 + *p_table ++ = ( 163 << 24 | // B1U + 143 << 16 | // B0U + 236 << 8 | // P3U + 150 ); // P2U + + //DWord 12 + *p_table ++ = ( 0 << 27 | // Reserved + 256 << 16 | // S0U + 200 << 8 | // B3U + 200 ); // B2U + + //DWord 13 + *p_table ++ = ( 0 << 22 | // Reserved + 0x74D << 11 | // S2U (11 bits, Default value -179 = F4Dh) + 113 ); // S1U + + //DWoord 14 + *p_table ++ = ( 0 << 28 | // Reserved + 20 << 20 | // Skin_types_margin + 120 << 12 | // Skin_types_thresh + 1 << 11 | // Skin_Types_Enable + 0 ); // S3U + + //DWord 15 + *p_table ++ = ( 0 << 31 | // Reserved + 0x3F8 << 21 | // SATB1 (10 bits, default 8, optimized value -8) + 31 << 14 | // SATP3 + 6 << 7 | // SATP2 + 0x7A ); // SATP1 (7 bits, default 6, optimized value -6) + + //DWord 16 + *p_table ++ = ( 0 << 31 | // Reserved + 297 << 20 | // SATS0 + 124 << 10 | // SATB3 + 8 ); // SATB2 + + //DWord 17 + *p_table ++ = ( 0 << 22 | // Reserved + 297 << 11 | // SATS2 + 85 ); // SATS1 + + //DWord 18 + *p_table ++ = ( 14 << 25 | // HUEP3 + 6 << 18 | // HUEP2 + 0x7A << 11 | // HUEP1 (7 bits, default value -6 = 7Ah) + 256 ); // SATS3 + + //DWord 19 + *p_table ++ = ( 0 << 30 | // Reserved + 256 << 20 | // HUEB3 + 8 << 10 | // HUEB2 + 0x3F8 ); // HUEB1 (10 bits, default value 8, optimized value -8) + + //DWord 20 + *p_table ++ = ( 0 << 22 | // Reserved + 85 << 11 | // HUES1 + 384 ); // HUES + + //DWord 21 + *p_table ++ = ( 0 << 22 | // Reserved + 256 << 11 | // HUES3 + 384 ); // HUES2 + + //DWord 22 + *p_table ++ = ( 0 << 31 | // Reserved + 0 << 21 | // SATB1_DARK + 31 << 14 | // SATP3_DARK + 31 << 7 | // SATP2_DARK + 0x7B ); // SATP1_DARK (7 bits, default value -11 = FF5h, optimized value -5) + + //DWord 23 + *p_table ++ = ( 0 << 31 | // Reserved + 305 << 20 | // SATS0_DARK + 124 << 10 | // SATB3_DARK + 124 ); // SATB2_DARK + + //DWord 24 + *p_table ++ = ( 0 << 22 | // Reserved + 256 << 11 | // SATS2_DARK + 220 ); // SATS1_DARK + + //DWord 25 + *p_table ++ = ( 14 << 25 | // HUEP3_DARK + 14 << 18 | // HUEP2_DARK + 14 << 11 | // HUEP1_DARK + 256 ); // SATS3_DARK + + //DWord 26 + *p_table ++ = ( 0 << 30 | // Reserved + 56 << 20 | // HUEB3_DARK + 56 << 10 | // HUEB2_DARK + 56 ); // HUEB1_DARK + + //DWord 27 + *p_table ++ = ( 0 << 22 | // Reserved + 256 << 11 | // HUES1_DARK + 256 ); // HUES0_DARK + + //DWord 28 + *p_table ++ = ( 0 << 22 | // Reserved + 256 << 11 | // HUES3_DARK + 256 ); // HUES2_DARK } } -- cgit v1.2.1 From 520752f97b8878a82f03aa40971928cf49bdcbd2 Mon Sep 17 00:00:00 2001 From: "qing.zhang" Date: Tue, 29 Apr 2014 05:44:47 +0800 Subject: Fix over assigned callback "QueryConfigEntrypoints". --- src/i965_drv_video.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index efe47779..43153240 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -5360,7 +5360,6 @@ VA_DRIVER_INIT_FUNC( VADriverContextP ctx ) vtable->vaTerminate = i965_Terminate; vtable->vaQueryConfigEntrypoints = i965_QueryConfigEntrypoints; vtable->vaQueryConfigProfiles = i965_QueryConfigProfiles; - vtable->vaQueryConfigEntrypoints = i965_QueryConfigEntrypoints; vtable->vaQueryConfigAttributes = i965_QueryConfigAttributes; vtable->vaCreateConfig = i965_CreateConfig; vtable->vaDestroyConfig = i965_DestroyConfig; -- cgit v1.2.1 From 84a5695541346c0864166888abb60ee257b51a96 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 5 May 2014 12:39:50 +0800 Subject: Make it buildable against libva 1.3.0 Signed-off-by: Xiang, Haihao Reviewed-by: Zhao Yakui --- src/va_backend_compat.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/va_backend_compat.h b/src/va_backend_compat.h index f5c9f75a..267f1d8f 100644 --- a/src/va_backend_compat.h +++ b/src/va_backend_compat.h @@ -45,4 +45,10 @@ # define VA_DRM_AUTH_CUSTOM VA_DUMMY #endif +#if !VA_CHECK_VERSION(0,35,1) + +#define VAProcFilterSkinToneEnhancement 5 + +#endif + #endif /* VA_BACKEND_COMPAT_H */ -- cgit v1.2.1 From a36f24198b46cba00a1d09f8c70dc86f36d98022 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 5 May 2014 12:48:27 +0800 Subject: Return error when trying to decoding an interlaced VC-1 video https://bugs.freedesktop.org/show_bug.cgi?id=77386 Signed-off-by: Xiang, Haihao Reviewed-by: Zhao Yakui --- src/i965_decoder_utils.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 2533381c..617bc154 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -654,7 +654,12 @@ intel_decoder_check_vc1_parameter(VADriverContextP ctx, VAPictureParameterBufferVC1 *pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; struct object_surface *obj_surface; int i = 0; - + + if (pic_param->sequence_fields.bits.interlace == 1 && + pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */ + return VA_STATUS_ERROR_DECODING_ERROR; + } + if (pic_param->picture_fields.bits.picture_type == 0 || pic_param->picture_fields.bits.picture_type == 3) { } else if (pic_param->picture_fields.bits.picture_type == 1 || -- cgit v1.2.1 From 6ea4efe43adf4e6c8e9e02929de9094c6bc41eeb Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 5 May 2014 15:38:52 +0800 Subject: Update NEWS Signed-off-by: Xiang, Haihao --- NEWS | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 9711d387..cd4c0474 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,14 @@ -libva-intel-driver NEWS -- summary of changes. 2014-03-24 +libva-intel-driver NEWS -- summary of changes. 2014-05-xx Copyright (C) 2009-2014 Intel Corporation +Version 1.3.1 - xx.May.2014 +* Add support for STE on Broadwell +* Add support for YV16 +* Add support for user specified tiling and stride +* Fix VP8 decoding on Broadwell +* Fix the wrong alpha when convert NV12 into RGBA +* Fix https://bugs.freedesktop.org/show_bug.cgi?id=77386 + Version 1.3.0 - 24.Mar.2014 * Add support for Broadwell - Decoding: H.264/MPEG-2/VC-1/JPEG/VP8 -- cgit v1.2.1 From a720bc8a3065594654ab0d866cb3c3ef6e20e5b8 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 9 May 2014 12:45:01 +0800 Subject: Intel driver 1.3.1 Signed-off-by: Xiang, Haihao --- NEWS | 4 ++-- configure.ac | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index cd4c0474..307127ed 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,7 @@ -libva-intel-driver NEWS -- summary of changes. 2014-05-xx +libva-intel-driver NEWS -- summary of changes. 2014-05-09 Copyright (C) 2009-2014 Intel Corporation -Version 1.3.1 - xx.May.2014 +Version 1.3.1 - 09.May.2014 * Add support for STE on Broadwell * Add support for YV16 * Add support for user specified tiling and stride diff --git a/configure.ac b/configure.ac index f7975bee..07dbb8d7 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [3]) m4_define([intel_driver_micro_version], [1]) -m4_define([intel_driver_pre_version], [1]) +m4_define([intel_driver_pre_version], [0]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1 From b48ba793b83096be87cc4a1258be80737f26fb3b Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 9 May 2014 12:51:08 +0800 Subject: 1.3.2.pre1 for development Signed-off-by: Xiang, Haihao --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 07dbb8d7..ad10943c 100644 --- a/configure.ac +++ b/configure.ac @@ -1,8 +1,8 @@ # intel-driver package version number m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [3]) -m4_define([intel_driver_micro_version], [1]) -m4_define([intel_driver_pre_version], [0]) +m4_define([intel_driver_micro_version], [2]) +m4_define([intel_driver_pre_version], [1]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1 From d767872a50dfeae0806a267eace90a2139d71034 Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Mon, 14 Apr 2014 02:17:42 -0600 Subject: i965_DeriveImage() support JPEG color formats Signed-off-by: Zhong Li (cherry picked from commit 9f9c505ed5212ae0704f71f45532b9716ac0bd51) --- src/i965_drv_video.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 43153240..c7007765 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -3243,6 +3243,10 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, case VA_FOURCC_I420: case VA_FOURCC_422H: + case VA_FOURCC_IMC3: + case VA_FOURCC_444P: + case VA_FOURCC_422V: + case VA_FOURCC_411P: image->num_planes = 3; image->pitches[0] = w_pitch; /* Y */ image->offsets[0] = 0; @@ -3251,8 +3255,10 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->pitches[2] = obj_surface->cb_cr_pitch; /* V */ image->offsets[2] = w_pitch * obj_surface->y_cr_offset; break; + case VA_FOURCC_YUY2: case VA_FOURCC_UYVY: + case VA_FOURCC_Y800: image->num_planes = 1; image->pitches[0] = obj_surface->width; /* Y, width is aligned already */ image->offsets[0] = 0; -- cgit v1.2.1 From a1b90545a6b428a91e080ffb9dd29835212e5304 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sat, 19 Apr 2014 00:12:34 +0800 Subject: Move all of PCIIDs and codec info into separated files The redundant code will be removed soon. Signed-off-by: Xiang, Haihao (cherry picked from commit d20db5984989626728f62eb3e02b60093d914d01) Conflicts: src/i965_drv_video.c --- src/Makefile.am | 2 + src/i965_device_info.c | 189 +++++++++++++++++++++++++++++++++++++++++++++++++ src/i965_drv_video.c | 164 ++---------------------------------------- src/i965_drv_video.h | 2 +- src/i965_pciids.h | 131 ++++++++++++++++++++++++++++++++++ 5 files changed, 328 insertions(+), 160 deletions(-) create mode 100644 src/i965_device_info.c create mode 100644 src/i965_pciids.h diff --git a/src/Makefile.am b/src/Makefile.am index 806ee4ee..b35d1ac9 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -67,6 +67,7 @@ source_c = \ i965_avc_hw_scoreboard.c\ i965_avc_ildb.c \ i965_decoder_utils.c \ + i965_device_info.c \ i965_drv_video.c \ i965_encoder.c \ i965_encoder_utils.c \ @@ -109,6 +110,7 @@ source_h = \ i965_media_mpeg2.h \ i965_mutext.h \ i965_gpe_utils.h \ + i965_pciids.h \ i965_post_processing.h \ i965_render.h \ i965_structs.h \ diff --git a/src/i965_device_info.c b/src/i965_device_info.c new file mode 100644 index 00000000..0164f1db --- /dev/null +++ b/src/i965_device_info.c @@ -0,0 +1,189 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include "i965_drv_video.h" + +extern struct hw_context *i965_proc_context_init(VADriverContextP, struct object_config *); +extern struct hw_context *g4x_dec_hw_context_init(VADriverContextP, struct object_config *); +static const struct hw_codec_info g4x_hw_codec_info = { + .dec_hw_context_init = g4x_dec_hw_context_init, + .enc_hw_context_init = NULL, + .proc_hw_context_init = NULL, + .max_width = 2048, + .max_height = 2048, + + .has_mpeg2_decoding = 1, + + .num_filters = 0, +}; + +extern struct hw_context *ironlake_dec_hw_context_init(VADriverContextP, struct object_config *); +static const struct hw_codec_info ilk_hw_codec_info = { + .dec_hw_context_init = ironlake_dec_hw_context_init, + .enc_hw_context_init = NULL, + .proc_hw_context_init = i965_proc_context_init, + .max_width = 2048, + .max_height = 2048, + + .has_mpeg2_decoding = 1, + .has_h264_decoding = 1, + .has_vpp = 1, + .has_accelerated_putimage = 1, + + .num_filters = 0, +}; + +extern struct hw_context *gen6_dec_hw_context_init(VADriverContextP, struct object_config *); +extern struct hw_context *gen6_enc_hw_context_init(VADriverContextP, struct object_config *); +static const struct hw_codec_info snb_hw_codec_info = { + .dec_hw_context_init = gen6_dec_hw_context_init, + .enc_hw_context_init = gen6_enc_hw_context_init, + .proc_hw_context_init = i965_proc_context_init, + .max_width = 2048, + .max_height = 2048, + + .has_mpeg2_decoding = 1, + .has_h264_decoding = 1, + .has_h264_encoding = 1, + .has_vc1_decoding = 1, + .has_vpp = 1, + .has_accelerated_getimage = 1, + .has_accelerated_putimage = 1, + .has_tiled_surface = 1, + + .num_filters = 2, + .filters = { + { VAProcFilterNoiseReduction, I965_RING_NULL }, + { VAProcFilterDeinterlacing, I965_RING_NULL }, + }, +}; + +extern struct hw_context *gen7_dec_hw_context_init(VADriverContextP, struct object_config *); +extern struct hw_context *gen7_enc_hw_context_init(VADriverContextP, struct object_config *); +static const struct hw_codec_info ivb_hw_codec_info = { + .dec_hw_context_init = gen7_dec_hw_context_init, + .enc_hw_context_init = gen7_enc_hw_context_init, + .proc_hw_context_init = i965_proc_context_init, + .max_width = 4096, + .max_height = 4096, + + .has_mpeg2_decoding = 1, + .has_mpeg2_encoding = 1, + .has_h264_decoding = 1, + .has_h264_encoding = 1, + .has_vc1_decoding = 1, + .has_jpeg_decoding = 1, + .has_vpp = 1, + .has_accelerated_getimage = 1, + .has_accelerated_putimage = 1, + .has_tiled_surface = 1, + .has_di_motion_adptive = 1, + + .num_filters = 2, + .filters = { + { VAProcFilterNoiseReduction, I965_RING_NULL }, + { VAProcFilterDeinterlacing, I965_RING_NULL }, + }, +}; + +extern struct hw_context *gen75_dec_hw_context_init(VADriverContextP, struct object_config *); +extern struct hw_context *gen75_enc_hw_context_init(VADriverContextP, struct object_config *); +extern struct hw_context *gen75_proc_context_init(VADriverContextP, struct object_config *); +static const struct hw_codec_info hsw_hw_codec_info = { + .dec_hw_context_init = gen75_dec_hw_context_init, + .enc_hw_context_init = gen75_enc_hw_context_init, + .proc_hw_context_init = gen75_proc_context_init, + .max_width = 4096, + .max_height = 4096, + + .has_mpeg2_decoding = 1, + .has_mpeg2_encoding = 1, + .has_h264_decoding = 1, + .has_h264_encoding = 1, + .has_vc1_decoding = 1, + .has_jpeg_decoding = 1, + .has_vpp = 1, + .has_accelerated_getimage = 1, + .has_accelerated_putimage = 1, + .has_tiled_surface = 1, + .has_di_motion_adptive = 1, + .has_di_motion_compensated = 1, + + .num_filters = 5, + .filters = { + { VAProcFilterNoiseReduction, I965_RING_VEBOX }, + { VAProcFilterDeinterlacing, I965_RING_VEBOX }, + { VAProcFilterSharpening, I965_RING_NULL }, + { VAProcFilterColorBalance, I965_RING_VEBOX}, + { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX}, + }, +}; + +extern struct hw_context *gen8_dec_hw_context_init(VADriverContextP, struct object_config *); +extern struct hw_context *gen8_enc_hw_context_init(VADriverContextP, struct object_config *); +static const struct hw_codec_info bdw_hw_codec_info = { + .dec_hw_context_init = gen8_dec_hw_context_init, + .enc_hw_context_init = gen8_enc_hw_context_init, + .proc_hw_context_init = gen75_proc_context_init, + .max_width = 4096, + .max_height = 4096, + + .has_mpeg2_decoding = 1, + .has_mpeg2_encoding = 1, + .has_h264_decoding = 1, + .has_h264_encoding = 1, + .has_vc1_decoding = 1, + .has_jpeg_decoding = 1, + .has_vpp = 1, + .has_accelerated_getimage = 1, + .has_accelerated_putimage = 1, + .has_tiled_surface = 1, + .has_di_motion_adptive = 1, + .has_di_motion_compensated = 1, + .has_vp8_decoding = 1, + + .num_filters = 5, + .filters = { + { VAProcFilterNoiseReduction, I965_RING_VEBOX }, + { VAProcFilterDeinterlacing, I965_RING_VEBOX }, + { VAProcFilterSharpening, I965_RING_NULL }, /* need to rebuild the shader for BDW */ + { VAProcFilterColorBalance, I965_RING_VEBOX}, + { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX}, + }, +}; + +const struct hw_codec_info * +i965_get_codec_info(int devid) +{ + switch (devid) { +#undef CHIPSET +#define CHIPSET(id, family, dev, str) case id: return &family##_hw_codec_info; +#include "i965_pciids.h" + default: + return NULL; + } +} diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index c7007765..f246a033 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -214,152 +214,6 @@ get_subpic_format(const VAImageFormat *va_format) return NULL; } -extern struct hw_context *i965_proc_context_init(VADriverContextP, struct object_config *); -extern struct hw_context *g4x_dec_hw_context_init(VADriverContextP, struct object_config *); -static struct hw_codec_info g4x_hw_codec_info = { - .dec_hw_context_init = g4x_dec_hw_context_init, - .enc_hw_context_init = NULL, - .proc_hw_context_init = NULL, - .max_width = 2048, - .max_height = 2048, - - .has_mpeg2_decoding = 1, - - .num_filters = 0, -}; - -extern struct hw_context *ironlake_dec_hw_context_init(VADriverContextP, struct object_config *); -static struct hw_codec_info ironlake_hw_codec_info = { - .dec_hw_context_init = ironlake_dec_hw_context_init, - .enc_hw_context_init = NULL, - .proc_hw_context_init = i965_proc_context_init, - .max_width = 2048, - .max_height = 2048, - - .has_mpeg2_decoding = 1, - .has_h264_decoding = 1, - .has_vpp = 1, - .has_accelerated_putimage = 1, - - .num_filters = 0, -}; - -extern struct hw_context *gen6_dec_hw_context_init(VADriverContextP, struct object_config *); -extern struct hw_context *gen6_enc_hw_context_init(VADriverContextP, struct object_config *); -static struct hw_codec_info gen6_hw_codec_info = { - .dec_hw_context_init = gen6_dec_hw_context_init, - .enc_hw_context_init = gen6_enc_hw_context_init, - .proc_hw_context_init = i965_proc_context_init, - .max_width = 2048, - .max_height = 2048, - - .has_mpeg2_decoding = 1, - .has_h264_decoding = 1, - .has_h264_encoding = 1, - .has_vc1_decoding = 1, - .has_vpp = 1, - .has_accelerated_getimage = 1, - .has_accelerated_putimage = 1, - .has_tiled_surface = 1, - - .num_filters = 2, - .filters = { - { VAProcFilterNoiseReduction, I965_RING_NULL }, - { VAProcFilterDeinterlacing, I965_RING_NULL }, - }, -}; - -extern struct hw_context *gen7_dec_hw_context_init(VADriverContextP, struct object_config *); -extern struct hw_context *gen7_enc_hw_context_init(VADriverContextP, struct object_config *); -static struct hw_codec_info gen7_hw_codec_info = { - .dec_hw_context_init = gen7_dec_hw_context_init, - .enc_hw_context_init = gen7_enc_hw_context_init, - .proc_hw_context_init = i965_proc_context_init, - .max_width = 4096, - .max_height = 4096, - - .has_mpeg2_decoding = 1, - .has_mpeg2_encoding = 1, - .has_h264_decoding = 1, - .has_h264_encoding = 1, - .has_vc1_decoding = 1, - .has_jpeg_decoding = 1, - .has_vpp = 1, - .has_accelerated_getimage = 1, - .has_accelerated_putimage = 1, - .has_tiled_surface = 1, - .has_di_motion_adptive = 1, - - .num_filters = 2, - .filters = { - { VAProcFilterNoiseReduction, I965_RING_NULL }, - { VAProcFilterDeinterlacing, I965_RING_NULL }, - }, -}; - -extern struct hw_context *gen75_proc_context_init(VADriverContextP, struct object_config *); -static struct hw_codec_info gen75_hw_codec_info = { - .dec_hw_context_init = gen75_dec_hw_context_init, - .enc_hw_context_init = gen75_enc_hw_context_init, - .proc_hw_context_init = gen75_proc_context_init, - .max_width = 4096, - .max_height = 4096, - - .has_mpeg2_decoding = 1, - .has_mpeg2_encoding = 1, - .has_h264_decoding = 1, - .has_h264_encoding = 1, - .has_vc1_decoding = 1, - .has_jpeg_decoding = 1, - .has_vpp = 1, - .has_accelerated_getimage = 1, - .has_accelerated_putimage = 1, - .has_tiled_surface = 1, - .has_di_motion_adptive = 1, - .has_di_motion_compensated = 1, - - .num_filters = 5, - .filters = { - { VAProcFilterNoiseReduction, I965_RING_VEBOX }, - { VAProcFilterDeinterlacing, I965_RING_VEBOX }, - { VAProcFilterSharpening, I965_RING_NULL }, - { VAProcFilterColorBalance, I965_RING_VEBOX}, - { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX}, - }, -}; - -/* TODO: Add the separate call back function for Gen8 */ -static struct hw_codec_info gen8_hw_codec_info = { - .dec_hw_context_init = gen8_dec_hw_context_init, - .enc_hw_context_init = gen8_enc_hw_context_init, - .proc_hw_context_init = gen75_proc_context_init, - .max_width = 4096, - .max_height = 4096, - - .has_mpeg2_decoding = 1, - .has_mpeg2_encoding = 1, - .has_h264_decoding = 1, - .has_h264_encoding = 1, - .has_vc1_decoding = 1, - .has_jpeg_decoding = 1, - .has_vpp = 1, - .has_accelerated_getimage = 1, - .has_accelerated_putimage = 1, - .has_tiled_surface = 1, - .has_di_motion_adptive = 1, - .has_di_motion_compensated = 1, - .has_vp8_decoding = 1, - - .num_filters = 5, - .filters = { - { VAProcFilterNoiseReduction, I965_RING_VEBOX }, - { VAProcFilterDeinterlacing, I965_RING_VEBOX }, - { VAProcFilterSharpening, I965_RING_NULL }, /* need to rebuild the shader for BDW */ - { VAProcFilterColorBalance, I965_RING_VEBOX}, - { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX}, - }, -}; - #define I965_PACKED_HEADER_BASE 0 #define I965_PACKED_MISC_HEADER_BASE 3 @@ -5137,24 +4991,16 @@ VAStatus i965_QueryVideoProcPipelineCaps( return VA_STATUS_SUCCESS; } +extern const struct hw_codec_info *i965_get_codec_info(int devid); + static bool i965_driver_data_init(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); - if (IS_GEN8(i965->intel.device_id)) - i965->codec_info = &gen8_hw_codec_info; - else if (IS_HASWELL(i965->intel.device_id)) - i965->codec_info = &gen75_hw_codec_info; - else if (IS_G4X(i965->intel.device_id)) - i965->codec_info = &g4x_hw_codec_info; - else if (IS_IRONLAKE(i965->intel.device_id)) - i965->codec_info = &ironlake_hw_codec_info; - else if (IS_GEN6(i965->intel.device_id)) - i965->codec_info = &gen6_hw_codec_info; - else if (IS_GEN7(i965->intel.device_id)) - i965->codec_info = &gen7_hw_codec_info; - else + i965->codec_info = i965_get_codec_info(i965->intel.device_id); + + if (!i965->codec_info) return false; if (object_heap_init(&i965->config_heap, diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 535402fb..eddf83d4 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -322,7 +322,7 @@ struct i965_driver_data struct object_heap buffer_heap; struct object_heap image_heap; struct object_heap subpic_heap; - struct hw_codec_info *codec_info; + const struct hw_codec_info *codec_info; _I965Mutex render_mutex; _I965Mutex pp_mutex; diff --git a/src/i965_pciids.h b/src/i965_pciids.h new file mode 100644 index 00000000..64973e44 --- /dev/null +++ b/src/i965_pciids.h @@ -0,0 +1,131 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Copied and modified from (mesa) include/pci_ids/i965_pci_ids.h + */ + +CHIPSET(0x2A42, g4x, g4x, "Intel(R) GM45 Express Chipset") +CHIPSET(0x2E02, g4x, g4x, "Intel(R) Integrated Graphics Device") +CHIPSET(0x2E12, g4x, g4x, "Intel(R) Q45/Q43") +CHIPSET(0x2E22, g4x, g4x, "Intel(R) G45/G43") +CHIPSET(0x2E32, g4x, g4x, "Intel(R) G41") +CHIPSET(0x2E42, g4x, g4x, "Intel(R) B43") +CHIPSET(0x2E92, g4x, g4x, "Intel(R) B43") +CHIPSET(0x0042, ilk, ilk, "Intel(R) Ironlake Desktop") +CHIPSET(0x0046, ilk, ilk, "Intel(R) Ironlake Mobile") +CHIPSET(0x0102, snb, snb_gt1, "Intel(R) Sandybridge Desktop") +CHIPSET(0x0112, snb, snb_gt2, "Intel(R) Sandybridge Desktop") +CHIPSET(0x0122, snb, snb_gt2, "Intel(R) Sandybridge Desktop") +CHIPSET(0x0106, snb, snb_gt1, "Intel(R) Sandybridge Mobile") +CHIPSET(0x0116, snb, snb_gt2, "Intel(R) Sandybridge Mobile") +CHIPSET(0x0126, snb, snb_gt2, "Intel(R) Sandybridge Mobile") +CHIPSET(0x010A, snb, snb_gt1, "Intel(R) Sandybridge Server") +CHIPSET(0x0152, ivb, ivb_gt1, "Intel(R) Ivybridge Desktop") +CHIPSET(0x0162, ivb, ivb_gt2, "Intel(R) Ivybridge Desktop") +CHIPSET(0x0156, ivb, ivb_gt1, "Intel(R) Ivybridge Mobile") +CHIPSET(0x0166, ivb, ivb_gt2, "Intel(R) Ivybridge Mobile") +CHIPSET(0x015A, ivb, ivb_gt1, "Intel(R) Ivybridge Server") +CHIPSET(0x016A, ivb, ivb_gt2, "Intel(R) Ivybridge Server") +CHIPSET(0x0F31, ivb, byt, "Intel(R) Bay Trail") +CHIPSET(0x0F32, ivb, byt, "Intel(R) Bay Trail") +CHIPSET(0x0F33, ivb, byt, "Intel(R) Bay Trail") +CHIPSET(0x0157, ivb, byt, "Intel(R) Bay Trail") +CHIPSET(0x0155, ivb, byt, "Intel(R) Bay Trail") +CHIPSET(0x0402, hsw, hsw_gt1, "Intel(R) Haswell Desktop") +CHIPSET(0x0412, hsw, hsw_gt2, "Intel(R) Haswell Desktop") +CHIPSET(0x0422, hsw, hsw_gt3, "Intel(R) Haswell Desktop") +CHIPSET(0x0406, hsw, hsw_gt1, "Intel(R) Haswell Mobile") +CHIPSET(0x0416, hsw, hsw_gt2, "Intel(R) Haswell Mobile") +CHIPSET(0x0426, hsw, hsw_gt3, "Intel(R) Haswell Mobile") +CHIPSET(0x040A, hsw, hsw_gt1, "Intel(R) Haswell Server") +CHIPSET(0x041A, hsw, hsw_gt2, "Intel(R) Haswell Server") +CHIPSET(0x042A, hsw, hsw_gt3, "Intel(R) Haswell Server") +CHIPSET(0x040B, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x041B, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x042B, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x040E, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x041E, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x042E, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0C02, hsw, hsw_gt1, "Intel(R) Haswell Desktop") +CHIPSET(0x0C12, hsw, hsw_gt2, "Intel(R) Haswell Desktop") +CHIPSET(0x0C22, hsw, hsw_gt3, "Intel(R) Haswell Desktop") +CHIPSET(0x0C06, hsw, hsw_gt1, "Intel(R) Haswell Mobile") +CHIPSET(0x0C16, hsw, hsw_gt2, "Intel(R) Haswell Mobile") +CHIPSET(0x0C26, hsw, hsw_gt3, "Intel(R) Haswell Mobile") +CHIPSET(0x0C0A, hsw, hsw_gt1, "Intel(R) Haswell Server") +CHIPSET(0x0C1A, hsw, hsw_gt2, "Intel(R) Haswell Server") +CHIPSET(0x0C2A, hsw, hsw_gt3, "Intel(R) Haswell Server") +CHIPSET(0x0C0B, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x0C1B, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x0C2B, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0C0E, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x0C1E, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x0C2E, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0A02, hsw, hsw_gt1, "Intel(R) Haswell Desktop") +CHIPSET(0x0A12, hsw, hsw_gt2, "Intel(R) Haswell Desktop") +CHIPSET(0x0A22, hsw, hsw_gt3, "Intel(R) Haswell Desktop") +CHIPSET(0x0A06, hsw, hsw_gt1, "Intel(R) Haswell Mobile") +CHIPSET(0x0A16, hsw, hsw_gt2, "Intel(R) Haswell Mobile") +CHIPSET(0x0A26, hsw, hsw_gt3, "Intel(R) Haswell Mobile") +CHIPSET(0x0A0A, hsw, hsw_gt1, "Intel(R) Haswell Server") +CHIPSET(0x0A1A, hsw, hsw_gt2, "Intel(R) Haswell Server") +CHIPSET(0x0A2A, hsw, hsw_gt3, "Intel(R) Haswell Server") +CHIPSET(0x0A0B, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x0A1B, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x0A2B, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0A0E, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x0A1E, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x0A2E, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0D02, hsw, hsw_gt1, "Intel(R) Haswell Desktop") +CHIPSET(0x0D12, hsw, hsw_gt2, "Intel(R) Haswell Desktop") +CHIPSET(0x0D22, hsw, hsw_gt3, "Intel(R) Haswell Desktop") +CHIPSET(0x0D06, hsw, hsw_gt1, "Intel(R) Haswell Mobile") +CHIPSET(0x0D16, hsw, hsw_gt2, "Intel(R) Haswell Mobile") +CHIPSET(0x0D26, hsw, hsw_gt3, "Intel(R) Haswell Mobile") +CHIPSET(0x0D0A, hsw, hsw_gt1, "Intel(R) Haswell Server") +CHIPSET(0x0D1A, hsw, hsw_gt2, "Intel(R) Haswell Server") +CHIPSET(0x0D2A, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0D0B, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x0D1B, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x0D2B, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0D0E, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x0D1E, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x0D2E, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x1602, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x1606, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x160A, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x160B, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x160D, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x160E, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x1612, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x1616, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x161A, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x161B, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x161D, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x161E, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x1622, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x1626, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x162A, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x162B, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x162D, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x162E, bdw, bdw, "Intel(R) Broadwell") -- cgit v1.2.1 From e7318fe6166fbacc53413bbf175d4fc97a1c9807 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sat, 19 Apr 2014 00:12:35 +0800 Subject: Add a new intel_device_info structure To store statically known device information Signed-off-by: Xiang, Haihao (cherry picked from commit eb014a09fde988ba3ed2d2be6e8d6f0c650d281e) --- src/i965_device_info.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++ src/intel_driver.c | 7 ++++ src/intel_driver.h | 16 +++++++ 3 files changed, 135 insertions(+) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 0164f1db..55e762b3 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -187,3 +187,115 @@ i965_get_codec_info(int devid) return NULL; } } + +static const struct intel_device_info g4x_device_info = { + .gen = 4, + + .urb_size = 384, + .max_wm_threads = 50, /* 10 * 5 */ + + .is_g4x = 1, +}; + +static const struct intel_device_info ilk_device_info = { + .gen = 5, + + .urb_size = 1024, + .max_wm_threads = 72, /* 12 * 6 */ +}; + +static const struct intel_device_info snb_gt1_device_info = { + .gen = 6, + .gt = 1, + + .urb_size = 1024, + .max_wm_threads = 40, +}; + +static const struct intel_device_info snb_gt2_device_info = { + .gen = 6, + .gt = 2, + + .urb_size = 1024, + .max_wm_threads = 80, +}; + +static const struct intel_device_info ivb_gt1_device_info = { + .gen = 7, + .gt = 1, + + .urb_size = 4096, + .max_wm_threads = 48, + + .is_ivybridge = 1, +}; + +static const struct intel_device_info ivb_gt2_device_info = { + .gen = 7, + .gt = 2, + + .urb_size = 4096, + .max_wm_threads = 172, + + .is_ivybridge = 1, +}; + +static const struct intel_device_info byt_device_info = { + .gen = 7, + .gt = 1, + + .urb_size = 4096, + .max_wm_threads = 48, + + .is_ivybridge = 1, + .is_baytrail = 1, +}; + +static const struct intel_device_info hsw_gt1_device_info = { + .gen = 7, + .gt = 1, + + .urb_size = 4096, + .max_wm_threads = 102, + + .is_haswell = 1, +}; + +static const struct intel_device_info hsw_gt2_device_info = { + .gen = 7, + .gt = 2, + + .urb_size = 4096, + .max_wm_threads = 204, + + .is_haswell = 1, +}; + +static const struct intel_device_info hsw_gt3_device_info = { + .gen = 7, + .gt = 3, + + .urb_size = 4096, + .max_wm_threads = 408, + + .is_haswell = 1, +}; + +static const struct intel_device_info bdw_device_info = { + .gen = 8, + + .urb_size = 4096, + .max_wm_threads = 64, /* per PSD */ +}; + +const struct intel_device_info * +i965_get_device_info(int devid) +{ + switch (devid) { +#undef CHIPSET +#define CHIPSET(id, family, dev, str) case id: return &dev##_device_info; +#include "i965_pciids.h" + default: + return NULL; + } +} diff --git a/src/intel_driver.c b/src/intel_driver.c index 8650dba6..e3e082d1 100644 --- a/src/intel_driver.c +++ b/src/intel_driver.c @@ -67,6 +67,8 @@ static void intel_driver_get_revid(struct intel_driver_data *intel, int *value) return; } +extern const struct intel_device_info *i965_get_device_info(int devid); + bool intel_driver_init(VADriverContextP ctx) { @@ -91,6 +93,11 @@ intel_driver_init(VADriverContextP ctx) pthread_mutex_init(&intel->ctxmutex, NULL); intel_driver_get_param(intel, I915_PARAM_CHIPSET_ID, &intel->device_id); + intel->device_info = i965_get_device_info(intel->device_id); + + if (!intel->device_info) + return false; + if (intel_driver_get_param(intel, I915_PARAM_HAS_EXECBUF2, &has_exec2)) intel->has_exec2 = has_exec2; if (intel_driver_get_param(intel, I915_PARAM_HAS_BSD, &has_bsd)) diff --git a/src/intel_driver.h b/src/intel_driver.h index 3c74ed3d..18bbfe63 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -116,6 +116,20 @@ struct intel_batchbuffer; } \ } while (0) +struct intel_device_info +{ + int gen; + int gt; + + unsigned int urb_size; + unsigned int max_wm_threads; + + unsigned int is_g4x : 1; /* gen4 */ + unsigned int is_ivybridge : 1; /* gen7 */ + unsigned int is_baytrail : 1; /* gen7 */ + unsigned int is_haswell : 1; /* gen7 */ +}; + struct intel_driver_data { int fd; @@ -134,6 +148,8 @@ struct intel_driver_data unsigned int has_bsd : 1; /* Flag: has bitstream decoder for H.264? */ unsigned int has_blt : 1; /* Flag: has BLT unit? */ unsigned int has_vebox : 1; /* Flag: has VEBOX unit */ + + const struct intel_device_info *device_info; }; bool intel_driver_init(VADriverContextP ctx); -- cgit v1.2.1 From ae70674e879aa40fe4cd3a01e66b239d642519b5 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sat, 19 Apr 2014 00:12:36 +0800 Subject: Dump chipset information in the vendor string Signed-off-by: Xiang, Haihao (cherry picked from commit 2518c1e741cb21c5412a4b5252ebe861a52c2900) --- src/i965_drv_video.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index f246a033..db8b2d19 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -5130,6 +5130,7 @@ i965_Init(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); int i; + const char *chipset; for (i = 0; i < ARRAY_ELEMS(i965_sub_ops); i++) { if ((i965_sub_ops[i].display_type == 0 || @@ -5139,9 +5140,19 @@ i965_Init(VADriverContextP ctx) } if (i == ARRAY_ELEMS(i965_sub_ops)) { - sprintf(i965->va_vendor, "%s %s driver - %d.%d.%d", + switch (i965->intel.device_id) { +#undef CHIPSET +#define CHIPSET(id, family, dev, str) case id: chipset = str; break; +#include "i965_pciids.h" + default: + chipset = "Unknown Intel Chipset"; + break; + } + + sprintf(i965->va_vendor, "%s %s driver for %s - %d.%d.%d", INTEL_STR_DRIVER_VENDOR, INTEL_STR_DRIVER_NAME, + chipset, INTEL_DRIVER_MAJOR_VERSION, INTEL_DRIVER_MINOR_VERSION, INTEL_DRIVER_MICRO_VERSION); -- cgit v1.2.1 From 5684002ddffc89398755bccf3962febf88e1dd2d Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sat, 19 Apr 2014 00:12:37 +0800 Subject: Remove URB_SIZE() Instead directly use the value stored in intel_device_info Signed-off-by: Xiang, Haihao (cherry picked from commit a0fe5a6262f9ff1398a512c83d193556bbd0eae9) --- src/i965_avc_hw_scoreboard.c | 4 ++-- src/i965_avc_ildb.c | 4 ++-- src/i965_defines.h | 6 ------ src/i965_media.c | 2 +- src/i965_media_h264.c | 2 +- src/i965_media_mpeg2.c | 2 +- src/i965_post_processing.c | 4 ++-- 7 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/i965_avc_hw_scoreboard.c b/src/i965_avc_hw_scoreboard.c index b17ea83f..f866599d 100644 --- a/src/i965_avc_hw_scoreboard.c +++ b/src/i965_avc_hw_scoreboard.c @@ -217,7 +217,7 @@ i965_avc_hw_scoreboard_urb_layout(VADriverContextP ctx, struct i965_h264_context unsigned int vfe_fence, cs_fence; vfe_fence = avc_hw_scoreboard_context->urb.cs_start; - cs_fence = URB_SIZE((&i965->intel)); + cs_fence = i965->intel.device_info->urb_size; BEGIN_BATCH(batch, 3); OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); @@ -429,7 +429,7 @@ i965_avc_hw_scoreboard_decode_init(VADriverContextP ctx, void *h264_context) avc_hw_scoreboard_context->urb.cs_start = avc_hw_scoreboard_context->urb.vfe_start + avc_hw_scoreboard_context->urb.num_vfe_entries * avc_hw_scoreboard_context->urb.size_vfe_entry; assert(avc_hw_scoreboard_context->urb.cs_start + - avc_hw_scoreboard_context->urb.num_cs_entries * avc_hw_scoreboard_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + avc_hw_scoreboard_context->urb.num_cs_entries * avc_hw_scoreboard_context->urb.size_cs_entry <= i965->intel.device_info->urb_size); } } diff --git a/src/i965_avc_ildb.c b/src/i965_avc_ildb.c index 8b93c51b..e0cc743e 100644 --- a/src/i965_avc_ildb.c +++ b/src/i965_avc_ildb.c @@ -410,7 +410,7 @@ i965_avc_ildb_urb_layout(VADriverContextP ctx, struct i965_h264_context *i965_h2 unsigned int vfe_fence, cs_fence; vfe_fence = avc_ildb_context->urb.cs_start; - cs_fence = URB_SIZE((&i965->intel)); + cs_fence = i965->intel.device_info->urb_size; BEGIN_BATCH(batch, 3); OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); @@ -597,7 +597,7 @@ i965_avc_ildb_decode_init(VADriverContextP ctx, void *h264_context) avc_ildb_context->urb.cs_start = avc_ildb_context->urb.vfe_start + avc_ildb_context->urb.num_vfe_entries * avc_ildb_context->urb.size_vfe_entry; assert(avc_ildb_context->urb.cs_start + - avc_ildb_context->urb.num_cs_entries * avc_ildb_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + avc_ildb_context->urb.num_cs_entries * avc_ildb_context->urb.size_cs_entry <= i965->intel.device_info->urb_size); for (i = 0; i < NUM_AVC_ILDB_SURFACES; i++) { dri_bo_unreference(avc_ildb_context->surface[i].s_bo); diff --git a/src/i965_defines.h b/src/i965_defines.h index 5b4a076a..f010ca23 100755 --- a/src/i965_defines.h +++ b/src/i965_defines.h @@ -821,10 +821,4 @@ #define SUBSAMPLE_YUV411 5 #define SUBSAMPLE_RGBX 6 -#define URB_SIZE(intel) (IS_GEN7(intel->device_id) ? 4096 : \ - IS_GEN8(intel->device_id) ? 4096 : \ - IS_GEN6(intel->device_id) ? 1024 : \ - IS_IRONLAKE(intel->device_id) ? 1024 : \ - IS_G4X(intel->device_id) ? 384 : 256) - #endif /* _I965_DEFINES_H_ */ diff --git a/src/i965_media.c b/src/i965_media.c index e6f1c16c..56541093 100644 --- a/src/i965_media.c +++ b/src/i965_media.c @@ -60,7 +60,7 @@ i965_media_urb_layout(VADriverContextP ctx, struct i965_media_context *media_con unsigned int vfe_fence, cs_fence; vfe_fence = media_context->urb.cs_start; - cs_fence = URB_SIZE((&i965->intel)); + cs_fence = i965->intel.device_info->urb_size; BEGIN_BATCH(batch, 3); OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); diff --git a/src/i965_media_h264.c b/src/i965_media_h264.c index f6c8c117..abfecef0 100644 --- a/src/i965_media_h264.c +++ b/src/i965_media_h264.c @@ -901,7 +901,7 @@ i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context media_context->urb.cs_start = media_context->urb.vfe_start + media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry; assert(media_context->urb.cs_start + - media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= i965->intel.device_info->urb_size); /* hook functions */ media_context->media_states_setup = i965_media_h264_states_setup; diff --git a/src/i965_media_mpeg2.c b/src/i965_media_mpeg2.c index 76a7035a..a5c757f2 100644 --- a/src/i965_media_mpeg2.c +++ b/src/i965_media_mpeg2.c @@ -1013,7 +1013,7 @@ i965_media_mpeg2_dec_context_init(VADriverContextP ctx, struct i965_media_contex media_context->urb.cs_start = media_context->urb.vfe_start + media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry; assert(media_context->urb.cs_start + - media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= i965->intel.device_info->urb_size); /* hook functions */ media_context->media_states_setup = i965_media_mpeg2_states_setup; diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 95b20e47..63580339 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -5212,7 +5212,7 @@ i965_post_processing_context_init(VADriverContextP ctx, }; if (IS_IRONLAKE(i965->intel.device_id)) { - pp_context->urb.size = URB_SIZE((&i965->intel)); + pp_context->urb.size = i965->intel.device_info->urb_size; pp_context->urb.num_vfe_entries = 32; pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */ pp_context->urb.num_cs_entries = 1; @@ -5221,7 +5221,7 @@ i965_post_processing_context_init(VADriverContextP ctx, pp_context->urb.cs_start = pp_context->urb.vfe_start + pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry; assert(pp_context->urb.cs_start + - pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size); pp_context->intel_post_processing = ironlake_post_processing; } else { pp_context->vfe_gpu_state.max_num_threads = 60; -- cgit v1.2.1 From e889cefebad2a2fa2230882b8070309f23054247 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sat, 19 Apr 2014 00:12:38 +0800 Subject: Remove max_wm_threads from render_state Instead directly use the value stored in intel_device_info Signed-off-by: Xiang, Haihao (cherry picked from commit 6ba787b29e4bcebdceda52906e33cb84f24a63b5) --- src/gen8_render.c | 10 +--------- src/i965_render.c | 31 ++++--------------------------- src/i965_render.h | 2 -- 3 files changed, 5 insertions(+), 38 deletions(-) diff --git a/src/gen8_render.c b/src/gen8_render.c index 90f278e5..8f5feaa9 100644 --- a/src/gen8_render.c +++ b/src/gen8_render.c @@ -1377,7 +1377,7 @@ gen8_emit_wm_state(VADriverContextP ctx, int kernel) unsigned int num_samples = 0; unsigned int max_threads; - max_threads = render_state->max_wm_threads - 2; + max_threads = i965->intel.device_info->max_wm_threads - 2; BEGIN_BATCH(batch, 2); OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2)); @@ -1784,14 +1784,6 @@ gen8_render_init(VADriverContextP ctx) dri_bo_unmap(render_state->instruction_state.bo); - - if (IS_GEN8(i965->intel.device_id)) { - render_state->max_wm_threads = 64; - } else { - /* should never get here !!! */ - assert(0); - } - return true; } diff --git a/src/i965_render.c b/src/i965_render.c index 6520ce3e..15643f36 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -478,7 +478,7 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4; } - wm_state->wm5.max_threads = render_state->max_wm_threads - 1; + wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1; wm_state->wm5.thread_dispatch_enable = 1; wm_state->wm5.enable_16_pix = 1; wm_state->wm5.enable_8_pix = 0; @@ -542,7 +542,7 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4; } - wm_state->wm5.max_threads = render_state->max_wm_threads - 1; + wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1; wm_state->wm5.thread_dispatch_enable = 1; wm_state->wm5.enable_16_pix = 1; wm_state->wm5.enable_8_pix = 0; @@ -2117,7 +2117,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel) (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(batch, 0); OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ - OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | + OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | GEN6_3DSTATE_WM_DISPATCH_ENABLE | GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | @@ -2899,7 +2899,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(batch, 0); /* scratch space base offset */ OUT_BATCH(batch, - ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples | + ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples | GEN7_PS_PUSH_CONSTANT_ENABLE | GEN7_PS_ATTRIBUTE_ENABLE | GEN7_PS_16_DISPATCH_ENABLE); @@ -3187,29 +3187,6 @@ i965_render_init(VADriverContextP ctx) 4096, 64); assert(render_state->curbe.bo); - if (IS_HSW_GT1(i965->intel.device_id)) { - render_state->max_wm_threads = 102; - } else if (IS_HSW_GT2(i965->intel.device_id)) { - render_state->max_wm_threads = 204; - } else if (IS_HSW_GT3(i965->intel.device_id)) { - render_state->max_wm_threads = 408; - } else if (IS_IVB_GT1(i965->intel.device_id) || IS_BAYTRAIL(i965->intel.device_id)) { - render_state->max_wm_threads = 48; - } else if (IS_IVB_GT2(i965->intel.device_id)) { - render_state->max_wm_threads = 172; - } else if (IS_SNB_GT1(i965->intel.device_id)) { - render_state->max_wm_threads = 40; - } else if (IS_SNB_GT2(i965->intel.device_id)) { - render_state->max_wm_threads = 80; - } else if (IS_IRONLAKE(i965->intel.device_id)) { - render_state->max_wm_threads = 72; /* 12 * 6 */ - } else if (IS_G4X(i965->intel.device_id)) { - render_state->max_wm_threads = 50; /* 12 * 5 */ - } else { - /* should never get here !!! */ - assert(0); - } - return true; } diff --git a/src/i965_render.h b/src/i965_render.h index a1f2f8ff..afbce492 100644 --- a/src/i965_render.h +++ b/src/i965_render.h @@ -79,8 +79,6 @@ struct i965_render_state struct i965_kernel render_kernels[3]; - int max_wm_threads; - struct { dri_bo *bo; int bo_size; -- cgit v1.2.1 From 7fb211f94dae32532d1326565b48d86e558ceb24 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sat, 19 Apr 2014 00:12:39 +0800 Subject: Simplify some macros Now it can directly use the information in intel_device_info instead of checking the pci id. Signed-off-by: Xiang, Haihao (cherry picked from commit f1b3f83953cd5f6e39900d98b4858a7cb825dee0) Conflicts: src/gen8_post_processing.c src/i965_post_processing.c src/intel_driver.h --- src/gen6_mfc_common.c | 2 +- src/gen75_picture_process.c | 4 +- src/gen75_vpp_gpe.c | 16 +-- src/gen75_vpp_vebox.c | 4 +- src/gen7_mfd.c | 4 +- src/gen8_post_processing.c | 18 +-- src/gen8_render.c | 2 +- src/i965_avc_bsd.c | 2 +- src/i965_avc_ildb.c | 6 +- src/i965_drv_video.c | 38 +++--- src/i965_media.c | 2 +- src/i965_media_h264.c | 4 +- src/i965_media_mpeg2.c | 2 +- src/i965_post_processing.c | 57 +++++---- src/i965_render.c | 46 ++++---- src/intel_batchbuffer.c | 12 +- src/intel_driver.h | 276 ++------------------------------------------ 17 files changed, 119 insertions(+), 376 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 7cf9cc6f..33b9d557 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -525,7 +525,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, int width_in_mbs = pSequenceParameter->picture_width_in_mbs; int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - if (IS_GEN6(i965->intel.device_id)) { + if (IS_GEN6(i965->intel.device_info)) { /* On the SNB it should be fixed to 128 for the DMV buffer */ width_in_mbs = 128; } diff --git a/src/gen75_picture_process.c b/src/gen75_picture_process.c index 3c4fc0b0..7f09b4f0 100644 --- a/src/gen75_picture_process.c +++ b/src/gen75_picture_process.c @@ -86,9 +86,9 @@ gen75_vpp_vebox(VADriverContextP ctx, proc_ctx->vpp_vebox_ctx->surface_input_object = proc_ctx->surface_pipeline_input_object; proc_ctx->vpp_vebox_ctx->surface_output_object = proc_ctx->surface_render_output_object; - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) va_status = gen75_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx); - else if (IS_GEN8(i965->intel.device_id)) + else if (IS_GEN8(i965->intel.device_info)) va_status = gen8_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx); return va_status; diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index 637d2bfa..2e3b104c 100644 --- a/src/gen75_vpp_gpe.c +++ b/src/gen75_vpp_gpe.c @@ -617,9 +617,9 @@ vpp_gpe_process(VADriverContextP ctx, struct vpp_gpe_context * vpp_gpe_ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) return gen75_gpe_process(ctx, vpp_gpe_ctx); - else if (IS_GEN8(i965->intel.device_id)) + else if (IS_GEN8(i965->intel.device_info)) return gen8_gpe_process(ctx, vpp_gpe_ctx); return VA_STATUS_ERROR_UNIMPLEMENTED; @@ -657,9 +657,9 @@ vpp_gpe_process_sharpening(VADriverContextP ctx, if(vpp_gpe_ctx->is_first_frame){ vpp_gpe_ctx->sub_shader_sum = 3; struct i965_kernel * vpp_kernels; - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) vpp_kernels = gen75_vpp_sharpening_kernels; - else if (IS_GEN8(i965->intel.device_id)) + else if (IS_GEN8(i965->intel.device_info)) vpp_kernels = gen8_vpp_sharpening_kernels; vpp_gpe_ctx->gpe_load_kernels(ctx, @@ -882,8 +882,8 @@ vpp_gpe_context_init(VADriverContextP ctx) struct vpp_gpe_context *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context)); struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx); - assert(IS_HASWELL(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)); + assert(IS_HASWELL(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)); vpp_gpe_ctx->surface_tmp = VA_INVALID_ID; vpp_gpe_ctx->surface_tmp_object = NULL; @@ -896,7 +896,7 @@ vpp_gpe_context_init(VADriverContextP ctx) gpe_ctx->vfe_state.urb_entry_size = 59 - 1; gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1; - if (IS_HASWELL(i965->intel.device_id)) { + if (IS_HASWELL(i965->intel.device_info)) { vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init; vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy; vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels; @@ -907,7 +907,7 @@ vpp_gpe_context_init(VADriverContextP ctx) gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6; gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); - } else if (IS_GEN8(i965->intel.device_id)) { + } else if (IS_GEN8(i965->intel.device_info)) { vpp_gpe_ctx->gpe_context_init = gen8_gpe_context_init; vpp_gpe_ctx->gpe_context_destroy = gen8_gpe_context_destroy; vpp_gpe_ctx->gpe_load_kernels = gen8_gpe_load_kernels; diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 788a75ee..d63729e1 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -152,7 +152,7 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c */ p_table = (unsigned int *)proc_ctx->dndi_state_table.ptr; - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) *p_table ++ = 0; // reserved . w0 *p_table ++ = ( 140 << 24 | // denoise STAD threshold . w1 @@ -224,7 +224,7 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c 13 << 6 | // chr temp diff th 7 ); // chr temp diff low - if (IS_GEN8(i965->intel.device_id)) + if (IS_GEN8(i965->intel.device_info)) *p_table ++ = 0; // parameters for hot pixel, } diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index e91cfd30..f4ccb12b 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -942,7 +942,7 @@ gen7_mfd_mpeg2_pic_state(VADriverContextP ctx, assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; - if (IS_HASWELL(i965->intel.device_id)) { + if (IS_HASWELL(i965->intel.device_info)) { /* XXX: disable concealment for now */ slice_concealment_disable_bit = 1; } @@ -1086,7 +1086,7 @@ gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx, (slice_param->macroblock_offset & 0x7)); OUT_BCS_BATCH(batch, (slice_param->quantiser_scale_code << 24) | - (IS_HASWELL(i965->intel.device_id) ? (vpos1 << 8 | hpos1) : 0)); + (IS_HASWELL(i965->intel.device_info) ? (vpos1 << 8 | hpos1) : 0)); ADVANCE_BCS_BATCH(batch); } diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index f0768309..82bf3b99 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -41,10 +41,10 @@ #include "i965_render.h" #include "intel_media.h" -#define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) || \ - IS_GEN6((ctx)->intel.device_id) || \ - IS_GEN7((ctx)->intel.device_id) || \ - IS_GEN8((ctx)->intel.device_id)) +#define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_info) || \ + IS_GEN6((ctx)->intel.device_info) || \ + IS_GEN7((ctx)->intel.device_info) || \ + IS_GEN8((ctx)->intel.device_info)) #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 @@ -1240,7 +1240,7 @@ gen8_pp_curbe_load(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); int param_size = 64; - if (IS_GEN8(i965->intel.device_id)) + if (IS_GEN8(i965->intel.device_info)) param_size = sizeof(struct gen7_pp_static_parameter); BEGIN_BATCH(batch, 4); @@ -1264,7 +1264,7 @@ gen8_pp_object_walker(VADriverContextP ctx, unsigned int *command_ptr; param_size = sizeof(struct gen7_pp_inline_parameter); - if (IS_GEN8(i965->intel.device_id)) + if (IS_GEN8(i965->intel.device_info)) param_size = sizeof(struct gen7_pp_inline_parameter); x_steps = pp_context->pp_x_steps(pp_context->private_context); @@ -1306,7 +1306,7 @@ gen8_pp_object_walker(VADriverContextP ctx, dri_bo_unmap(command_buffer); - if (IS_GEN8(i965->intel.device_id)) { + if (IS_GEN8(i965->intel.device_info)) { BEGIN_BATCH(batch, 3); OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); OUT_RELOC(batch, command_buffer, @@ -1431,7 +1431,7 @@ gen8_post_processing_context_init(VADriverContextP ctx, assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); - if (IS_GEN8(i965->intel.device_id)) + if (IS_GEN8(i965->intel.device_info)) memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules)); else { /* should never get here !!! */ @@ -1485,7 +1485,7 @@ gen8_post_processing_context_init(VADriverContextP ctx, dri_bo_unmap(pp_context->instruction_state.bo); /* static & inline parameters */ - if (IS_GEN8(i965->intel.device_id)) { + if (IS_GEN8(i965->intel.device_info)) { pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1); pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1); } diff --git a/src/gen8_render.c b/src/gen8_render.c index 8f5feaa9..54f62253 100644 --- a/src/gen8_render.c +++ b/src/gen8_render.c @@ -1734,7 +1734,7 @@ gen8_render_init(VADriverContextP ctx) render_state->render_put_surface = gen8_render_put_surface; render_state->render_put_subpicture = gen8_render_put_subpicture; - if (IS_GEN8(i965->intel.device_id)) { + if (IS_GEN8(i965->intel.device_info)) { memcpy(render_state->render_kernels, render_kernels_gen8, sizeof(render_state->render_kernels)); } diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c index 72b83074..67c7c959 100644 --- a/src/i965_avc_bsd.c +++ b/src/i965_avc_bsd.c @@ -793,7 +793,7 @@ i965_avc_bsd_object(VADriverContextP ctx, { struct i965_driver_data *i965 = i965_driver_data(ctx); - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) ironlake_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context); else g4x_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context); diff --git a/src/i965_avc_ildb.c b/src/i965_avc_ildb.c index e0cc743e..d414c906 100644 --- a/src/i965_avc_ildb.c +++ b/src/i965_avc_ildb.c @@ -342,7 +342,7 @@ i965_avc_ildb_upload_constants(VADriverContextP ctx, assert(avc_ildb_context->curbe.bo->virtual); root_input = avc_ildb_context->curbe.bo->virtual; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { root_input->max_concurrent_threads = 76; /* 72 - 2 + 8 - 2 */ } else { root_input->max_concurrent_threads = 54; /* 50 - 2 + 8 - 2 */ @@ -427,7 +427,7 @@ i965_avc_ildb_state_base_address(VADriverContextP ctx, struct i965_h264_context struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965_h264_context->batch; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { BEGIN_BATCH(batch, 8); OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6); OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); @@ -614,7 +614,7 @@ i965_avc_ildb_decode_init(VADriverContextP ctx, void *h264_context) /* kernel offset */ assert(NUM_AVC_ILDB_INTERFACES == ARRAY_ELEMS(avc_ildb_kernel_offset_gen5)); - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { avc_ildb_kernel_offset = avc_ildb_kernel_offset_gen5; } else { avc_ildb_kernel_offset = avc_ildb_kernel_offset_gen4; diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index db8b2d19..96ca997e 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -1116,9 +1116,9 @@ i965_guess_surface_format(VADriverContextP ctx, if (!obj_config) return; - if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) { + if (IS_GEN6(i965->intel.device_info) || + IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) { *fourcc = VA_FOURCC_NV12; *is_tiled = 1; return; @@ -1500,7 +1500,7 @@ i965_CreateContext(VADriverContextP ctx, render_state->interleaved_uv = 1; break; default: - render_state->interleaved_uv = !!(IS_GEN6(i965->intel.device_id) || IS_GEN7(i965->intel.device_id) || IS_GEN8(i965->intel.device_id)); + render_state->interleaved_uv = !!(IS_GEN6(i965->intel.device_info) || IS_GEN7(i965->intel.device_info) || IS_GEN8(i965->intel.device_info)); break; } @@ -4213,7 +4213,7 @@ i965_GetSurfaceAttributes( attrib_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; if (attrib_list[i].value.value.i == 0) { - if (IS_G4X(i965->intel.device_id)) { + if (IS_G4X(i965->intel.device_info)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { attrib_list[i].value.value.i = VA_FOURCC_I420; @@ -4221,7 +4221,7 @@ i965_GetSurfaceAttributes( assert(0); attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED; } - } else if (IS_IRONLAKE(i965->intel.device_id)) { + } else if (IS_IRONLAKE(i965->intel.device_info)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { attrib_list[i].value.value.i = VA_FOURCC_I420; @@ -4235,17 +4235,17 @@ i965_GetSurfaceAttributes( assert(0); attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED; } - } else if (IS_GEN6(i965->intel.device_id)) { + } else if (IS_GEN6(i965->intel.device_info)) { attrib_list[i].value.value.i = VA_FOURCC_NV12; - } else if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) { + } else if (IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) { if (obj_config->profile == VAProfileJPEGBaseline) attrib_list[i].value.value.i = 0; /* internal format */ else attrib_list[i].value.value.i = VA_FOURCC_NV12; } } else { - if (IS_G4X(i965->intel.device_id)) { + if (IS_G4X(i965->intel.device_info)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { if (attrib_list[i].value.value.i != VA_FOURCC_I420) { @@ -4256,7 +4256,7 @@ i965_GetSurfaceAttributes( assert(0); attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED; } - } else if (IS_IRONLAKE(i965->intel.device_id)) { + } else if (IS_IRONLAKE(i965->intel.device_info)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { if (attrib_list[i].value.value.i != VA_FOURCC_I420) { @@ -4290,7 +4290,7 @@ i965_GetSurfaceAttributes( assert(0); attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED; } - } else if (IS_GEN6(i965->intel.device_id)) { + } else if (IS_GEN6(i965->intel.device_info)) { if (obj_config->entrypoint == VAEntrypointEncSlice || obj_config->entrypoint == VAEntrypointVideoProc) { switch (attrib_list[i].value.value.i) { @@ -4314,8 +4314,8 @@ i965_GetSurfaceAttributes( attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } } - } else if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) { + } else if (IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) { if (obj_config->entrypoint == VAEntrypointEncSlice || obj_config->entrypoint == VAEntrypointVideoProc) { switch (attrib_list[i].value.value.i) { @@ -4398,7 +4398,7 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, if (attribs == NULL) return VA_STATUS_ERROR_ALLOCATION_FAILED; - if (IS_G4X(i965->intel.device_id)) { + if (IS_G4X(i965->intel.device_info)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { attribs[i].type = VASurfaceAttribPixelFormat; @@ -4407,7 +4407,7 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].value.value.i = VA_FOURCC_I420; i++; } - } else if (IS_IRONLAKE(i965->intel.device_id)) { + } else if (IS_IRONLAKE(i965->intel.device_info)) { switch (obj_config->profile) { case VAProfileMPEG2Simple: case VAProfileMPEG2Main: @@ -4446,7 +4446,7 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, default: break; } - } else if (IS_GEN6(i965->intel.device_id)) { + } else if (IS_GEN6(i965->intel.device_info)) { if (obj_config->entrypoint == VAEntrypointVLD) { /* decode */ attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; @@ -4493,7 +4493,7 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, i++; } } - } else if (IS_GEN7(i965->intel.device_id)) { + } else if (IS_GEN7(i965->intel.device_info)) { if (obj_config->entrypoint == VAEntrypointVLD) { /* decode */ if (obj_config->profile == VAProfileJPEGBaseline) { attribs[i].type = VASurfaceAttribPixelFormat; @@ -4596,7 +4596,7 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, i++; } } - } else if (IS_GEN8(i965->intel.device_id)) { + } else if (IS_GEN8(i965->intel.device_info)) { if (obj_config->entrypoint == VAEntrypointVLD) { /* decode */ if (obj_config->profile == VAProfileJPEGBaseline) { attribs[i].type = VASurfaceAttribPixelFormat; diff --git a/src/i965_media.c b/src/i965_media.c index 56541093..a13c233e 100644 --- a/src/i965_media.c +++ b/src/i965_media.c @@ -77,7 +77,7 @@ i965_media_state_base_address(VADriverContextP ctx, struct i965_media_context *m struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = media_context->base.batch; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { BEGIN_BATCH(batch, 8); OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6); OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); diff --git a/src/i965_media_h264.c b/src/i965_media_h264.c index abfecef0..9de4e091 100644 --- a/src/i965_media_h264.c +++ b/src/i965_media_h264.c @@ -852,7 +852,7 @@ i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context sizeof(h264_avc_kernels_gen5[0]))); assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) / sizeof(avc_mc_kernel_offset_gen5[0]))); - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen5, sizeof(i965_h264_context->avc_kernels)); avc_mc_kernel_offset = avc_mc_kernel_offset_gen5; intra_kernel_header = &intra_kernel_header_gen5; @@ -886,7 +886,7 @@ i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context media_context->free_private_context = i965_media_h264_free_private_context; /* URB */ - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { media_context->urb.num_vfe_entries = 63; } else { media_context->urb.num_vfe_entries = 23; diff --git a/src/i965_media_mpeg2.c b/src/i965_media_mpeg2.c index a5c757f2..245c8e7d 100644 --- a/src/i965_media_mpeg2.c +++ b/src/i965_media_mpeg2.c @@ -988,7 +988,7 @@ i965_media_mpeg2_dec_context_init(VADriverContextP ctx, struct i965_media_contex sizeof(mpeg2_vld_kernels_gen5[0]))); assert(NUM_MPEG2_VLD_KERNELS <= MAX_INTERFACE_DESC); - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) memcpy(i965_mpeg2_context->vld_kernels, mpeg2_vld_kernels_gen5, sizeof(i965_mpeg2_context->vld_kernels)); else memcpy(i965_mpeg2_context->vld_kernels, mpeg2_vld_kernels_gen4, sizeof(i965_mpeg2_context->vld_kernels)); diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 63580339..bdab73a2 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -40,11 +40,10 @@ #include "i965_render.h" #include "intel_media.h" -#define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) || \ - IS_GEN6((ctx)->intel.device_id) || \ - IS_GEN7((ctx)->intel.device_id) || \ - IS_GEN8((ctx)->intel.device_id)) - +#define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_info) || \ + IS_GEN6((ctx)->intel.device_info) || \ + IS_GEN7((ctx)->intel.device_info) || \ + IS_GEN8((ctx)->intel.device_info)) #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\ MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)) @@ -1640,7 +1639,7 @@ gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_cont ss->ss2.height = height - 1; ss->ss3.pitch = pitch - 1; gen7_pp_set_surface_tiling(ss, tiling); - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) gen7_render_set_surface_scs(ss); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0, @@ -2937,7 +2936,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con pp_static_parameter->grf1.pointer_to_inline_parameter = 7; pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */ - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */ if (pp_static_parameter->grf2.avs_wa_enable) { @@ -4110,7 +4109,7 @@ gen6_pp_initialize( assert(bo); pp_context->vfe_state.bo = bo; - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_info)) { static_param_size = sizeof(struct gen7_pp_static_parameter); inline_param_size = sizeof(struct gen7_pp_inline_parameter); } else { @@ -4166,7 +4165,7 @@ gen6_pp_interface_descriptor_table(VADriverContextP ctx, desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5); desc->desc4.constant_urb_entry_read_offset = 0; - if (IS_GEN7(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_info)) desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */ else desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */ @@ -4198,8 +4197,8 @@ gen6_pp_upload_constants(VADriverContextP ctx, assert(sizeof(struct pp_static_parameter) == 128); assert(sizeof(struct gen7_pp_static_parameter) == 192); - if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) param_size = sizeof(struct gen7_pp_static_parameter); else param_size = sizeof(struct pp_static_parameter); @@ -4282,8 +4281,8 @@ gen6_pp_curbe_load(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); int param_size; - if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) param_size = sizeof(struct gen7_pp_static_parameter); else param_size = sizeof(struct pp_static_parameter); @@ -4369,8 +4368,8 @@ gen6_pp_object_walker(VADriverContextP ctx, dri_bo *command_buffer; unsigned int *command_ptr; - if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) param_size = sizeof(struct gen7_pp_inline_parameter); else param_size = sizeof(struct pp_inline_parameter); @@ -4390,7 +4389,7 @@ gen6_pp_object_walker(VADriverContextP ctx, for (x = 0; x < x_steps; x++) { if (!pp_context->pp_set_block_parameter(pp_context, x, y)) { // some common block parameter update goes here, apply to all pp functions - if (IS_GEN6(i965->intel.device_id)) + if (IS_GEN6(i965->intel.device_info)) update_block_mask_parameter (pp_context, x, y, x_steps, y_steps); *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2)); @@ -4412,7 +4411,7 @@ gen6_pp_object_walker(VADriverContextP ctx, dri_bo_unmap(command_buffer); - if (IS_GEN8(i965->intel.device_id)) { + if (IS_GEN8(i965->intel.device_info)) { BEGIN_BATCH(batch, 3); OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); OUT_RELOC(batch, command_buffer, @@ -4570,9 +4569,9 @@ i965_vpp_clear_surface(VADriverContextP ctx, br13 |= BR13_8; br13 |= pitch; - if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) { + if (IS_GEN6(i965->intel.device_info) || + IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) { intel_batchbuffer_start_atomic_blt(batch, 48); BEGIN_BLT_BATCH(batch, 12); } else { @@ -5185,7 +5184,7 @@ i965_post_processing_terminate(VADriverContextP ctx) struct i965_post_processing_context *pp_context = i965->pp_context; if (pp_context) { - if (IS_GEN8(i965->intel.device_id)) { + if (IS_GEN8(i965->intel.device_info)) { gen8_post_processing_context_finalize(pp_context); } else { i965_post_processing_context_finalize(pp_context); @@ -5206,12 +5205,12 @@ i965_post_processing_context_init(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); int i; - if (IS_GEN8(i965->intel.device_id)) { + if (IS_GEN8(i965->intel.device_info)) { gen8_post_processing_context_init(ctx, pp_context, batch); return; }; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { pp_context->urb.size = i965->intel.device_info->urb_size; pp_context->urb.num_vfe_entries = 32; pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */ @@ -5238,13 +5237,13 @@ i965_post_processing_context_init(VADriverContextP ctx, assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75)); - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules)); - else if (IS_GEN7(i965->intel.device_id)) + else if (IS_GEN7(i965->intel.device_info)) memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules)); - else if (IS_GEN6(i965->intel.device_id)) + else if (IS_GEN6(i965->intel.device_info)) memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules)); - else if (IS_IRONLAKE(i965->intel.device_id)) + else if (IS_IRONLAKE(i965->intel.device_info)) memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules)); for (i = 0; i < NUM_PP_MODULES; i++) { @@ -5263,8 +5262,8 @@ i965_post_processing_context_init(VADriverContextP ctx, } /* static & inline parameters */ - if (IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) { pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1); pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1); } else { diff --git a/src/i965_render.c b/src/i965_render.c index 15643f36..6b0cb19e 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -341,7 +341,7 @@ i965_render_vs_unit(VADriverContextP ctx) vs_state = render_state->vs.state->virtual; memset(vs_state, 0, sizeof(*vs_state)); - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; else vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES; @@ -455,7 +455,7 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->thread1.single_program_flow = 1; /* XXX */ - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */ else wm_state->thread1.binding_table_entry_count = 7; @@ -472,7 +472,7 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->wm4.stats_enable = 0; wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { wm_state->wm4.sampler_count = 0; /* hardware requirement */ } else { wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4; @@ -519,7 +519,7 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->thread1.single_program_flow = 1; /* XXX */ - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */ else wm_state->thread1.binding_table_entry_count = 7; @@ -536,7 +536,7 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->wm4.stats_enable = 0; wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { wm_state->wm4.sampler_count = 0; /* hardware requirement */ } else { wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4; @@ -828,12 +828,12 @@ i965_render_src_surface_state( assert(ss_bo->virtual); ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_info)) { gen7_render_set_surface_state(ss, region, offset, w, h, pitch, format, flags); - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) gen7_render_set_surface_scs(ss); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_SAMPLER, 0, @@ -943,12 +943,12 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index) assert(ss_bo->virtual); ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_info)) { gen7_render_set_surface_state(ss, dest_region->bo, 0, dest_region->width, dest_region->height, dest_region->pitch, format, 0); - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) gen7_render_set_surface_scs(ss); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, @@ -1230,7 +1230,7 @@ i965_render_state_base_address(VADriverContextP ctx) struct intel_batchbuffer *batch = i965->batch; struct i965_render_state *render_state = &i965->render_state; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { BEGIN_BATCH(batch, 8); OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6); OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); @@ -1394,7 +1394,7 @@ i965_render_vertex_elements(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { BEGIN_BATCH(batch, 5); OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3); /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ @@ -1486,7 +1486,7 @@ i965_render_startup(VADriverContextP ctx) ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4); else OUT_BATCH(batch, 3); @@ -1536,9 +1536,9 @@ i965_clear_dest_region(VADriverContextP ctx) br13 |= pitch; - if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id) || - IS_GEN8(i965->intel.device_id)) { + if (IS_GEN6(i965->intel.device_info) || + IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) { intel_batchbuffer_start_atomic_blt(batch, 24); BEGIN_BLT_BATCH(batch, 6); } else { @@ -2553,7 +2553,7 @@ gen7_emit_urb(VADriverContextP ctx) struct intel_batchbuffer *batch = i965->batch; unsigned int num_urb_entries = 32; - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) num_urb_entries = 64; BEGIN_BATCH(batch, 2); @@ -2862,7 +2862,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB; unsigned int num_samples = 0; - if (IS_HASWELL(i965->intel.device_id)) { + if (IS_HASWELL(i965->intel.device_info)) { max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW; num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW; } @@ -3146,19 +3146,19 @@ i965_render_init(VADriverContextP ctx) assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / sizeof(render_kernels_gen6[0]))); - if (IS_GEN8(i965->intel.device_id)) { + if (IS_GEN8(i965->intel.device_info)) { return gen8_render_init(ctx); - } else if (IS_GEN7(i965->intel.device_id)) { + } else if (IS_GEN7(i965->intel.device_info)) { memcpy(render_state->render_kernels, - (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7), + (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7), sizeof(render_state->render_kernels)); render_state->render_put_surface = gen7_render_put_surface; render_state->render_put_subpicture = gen7_render_put_subpicture; - } else if (IS_GEN6(i965->intel.device_id)) { + } else if (IS_GEN6(i965->intel.device_info)) { memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels)); render_state->render_put_surface = gen6_render_put_surface; render_state->render_put_subpicture = gen6_render_put_subpicture; - } else if (IS_IRONLAKE(i965->intel.device_id)) { + } else if (IS_IRONLAKE(i965->intel.device_info)) { memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels)); render_state->render_put_surface = i965_render_put_surface; render_state->render_put_subpicture = i965_render_put_subpicture; @@ -3197,7 +3197,7 @@ i965_render_terminate(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - if (IS_GEN8(i965->intel.device_id)) { + if (IS_GEN8(i965->intel.device_info)) { gen8_render_terminate(ctx); return; } diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index 9dc496d0..c6d3769e 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -87,7 +87,7 @@ intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size batch->flag = flag; batch->run = drm_intel_bo_mrb_exec; - if (IS_GEN6(intel->device_id) && + if (IS_GEN6(intel->device_info) && flag == I915_EXEC_RENDER) batch->wa_render_bo = dri_bo_alloc(intel->bufmgr, "wa scratch", @@ -183,11 +183,11 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) { struct intel_driver_data *intel = batch->intel; - if (IS_GEN6(intel->device_id) || - IS_GEN7(intel->device_id) || - IS_GEN8(intel->device_id)) { + if (IS_GEN6(intel->device_info) || + IS_GEN7(intel->device_info) || + IS_GEN8(intel->device_info)) { if (batch->flag == I915_EXEC_RENDER) { - if (IS_GEN8(intel->device_id)) { + if (IS_GEN8(intel->device_info)) { BEGIN_BATCH(batch, 6); OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2)); @@ -202,7 +202,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) OUT_BATCH(batch, 0); /* write data */ OUT_BATCH(batch, 0); ADVANCE_BATCH(batch); - } else if (IS_GEN6(intel->device_id)) { + } else if (IS_GEN6(intel->device_info)) { assert(batch->wa_render_bo); BEGIN_BATCH(batch, 4 * 3); diff --git a/src/intel_driver.h b/src/intel_driver.h index 18bbfe63..8636b216 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -174,271 +174,15 @@ struct intel_region dri_bo *bo; }; -#define PCI_CHIP_GM45_GM 0x2A42 -#define PCI_CHIP_IGD_E_G 0x2E02 -#define PCI_CHIP_Q45_G 0x2E12 -#define PCI_CHIP_G45_G 0x2E22 -#define PCI_CHIP_G41_G 0x2E32 -#define PCI_CHIP_B43_G 0x2E42 -#define PCI_CHIP_B43_G1 0x2E92 - -#define PCI_CHIP_IRONLAKE_D_G 0x0042 -#define PCI_CHIP_IRONLAKE_M_G 0x0046 - -#ifndef PCI_CHIP_SANDYBRIDGE_GT1 -#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */ -#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 -#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 -#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */ -#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 -#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 -#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A /* Server */ -#endif - -#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */ -#define PCI_CHIP_IVYBRIDGE_GT2 0x0162 -#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */ -#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166 -#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */ -#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a - -#define PCI_CHIP_HASWELL_GT1 0x0402 /* Desktop */ -#define PCI_CHIP_HASWELL_GT2 0x0412 -#define PCI_CHIP_HASWELL_GT3 0x0422 -#define PCI_CHIP_HASWELL_M_GT1 0x0406 /* Mobile */ -#define PCI_CHIP_HASWELL_M_GT2 0x0416 -#define PCI_CHIP_HASWELL_M_GT3 0x0426 -#define PCI_CHIP_HASWELL_S_GT1 0x040a /* Server */ -#define PCI_CHIP_HASWELL_S_GT2 0x041a -#define PCI_CHIP_HASWELL_S_GT3 0x042a -#define PCI_CHIP_HASWELL_B_GT1 0x040b /* Reserved */ -#define PCI_CHIP_HASWELL_B_GT2 0x041b -#define PCI_CHIP_HASWELL_B_GT3 0x042b -#define PCI_CHIP_HASWELL_E_GT1 0x040e /* Reserved */ -#define PCI_CHIP_HASWELL_E_GT2 0x041e -#define PCI_CHIP_HASWELL_E_GT3 0x042e - -#define PCI_CHIP_HASWELL_SDV_GT1 0x0c02 /* Desktop */ -#define PCI_CHIP_HASWELL_SDV_GT2 0x0c12 -#define PCI_CHIP_HASWELL_SDV_GT3 0x0c22 -#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0c06 /* Mobile */ -#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0c16 -#define PCI_CHIP_HASWELL_SDV_M_GT3 0x0c26 -#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0c0a /* Server */ -#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0c1a -#define PCI_CHIP_HASWELL_SDV_S_GT3 0x0c2a -#define PCI_CHIP_HASWELL_SDV_B_GT1 0x0c0b /* Reserved */ -#define PCI_CHIP_HASWELL_SDV_B_GT2 0x0c1b -#define PCI_CHIP_HASWELL_SDV_B_GT3 0x0c2b -#define PCI_CHIP_HASWELL_SDV_E_GT1 0x0c0e /* Reserved */ -#define PCI_CHIP_HASWELL_SDV_E_GT2 0x0c1e -#define PCI_CHIP_HASWELL_SDV_E_GT3 0x0c2e - -#define PCI_CHIP_HASWELL_ULT_GT1 0x0A02 /* Desktop */ -#define PCI_CHIP_HASWELL_ULT_GT2 0x0A12 -#define PCI_CHIP_HASWELL_ULT_GT3 0x0A22 -#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 /* Mobile */ -#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16 -#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26 -#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */ -#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A -#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A -#define PCI_CHIP_HASWELL_ULT_B_GT1 0x0A0B /* Reserved */ -#define PCI_CHIP_HASWELL_ULT_B_GT2 0x0A1B -#define PCI_CHIP_HASWELL_ULT_B_GT3 0x0A2B -#define PCI_CHIP_HASWELL_ULT_E_GT1 0x0A0E /* Reserved */ -#define PCI_CHIP_HASWELL_ULT_E_GT2 0x0A1E -#define PCI_CHIP_HASWELL_ULT_E_GT3 0x0A2E - -#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */ -#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12 -#define PCI_CHIP_HASWELL_CRW_GT3 0x0D22 -#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */ -#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16 -#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26 -#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */ -#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A -#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A -#define PCI_CHIP_HASWELL_CRW_B_GT1 0x0D0B /* Reserved */ -#define PCI_CHIP_HASWELL_CRW_B_GT2 0x0D1B -#define PCI_CHIP_HASWELL_CRW_B_GT3 0x0D2B -#define PCI_CHIP_HASWELL_CRW_E_GT1 0x0D0E /* Reserved */ -#define PCI_CHIP_HASWELL_CRW_E_GT2 0x0D1E -#define PCI_CHIP_HASWELL_CRW_E_GT3 0x0D2E - -#define PCI_CHIP_BAYTRAIL_M_1 0x0F31 -#define PCI_CHIP_BAYTRAIL_M_2 0x0F32 -#define PCI_CHIP_BAYTRAIL_M_3 0x0F33 -#define PCI_CHIP_BAYTRAIL_M_4 0x0157 -#define PCI_CHIP_BAYTRAIL_D 0x0155 - -#define PCI_CHIP_BROADWELL_MS_GT1 0x1602 -#define PCI_CHIP_BROADWELL_MS_GT2 0x1612 -#define PCI_CHIP_BROADWELL_MS_GT2PLUS 0x1622 - -#define PCI_CHIP_BROADWELL_M_GT1_1 0x1606 -#define PCI_CHIP_BROADWELL_M_GT2_1 0x1616 -#define PCI_CHIP_BROADWELL_M_GT2PLUS_1 0x1626 - -#define PCI_CHIP_BROADWELL_M_GT1_2 0x160B -#define PCI_CHIP_BROADWELL_M_GT2_2 0x161B -#define PCI_CHIP_BROADWELL_M_GT2PLUS_2 0x162B - -#define PCI_CHIP_BROADWELL_M_GT1_3 0x160E -#define PCI_CHIP_BROADWELL_M_GT2_3 0x161E -#define PCI_CHIP_BROADWELL_M_GT2PLUS_3 0x162E - -#define PCI_CHIP_BROADWELL_D_GT1_1 0x160A -#define PCI_CHIP_BROADWELL_D_GT2_1 0x161A -#define PCI_CHIP_BROADWELL_D_GT2PLUS_1 0x162A - -#define PCI_CHIP_BROADWELL_D_GT1_2 0x160D -#define PCI_CHIP_BROADWELL_D_GT2_2 0x161D -#define PCI_CHIP_BROADWELL_D_GT2PLUS_2 0x162D - -#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ - devid == PCI_CHIP_Q45_G || \ - devid == PCI_CHIP_G45_G || \ - devid == PCI_CHIP_G41_G || \ - devid == PCI_CHIP_B43_G || \ - devid == PCI_CHIP_B43_G1) - -#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) -#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) - -#define IS_IRONLAKE_D(devid) (devid == PCI_CHIP_IRONLAKE_D_G) -#define IS_IRONLAKE_M(devid) (devid == PCI_CHIP_IRONLAKE_M_G) -#define IS_IRONLAKE(devid) (IS_IRONLAKE_D(devid) || IS_IRONLAKE_M(devid)) - -#define IS_SNB_GT1(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ - devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ - devid == PCI_CHIP_SANDYBRIDGE_S_GT) - -#define IS_SNB_GT2(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ - devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ - devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ - devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS) - -#define IS_GEN6(devid) (IS_SNB_GT1(devid) || \ - IS_SNB_GT2(devid)) - -#define IS_BAYTRAIL_M1(devid) (devid == PCI_CHIP_BAYTRAIL_M_1) -#define IS_BAYTRAIL_M2(devid) (devid == PCI_CHIP_BAYTRAIL_M_2) -#define IS_BAYTRAIL_M3(devid) (devid == PCI_CHIP_BAYTRAIL_M_3) -#define IS_BAYTRAIL_D(devid) (devid == PCI_CHIP_BAYTRAIL_D) -#define IS_BAYTRAIL(devid) (IS_BAYTRAIL_M1(devid) || \ - IS_BAYTRAIL_M2(devid) || \ - IS_BAYTRAIL_M3(devid) || \ - IS_BAYTRAIL_D(devid) ) - -#define IS_IVB_GT1(devid) (devid == PCI_CHIP_IVYBRIDGE_GT1 || \ - devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \ - devid == PCI_CHIP_IVYBRIDGE_S_GT1) - -#define IS_IVB_GT2(devid) (devid == PCI_CHIP_IVYBRIDGE_GT2 || \ - devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \ - devid == PCI_CHIP_IVYBRIDGE_S_GT2) - -#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || \ - IS_IVB_GT2(devid) || \ - IS_BAYTRAIL(devid) ) - -#define IS_HSW_GT1(devid) (devid == PCI_CHIP_HASWELL_GT1 || \ - devid == PCI_CHIP_HASWELL_M_GT1 || \ - devid == PCI_CHIP_HASWELL_S_GT1 || \ - devid == PCI_CHIP_HASWELL_B_GT1 || \ - devid == PCI_CHIP_HASWELL_E_GT1 || \ - devid == PCI_CHIP_HASWELL_SDV_GT1 || \ - devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \ - devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \ - devid == PCI_CHIP_HASWELL_SDV_B_GT1 || \ - devid == PCI_CHIP_HASWELL_SDV_E_GT1 || \ - devid == PCI_CHIP_HASWELL_CRW_GT1 || \ - devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \ - devid == PCI_CHIP_HASWELL_CRW_S_GT1 || \ - devid == PCI_CHIP_HASWELL_CRW_B_GT1 || \ - devid == PCI_CHIP_HASWELL_CRW_E_GT1 || \ - devid == PCI_CHIP_HASWELL_ULT_GT1 || \ - devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \ - devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \ - devid == PCI_CHIP_HASWELL_ULT_B_GT1 || \ - devid == PCI_CHIP_HASWELL_ULT_E_GT1) - - -#define IS_HSW_GT2(devid) (devid == PCI_CHIP_HASWELL_GT2|| \ - devid == PCI_CHIP_HASWELL_M_GT2|| \ - devid == PCI_CHIP_HASWELL_S_GT2|| \ - devid == PCI_CHIP_HASWELL_B_GT2 || \ - devid == PCI_CHIP_HASWELL_E_GT2 || \ - devid == PCI_CHIP_HASWELL_SDV_GT2|| \ - devid == PCI_CHIP_HASWELL_SDV_M_GT2|| \ - devid == PCI_CHIP_HASWELL_SDV_S_GT2|| \ - devid == PCI_CHIP_HASWELL_SDV_B_GT2 || \ - devid == PCI_CHIP_HASWELL_SDV_E_GT2 || \ - devid == PCI_CHIP_HASWELL_CRW_GT2|| \ - devid == PCI_CHIP_HASWELL_CRW_M_GT2|| \ - devid == PCI_CHIP_HASWELL_CRW_S_GT2|| \ - devid == PCI_CHIP_HASWELL_CRW_B_GT2|| \ - devid == PCI_CHIP_HASWELL_CRW_E_GT2|| \ - devid == PCI_CHIP_HASWELL_ULT_GT2|| \ - devid == PCI_CHIP_HASWELL_ULT_M_GT2|| \ - devid == PCI_CHIP_HASWELL_ULT_S_GT2|| \ - devid == PCI_CHIP_HASWELL_ULT_B_GT2 || \ - devid == PCI_CHIP_HASWELL_ULT_E_GT2) - - -#define IS_HSW_GT3(devid) (devid == PCI_CHIP_HASWELL_GT3 || \ - devid == PCI_CHIP_HASWELL_M_GT3 || \ - devid == PCI_CHIP_HASWELL_S_GT3 || \ - devid == PCI_CHIP_HASWELL_B_GT3 || \ - devid == PCI_CHIP_HASWELL_E_GT3 || \ - devid == PCI_CHIP_HASWELL_SDV_GT3 || \ - devid == PCI_CHIP_HASWELL_SDV_M_GT3 || \ - devid == PCI_CHIP_HASWELL_SDV_S_GT3 || \ - devid == PCI_CHIP_HASWELL_SDV_B_GT3 || \ - devid == PCI_CHIP_HASWELL_SDV_E_GT3 || \ - devid == PCI_CHIP_HASWELL_CRW_GT3 || \ - devid == PCI_CHIP_HASWELL_CRW_M_GT3 || \ - devid == PCI_CHIP_HASWELL_CRW_S_GT3 || \ - devid == PCI_CHIP_HASWELL_CRW_B_GT3 || \ - devid == PCI_CHIP_HASWELL_CRW_E_GT3 || \ - devid == PCI_CHIP_HASWELL_ULT_GT3 || \ - devid == PCI_CHIP_HASWELL_ULT_M_GT3 || \ - devid == PCI_CHIP_HASWELL_ULT_S_GT3 || \ - devid == PCI_CHIP_HASWELL_ULT_B_GT3 || \ - devid == PCI_CHIP_HASWELL_ULT_E_GT3) - -#define IS_HASWELL(devid) (IS_HSW_GT1(devid) || \ - IS_HSW_GT2(devid) || \ - IS_HSW_GT3(devid)) - -#define IS_GEN7(devid) (IS_IVYBRIDGE(devid) || \ - IS_HASWELL(devid)) - - -#define IS_BDW_GT1(devid) (devid == PCI_CHIP_BROADWELL_M_GT1_1 || \ - devid == PCI_CHIP_BROADWELL_M_GT1_2 || \ - devid == PCI_CHIP_BROADWELL_M_GT1_3 || \ - devid == PCI_CHIP_BROADWELL_D_GT1_1 || \ - devid == PCI_CHIP_BROADWELL_D_GT1_2 || \ - devid == PCI_CHIP_BROADWELL_MS_GT1) - -#define IS_BDW_GT2(devid) (devid == PCI_CHIP_BROADWELL_M_GT2_1 || \ - devid == PCI_CHIP_BROADWELL_M_GT2_2 || \ - devid == PCI_CHIP_BROADWELL_M_GT2_3 || \ - devid == PCI_CHIP_BROADWELL_D_GT2_1 || \ - devid == PCI_CHIP_BROADWELL_D_GT2_2 || \ - devid == PCI_CHIP_BROADWELL_MS_GT2) - -#define IS_BDW_GT2PLUS(devid) (devid == PCI_CHIP_BROADWELL_M_GT2PLUS_1 || \ - devid == PCI_CHIP_BROADWELL_M_GT2PLUS_2 || \ - devid == PCI_CHIP_BROADWELL_M_GT2PLUS_3 || \ - devid == PCI_CHIP_BROADWELL_D_GT2PLUS_1 || \ - devid == PCI_CHIP_BROADWELL_D_GT2PLUS_2 || \ - devid == PCI_CHIP_BROADWELL_MS_GT2PLUS) - -#define IS_GEN8(devid) (IS_BDW_GT1(devid) || \ - IS_BDW_GT2(devid) || \ - IS_BDW_GT2PLUS(devid)) +#define IS_G4X(device_info) (device_info->is_g4x) + +#define IS_IRONLAKE(device_info) (device_info->gen == 5) + +#define IS_GEN6(device_info) (device_info->gen == 6) + +#define IS_HASWELL(device_info) (device_info->is_haswell) +#define IS_GEN7(device_info) (device_info->gen == 7) + +#define IS_GEN8(device_info) (device_info->gen == 8) #endif /* _INTEL_DRIVER_H_ */ -- cgit v1.2.1 From 69e26f5ca4cad4ac69d47fa0db50f037c197c079 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sat, 19 Apr 2014 00:12:40 +0800 Subject: render_init()/render_terminate() callback functions for each platform It is to reduce the usage of IS_GENxxx() Signed-off-by: Xiang, Haihao (cherry picked from commit f150fbf444ca63b5e9c3e8f7e17aa3386f7061fa) --- src/gen8_render.c | 71 +++++++++++++++++---------------- src/i965_device_info.c | 14 +++++++ src/i965_drv_video.h | 2 + src/i965_render.c | 104 +++++++++++++++++++++++++++---------------------- src/i965_render.h | 4 +- 5 files changed, 109 insertions(+), 86 deletions(-) diff --git a/src/gen8_render.c b/src/gen8_render.c index 54f62253..ca731d20 100644 --- a/src/gen8_render.c +++ b/src/gen8_render.c @@ -1721,6 +1721,40 @@ gen8_render_put_subpicture( intel_batchbuffer_flush(batch); } +static void +gen8_render_terminate(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + + dri_bo_unreference(render_state->vb.vertex_buffer); + render_state->vb.vertex_buffer = NULL; + + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); + render_state->wm.surface_state_binding_table_bo = NULL; + + if (render_state->instruction_state.bo) { + dri_bo_unreference(render_state->instruction_state.bo); + render_state->instruction_state.bo = NULL; + } + + if (render_state->dynamic_state.bo) { + dri_bo_unreference(render_state->dynamic_state.bo); + render_state->dynamic_state.bo = NULL; + } + + if (render_state->indirect_state.bo) { + dri_bo_unreference(render_state->indirect_state.bo); + render_state->indirect_state.bo = NULL; + } + + if (render_state->draw_region) { + dri_bo_unreference(render_state->draw_region->bo); + free(render_state->draw_region); + render_state->draw_region = NULL; + } +} + bool gen8_render_init(VADriverContextP ctx) { @@ -1733,6 +1767,7 @@ gen8_render_init(VADriverContextP ctx) render_state->render_put_surface = gen8_render_put_surface; render_state->render_put_subpicture = gen8_render_put_subpicture; + render_state->render_terminate = gen8_render_terminate; if (IS_GEN8(i965->intel.device_info)) { memcpy(render_state->render_kernels, render_kernels_gen8, @@ -1786,39 +1821,3 @@ gen8_render_init(VADriverContextP ctx) return true; } - - -void -gen8_render_terminate(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - - dri_bo_unreference(render_state->vb.vertex_buffer); - render_state->vb.vertex_buffer = NULL; - - dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); - render_state->wm.surface_state_binding_table_bo = NULL; - - if (render_state->instruction_state.bo) { - dri_bo_unreference(render_state->instruction_state.bo); - render_state->instruction_state.bo = NULL; - } - - if (render_state->dynamic_state.bo) { - dri_bo_unreference(render_state->dynamic_state.bo); - render_state->dynamic_state.bo = NULL; - } - - if (render_state->indirect_state.bo) { - dri_bo_unreference(render_state->indirect_state.bo); - render_state->indirect_state.bo = NULL; - } - - if (render_state->draw_region) { - dri_bo_unreference(render_state->draw_region->bo); - free(render_state->draw_region); - render_state->draw_region = NULL; - } -} - diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 55e762b3..a15a31b0 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -29,10 +29,14 @@ extern struct hw_context *i965_proc_context_init(VADriverContextP, struct object_config *); extern struct hw_context *g4x_dec_hw_context_init(VADriverContextP, struct object_config *); +extern bool genx_render_init(VADriverContextP); + static const struct hw_codec_info g4x_hw_codec_info = { .dec_hw_context_init = g4x_dec_hw_context_init, .enc_hw_context_init = NULL, .proc_hw_context_init = NULL, + .render_init = genx_render_init, + .max_width = 2048, .max_height = 2048, @@ -46,6 +50,8 @@ static const struct hw_codec_info ilk_hw_codec_info = { .dec_hw_context_init = ironlake_dec_hw_context_init, .enc_hw_context_init = NULL, .proc_hw_context_init = i965_proc_context_init, + .render_init = genx_render_init, + .max_width = 2048, .max_height = 2048, @@ -63,6 +69,8 @@ static const struct hw_codec_info snb_hw_codec_info = { .dec_hw_context_init = gen6_dec_hw_context_init, .enc_hw_context_init = gen6_enc_hw_context_init, .proc_hw_context_init = i965_proc_context_init, + .render_init = genx_render_init, + .max_width = 2048, .max_height = 2048, @@ -88,6 +96,8 @@ static const struct hw_codec_info ivb_hw_codec_info = { .dec_hw_context_init = gen7_dec_hw_context_init, .enc_hw_context_init = gen7_enc_hw_context_init, .proc_hw_context_init = i965_proc_context_init, + .render_init = genx_render_init, + .max_width = 4096, .max_height = 4096, @@ -117,6 +127,8 @@ static const struct hw_codec_info hsw_hw_codec_info = { .dec_hw_context_init = gen75_dec_hw_context_init, .enc_hw_context_init = gen75_enc_hw_context_init, .proc_hw_context_init = gen75_proc_context_init, + .render_init = genx_render_init, + .max_width = 4096, .max_height = 4096, @@ -149,6 +161,8 @@ static const struct hw_codec_info bdw_hw_codec_info = { .dec_hw_context_init = gen8_dec_hw_context_init, .enc_hw_context_init = gen8_enc_hw_context_init, .proc_hw_context_init = gen75_proc_context_init, + .render_init = gen8_render_init, + .max_width = 4096, .max_height = 4096, diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index eddf83d4..2de99282 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -286,6 +286,8 @@ struct hw_codec_info struct hw_context *(*dec_hw_context_init)(VADriverContextP, struct object_config *); struct hw_context *(*enc_hw_context_init)(VADriverContextP, struct object_config *); struct hw_context *(*proc_hw_context_init)(VADriverContextP, struct object_config *); + bool (*render_init)(VADriverContextP); + int max_width; int max_height; diff --git a/src/i965_render.c b/src/i965_render.c index 6b0cb19e..aed78c07 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -3132,9 +3132,52 @@ intel_render_put_subpicture( render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); } +static void +genx_render_terminate(VADriverContextP ctx) +{ + int i; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + + dri_bo_unreference(render_state->curbe.bo); + render_state->curbe.bo = NULL; + + for (i = 0; i < NUM_RENDER_KERNEL; i++) { + struct i965_kernel *kernel = &render_state->render_kernels[i]; + + dri_bo_unreference(kernel->bo); + kernel->bo = NULL; + } + + dri_bo_unreference(render_state->vb.vertex_buffer); + render_state->vb.vertex_buffer = NULL; + dri_bo_unreference(render_state->vs.state); + render_state->vs.state = NULL; + dri_bo_unreference(render_state->sf.state); + render_state->sf.state = NULL; + dri_bo_unreference(render_state->wm.sampler); + render_state->wm.sampler = NULL; + dri_bo_unreference(render_state->wm.state); + render_state->wm.state = NULL; + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); + dri_bo_unreference(render_state->cc.viewport); + render_state->cc.viewport = NULL; + dri_bo_unreference(render_state->cc.state); + render_state->cc.state = NULL; + dri_bo_unreference(render_state->cc.blend); + render_state->cc.blend = NULL; + dri_bo_unreference(render_state->cc.depth_stencil); + render_state->cc.depth_stencil = NULL; + + if (render_state->draw_region) { + dri_bo_unreference(render_state->draw_region->bo); + free(render_state->draw_region); + render_state->draw_region = NULL; + } +} bool -i965_render_init(VADriverContextP ctx) +genx_render_init(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; @@ -3146,9 +3189,7 @@ i965_render_init(VADriverContextP ctx) assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / sizeof(render_kernels_gen6[0]))); - if (IS_GEN8(i965->intel.device_info)) { - return gen8_render_init(ctx); - } else if (IS_GEN7(i965->intel.device_info)) { + if (IS_GEN7(i965->intel.device_info)) { memcpy(render_state->render_kernels, (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7), sizeof(render_state->render_kernels)); @@ -3168,6 +3209,8 @@ i965_render_init(VADriverContextP ctx) render_state->render_put_subpicture = i965_render_put_subpicture; } + render_state->render_terminate = genx_render_terminate; + for (i = 0; i < NUM_RENDER_KERNEL; i++) { struct i965_kernel *kernel = &render_state->render_kernels[i]; @@ -3190,52 +3233,19 @@ i965_render_init(VADriverContextP ctx) return true; } -void -i965_render_terminate(VADriverContextP ctx) +bool +i965_render_init(VADriverContextP ctx) { - int i; struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - - if (IS_GEN8(i965->intel.device_info)) { - gen8_render_terminate(ctx); - return; - } - - dri_bo_unreference(render_state->curbe.bo); - render_state->curbe.bo = NULL; - for (i = 0; i < NUM_RENDER_KERNEL; i++) { - struct i965_kernel *kernel = &render_state->render_kernels[i]; - - dri_bo_unreference(kernel->bo); - kernel->bo = NULL; - } + return i965->codec_info->render_init(ctx); +} - dri_bo_unreference(render_state->vb.vertex_buffer); - render_state->vb.vertex_buffer = NULL; - dri_bo_unreference(render_state->vs.state); - render_state->vs.state = NULL; - dri_bo_unreference(render_state->sf.state); - render_state->sf.state = NULL; - dri_bo_unreference(render_state->wm.sampler); - render_state->wm.sampler = NULL; - dri_bo_unreference(render_state->wm.state); - render_state->wm.state = NULL; - dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); - dri_bo_unreference(render_state->cc.viewport); - render_state->cc.viewport = NULL; - dri_bo_unreference(render_state->cc.state); - render_state->cc.state = NULL; - dri_bo_unreference(render_state->cc.blend); - render_state->cc.blend = NULL; - dri_bo_unreference(render_state->cc.depth_stencil); - render_state->cc.depth_stencil = NULL; +void +i965_render_terminate(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; - if (render_state->draw_region) { - dri_bo_unreference(render_state->draw_region->bo); - free(render_state->draw_region); - render_state->draw_region = NULL; - } + render_state->render_terminate(ctx); } - diff --git a/src/i965_render.h b/src/i965_render.h index afbce492..fde398b2 100644 --- a/src/i965_render.h +++ b/src/i965_render.h @@ -123,7 +123,7 @@ struct i965_render_state void (*render_put_subpicture)(VADriverContextP ctx, struct object_surface *, const VARectangle *src_rec, const VARectangle *dst_rect); - + void (*render_terminate)(VADriverContextP ctx); }; bool i965_render_init(VADriverContextP ctx); @@ -157,6 +157,4 @@ gen8_render_set_surface_scs(struct gen8_surface_state *ss); extern bool gen8_render_init(VADriverContextP ctx); -extern void gen8_render_terminate(VADriverContextP ctx); - #endif /* _I965_RENDER_H_ */ -- cgit v1.2.1 From cd9d671b88b6f999142a8a90a86c5b42021e6ee5 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sat, 19 Apr 2014 00:12:41 +0800 Subject: posst_processing_context_init()/finalize() callback functions for each platform It is to reduce the usage of IS_GENxxx() as well. Signed-off-by: Xiang, Haihao (cherry picked from commit 77b6a72504d917af9335ab94f6ecbefb8b087206) --- src/gen8_post_processing.c | 6 ++++-- src/i965_device_info.c | 9 +++++++++ src/i965_drv_video.h | 1 + src/i965_post_processing.c | 24 +++++++++--------------- src/i965_post_processing.h | 10 +--------- 5 files changed, 24 insertions(+), 26 deletions(-) diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index 82bf3b99..4ff7f41b 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -1373,7 +1373,7 @@ gen8_post_processing( return va_status; } -void +static void gen8_post_processing_context_finalize(struct i965_post_processing_context *pp_context) { dri_bo_unreference(pp_context->surface_state_binding_table.bo); @@ -1410,7 +1410,7 @@ gen8_post_processing_context_finalize(struct i965_post_processing_context *pp_co void gen8_post_processing_context_init(VADriverContextP ctx, - struct i965_post_processing_context *pp_context, + void *data, struct intel_batchbuffer *batch) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -1418,6 +1418,7 @@ gen8_post_processing_context_init(VADriverContextP ctx, unsigned int kernel_offset, end_offset; unsigned char *kernel_ptr; struct pp_module *pp_module; + struct i965_post_processing_context *pp_context = data; { pp_context->vfe_gpu_state.max_num_threads = 60; @@ -1428,6 +1429,7 @@ gen8_post_processing_context_init(VADriverContextP ctx, } pp_context->intel_post_processing = gen8_post_processing; + pp_context->finalize = gen8_post_processing_context_finalize; assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); diff --git a/src/i965_device_info.c b/src/i965_device_info.c index a15a31b0..f040592c 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -36,6 +36,7 @@ static const struct hw_codec_info g4x_hw_codec_info = { .enc_hw_context_init = NULL, .proc_hw_context_init = NULL, .render_init = genx_render_init, + .post_processing_context_init = NULL, .max_width = 2048, .max_height = 2048, @@ -46,11 +47,14 @@ static const struct hw_codec_info g4x_hw_codec_info = { }; extern struct hw_context *ironlake_dec_hw_context_init(VADriverContextP, struct object_config *); +extern void i965_post_processing_context_init(VADriverContextP, void *, struct intel_batchbuffer *); + static const struct hw_codec_info ilk_hw_codec_info = { .dec_hw_context_init = ironlake_dec_hw_context_init, .enc_hw_context_init = NULL, .proc_hw_context_init = i965_proc_context_init, .render_init = genx_render_init, + .post_processing_context_init = i965_post_processing_context_init, .max_width = 2048, .max_height = 2048, @@ -70,6 +74,7 @@ static const struct hw_codec_info snb_hw_codec_info = { .enc_hw_context_init = gen6_enc_hw_context_init, .proc_hw_context_init = i965_proc_context_init, .render_init = genx_render_init, + .post_processing_context_init = i965_post_processing_context_init, .max_width = 2048, .max_height = 2048, @@ -97,6 +102,7 @@ static const struct hw_codec_info ivb_hw_codec_info = { .enc_hw_context_init = gen7_enc_hw_context_init, .proc_hw_context_init = i965_proc_context_init, .render_init = genx_render_init, + .post_processing_context_init = i965_post_processing_context_init, .max_width = 4096, .max_height = 4096, @@ -128,6 +134,7 @@ static const struct hw_codec_info hsw_hw_codec_info = { .enc_hw_context_init = gen75_enc_hw_context_init, .proc_hw_context_init = gen75_proc_context_init, .render_init = genx_render_init, + .post_processing_context_init = i965_post_processing_context_init, .max_width = 4096, .max_height = 4096, @@ -157,11 +164,13 @@ static const struct hw_codec_info hsw_hw_codec_info = { extern struct hw_context *gen8_dec_hw_context_init(VADriverContextP, struct object_config *); extern struct hw_context *gen8_enc_hw_context_init(VADriverContextP, struct object_config *); +extern void gen8_post_processing_context_init(VADriverContextP, void *, struct intel_batchbuffer *); static const struct hw_codec_info bdw_hw_codec_info = { .dec_hw_context_init = gen8_dec_hw_context_init, .enc_hw_context_init = gen8_enc_hw_context_init, .proc_hw_context_init = gen75_proc_context_init, .render_init = gen8_render_init, + .post_processing_context_init = gen8_post_processing_context_init, .max_width = 4096, .max_height = 4096, diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 2de99282..856b4789 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -287,6 +287,7 @@ struct hw_codec_info struct hw_context *(*enc_hw_context_init)(VADriverContextP, struct object_config *); struct hw_context *(*proc_hw_context_init)(VADriverContextP, struct object_config *); bool (*render_init)(VADriverContextP); + void (*post_processing_context_init)(VADriverContextP, void *, struct intel_batchbuffer *); int max_width; int max_height; diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index bdab73a2..bcd22cd1 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -5184,11 +5184,7 @@ i965_post_processing_terminate(VADriverContextP ctx) struct i965_post_processing_context *pp_context = i965->pp_context; if (pp_context) { - if (IS_GEN8(i965->intel.device_info)) { - gen8_post_processing_context_finalize(pp_context); - } else { - i965_post_processing_context_finalize(pp_context); - } + pp_context->finalize(pp_context); free(pp_context); } @@ -5197,18 +5193,14 @@ i965_post_processing_terminate(VADriverContextP ctx) #define VPP_CURBE_ALLOCATION_SIZE 32 -static void +void i965_post_processing_context_init(VADriverContextP ctx, - struct i965_post_processing_context *pp_context, + void *data, struct intel_batchbuffer *batch) { struct i965_driver_data *i965 = i965_driver_data(ctx); int i; - - if (IS_GEN8(i965->intel.device_info)) { - gen8_post_processing_context_init(ctx, pp_context, batch); - return; - }; + struct i965_post_processing_context *pp_context = data; if (IS_IRONLAKE(i965->intel.device_info)) { pp_context->urb.size = i965->intel.device_info->urb_size; @@ -5230,7 +5222,8 @@ i965_post_processing_context_init(VADriverContextP ctx, pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE; pp_context->intel_post_processing = gen6_post_processing; } - + + pp_context->finalize = i965_post_processing_context_finalize; assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6)); @@ -5286,7 +5279,7 @@ i965_post_processing_init(VADriverContextP ctx) if (HAS_PP(i965)) { if (pp_context == NULL) { pp_context = calloc(1, sizeof(*pp_context)); - i965_post_processing_context_init(ctx, pp_context, i965->pp_batch); + i965->codec_info->post_processing_context_init(ctx, pp_context, i965->pp_batch); i965->pp_context = pp_context; } } @@ -5574,13 +5567,14 @@ i965_proc_context_destroy(void *hw_context) struct hw_context * i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config) { + struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_driver_data *intel = intel_driver_data(ctx); struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context)); proc_context->base.destroy = i965_proc_context_destroy; proc_context->base.run = i965_proc_picture; proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0); - i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch); + i965->codec_info->post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch); return (struct hw_context *)proc_context; } diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index fd4cbcf5..76f3595e 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -535,6 +535,7 @@ struct i965_post_processing_context const VARectangle *dst_rect, int pp_index, void * filter_param); + void (*finalize)(struct i965_post_processing_context *pp_context); }; struct i965_proc_context @@ -575,13 +576,4 @@ i965_post_processing_terminate(VADriverContextP ctx); bool i965_post_processing_init(VADriverContextP ctx); - -extern void -gen8_post_processing_context_init(VADriverContextP ctx, - struct i965_post_processing_context *pp_context, - struct intel_batchbuffer *batch); - -extern void -gen8_post_processing_context_finalize(struct i965_post_processing_context *pp_context); - #endif /* __I965_POST_PROCESSING_H__ */ -- cgit v1.2.1 From a7343f9148198f25b2c10bf610c95e6ebd4fa189 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 24 Apr 2014 13:39:21 +0800 Subject: Rename HAS_PP() to HAS_VPP() Directly check the flag of has_vpp in codec_info Signed-off-by: Xiang, Haihao (cherry picked from commit 1c4d3468229797e787f4b99b0729baf90a115a1d) Conflicts: src/gen8_post_processing.c src/i965_post_processing.c --- src/gen8_post_processing.c | 6 ------ src/i965_post_processing.c | 13 +++++-------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index 4ff7f41b..cf613cfe 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -41,12 +41,6 @@ #include "i965_render.h" #include "intel_media.h" -#define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_info) || \ - IS_GEN6((ctx)->intel.device_info) || \ - IS_GEN7((ctx)->intel.device_info) || \ - IS_GEN8((ctx)->intel.device_info)) - - #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index bcd22cd1..72279da1 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -40,10 +40,7 @@ #include "i965_render.h" #include "intel_media.h" -#define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_info) || \ - IS_GEN6((ctx)->intel.device_info) || \ - IS_GEN7((ctx)->intel.device_info) || \ - IS_GEN8((ctx)->intel.device_info)) +#define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp) #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\ MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)) @@ -4638,7 +4635,7 @@ i965_scaling_processing( assert(src_surface_obj->fourcc == VA_FOURCC_NV12); assert(dst_surface_obj->fourcc == VA_FOURCC_NV12); - if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) { + if (HAS_VPP(i965) && (flags & I965_PP_FLAG_AVS)) { struct i965_surface src_surface; struct i965_surface dst_surface; @@ -4681,7 +4678,7 @@ i965_post_processing( *has_done_scaling = 0; - if (HAS_PP(i965)) { + if (HAS_VPP(i965)) { VAStatus status; struct i965_surface src_surface; struct i965_surface dst_surface; @@ -5073,7 +5070,7 @@ i965_image_processing(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED; - if (HAS_PP(i965)) { + if (HAS_VPP(i965)) { int fourcc = pp_get_surface_fourcc(ctx, src_surface); _i965LockMutex(&i965->pp_mutex); @@ -5276,7 +5273,7 @@ i965_post_processing_init(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_post_processing_context *pp_context = i965->pp_context; - if (HAS_PP(i965)) { + if (HAS_VPP(i965)) { if (pp_context == NULL) { pp_context = calloc(1, sizeof(*pp_context)); i965->codec_info->post_processing_context_init(ctx, pp_context, i965->pp_batch); -- cgit v1.2.1 From e437ae5c33ea6547f88cf2bd1b34159fce95b988 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 9 May 2014 16:16:05 +0800 Subject: Limit the minimum pitch for linear surface pitch must be 64 at least for linear surface for most functions on IVB/HSW/BDW such VEBOX, Data port media read/write https://bugs.freedesktop.org/show_bug.cgi?id=72522 Signed-off-by: Xiang, Haihao (cherry picked from commit 57db5c2524f4e3cb6ae2301bddfdf1c40cdbb626) --- src/i965_device_info.c | 12 ++++++++++++ src/i965_drv_video.c | 14 ++++++++------ src/i965_drv_video.h | 2 ++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index f040592c..f7ce2261 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -40,6 +40,8 @@ static const struct hw_codec_info g4x_hw_codec_info = { .max_width = 2048, .max_height = 2048, + .min_linear_wpitch = 16, + .min_linear_hpitch = 16, .has_mpeg2_decoding = 1, @@ -58,6 +60,8 @@ static const struct hw_codec_info ilk_hw_codec_info = { .max_width = 2048, .max_height = 2048, + .min_linear_wpitch = 16, + .min_linear_hpitch = 16, .has_mpeg2_decoding = 1, .has_h264_decoding = 1, @@ -78,6 +82,8 @@ static const struct hw_codec_info snb_hw_codec_info = { .max_width = 2048, .max_height = 2048, + .min_linear_wpitch = 16, + .min_linear_hpitch = 16, .has_mpeg2_decoding = 1, .has_h264_decoding = 1, @@ -106,6 +112,8 @@ static const struct hw_codec_info ivb_hw_codec_info = { .max_width = 4096, .max_height = 4096, + .min_linear_wpitch = 64, + .min_linear_hpitch = 16, .has_mpeg2_decoding = 1, .has_mpeg2_encoding = 1, @@ -138,6 +146,8 @@ static const struct hw_codec_info hsw_hw_codec_info = { .max_width = 4096, .max_height = 4096, + .min_linear_wpitch = 64, + .min_linear_hpitch = 16, .has_mpeg2_decoding = 1, .has_mpeg2_encoding = 1, @@ -174,6 +184,8 @@ static const struct hw_codec_info bdw_hw_codec_info = { .max_width = 4096, .max_height = 4096, + .min_linear_wpitch = 64, + .min_linear_hpitch = 16, .has_mpeg2_decoding = 1, .has_mpeg2_encoding = 1, diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 96ca997e..bf152087 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -955,8 +955,10 @@ i965_CreateSurfaces2( obj_surface->obj_subpic[j] = NULL; } - obj_surface->width = ALIGN(width, 16); - obj_surface->height = ALIGN(height, 16); + assert(i965->codec_info->min_linear_wpitch); + assert(i965->codec_info->min_linear_hpitch); + obj_surface->width = ALIGN(width, i965->codec_info->min_linear_wpitch); + obj_surface->height = ALIGN(height, i965->codec_info->min_linear_hpitch); obj_surface->flags = SURFACE_REFERENCED; obj_surface->fourcc = 0; obj_surface->bo = NULL; @@ -2606,7 +2608,7 @@ i965_CreateImage(VADriverContextP ctx, image->image_id = image_id; image->buf = VA_INVALID_ID; - awidth = ALIGN(width, 64); + awidth = ALIGN(width, i965->codec_info->min_linear_wpitch); if ((format->fourcc == VA_FOURCC_YV12) || (format->fourcc == VA_FOURCC_I420)) { @@ -2615,7 +2617,7 @@ i965_CreateImage(VADriverContextP ctx, } } - aheight = ALIGN(height, 16); + aheight = ALIGN(height, i965->codec_info->min_linear_hpitch); size = awidth * aheight; size2 = (awidth / 2) * (aheight / 2); @@ -2952,7 +2954,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, case VA_FOURCC_YUY2: case VA_FOURCC_UYVY: - obj_surface->width = ALIGN(obj_surface->orig_width * 2, 16); + obj_surface->width = ALIGN(obj_surface->orig_width * 2, i965->codec_info->min_linear_wpitch); obj_surface->y_cb_offset = 0; obj_surface->y_cr_offset = 0; obj_surface->cb_cr_width = obj_surface->orig_width / 2; @@ -2965,7 +2967,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, case VA_FOURCC_RGBX: case VA_FOURCC_BGRA: case VA_FOURCC_BGRX: - obj_surface->width = ALIGN(obj_surface->orig_width * 4, 16); + obj_surface->width = ALIGN(obj_surface->orig_width * 4, i965->codec_info->min_linear_wpitch); region_width = obj_surface->width; region_height = obj_surface->height; break; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 856b4789..900aed99 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -291,6 +291,8 @@ struct hw_codec_info int max_width; int max_height; + int min_linear_wpitch; + int min_linear_hpitch; unsigned int has_mpeg2_decoding:1; unsigned int has_mpeg2_encoding:1; -- cgit v1.2.1 From 3afe0514162332e923092000940ee8e99979a817 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 9 May 2014 16:31:54 +0800 Subject: mpeg2: check frame_pred_frame_dct instead of progressive_frame Some MPEG-2 videos set progressive_frame to 1 and set frame_pred_frame_dct to 0, which is not conformed to MPEG-2 spec. bottom field may be used to form prediction if frame_pred_frame_dct is 0. Previously the bottom field is excluded from the frame store list https://bugs.freedesktop.org/show_bug.cgi?id=73424 Signed-off-by: Xiang, Haihao (cherry picked from commit b3031d16b1ea9ef2ab95bc09e59f0db5214a1125) --- src/i965_decoder_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 617bc154..f579f911 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -139,7 +139,7 @@ mpeg2_set_reference_surfaces( ref_frames[n++].surface_id = ref_frames[0].surface_id; } - if (pic_param->picture_coding_extension.bits.progressive_frame) + if (pic_param->picture_coding_extension.bits.frame_pred_frame_dct) return; ref_frames[2].surface_id = VA_INVALID_ID; -- cgit v1.2.1 From 18d0aee33fd4a32f846475cf31a2be6499b17b4b Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Wed, 14 May 2014 13:33:07 +0200 Subject: surface: fix geometry (size, layout) of grayscale surfaces. Fix size of the allocated buffer used to represent grayscale (Y800) surfaces. Only the luminance component is needed, thus implying a single plane. Likewise, update render routines to only submit the first plane. The existing render kernels readily only care about that single plane. Signed-off-by: Gwenole Beauchesne --- src/gen8_render.c | 3 +++ src/i965_drv_video.c | 9 +++++---- src/i965_render.c | 3 +++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/gen8_render.c b/src/gen8_render.c index ca731d20..074fec42 100644 --- a/src/gen8_render.c +++ b/src/gen8_render.c @@ -276,6 +276,9 @@ gen8_render_src_surfaces_state( gen8_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */ gen8_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); + if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */ + return; + if (obj_surface->fourcc == VA_FOURCC_NV12) { gen8_render_src_surface_state(ctx, 3, region, region_pitch * obj_surface->y_cb_offset, diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index bf152087..7a0320bc 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -861,6 +861,7 @@ bpp_1stplane_by_fourcc(unsigned int fourcc) case VA_FOURCC_YUY2: return 2; + case VA_FOURCC_Y800: case VA_FOURCC_YV12: case VA_FOURCC_IMC3: case VA_FOURCC_IYUV: @@ -2868,13 +2869,13 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, case VA_FOURCC_Y800: assert(subsampling == SUBSAMPLE_YUV400); - obj_surface->cb_cr_pitch = obj_surface->width; + obj_surface->cb_cr_pitch = 0; obj_surface->cb_cr_width = 0; obj_surface->cb_cr_height = 0; - obj_surface->y_cb_offset = obj_surface->height; - obj_surface->y_cr_offset = obj_surface->y_cb_offset + ALIGN(obj_surface->cb_cr_height, 32); + obj_surface->y_cb_offset = 0; + obj_surface->y_cr_offset = 0; region_width = obj_surface->width; - region_height = obj_surface->height + ALIGN(obj_surface->cb_cr_height, 32) * 2; + region_height = obj_surface->height; break; diff --git a/src/i965_render.c b/src/i965_render.c index aed78c07..9d0e8465 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -876,6 +876,9 @@ i965_render_src_surfaces_state( i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */ i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); + if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */ + return; + if (obj_surface->fourcc == VA_FOURCC_NV12) { i965_render_src_surface_state(ctx, 3, region, region_pitch * obj_surface->y_cb_offset, -- cgit v1.2.1 From cffa75273cd367d41019995c2335241b8e349ef1 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Wed, 14 May 2014 13:42:51 +0200 Subject: surface: factor out release of surface buffer storage. Introduce a new i965_destroy_surface_storage() helper function to unreference the underlying GEM buffer object, and any associated private data, if any. Signed-off-by: Gwenole Beauchesne --- src/i965_decoder_utils.c | 6 +----- src/i965_drv_video.c | 14 +++++++++++--- src/i965_drv_video.h | 3 +++ src/i965_output_dri.c | 6 +----- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index f579f911..7ebc3fac 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -387,14 +387,10 @@ intel_update_avc_frame_store_index(VADriverContextP ctx, obj_surface->flags &= ~SURFACE_REFERENCED; if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) { - dri_bo_unreference(obj_surface->bo); - obj_surface->bo = NULL; obj_surface->flags &= ~SURFACE_REF_DIS_MASK; + i965_destroy_surface_storage(obj_surface); } - if (obj_surface->free_private_data) - obj_surface->free_private_data(&obj_surface->private_data); - frame_store[i].surface_id = VA_INVALID_ID; frame_store[i].frame_store_id = -1; frame_store[i].obj_surface = NULL; diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 7a0320bc..eb67f535 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -603,10 +603,11 @@ VAStatus i965_QueryConfigAttributes(VADriverContextP ctx, return vaStatus; } -static void -i965_destroy_surface(struct object_heap *heap, struct object_base *obj) +void +i965_destroy_surface_storage(struct object_surface *obj_surface) { - struct object_surface *obj_surface = (struct object_surface *)obj; + if (!obj_surface) + return; dri_bo_unreference(obj_surface->bo); obj_surface->bo = NULL; @@ -615,7 +616,14 @@ i965_destroy_surface(struct object_heap *heap, struct object_base *obj) obj_surface->free_private_data(&obj_surface->private_data); obj_surface->private_data = NULL; } +} + +static void +i965_destroy_surface(struct object_heap *heap, struct object_base *obj) +{ + struct object_surface *obj_surface = (struct object_surface *)obj; + i965_destroy_surface_storage(obj_surface); object_heap_free(heap, obj); } diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 900aed99..44f61bf7 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -428,4 +428,7 @@ extern VAStatus i965_DestroySurfaces(VADriverContextP ctx, #define I965_SURFACE_MEM_GEM_FLINK 1 #define I965_SURFACE_MEM_DRM_PRIME 2 +void +i965_destroy_surface_storage(struct object_surface *obj_surface); + #endif /* _I965_DRV_VIDEO_H_ */ diff --git a/src/i965_output_dri.c b/src/i965_output_dri.c index 717ee9a4..fdd69cea 100644 --- a/src/i965_output_dri.c +++ b/src/i965_output_dri.c @@ -209,12 +209,8 @@ i965_put_surface_dri( obj_surface->flags |= SURFACE_DISPLAYED; if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) { - dri_bo_unreference(obj_surface->bo); - obj_surface->bo = NULL; obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - - if (obj_surface->free_private_data) - obj_surface->free_private_data(&obj_surface->private_data); + i965_destroy_surface_storage(obj_surface); } _i965UnlockMutex(&i965->render_mutex); -- cgit v1.2.1 From 0f2e2a97a33881ab3a7f0c079391651c8a0fca78 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Fri, 9 May 2014 18:30:33 +0200 Subject: config: fix vaGetConfigAttributes() to validate profile/entrypoint. Factor out code to validate profile/entrypoint per the underlying hardware capabilities. Also fix vaGetConfigAttributes() to really validate the profile/entrypoint pair. Signed-off-by: Gwenole Beauchesne --- src/i965_drv_video.c | 145 +++++++++++++++++++++++++++------------------------ 1 file changed, 78 insertions(+), 67 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index eb67f535..15d65e57 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -366,6 +366,78 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx, return n > 0 ? VA_STATUS_SUCCESS : VA_STATUS_ERROR_UNSUPPORTED_PROFILE; } +static VAStatus +i965_validate_config(VADriverContextP ctx, VAProfile profile, + VAEntrypoint entrypoint) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + VAStatus va_status; + + /* Validate profile & entrypoint */ + switch (profile) { + case VAProfileMPEG2Simple: + case VAProfileMPEG2Main: + if ((HAS_MPEG2_DECODING(i965) && entrypoint == VAEntrypointVLD) || + (HAS_MPEG2_ENCODING(i965) && entrypoint == VAEntrypointEncSlice)) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + break; + + case VAProfileH264ConstrainedBaseline: + case VAProfileH264Main: + case VAProfileH264High: + if ((HAS_H264_DECODING(i965) && entrypoint == VAEntrypointVLD) || + (HAS_H264_ENCODING(i965) && entrypoint == VAEntrypointEncSlice)) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + break; + + case VAProfileVC1Simple: + case VAProfileVC1Main: + case VAProfileVC1Advanced: + if (HAS_VC1_DECODING(i965) && entrypoint == VAEntrypointVLD) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + break; + + case VAProfileNone: + if (HAS_VPP(i965) && VAEntrypointVideoProc == entrypoint) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + break; + + case VAProfileJPEGBaseline: + if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + break; + + case VAProfileVP8Version0_3: + if ((HAS_VP8_DECODING(i965) && entrypoint == VAEntrypointVLD) || + (HAS_VP8_ENCODING(i965) && entrypoint == VAEntrypointEncSlice)) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + break; + + default: + va_status = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; + break; + } + return va_status; +} + VAStatus i965_GetConfigAttributes(VADriverContextP ctx, VAProfile profile, @@ -373,8 +445,13 @@ i965_GetConfigAttributes(VADriverContextP ctx, VAConfigAttrib *attrib_list, /* in/out */ int num_attribs) { + VAStatus va_status; int i; + va_status = i965_validate_config(ctx, profile, entrypoint); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + /* Other attributes don't seem to be defined */ /* What to do if we don't know the attribute? */ for (i = 0; i < num_attribs; i++) { @@ -460,73 +537,7 @@ i965_CreateConfig(VADriverContextP ctx, int i; VAStatus vaStatus; - /* Validate profile & entrypoint */ - switch (profile) { - case VAProfileMPEG2Simple: - case VAProfileMPEG2Main: - if ((HAS_MPEG2_DECODING(i965) && VAEntrypointVLD == entrypoint) || - (HAS_MPEG2_ENCODING(i965) && VAEntrypointEncSlice == entrypoint)) { - vaStatus = VA_STATUS_SUCCESS; - } else { - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; - } - break; - - case VAProfileH264ConstrainedBaseline: - case VAProfileH264Main: - case VAProfileH264High: - if ((HAS_H264_DECODING(i965) && VAEntrypointVLD == entrypoint) || - (HAS_H264_ENCODING(i965) && VAEntrypointEncSlice == entrypoint)) { - vaStatus = VA_STATUS_SUCCESS; - } else { - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; - } - - break; - - case VAProfileVC1Simple: - case VAProfileVC1Main: - case VAProfileVC1Advanced: - if (HAS_VC1_DECODING(i965) && VAEntrypointVLD == entrypoint) { - vaStatus = VA_STATUS_SUCCESS; - } else { - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; - } - - break; - - case VAProfileNone: - if (HAS_VPP(i965) && VAEntrypointVideoProc == entrypoint) { - vaStatus = VA_STATUS_SUCCESS; - } else { - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; - } - - break; - - case VAProfileJPEGBaseline: - if (HAS_JPEG_DECODING(i965) && VAEntrypointVLD == entrypoint) { - vaStatus = VA_STATUS_SUCCESS; - } else { - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; - } - - break; - - case VAProfileVP8Version0_3: - if ((HAS_VP8_DECODING(i965) && VAEntrypointVLD == entrypoint) || - (HAS_VP8_ENCODING(i965) && VAEntrypointEncSlice == entrypoint)) - vaStatus = VA_STATUS_SUCCESS; - else - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; - - break; - - default: - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; - break; - } - + vaStatus = i965_validate_config(ctx, profile, entrypoint); if (VA_STATUS_SUCCESS != vaStatus) { return vaStatus; } -- cgit v1.2.1 From 9200fe231e1ffe249e17c8146303eeae6338aa06 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Fri, 9 May 2014 17:55:05 +0200 Subject: config: fix vaCreateConfig() to not override user chroma format. Only validate the user-defined chroma format (VAConfigAttribRTFormat) attribute, if any. Don't override it. i.e. append a pre-defined value only if it was not defined by the user beforehand. Propertly return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT if the supplied chroma format is not supported. Signed-off-by: Gwenole Beauchesne --- src/i965_drv_video.c | 90 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 24 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 15d65e57..1d4a65cc 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -438,6 +438,20 @@ i965_validate_config(VADriverContextP ctx, VAProfile profile, return va_status; } +static uint32_t +i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile, + VAEntrypoint entrypoint) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + uint32_t chroma_formats = VA_RT_FORMAT_YUV420; + + switch (profile) { + default: + break; + } + return chroma_formats; +} + VAStatus i965_GetConfigAttributes(VADriverContextP ctx, VAProfile profile, @@ -457,7 +471,8 @@ i965_GetConfigAttributes(VADriverContextP ctx, for (i = 0; i < num_attribs; i++) { switch (attrib_list[i].type) { case VAConfigAttribRTFormat: - attrib_list[i].value = VA_RT_FORMAT_YUV420; + attrib_list[i].value = i965_get_default_chroma_formats(ctx, + profile, entrypoint); break; case VAConfigAttribRateControl: @@ -498,29 +513,49 @@ i965_destroy_config(struct object_heap *heap, struct object_base *obj) object_heap_free(heap, obj); } -static VAStatus -i965_update_attribute(struct object_config *obj_config, VAConfigAttrib *attrib) +static VAConfigAttrib * +i965_lookup_config_attribute(struct object_config *obj_config, + VAConfigAttribType type) { int i; - /* Check existing attrbiutes */ for (i = 0; i < obj_config->num_attribs; i++) { - if (obj_config->attrib_list[i].type == attrib->type) { - /* Update existing attribute */ - obj_config->attrib_list[i].value = attrib->value; - return VA_STATUS_SUCCESS; - } + VAConfigAttrib * const attrib = &obj_config->attrib_list[i]; + if (attrib->type == type) + return attrib; } + return NULL; +} - if (obj_config->num_attribs < I965_MAX_CONFIG_ATTRIBUTES) { - i = obj_config->num_attribs; - obj_config->attrib_list[i].type = attrib->type; - obj_config->attrib_list[i].value = attrib->value; - obj_config->num_attribs++; +static VAStatus +i965_append_config_attribute(struct object_config *obj_config, + const VAConfigAttrib *new_attrib) +{ + VAConfigAttrib *attrib; + + if (obj_config->num_attribs >= I965_MAX_CONFIG_ATTRIBUTES) + return VA_STATUS_ERROR_MAX_NUM_EXCEEDED; + + attrib = &obj_config->attrib_list[obj_config->num_attribs++]; + attrib->type = new_attrib->type; + attrib->value = new_attrib->value; + return VA_STATUS_SUCCESS; +} + +static VAStatus +i965_ensure_config_attribute(struct object_config *obj_config, + const VAConfigAttrib *new_attrib) +{ + VAConfigAttrib *attrib; + + /* Check for existing attributes */ + attrib = i965_lookup_config_attribute(obj_config, new_attrib->type); + if (attrib) { + /* Update existing attribute */ + attrib->value = new_attrib->value; return VA_STATUS_SUCCESS; } - - return VA_STATUS_ERROR_MAX_NUM_EXCEEDED; + return i965_append_config_attribute(obj_config, new_attrib); } VAStatus @@ -552,16 +587,23 @@ i965_CreateConfig(VADriverContextP ctx, obj_config->profile = profile; obj_config->entrypoint = entrypoint; - obj_config->attrib_list[0].type = VAConfigAttribRTFormat; - obj_config->attrib_list[0].value = VA_RT_FORMAT_YUV420; - obj_config->num_attribs = 1; + obj_config->num_attribs = 0; - for(i = 0; i < num_attribs; i++) { - vaStatus = i965_update_attribute(obj_config, &(attrib_list[i])); - - if (VA_STATUS_SUCCESS != vaStatus) { + for (i = 0; i < num_attribs; i++) { + vaStatus = i965_ensure_config_attribute(obj_config, &attrib_list[i]); + if (vaStatus != VA_STATUS_SUCCESS) break; - } + } + + if (vaStatus == VA_STATUS_SUCCESS) { + VAConfigAttrib attrib, *attrib_found; + attrib.type = VAConfigAttribRTFormat; + attrib.value = i965_get_default_chroma_formats(ctx, profile, entrypoint); + attrib_found = i965_lookup_config_attribute(obj_config, attrib.type); + if (!attrib_found || !attrib_found->value) + vaStatus = i965_append_config_attribute(obj_config, &attrib); + else if (!(attrib_found->value & attrib.value)) + vaStatus = VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; } /* Error recovery */ -- cgit v1.2.1 From e29345cbdc26d5e4c6729100344eb8dbf9e35b65 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Fri, 9 May 2014 18:15:23 +0200 Subject: config: fix supported set of chroma formats for JPEG decode. If the hardware supports JPEG decoding, then we have to expose the right set of chroma formats for the output (decoded) VA surface. In particular, we could support YUV 4:0:0, 4:1:0, 4:2:2 and 4:4:4. v2: export support for YUV 4:0:0 (grayscale) too [Haihao] Signed-off-by: Gwenole Beauchesne --- src/i965_device_info.c | 11 +++++++++++ src/i965_drv_video.c | 5 +++++ src/i965_drv_video.h | 2 ++ 3 files changed, 18 insertions(+) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index f7ce2261..1d5d6aa7 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -27,6 +27,11 @@ #include #include "i965_drv_video.h" +/* Extra set of chroma formats supported for JPEG decoding (beyond YUV 4:2:0) */ +#define EXTRA_JPEG_DEC_CHROMA_FORMATS \ + (VA_RT_FORMAT_YUV400 | VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | \ + VA_RT_FORMAT_YUV444) + extern struct hw_context *i965_proc_context_init(VADriverContextP, struct object_config *); extern struct hw_context *g4x_dec_hw_context_init(VADriverContextP, struct object_config *); extern bool genx_render_init(VADriverContextP); @@ -115,6 +120,8 @@ static const struct hw_codec_info ivb_hw_codec_info = { .min_linear_wpitch = 64, .min_linear_hpitch = 16, + .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, + .has_mpeg2_decoding = 1, .has_mpeg2_encoding = 1, .has_h264_decoding = 1, @@ -149,6 +156,8 @@ static const struct hw_codec_info hsw_hw_codec_info = { .min_linear_wpitch = 64, .min_linear_hpitch = 16, + .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, + .has_mpeg2_decoding = 1, .has_mpeg2_encoding = 1, .has_h264_decoding = 1, @@ -187,6 +196,8 @@ static const struct hw_codec_info bdw_hw_codec_info = { .min_linear_wpitch = 64, .min_linear_hpitch = 16, + .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, + .has_mpeg2_decoding = 1, .has_mpeg2_encoding = 1, .has_h264_decoding = 1, diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 1d4a65cc..4690b622 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -446,6 +446,11 @@ i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile, uint32_t chroma_formats = VA_RT_FORMAT_YUV420; switch (profile) { + case VAProfileJPEGBaseline: + if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) + chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats; + break; + default: break; } diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 44f61bf7..a09e071d 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -294,6 +294,8 @@ struct hw_codec_info int min_linear_wpitch; int min_linear_hpitch; + unsigned int jpeg_dec_chroma_formats; + unsigned int has_mpeg2_decoding:1; unsigned int has_mpeg2_encoding:1; unsigned int has_h264_decoding:1; -- cgit v1.2.1 From 6d76944605a75872714744262ce0370581de9225 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Wed, 14 May 2014 13:59:25 +0200 Subject: decoder: h264: factor out allocation of reconstructed surfaces. Add new avc_ensure_surface_bo() helper function to factor out the allocatiion and initialization processes of the reconstructed VA surface buffer stores. Keep preferred native format (NV12) and initialize chroma values to 0.0 (0x80) when needed for "fake" grayscale (Y800) surfaces implemented on top of existing NV12. Signed-off-by: Gwenole Beauchesne --- src/gen6_mfd.c | 12 +---------- src/gen75_mfd.c | 12 +---------- src/gen7_mfd.c | 12 +---------- src/gen8_mfd.c | 12 +---------- src/i965_decoder_utils.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++- src/i965_decoder_utils.h | 8 +++++++ 6 files changed, 65 insertions(+), 45 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 4a220522..2092f69e 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -840,18 +840,8 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx, obj_surface = decode_state->render_object; obj_surface->flags &= ~SURFACE_REF_DIS_MASK; obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); - - /* initial uv component for YUV400 case */ - if (pic_param->seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface->width * obj_surface->height; - unsigned int uv_size = obj_surface->width * obj_surface->height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface->bo); - memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface->bo); - } + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo); diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index 2d4e236c..5b023cff 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -1084,18 +1084,8 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx, obj_surface = decode_state->render_object; obj_surface->flags &= ~SURFACE_REF_DIS_MASK; obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); - - /* initial uv component for YUV400 case */ - if (pic_param->seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface->width * obj_surface->height; - unsigned int uv_size = obj_surface->width * obj_surface->height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface->bo); - memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface->bo); - } + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index f4ccb12b..06eb7430 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -758,18 +758,8 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx, obj_surface = decode_state->render_object; obj_surface->flags &= ~SURFACE_REF_DIS_MASK; obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); - - /* initial uv component for YUV400 case */ - if (pic_param->seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface->width * obj_surface->height; - unsigned int uv_size = obj_surface->width * obj_surface->height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface->bo); - memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface->bo); - } + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 1742beaa..e3e71fb6 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -845,18 +845,8 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx, obj_surface = decode_state->render_object; obj_surface->flags &= ~SURFACE_REF_DIS_MASK; obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); - - /* initial uv component for YUV400 case */ - if (pic_param->seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface->width * obj_surface->height; - unsigned int uv_size = obj_surface->width * obj_surface->height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface->bo); - memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface->bo); - } + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 7ebc3fac..18704fe5 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -174,6 +174,58 @@ mpeg2_set_reference_surfaces( } } +/* Ensure the supplied VA surface has valid storage for decoding the + current picture */ +VAStatus +avc_ensure_surface_bo( + VADriverContextP ctx, + struct decode_state *decode_state, + struct object_surface *obj_surface, + const VAPictureParameterBufferH264 *pic_param +) +{ + VAStatus va_status; + uint32_t hw_fourcc, fourcc, subsample; + + /* Validate chroma format */ + switch (pic_param->seq_fields.bits.chroma_format_idc) { + case 0: // Grayscale + fourcc = VA_FOURCC_Y800; + subsample = SUBSAMPLE_YUV400; + break; + case 1: // YUV 4:2:0 + fourcc = VA_FOURCC_NV12; + subsample = SUBSAMPLE_YUV420; + break; + default: + return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; + } + + /* XXX: always allocate NV12 (YUV 4:2:0) surfaces for now */ + hw_fourcc = VA_FOURCC_NV12; + subsample = SUBSAMPLE_YUV420; + + /* (Re-)allocate the underlying surface buffer store, if necessary */ + if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) { + i965_destroy_surface_storage(obj_surface); + va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, + hw_fourcc, subsample); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + } + + /* Fake chroma components if grayscale is implemented on top of NV12 */ + if (fourcc == VA_FOURCC_Y800 && hw_fourcc == VA_FOURCC_NV12) { + const uint32_t uv_offset = obj_surface->width * obj_surface->height; + const uint32_t uv_size = obj_surface->width * obj_surface->height / 2; + + drm_intel_gem_bo_map_gtt(obj_surface->bo); + memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); + drm_intel_gem_bo_unmap_gtt(obj_surface->bo); + } + return VA_STATUS_SUCCESS; +} + /* Generate flat scaling matrices for H.264 decoding */ void avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix) @@ -423,7 +475,7 @@ intel_update_avc_frame_store_index(VADriverContextP ctx, * Sometimes a dummy frame comes from the upper layer library, call i965_check_alloc_surface_bo() * to ake sure the store buffer is allocated for this reference frame */ - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); slot_found = 0; frame_idx = -1; diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h index b7b72b3e..14a45fba 100644 --- a/src/i965_decoder_utils.h +++ b/src/i965_decoder_utils.h @@ -43,6 +43,14 @@ mpeg2_set_reference_surfaces( VAPictureParameterBufferMPEG2 *pic_param ); +VAStatus +avc_ensure_surface_bo( + VADriverContextP ctx, + struct decode_state *decode_state, + struct object_surface *obj_surface, + const VAPictureParameterBufferH264 *pic_param +); + void avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix); -- cgit v1.2.1 From e9c2a677a04e911529966cccf71eb3a9ae59e6c3 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Fri, 9 May 2014 18:52:00 +0200 Subject: decoder: h264: optimize support for grayscale surfaces. Optimize support for grayscale surfaces in two aspects: (i) space by only allocating the luminance component ; (ii) speed by avoiding initialization of the (now inexistent) chrominance planes. Keep backward compatibility with older codec layers that only supported YUV 4:2:0 and not grayscale formats properly. v2: fix check for extra H.264 chroma formats [Haihao] Signed-off-by: Gwenole Beauchesne --- src/gen6_mfd.c | 8 ++++++-- src/gen75_mfd.c | 6 +++++- src/gen7_mfd.c | 6 +++++- src/gen8_mfd.c | 6 +++++- src/i965_decoder_utils.c | 23 +++++++++++++++++++---- src/i965_device_info.c | 9 +++++++++ src/i965_drv_video.c | 13 +++++++++++++ src/i965_drv_video.h | 9 +++++++++ 8 files changed, 71 insertions(+), 9 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 2092f69e..f925d986 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx, { struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; struct object_surface *obj_surface = decode_state->render_object; - + unsigned int surface_format; + + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 19) | ((obj_surface->orig_width - 1) << 6)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ (0 << 22) | /* surface object control state, FIXME??? */ ((obj_surface->width - 1) << 3) | /* pitch */ diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index 5b023cff..895b1940 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state->render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; + unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface->y_cb_offset; y_cr_offset = obj_surface->y_cr_offset; + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 18) | ((obj_surface->orig_width - 1) << 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ (0 << 22) | /* surface object control state, ignored */ ((obj_surface->width - 1) << 3) | /* pitch */ diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index 06eb7430..f9114e72 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state->render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; + unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface->y_cb_offset; y_cr_offset = obj_surface->y_cr_offset; + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 18) | ((obj_surface->orig_width - 1) << 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ (0 << 22) | /* surface object control state, ignored */ ((obj_surface->width - 1) << 3) | /* pitch */ diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index e3e71fb6..10495d83 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -145,12 +145,16 @@ gen8_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state->render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; + unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface->y_cb_offset; y_cr_offset = obj_surface->y_cr_offset; + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -158,7 +162,7 @@ gen8_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 18) | ((obj_surface->orig_width - 1) << 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ (0 << 22) | /* surface object control state, ignored */ ((obj_surface->width - 1) << 3) | /* pitch */ diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 18704fe5..9a5092e2 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -185,25 +185,40 @@ avc_ensure_surface_bo( ) { VAStatus va_status; - uint32_t hw_fourcc, fourcc, subsample; + uint32_t hw_fourcc, fourcc, subsample, chroma_format; /* Validate chroma format */ switch (pic_param->seq_fields.bits.chroma_format_idc) { case 0: // Grayscale fourcc = VA_FOURCC_Y800; subsample = SUBSAMPLE_YUV400; + chroma_format = VA_RT_FORMAT_YUV400; break; case 1: // YUV 4:2:0 fourcc = VA_FOURCC_NV12; subsample = SUBSAMPLE_YUV420; + chroma_format = VA_RT_FORMAT_YUV420; break; default: return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; } - /* XXX: always allocate NV12 (YUV 4:2:0) surfaces for now */ - hw_fourcc = VA_FOURCC_NV12; - subsample = SUBSAMPLE_YUV420; + /* Determine the HW surface format, bound to VA config needs */ + if ((decode_state->base.chroma_formats & chroma_format) == chroma_format) + hw_fourcc = fourcc; + else { + hw_fourcc = 0; + switch (fourcc) { + case VA_FOURCC_Y800: // Implement with an NV12 surface + if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) { + hw_fourcc = VA_FOURCC_NV12; + subsample = SUBSAMPLE_YUV420; + } + break; + } + } + if (!hw_fourcc) + return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; /* (Re-)allocate the underlying surface buffer store, if necessary */ if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) { diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 1d5d6aa7..4fad7a47 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -27,6 +27,10 @@ #include #include "i965_drv_video.h" +/* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */ +#define EXTRA_H264_DEC_CHROMA_FORMATS \ + (VA_RT_FORMAT_YUV400) + /* Extra set of chroma formats supported for JPEG decoding (beyond YUV 4:2:0) */ #define EXTRA_JPEG_DEC_CHROMA_FORMATS \ (VA_RT_FORMAT_YUV400 | VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | \ @@ -90,6 +94,8 @@ static const struct hw_codec_info snb_hw_codec_info = { .min_linear_wpitch = 16, .min_linear_hpitch = 16, + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, + .has_mpeg2_decoding = 1, .has_h264_decoding = 1, .has_h264_encoding = 1, @@ -120,6 +126,7 @@ static const struct hw_codec_info ivb_hw_codec_info = { .min_linear_wpitch = 64, .min_linear_hpitch = 16, + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, .has_mpeg2_decoding = 1, @@ -156,6 +163,7 @@ static const struct hw_codec_info hsw_hw_codec_info = { .min_linear_wpitch = 64, .min_linear_hpitch = 16, + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, .has_mpeg2_decoding = 1, @@ -196,6 +204,7 @@ static const struct hw_codec_info bdw_hw_codec_info = { .min_linear_wpitch = 64, .min_linear_hpitch = 16, + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, .has_mpeg2_decoding = 1, diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 4690b622..c7da398a 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -446,6 +446,13 @@ i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile, uint32_t chroma_formats = VA_RT_FORMAT_YUV420; switch (profile) { + case VAProfileH264ConstrainedBaseline: + case VAProfileH264Main: + case VAProfileH264High: + if (HAS_H264_DECODING(i965) && entrypoint == VAEntrypointVLD) + chroma_formats |= i965->codec_info->h264_dec_chroma_formats; + break; + case VAProfileJPEGBaseline: if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats; @@ -1532,6 +1539,7 @@ i965_CreateContext(VADriverContextP ctx, struct i965_render_state *render_state = &i965->render_state; struct object_config *obj_config = CONFIG(config_id); struct object_context *obj_context = NULL; + VAConfigAttrib *attrib; VAStatus vaStatus = VA_STATUS_SUCCESS; int contextID; int i; @@ -1625,6 +1633,11 @@ i965_CreateContext(VADriverContextP ctx, } } + attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribRTFormat); + if (!attrib) + return VA_STATUS_ERROR_INVALID_CONFIG; + obj_context->codec_state.base.chroma_formats = attrib->value; + /* Error recovery */ if (VA_STATUS_SUCCESS != vaStatus) { i965_destroy_context(&i965->context_heap, (struct object_base *)obj_context); diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index a09e071d..e8bbf87e 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -101,8 +101,13 @@ struct object_config #define NUM_SLICES 10 +struct codec_state_base { + uint32_t chroma_formats; +}; + struct decode_state { + struct codec_state_base base; struct buffer_store *pic_param; struct buffer_store **slice_params; struct buffer_store *iq_matrix; @@ -122,6 +127,7 @@ struct decode_state struct encode_state { + struct codec_state_base base; struct buffer_store *seq_param; struct buffer_store *pic_param; struct buffer_store *pic_control; @@ -152,6 +158,7 @@ struct encode_state struct proc_state { + struct codec_state_base base; struct buffer_store *pipeline_param; VASurfaceID current_render_target; @@ -163,6 +170,7 @@ struct proc_state union codec_state { + struct codec_state_base base; struct decode_state decode; struct encode_state encode; struct proc_state proc; @@ -294,6 +302,7 @@ struct hw_codec_info int min_linear_wpitch; int min_linear_hpitch; + unsigned int h264_dec_chroma_formats; unsigned int jpeg_dec_chroma_formats; unsigned int has_mpeg2_decoding:1; -- cgit v1.2.1 From 628c958f4881900548ed80be1286060db68e0115 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Wed, 4 Jun 2014 11:17:52 +0200 Subject: decoder: h264: only allocate tiled surfaces for Sandybridge an newer. Don't allocate tiled surfaces on Ironlake platforms and earlier, stick to linear surfaces. This is a regression from 6d76944. Reported-by: Haihao Xiang Signed-off-by: Gwenole Beauchesne --- src/i965_decoder_utils.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 9a5092e2..ae5fd76b 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -222,9 +222,11 @@ avc_ensure_surface_bo( /* (Re-)allocate the underlying surface buffer store, if necessary */ if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) { + struct i965_driver_data * const i965 = i965_driver_data(ctx); + i965_destroy_surface_storage(obj_surface); - va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, - hw_fourcc, subsample); + va_status = i965_check_alloc_surface_bo(ctx, obj_surface, + i965->codec_info->has_tiled_surface, hw_fourcc, subsample); if (va_status != VA_STATUS_SUCCESS) return va_status; } -- cgit v1.2.1 From 60ea472b116a2e245fa8579355c47eb501bfa20a Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Wed, 4 Jun 2014 10:36:28 +0200 Subject: decoder: h264: don't allocate bottom DMV buffer on Broadwell. Broadwell now uses a unique DMV buffer, irrespective of any field coding mode. The dmv_buffer is not used, so it doesn't need to be allocated at all. Signed-off-by: Gwenole Beauchesne --- src/gen8_mfd.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 10495d83..a080504a 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -78,9 +78,8 @@ gen8_mfd_init_avc_surface(VADriverContextP ctx, obj_surface->private_data = gen7_avc_surface; } - gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag && - !pic_param->seq_fields.bits.direct_8x8_inference_flag); - + /* DMV buffers now relate to the whole frame, irrespective of + field coding modes */ if (gen7_avc_surface->dmv_top == NULL) { gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr, "direct mv w/r buffer", @@ -88,15 +87,6 @@ gen8_mfd_init_avc_surface(VADriverContextP ctx, 0x1000); assert(gen7_avc_surface->dmv_top); } - - if (gen7_avc_surface->dmv_bottom_flag && - gen7_avc_surface->dmv_bottom == NULL) { - gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr, - "direct mv w/r buffer", - width_in_mbs * height_in_mbs * 128, - 0x1000); - assert(gen7_avc_surface->dmv_bottom); - } } static void -- cgit v1.2.1 From 7465b1699cbde5fc6227167d6c28995d947f14f2 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sat, 19 Apr 2014 00:12:42 +0800 Subject: Remove unnecessary check with IS_GEN8() It is always true or false Signed-off-by: Xiang, Haihao (cherry picked from commit 42258e128f19b93aa102672d5f61eb73d9f9808f) --- src/gen8_post_processing.c | 32 ++++++++++---------------------- src/gen8_render.c | 6 ++---- src/i965_post_processing.c | 37 +++++++++++-------------------------- src/i965_render.c | 3 +-- 4 files changed, 24 insertions(+), 54 deletions(-) diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index cf613cfe..22279383 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -1234,8 +1234,7 @@ gen8_pp_curbe_load(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); int param_size = 64; - if (IS_GEN8(i965->intel.device_info)) - param_size = sizeof(struct gen7_pp_static_parameter); + param_size = sizeof(struct gen7_pp_static_parameter); BEGIN_BATCH(batch, 4); OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2)); @@ -1258,8 +1257,6 @@ gen8_pp_object_walker(VADriverContextP ctx, unsigned int *command_ptr; param_size = sizeof(struct gen7_pp_inline_parameter); - if (IS_GEN8(i965->intel.device_info)) - param_size = sizeof(struct gen7_pp_inline_parameter); x_steps = pp_context->pp_x_steps(pp_context->private_context); y_steps = pp_context->pp_y_steps(pp_context->private_context); @@ -1300,14 +1297,12 @@ gen8_pp_object_walker(VADriverContextP ctx, dri_bo_unmap(command_buffer); - if (IS_GEN8(i965->intel.device_info)) { - BEGIN_BATCH(batch, 3); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); - OUT_RELOC(batch, command_buffer, - I915_GEM_DOMAIN_COMMAND, 0, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - } + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_RELOC(batch, command_buffer, + I915_GEM_DOMAIN_COMMAND, 0, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); dri_bo_unreference(command_buffer); @@ -1427,12 +1422,7 @@ gen8_post_processing_context_init(VADriverContextP ctx, assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); - if (IS_GEN8(i965->intel.device_info)) - memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules)); - else { - /* should never get here !!! */ - assert(0); - } + memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules)); kernel_size = 4096 ; @@ -1481,10 +1471,8 @@ gen8_post_processing_context_init(VADriverContextP ctx, dri_bo_unmap(pp_context->instruction_state.bo); /* static & inline parameters */ - if (IS_GEN8(i965->intel.device_info)) { - pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1); - pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1); - } + pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1); + pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1); pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE; pp_context->pp_dndi_context.current_out_obj_surface = NULL; diff --git a/src/gen8_render.c b/src/gen8_render.c index 074fec42..9c49cbc6 100644 --- a/src/gen8_render.c +++ b/src/gen8_render.c @@ -1772,10 +1772,8 @@ gen8_render_init(VADriverContextP ctx) render_state->render_put_subpicture = gen8_render_put_subpicture; render_state->render_terminate = gen8_render_terminate; - if (IS_GEN8(i965->intel.device_info)) { - memcpy(render_state->render_kernels, render_kernels_gen8, - sizeof(render_state->render_kernels)); - } + memcpy(render_state->render_kernels, render_kernels_gen8, + sizeof(render_state->render_kernels)); kernel_size = 4096; diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 72279da1..8bdea26f 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -4194,8 +4194,7 @@ gen6_pp_upload_constants(VADriverContextP ctx, assert(sizeof(struct pp_static_parameter) == 128); assert(sizeof(struct gen7_pp_static_parameter) == 192); - if (IS_GEN7(i965->intel.device_info) || - IS_GEN8(i965->intel.device_info)) + if (IS_GEN7(i965->intel.device_info)) param_size = sizeof(struct gen7_pp_static_parameter); else param_size = sizeof(struct pp_static_parameter); @@ -4278,8 +4277,7 @@ gen6_pp_curbe_load(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); int param_size; - if (IS_GEN7(i965->intel.device_info) || - IS_GEN8(i965->intel.device_info)) + if (IS_GEN7(i965->intel.device_info)) param_size = sizeof(struct gen7_pp_static_parameter); else param_size = sizeof(struct pp_static_parameter); @@ -4365,8 +4363,7 @@ gen6_pp_object_walker(VADriverContextP ctx, dri_bo *command_buffer; unsigned int *command_ptr; - if (IS_GEN7(i965->intel.device_info) || - IS_GEN8(i965->intel.device_info)) + if (IS_GEN7(i965->intel.device_info)) param_size = sizeof(struct gen7_pp_inline_parameter); else param_size = sizeof(struct pp_inline_parameter); @@ -4408,23 +4405,13 @@ gen6_pp_object_walker(VADriverContextP ctx, dri_bo_unmap(command_buffer); - if (IS_GEN8(i965->intel.device_info)) { - BEGIN_BATCH(batch, 3); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); - OUT_RELOC(batch, command_buffer, - I915_GEM_DOMAIN_COMMAND, 0, - 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); - } else { - BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); - OUT_RELOC(batch, command_buffer, - I915_GEM_DOMAIN_COMMAND, 0, + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); + OUT_RELOC(batch, command_buffer, + I915_GEM_DOMAIN_COMMAND, 0, 0); - ADVANCE_BATCH(batch); - } - + ADVANCE_BATCH(batch); + dri_bo_unreference(command_buffer); /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END @@ -4567,8 +4554,7 @@ i965_vpp_clear_surface(VADriverContextP ctx, br13 |= pitch; if (IS_GEN6(i965->intel.device_info) || - IS_GEN7(i965->intel.device_info) || - IS_GEN8(i965->intel.device_info)) { + IS_GEN7(i965->intel.device_info)) { intel_batchbuffer_start_atomic_blt(batch, 48); BEGIN_BLT_BATCH(batch, 12); } else { @@ -5252,8 +5238,7 @@ i965_post_processing_context_init(VADriverContextP ctx, } /* static & inline parameters */ - if (IS_GEN7(i965->intel.device_info) || - IS_GEN8(i965->intel.device_info)) { + if (IS_GEN7(i965->intel.device_info)) { pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1); pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1); } else { diff --git a/src/i965_render.c b/src/i965_render.c index 9d0e8465..38f70ebb 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -1540,8 +1540,7 @@ i965_clear_dest_region(VADriverContextP ctx) br13 |= pitch; if (IS_GEN6(i965->intel.device_info) || - IS_GEN7(i965->intel.device_info) || - IS_GEN8(i965->intel.device_info)) { + IS_GEN7(i965->intel.device_info)) { intel_batchbuffer_start_atomic_blt(batch, 24); BEGIN_BLT_BATCH(batch, 6); } else { -- cgit v1.2.1 From f07cd585c7cf2949826050f2419dd74959155201 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 26 May 2014 10:36:49 +0800 Subject: VPP: i965_vpp_clear_surface() is still used for CSC on BDW https://bugs.freedesktop.org/show_bug.cgi?id=79065 The regression is caused by commit 42258e1 Signed-off-by: Xiang, Haihao (cherry picked from commit 0523c58148e9496927f2c3fa9a641885a0350d0f) --- src/i965_post_processing.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 8bdea26f..c2db4806 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -4553,13 +4553,13 @@ i965_vpp_clear_surface(VADriverContextP ctx, br13 |= BR13_8; br13 |= pitch; - if (IS_GEN6(i965->intel.device_info) || - IS_GEN7(i965->intel.device_info)) { - intel_batchbuffer_start_atomic_blt(batch, 48); - BEGIN_BLT_BATCH(batch, 12); - } else { + if (IS_IRONLAKE(i965->intel.device_info)) { intel_batchbuffer_start_atomic(batch, 48); BEGIN_BATCH(batch, 12); + } else { + /* Will double-check the command if the new chipset is added */ + intel_batchbuffer_start_atomic_blt(batch, 48); + BEGIN_BLT_BATCH(batch, 12); } region_width = obj_surface->width; -- cgit v1.2.1 From 338180cf36cc5cf641827a33e800e7faa78efa0f Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 19 May 2014 10:43:33 +0800 Subject: VPP: MADI on SNB Set the right surface states for reference, STMM and output surface, fix the shader as well Signed-off-by: Xiang, Haihao Tested-By: Simon Farnsworth (cherry picked from commit 1d1b8da1284f7f918733db79428f09af38d7e14a) Conflicts: src/i965_post_processing.c --- src/i965_device_info.c | 1 + src/i965_post_processing.c | 171 ++++++++++++++++----- .../Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm | 43 +++--- .../post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5 | 37 +++-- .../post_processing/gen5_6/nv12_dndi_nv12.g6b | 37 +++-- 5 files changed, 203 insertions(+), 86 deletions(-) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 4fad7a47..6fad1061 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -104,6 +104,7 @@ static const struct hw_codec_info snb_hw_codec_info = { .has_accelerated_getimage = 1, .has_accelerated_putimage = 1, .has_tiled_surface = 1, + .has_di_motion_adptive = 1, .num_filters = 2, .filters = { diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index c2db4806..acc9732c 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -40,6 +40,11 @@ #include "i965_render.h" #include "intel_media.h" +extern VAStatus +vpp_surface_convert(VADriverContextP ctx, + struct object_surface *src_obj_surf, + struct object_surface *dst_obj_surf); + #define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp) #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\ @@ -3002,13 +3007,14 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - struct object_surface *obj_surface; + struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface; struct i965_sampler_dndi *sampler_dndi; int index; int w, h; int orig_w, orig_h; int dndi_top_first = 1; VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param; + int is_first_frame = (pp_dndi_context->frame_order == -1); if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD) dndi_top_first = 0; @@ -3016,58 +3022,159 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex dndi_top_first = 1; /* surface */ - obj_surface = (struct object_surface *)src_surface->base; - orig_w = obj_surface->orig_width; - orig_h = obj_surface->orig_height; - w = obj_surface->width; - h = obj_surface->height; + current_in_obj_surface = (struct object_surface *)src_surface->base; - if (pp_dndi_context->stmm_bo == NULL) { - pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr, - "STMM surface", - w * h, - 4096); - assert(pp_dndi_context->stmm_bo); + if (di_filter_param->algorithm == VAProcDeinterlacingBob) { + previous_in_obj_surface = current_in_obj_surface; + is_first_frame = 1; + } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) { + if (pp_dndi_context->frame_order == 0) { + VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param; + if (!pipeline_param || + !pipeline_param->num_forward_references || + pipeline_param->forward_references[0] == VA_INVALID_ID) { + WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n"); + + return VA_STATUS_ERROR_INVALID_PARAMETER; + } else { + previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]); + assert(previous_in_obj_surface && previous_in_obj_surface->bo); + + is_first_frame = 0; + } + } else if (pp_dndi_context->frame_order == 1) { + vpp_surface_convert(ctx, + pp_dndi_context->current_out_obj_surface, + (struct object_surface *)dst_surface->base); + pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2; + is_first_frame = 0; + + return VA_STATUS_SUCCESS_1; + } else { + previous_in_obj_surface = current_in_obj_surface; + is_first_frame = 1; + } + } else { + return VA_STATUS_ERROR_UNIMPLEMENTED; } + /* source (temporal reference) YUV surface index 5 */ + orig_w = previous_in_obj_surface->orig_width; + orig_h = previous_in_obj_surface->orig_height; + w = previous_in_obj_surface->width; + h = previous_in_obj_surface->height; + i965_pp_set_surface2_state(ctx, pp_context, + previous_in_obj_surface->bo, 0, + orig_w, orig_h, w, + 0, h, + SURFACE_FORMAT_PLANAR_420_8, 1, + 5); + + /* source surface */ + orig_w = current_in_obj_surface->orig_width; + orig_h = current_in_obj_surface->orig_height; + w = current_in_obj_surface->width; + h = current_in_obj_surface->height; + /* source UV surface index 2 */ i965_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, w * h, + current_in_obj_surface->bo, w * h, orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 2, 0); /* source YUV surface index 4 */ i965_pp_set_surface2_state(ctx, pp_context, - obj_surface->bo, 0, + current_in_obj_surface->bo, 0, orig_w, orig_h, w, 0, h, SURFACE_FORMAT_PLANAR_420_8, 1, 4); - /* source STMM surface index 20 */ + /* source STMM surface index 6 */ + if (pp_dndi_context->stmm_bo == NULL) { + pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr, + "STMM surface", + w * h, + 4096); + assert(pp_dndi_context->stmm_bo); + } + i965_pp_set_surface_state(ctx, pp_context, pp_dndi_context->stmm_bo, 0, orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, - 20, 1); + 6, 0); - /* destination surface */ - obj_surface = (struct object_surface *)dst_surface->base; - orig_w = obj_surface->orig_width; - orig_h = obj_surface->orig_height; - w = obj_surface->width; - h = obj_surface->height; + /* destination (Previous frame) */ + previous_out_obj_surface = (struct object_surface *)dst_surface->base; + orig_w = previous_out_obj_surface->orig_width; + orig_h = previous_out_obj_surface->orig_height; + w = previous_out_obj_surface->width; + h = previous_out_obj_surface->height; - /* destination Y surface index 7 */ + if (is_first_frame) { + current_out_obj_surface = previous_out_obj_surface; + } else { + VAStatus va_status; + + if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) { + unsigned int tiling = 0, swizzle = 0; + dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle); + + va_status = i965_CreateSurfaces(ctx, + orig_w, + orig_h, + VA_RT_FORMAT_YUV420, + 1, + &pp_dndi_context->current_out_surface); + assert(va_status == VA_STATUS_SUCCESS); + pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface); + assert(pp_dndi_context->current_out_obj_surface); + i965_check_alloc_surface_bo(ctx, + pp_dndi_context->current_out_obj_surface, + tiling != I915_TILING_NONE, + VA_FOURCC_NV12, + SUBSAMPLE_YUV420); + } + + current_out_obj_surface = pp_dndi_context->current_out_obj_surface; + } + + /* destination (Previous frame) Y surface index 7 */ i965_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, 0, + previous_out_obj_surface->bo, 0, orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 7, 1); - /* destination UV surface index 8 */ + /* destination (Previous frame) UV surface index 8 */ i965_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, w * h, + previous_out_obj_surface->bo, w * h, orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 8, 1); + + /* destination(Current frame) */ + orig_w = current_out_obj_surface->orig_width; + orig_h = current_out_obj_surface->orig_height; + w = current_out_obj_surface->width; + h = current_out_obj_surface->height; + + /* destination (Current frame) Y surface index xxx */ + i965_pp_set_surface_state(ctx, pp_context, + current_out_obj_surface->bo, 0, + orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, + 10, 1); + + /* destination (Current frame) UV surface index xxx */ + i965_pp_set_surface_state(ctx, pp_context, + current_out_obj_surface->bo, w * h, + orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, + 11, 1); + + /* STMM output surface, index 20 */ + i965_pp_set_surface_state(ctx, pp_context, + pp_dndi_context->stmm_bo, 0, + orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, + 20, 1); + /* sampler dndi */ dri_bo_map(pp_context->sampler_state_table.bo, True); assert(pp_context->sampler_state_table.bo->virtual); @@ -3115,7 +3222,7 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex sampler_dndi[index].dw6.di_partial = 0; sampler_dndi[index].dw6.dndi_top_first = dndi_top_first; sampler_dndi[index].dw6.dndi_stream_id = 0; - sampler_dndi[index].dw6.dndi_first_frame = 1; + sampler_dndi[index].dw6.dndi_first_frame = is_first_frame; sampler_dndi[index].dw6.progressive_dn = 0; sampler_dndi[index].dw6.fmd_tear_threshold = 2; sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100; @@ -3149,6 +3256,8 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex dst_surface->flags = I965_SURFACE_FLAG_FRAME; + pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2; + return VA_STATUS_SUCCESS; } @@ -3386,12 +3495,6 @@ gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context return 0; } - -extern VAStatus -vpp_surface_convert(VADriverContextP ctx, - struct object_surface *src_obj_surf, - struct object_surface *dst_obj_surf); - static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, const struct i965_surface *src_surface, @@ -5283,8 +5386,6 @@ static const int proc_frame_to_pp_frame[3] = { I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST }; -#define VA_STATUS_SUCCESS_1 0xFFFFFFFE - VAStatus i965_proc_picture(VADriverContextP ctx, VAProfile profile, diff --git a/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm index 280d37a5..23bd3067 100644 --- a/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm +++ b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm @@ -62,24 +62,29 @@ #include "DI_Hist_Save.asm" ////////////////////////////////////// Save the DN Curr Frame for Next Run //////////////////////// - add (4) pCF_Y_OFFSET<1>:uw ubSRC_CF_OFFSET<4;4,1>:ub npDN_YUV:w - // check top/bottom field first - cmp.e.f0.0 (1) null<1>:w ubTFLD_FIRST<0;1,0>:ub 1:w - (f0.0) jmpi (1) TOP_FIELD_FIRST - -BOTTOM_FIELD_FIRST: - $for (0,0; udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 2nd field luma from current frame (line 0,2) - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,4)<4;4,1> // 1st field luma from current frame (line 1,3) - } - jmpi (1) SAVE_DN_CURR - -TOP_FIELD_FIRST: - $for (0,0; udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0)<4;4,1> // 2nd field luma from current frame (line 0,2) - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 1st field luma from current frame (line 1,3) + // previous frame + $for (0; ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16) } -SAVE_DN_CURR: + + mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin + mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4) + mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud + send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud + + //Write UV through DATAPORT + mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin + asr (1) rMSGSRC.1<1>:d rMSGSRC.1<0;1,0>:d 1:w // U/V block origin should be half of Y's + mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // block width and height (16x2) + mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud + + mov (8) mubMSGHDR_DN(1, 0)<2> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET, 1)<16 ;8,2> + mov (8) mubMSGHDR_DN(1, 1)<2> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET, 16)<16 ;8,2> + mov (8) mubMSGHDR_DN(1, 16)<2> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET+1, 1)<16 ;8,2> + mov (8) mubMSGHDR_DN(1, 17)<2> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET+1, 16)<16 ;8,2> + send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_UV:ud + + // current frame $for (0; ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16) } @@ -87,7 +92,7 @@ SAVE_DN_CURR: mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4) mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud + send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_1_Y:ud //Write UV through DATAPORT mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin @@ -99,4 +104,4 @@ SAVE_DN_CURR: mov (8) mubMSGHDR_DN(1, 1)<2> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET, 16)<16 ;8,2> mov (8) mubMSGHDR_DN(1, 16)<2> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+1, 1)<16 ;8,2> mov (8) mubMSGHDR_DN(1, 17)<2> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+1, 16)<16 ;8,2> - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_UV:ud \ No newline at end of file + send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_1_UV:ud diff --git a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5 index 446fb4b4..4563d207 100644 --- a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5 +++ b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5 @@ -44,18 +44,23 @@ { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 }, { 0x00000001, 0x21c00022, 0x00000560, 0x00000000 }, { 0x0d600031, 0x20000c04, 0x508d0000, 0x04082014 }, - { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 }, - { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 }, - { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a }, - { 0x00400001, 0x20400022, 0x00690580, 0x00000000 }, - { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 }, - { 0x00400001, 0x20600022, 0x00690590, 0x00000000 }, - { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 }, - { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 }, - { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 }, - { 0x00400001, 0x20500022, 0x00690580, 0x00000000 }, - { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 }, - { 0x00400001, 0x20700022, 0x00690590, 0x00000000 }, + { 0x00800001, 0x20400232, 0x00b10440, 0x00000000 }, + { 0x00800001, 0x20500232, 0x00b10450, 0x00000000 }, + { 0x00800001, 0x20600232, 0x00b10460, 0x00000000 }, + { 0x00800001, 0x20700232, 0x00b10470, 0x00000000 }, + { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, + { 0x00000001, 0x21080061, 0x00000000, 0x0003000f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x01600031, 0x20000c04, 0x508d0000, 0x06082007 }, + { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, + { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 }, + { 0x00000001, 0x21080061, 0x00000000, 0x0001000f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x00600001, 0x40400232, 0x00ae0481, 0x00000000 }, + { 0x00600001, 0x40410232, 0x00ae0490, 0x00000000 }, + { 0x00600001, 0x40500232, 0x00ae04a1, 0x00000000 }, + { 0x00600001, 0x40510232, 0x00ae04b0, 0x00000000 }, + { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 }, { 0x00800001, 0x20400232, 0x00b104c0, 0x00000000 }, { 0x00800001, 0x20500232, 0x00b104d0, 0x00000000 }, { 0x00800001, 0x20600232, 0x00b104e0, 0x00000000 }, @@ -63,7 +68,7 @@ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, { 0x00000001, 0x21080061, 0x00000000, 0x0003000f }, { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, - { 0x01600031, 0x20000c04, 0x508d0000, 0x06082007 }, + { 0x01600031, 0x20000c04, 0x508d0000, 0x0608200a }, { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 }, { 0x00000001, 0x21080061, 0x00000000, 0x0001000f }, @@ -72,7 +77,7 @@ { 0x00600001, 0x40410232, 0x00ae0510, 0x00000000 }, { 0x00600001, 0x40500232, 0x00ae0521, 0x00000000 }, { 0x00600001, 0x40510232, 0x00ae0530, 0x00000000 }, - { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 }, + { 0x01600031, 0x20000c04, 0x508d0000, 0x0408200b }, { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff }, { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 }, { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 }, @@ -81,10 +86,10 @@ { 0x01000010, 0x20003dac, 0x00000086, 0x00010001 }, { 0x00010001, 0x20b80129, 0x000000c4, 0x00000000 }, { 0x00010001, 0x20ba0231, 0x000000c6, 0x00000000 }, - { 0x00010220, 0x34001c00, 0x02001400, 0xffffff64 }, + { 0x00010220, 0x34001c00, 0x02001400, 0xffffff5a }, { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 }, { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 }, - { 0x00000220, 0x34001c00, 0x00001400, 0xffffff5e }, + { 0x00000220, 0x34001c00, 0x00001400, 0xffffff54 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 }, { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 }, diff --git a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b index 111d4832..8d6ebe39 100644 --- a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b +++ b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b @@ -117,18 +117,23 @@ { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 }, { 0x00000001, 0x21c00022, 0x00000560, 0x00000000 }, { 0x05600031, 0x20000cc4, 0x000001a0, 0x04094014 }, - { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 }, - { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 }, - { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a }, - { 0x00400001, 0x20400022, 0x00690580, 0x00000000 }, - { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 }, - { 0x00400001, 0x20600022, 0x00690590, 0x00000000 }, - { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 }, - { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 }, - { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 }, - { 0x00400001, 0x20500022, 0x00690580, 0x00000000 }, - { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 }, - { 0x00400001, 0x20700022, 0x00690590, 0x00000000 }, + { 0x00800001, 0x20400232, 0x00b10440, 0x00000000 }, + { 0x00800001, 0x20500232, 0x00b10450, 0x00000000 }, + { 0x00800001, 0x20600232, 0x00b10460, 0x00000000 }, + { 0x00800001, 0x20700232, 0x00b10470, 0x00000000 }, + { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, + { 0x00000001, 0x21080061, 0x00000000, 0x0003000f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x05600031, 0x20000cc4, 0x00000020, 0x06094007 }, + { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, + { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 }, + { 0x00000001, 0x21080061, 0x00000000, 0x0001000f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x00600001, 0x40400232, 0x00ae0481, 0x00000000 }, + { 0x00600001, 0x40410232, 0x00ae0490, 0x00000000 }, + { 0x00600001, 0x40500232, 0x00ae04a1, 0x00000000 }, + { 0x00600001, 0x40510232, 0x00ae04b0, 0x00000000 }, + { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 }, { 0x00800001, 0x20400232, 0x00b104c0, 0x00000000 }, { 0x00800001, 0x20500232, 0x00b104d0, 0x00000000 }, { 0x00800001, 0x20600232, 0x00b104e0, 0x00000000 }, @@ -136,7 +141,7 @@ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, { 0x00000001, 0x21080061, 0x00000000, 0x0003000f }, { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, - { 0x05600031, 0x20000cc4, 0x00000020, 0x06094007 }, + { 0x05600031, 0x20000cc4, 0x00000020, 0x0609400a }, { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 }, { 0x00000001, 0x21080061, 0x00000000, 0x0001000f }, @@ -145,7 +150,7 @@ { 0x00600001, 0x40410232, 0x00ae0510, 0x00000000 }, { 0x00600001, 0x40500232, 0x00ae0521, 0x00000000 }, { 0x00600001, 0x40510232, 0x00ae0530, 0x00000000 }, - { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 }, + { 0x05600031, 0x20000cc4, 0x00000020, 0x0409400b }, { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff }, { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 }, { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 }, @@ -154,10 +159,10 @@ { 0x01000010, 0x20003dac, 0x00000086, 0x00010001 }, { 0x00010001, 0x20b80129, 0x000000c4, 0x00000000 }, { 0x00010001, 0x20ba0231, 0x000000c6, 0x00000000 }, - { 0x00010220, 0x34001c00, 0x02001400, 0xffffff64 }, + { 0x00010220, 0x34001c00, 0x02001400, 0xffffff5a }, { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 }, { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 }, - { 0x00000220, 0x34001c00, 0x00001400, 0xffffff5e }, + { 0x00000220, 0x34001c00, 0x00001400, 0xffffff54 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 }, { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 }, -- cgit v1.2.1 From a664a099f7e41e16f3fbc13eb73f71e2cd0e72ae Mon Sep 17 00:00:00 2001 From: Sebastian Ramacher Date: Fri, 23 May 2014 08:57:33 +0800 Subject: Define i965_proc_picture in header Signed-off-by: Sebastian Ramacher Reviewed-by: Zhao, Yakui (cherry picked from commit e9e9b55c769a6c0b90d6af5d89a6baf4c6f742be) --- src/gen75_picture_process.c | 6 ------ src/i965_post_processing.h | 7 +++++++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/gen75_picture_process.c b/src/gen75_picture_process.c index 7f09b4f0..87b6c685 100644 --- a/src/gen75_picture_process.c +++ b/src/gen75_picture_process.c @@ -38,12 +38,6 @@ #include "i965_post_processing.h" #include "gen75_picture_process.h" -extern void -i965_proc_picture(VADriverContextP ctx, - VAProfile profile, - union codec_state *codec_state, - struct hw_context *hw_context); - extern struct hw_context * i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config); diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index 76f3595e..f0a277e6 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -576,4 +576,11 @@ i965_post_processing_terminate(VADriverContextP ctx); bool i965_post_processing_init(VADriverContextP ctx); + +extern VAStatus +i965_proc_picture(VADriverContextP ctx, + VAProfile profile, + union codec_state *codec_state, + struct hw_context *hw_context); + #endif /* __I965_POST_PROCESSING_H__ */ -- cgit v1.2.1 From dfffd00a85aec108862bfcdbf537cc360dcca483 Mon Sep 17 00:00:00 2001 From: Sebastian Ramacher Date: Fri, 23 May 2014 08:57:39 +0800 Subject: Propagate error code Signed-off-by: Sebastian Ramacher Reviewed-by: Zhao, Yakui (cherry picked from commit ca1acd54eb59eadabfb40a4b61df2e8968b5e00d) --- src/gen75_picture_process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gen75_picture_process.c b/src/gen75_picture_process.c index 87b6c685..6978d4b0 100644 --- a/src/gen75_picture_process.c +++ b/src/gen75_picture_process.c @@ -57,8 +57,8 @@ gen75_vpp_fmt_cvt(VADriverContextP ctx, proc_ctx->vpp_fmt_cvt_ctx = i965_proc_context_init(ctx, NULL); } - i965_proc_picture(ctx, profile, codec_state, - proc_ctx->vpp_fmt_cvt_ctx); + va_status = i965_proc_picture(ctx, profile, codec_state, + proc_ctx->vpp_fmt_cvt_ctx); return va_status; } -- cgit v1.2.1 From b44240a8b01199e75f39ad2ba42a757ef5f51fcc Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 26 May 2014 10:14:53 +0800 Subject: mpeg2: calculate the slice data length on IVB Sometimes pending datas are added in slice data buffer, however HW requires slice data length excludes pending datas, otherwise the behavior is undefined https://bugs.freedesktop.org/show_bug.cgi?id=77041 Signed-off-by: Xiang, Haihao (cherry picked from commit a9004e6c5c7f33cd1e33e4dab92a5a0017714bbd) --- src/gen7_mfd.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index f9114e72..91036bec 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -1034,10 +1034,35 @@ gen7_mfd_mpeg2_qm_state(VADriverContextP ctx, } } +uint32_t mpeg2_get_slice_data_length(dri_bo *slice_data_bo, VASliceParameterBufferMPEG2 *slice_param) +{ + uint8_t *buf; + uint32_t buf_offset = slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3); + uint32_t buf_size = slice_param->slice_data_size - (slice_param->macroblock_offset >> 3); + uint32_t i; + + dri_bo_map(slice_data_bo, 0); + buf = (uint8_t *)slice_data_bo->virtual + buf_offset; + + for (i = 3; i < buf_size; i++) { + if (buf[i - 3] && + !buf[i - 2] && + !buf[i - 1] && + !buf[i]) { + dri_bo_unmap(slice_data_bo); + return i - 3 + 1; + } + } + + dri_bo_unmap(slice_data_bo); + return buf_size; +} + static void gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx, VAPictureParameterBufferMPEG2 *pic_param, VASliceParameterBufferMPEG2 *slice_param, + dri_bo *slice_data_bo, VASliceParameterBufferMPEG2 *next_slice_param, struct gen7_mfd_context *gen7_mfd_context) { @@ -1068,7 +1093,7 @@ gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 5); OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2)); OUT_BCS_BATCH(batch, - slice_param->slice_data_size - (slice_param->macroblock_offset >> 3)); + mpeg2_get_slice_data_length(slice_data_bo, slice_param)); OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3)); OUT_BCS_BATCH(batch, @@ -1131,7 +1156,7 @@ gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx, else next_slice_param = next_slice_group_param; - gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context); + gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context); slice_param++; } } -- cgit v1.2.1 From c39d20832155811acfd26d85e3a0b368a567916d Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sat, 24 May 2014 14:36:12 +0800 Subject: New structure i965_fourcc_info and hold all supported fourcc in an array v2: bpp[] in bit and fix the vertical factor for 411P (Yakui) Signed-off-by: Xiang, Haihao (cherry picked from commit 1de3a2cdc8c3f8b2f6191c0f114fa1167f40f2ec) Conflicts: src/i965_drv_video.c --- src/i965_drv_video.c | 194 ++++++++++++++++++++++++++++++++------------------- src/i965_fourcc.h | 32 +++++++++ 2 files changed, 153 insertions(+), 73 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index c7da398a..00479cf7 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -95,6 +95,117 @@ static int get_sampling_from_fourcc(unsigned int fourcc); #define IS_VA_WAYLAND(ctx) \ (((ctx)->display_type & VA_DISPLAY_MAJOR_MASK) == VA_DISPLAY_WAYLAND) +#define I965_BIT 1 +#define I965_2BITS (I965_BIT << 1) +#define I965_4BITS (I965_BIT << 2) +#define I965_8BITS (I965_BIT << 3) +#define I965_16BITS (I965_BIT << 4) +#define I965_32BITS (I965_BIT << 5) + +#define PLANE_0 0 +#define PLANE_1 1 +#define PLANE_2 2 + +#define OFFSET_0 0 +#define OFFSET_4 4 +#define OFFSET_8 8 +#define OFFSET_16 16 +#define OFFSET_24 24 + +/* hfactor, vfactor, num_planes, bpp[], num_components, components[] */ +#define I_NV12 2, 2, 2, {I965_8BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_1, OFFSET_8} } +#define I_I420 2, 2, 3, {I965_8BITS, I965_2BITS, I965_2BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} } +#define I_IYUV I_I420 +#define I_IMC3 I_I420 +#define I_YV12 2, 2, 3, {I965_8BITS, I965_2BITS, I965_2BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_2, OFFSET_0}, {PLANE_1, OFFSET_0} } +#define I_IMC1 I_YV12 + +#define I_422H 2, 1, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} } +#define I_422V 1, 2, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} } +#define I_YV16 2, 1, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_2, OFFSET_0}, {PLANE_1, OFFSET_0} } +#define I_YUY2 2, 1, 1, {I965_32BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_24} } +#define I_UYVY 2, 1, 1, {I965_32BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_16} } + +#define I_444P 1, 1, 3, {I965_8BITS, I965_8BITS, I965_8BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} } + +#define I_411P 4, 1, 3, {I965_8BITS, I965_2BITS, I965_2BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} } + +#define I_Y800 1, 1, 1, {I965_8BITS}, 1, { {PLANE_0, OFFSET_0} } + +#define I_RGBA 1, 1, 1, {I965_32BITS}, 4, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_24} } +#define I_RGBX 1, 1, 1, {I965_32BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_16} } +#define I_BGRA 1, 1, 1, {I965_32BITS}, 4, { {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_24} } +#define I_BGRX 1, 1, 1, {I965_32BITS}, 3, { {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_0} } + +#define I_ARGB 1, 1, 1, {I965_32BITS}, 4, { {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_24}, {PLANE_0, OFFSET_0} } +#define I_ABGR 1, 1, 1, {I965_32BITS}, 4, { {PLANE_0, OFFSET_24}, {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_0} } + +#define I_IA88 1, 1, 1, {I965_16BITS}, 2, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8} } +#define I_AI88 1, 1, 1, {I965_16BITS}, 2, { {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_0} } + +#define I_IA44 1, 1, 1, {I965_8BITS}, 2, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_4} } +#define I_AI44 1, 1, 1, {I965_8BITS}, 2, { {PLANE_0, OFFSET_4}, {PLANE_0, OFFSET_0} } + +/* flag */ +#define I_S 1 +#define I_I 2 +#define I_SI (I_S | I_I) + +#define DEF_FOUCC_INFO(FOURCC, FORMAT, SUB, FLAG) { VA_FOURCC_##FOURCC, I965_COLOR_##FORMAT, SUBSAMPLE_##SUB, FLAG, I_##FOURCC } +#define DEF_YUV(FOURCC, SUB, FLAG) DEF_FOUCC_INFO(FOURCC, YUV, SUB, FLAG) +#define DEF_RGB(FOURCC, SUB, FLAG) DEF_FOUCC_INFO(FOURCC, RGB, SUB, FLAG) +#define DEF_INDEX(FOURCC, SUB, FLAG) DEF_FOUCC_INFO(FOURCC, INDEX, SUB, FLAG) + +static const i965_fourcc_info i965_fourcc_infos[] = { + DEF_YUV(NV12, YUV420, I_SI), + DEF_YUV(I420, YUV420, I_SI), + DEF_YUV(IYUV, YUV420, I_S), + DEF_YUV(IMC3, YUV420, I_S), + DEF_YUV(YV12, YUV420, I_SI), + DEF_YUV(IMC1, YUV420, I_S), + + DEF_YUV(422H, YUV422H, I_SI), + DEF_YUV(422V, YUV422V, I_S), + DEF_YUV(YV16, YUV422H, I_S), + DEF_YUV(YUY2, YUV422H, I_SI), + DEF_YUV(UYVY, YUV422H, I_SI), + + DEF_YUV(444P, YUV444, I_S), + + DEF_YUV(411P, YUV411, I_S), + + DEF_YUV(Y800, YUV400, I_S), + + DEF_RGB(RGBA, RGBX, I_SI), + DEF_RGB(RGBX, RGBX, I_SI), + DEF_RGB(BGRA, RGBX, I_SI), + DEF_RGB(BGRX, RGBX, I_SI), + + DEF_RGB(ARGB, RGBX, I_I), + DEF_RGB(ABGR, RGBX, I_I), + + DEF_INDEX(IA88, RGBX, I_I), + DEF_INDEX(AI88, RGBX, I_I), + + DEF_INDEX(IA44, RGBX, I_I), + DEF_INDEX(AI44, RGBX, I_I) +}; + +const i965_fourcc_info * +get_fourcc_info(unsigned int fourcc) +{ + unsigned int i; + + for (i = 0; ARRAY_ELEMS(i965_fourcc_infos); i++) { + const i965_fourcc_info * const info = &i965_fourcc_infos[i]; + + if (info->fourcc == fourcc) + return info; + } + + return NULL; +} + enum { I965_SURFACETYPE_RGBA = 1, I965_SURFACETYPE_YUV, @@ -918,35 +1029,12 @@ i965_suface_external_memory(VADriverContextP ctx, static int bpp_1stplane_by_fourcc(unsigned int fourcc) { - switch (fourcc) { - case VA_FOURCC_RGBA: - case VA_FOURCC_RGBX: - case VA_FOURCC_BGRA: - case VA_FOURCC_BGRX: - case VA_FOURCC_ARGB: - case VA_FOURCC_XRGB: - case VA_FOURCC_ABGR: - case VA_FOURCC_XBGR: - case VA_FOURCC_AYUV: - return 4; + const i965_fourcc_info *info = get_fourcc_info(fourcc); - case VA_FOURCC_UYVY: - case VA_FOURCC_YUY2: - return 2; - - case VA_FOURCC_Y800: - case VA_FOURCC_YV12: - case VA_FOURCC_IMC3: - case VA_FOURCC_IYUV: - case VA_FOURCC_NV12: - case VA_FOURCC_NV11: - case VA_FOURCC_YV16: - return 1; - - default: - ASSERT_RET(0, 0); - return 0; - } + if (info && (info->flag & I_S)) + return info->bpp[0] / 8; + else + return 0; } static VAStatus @@ -3315,52 +3403,12 @@ i965_SetImagePalette(VADriverContextP ctx, static int get_sampling_from_fourcc(unsigned int fourcc) { - int surface_sampling = -1; - - switch (fourcc) { - case VA_FOURCC_NV12: - case VA_FOURCC_YV12: - case VA_FOURCC_I420: - case VA_FOURCC_IYUV: - case VA_FOURCC_IMC1: - case VA_FOURCC_IMC3: - surface_sampling = SUBSAMPLE_YUV420; - break; - case VA_FOURCC_YUY2: - case VA_FOURCC_UYVY: - case VA_FOURCC_422H: - case VA_FOURCC_YV16: - surface_sampling = SUBSAMPLE_YUV422H; - break; - case VA_FOURCC_422V: - surface_sampling = SUBSAMPLE_YUV422V; - break; - - case VA_FOURCC_444P: - surface_sampling = SUBSAMPLE_YUV444; - break; + const i965_fourcc_info *info = get_fourcc_info(fourcc); - case VA_FOURCC_411P: - surface_sampling = SUBSAMPLE_YUV411; - break; - - case VA_FOURCC_Y800: - surface_sampling = SUBSAMPLE_YUV400; - break; - case VA_FOURCC_RGBA: - case VA_FOURCC_RGBX: - case VA_FOURCC_BGRA: - case VA_FOURCC_BGRX: - surface_sampling = SUBSAMPLE_RGBX; - break; - default: - /* Never get here */ - ASSERT_RET(0, 0); - break; - - } - - return surface_sampling; + if (info && (info->flag & I_S)) + return info->subsampling; + else + return -1; } static inline void diff --git a/src/i965_fourcc.h b/src/i965_fourcc.h index 510c0441..3a9f1206 100644 --- a/src/i965_fourcc.h +++ b/src/i965_fourcc.h @@ -33,4 +33,36 @@ #define VA_FOURCC_YVY2 VA_FOURCC('Y','V','Y','2') #endif +#define I965_MAX_PLANES 4 +#define I965_MAX_COMONENTS 4 + +#define I965_COLOR_YUV 0 +#define I965_COLOR_RGB 1 +#define I965_COLOR_INDEX 2 + +typedef struct { + uint8_t plane; /* the plane which the pixel belongs to */ + uint8_t offset; /* bits offset within a pixel in the plane */ +} i965_component_info; + +typedef struct { + uint32_t fourcc; /* fourcc */ + uint32_t format; /* 0: YUV, 1: RGB, 2: Indexed format */ + uint32_t subsampling; /* Sub sampling */ + uint8_t flag; /* 1: only supported by vaCreateSurfaces(), 2: only supported by vaCreateImage(), 3: both */ + uint8_t hfactor; /* horizontal sampling factor */ + uint8_t vfactor; /* vertical sampling factor */ + uint8_t num_planes; /* number of planes */ + uint8_t bpp[I965_MAX_PLANES]; /* bits per pixel within a plane */ + uint8_t num_components; /* number of components */ + /* + * Components in the array are ordered in Y, U, V, A (up to 4 components) + * for YUV formats, R, G, B, A (up to 4 components) for RGB formats and + * I, A (2 components) for indexed formats + */ + i965_component_info components[I965_MAX_COMONENTS]; +} i965_fourcc_info; + +extern const i965_fourcc_info *get_fourcc_info(unsigned int); + #endif /* _I965_FOURCC_H_ */ -- cgit v1.2.1 From 05dbef137a9b79a491bccc7441fe3b79af1f9e10 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 26 May 2014 10:14:51 +0800 Subject: VPP: Simplify surface state setting for csc and scaling on IVB/HSW/BDW v2: bpp[] is in unit of bits Signed-off-by: Xiang, Haihao (cherry picked from commit d415357f25fc01b96592ba29ba95da9d6dc82ff3) --- src/gen8_post_processing.c | 180 ++++++++++++++++++++------------------------- src/i965_post_processing.c | 179 ++++++++++++++++++++------------------------ 2 files changed, 162 insertions(+), 197 deletions(-) diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index 22279383..b55f5be0 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -463,18 +463,10 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc struct object_image *obj_image; dri_bo *bo; int fourcc = pp_get_surface_fourcc(ctx, surface); - const int U = (fourcc == VA_FOURCC_YV12 || - fourcc == VA_FOURCC_YV16 || - fourcc == VA_FOURCC_IMC1) ? 2 : 1; - const int V = (fourcc == VA_FOURCC_YV12 || - fourcc == VA_FOURCC_YV16 || - fourcc == VA_FOURCC_IMC1) ? 1 : 2; - int interleaved_uv = fourcc == VA_FOURCC_NV12; - int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY); - int rgbx_format = (fourcc == VA_FOURCC_RGBA || - fourcc == VA_FOURCC_RGBX || - fourcc == VA_FOURCC_BGRA || - fourcc == VA_FOURCC_BGRX); + const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc); + + if (fourcc_info == NULL) + return; if (surface->type == I965_SURFACE_TYPE_SURFACE) { obj_surface = (struct object_surface *)surface->base; @@ -484,16 +476,8 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc pitch[0] = obj_surface->width; offset[0] = 0; - if (packed_yuv) { - if (is_target) - width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */ - else - width[0] = obj_surface->orig_width; /* surface foramt is YCBCR, width is specified in units of pixels */ - - } else if (rgbx_format) { - if (is_target) - width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */ - } + if (fourcc_info->num_planes == 1 && is_target) + width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */ width[1] = obj_surface->cb_cr_width; height[1] = obj_surface->cb_cr_height; @@ -505,6 +489,9 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc pitch[2] = obj_surface->cb_cr_pitch; offset[2] = obj_surface->y_cr_offset * obj_surface->width; } else { + int U = 0, V = 0; + + /* FIXME: add support for ARGB/ABGR image */ obj_image = (struct object_image *)surface->base; bo = obj_image->bo; width[0] = obj_image->image.width; @@ -512,35 +499,30 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc pitch[0] = obj_image->image.pitches[0]; offset[0] = obj_image->image.offsets[0]; - if (rgbx_format) { - if (is_target) - width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */ - } else if (packed_yuv) { + if (fourcc_info->num_planes == 1) { if (is_target) - width[0] = obj_image->image.width * 2; /* surface format is R8, so double the width */ - else - width[0] = obj_image->image.width; /* surface foramt is YCBCR, width is specified in units of pixels */ - } else if (interleaved_uv) { - width[1] = obj_image->image.width / 2; - height[1] = obj_image->image.height / 2; - pitch[1] = obj_image->image.pitches[1]; - offset[1] = obj_image->image.offsets[1]; + width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */ + } else if (fourcc_info->num_planes == 2) { + U = 1, V = 1; } else { - width[1] = obj_image->image.width / 2; - height[1] = obj_image->image.height / 2; - pitch[1] = obj_image->image.pitches[U]; - offset[1] = obj_image->image.offsets[U]; - width[2] = obj_image->image.width / 2; - height[2] = obj_image->image.height / 2; - pitch[2] = obj_image->image.pitches[V]; - offset[2] = obj_image->image.offsets[V]; - if (fourcc == VA_FOURCC_YV16 || fourcc == VA_FOURCC_422H) { - width[1] = obj_image->image.width / 2; - height[1] = obj_image->image.height; - width[2] = obj_image->image.width / 2; - height[2] = obj_image->image.height; - } + assert(fourcc_info->num_components == 3); + + U = fourcc_info->components[1].plane; + V = fourcc_info->components[2].plane; + assert((U == 1 && V == 2) || + (U == 2 && V == 1)); } + + /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */ + width[1] = obj_image->image.width / fourcc_info->hfactor; + height[1] = obj_image->image.height / fourcc_info->vfactor; + pitch[1] = obj_image->image.pitches[U]; + offset[1] = obj_image->image.offsets[U]; + + width[2] = obj_image->image.width / fourcc_info->hfactor; + height[2] = obj_image->image.height / fourcc_info->vfactor; + pitch[2] = obj_image->image.pitches[V]; + offset[2] = obj_image->image.offsets[V]; } if (is_target) { @@ -549,34 +531,34 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc width[0] / 4, height[0], pitch[0], I965_SURFACEFORMAT_R8_UINT, base_index, 1); - if (rgbx_format) { - struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - /* the format is MSB: X-B-G-R */ - pp_static_parameter->grf2.save_avs_rgb_swap = 0; - if ((fourcc == VA_FOURCC_BGRA) || - (fourcc == VA_FOURCC_BGRX)) { - /* It is stored as MSB: X-R-G-B */ - pp_static_parameter->grf2.save_avs_rgb_swap = 1; - } - } - if (!packed_yuv && !rgbx_format) { - if (interleaved_uv) { - gen8_pp_set_surface_state(ctx, pp_context, - bo, offset[1], - width[1] / 2, height[1], pitch[1], - I965_SURFACEFORMAT_R8G8_SINT, - base_index + 1, 1); - } else { - gen8_pp_set_surface_state(ctx, pp_context, - bo, offset[1], - width[1] / 4, height[1], pitch[1], - I965_SURFACEFORMAT_R8_SINT, - base_index + 1, 1); - gen8_pp_set_surface_state(ctx, pp_context, - bo, offset[2], - width[2] / 4, height[2], pitch[2], - I965_SURFACEFORMAT_R8_SINT, - base_index + 2, 1); + + if (fourcc_info->num_planes == 2) { + gen8_pp_set_surface_state(ctx, pp_context, + bo, offset[1], + width[1] / 2, height[1], pitch[1], + I965_SURFACEFORMAT_R8G8_SINT, + base_index + 1, 1); + } else if (fourcc_info->num_planes == 3) { + gen8_pp_set_surface_state(ctx, pp_context, + bo, offset[1], + width[1] / 4, height[1], pitch[1], + I965_SURFACEFORMAT_R8_SINT, + base_index + 1, 1); + gen8_pp_set_surface_state(ctx, pp_context, + bo, offset[2], + width[2] / 4, height[2], pitch[2], + I965_SURFACEFORMAT_R8_SINT, + base_index + 2, 1); + } + + if (fourcc_info->format == I965_COLOR_RGB) { + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + /* the format is MSB: X-B-G-R */ + pp_static_parameter->grf2.save_avs_rgb_swap = 0; + if ((fourcc == VA_FOURCC_BGRA) || + (fourcc == VA_FOURCC_BGRX)) { + /* It is stored as MSB: X-R-G-B */ + pp_static_parameter->grf2.save_avs_rgb_swap = 1; } } } else { @@ -594,7 +576,8 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc default: break; } - if (rgbx_format) { + + if (fourcc_info->format == I965_COLOR_RGB) { struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */ format0 = SURFACE_FORMAT_R8G8B8A8_UNORM; @@ -604,6 +587,7 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc pp_static_parameter->grf2.src_avs_rgb_swap = 1; } } + gen8_pp_set_surface2_state(ctx, pp_context, bo, offset[0], width[0], height[0], pitch[0], @@ -611,28 +595,26 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc format0, 0, base_index); - if (!packed_yuv && !rgbx_format) { - if (interleaved_uv) { - gen8_pp_set_surface2_state(ctx, pp_context, - bo, offset[1], - width[1], height[1], pitch[1], - 0, 0, - SURFACE_FORMAT_R8B8_UNORM, 0, - base_index + 1); - } else { - gen8_pp_set_surface2_state(ctx, pp_context, - bo, offset[1], - width[1], height[1], pitch[1], - 0, 0, - SURFACE_FORMAT_R8_UNORM, 0, - base_index + 1); - gen8_pp_set_surface2_state(ctx, pp_context, - bo, offset[2], - width[2], height[2], pitch[2], - 0, 0, - SURFACE_FORMAT_R8_UNORM, 0, - base_index + 2); - } + if (fourcc_info->num_planes == 2) { + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[1], + width[1], height[1], pitch[1], + 0, 0, + SURFACE_FORMAT_R8B8_UNORM, 0, + base_index + 1); + } else if (fourcc_info->num_planes == 3) { + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[1], + width[1], height[1], pitch[1], + 0, 0, + SURFACE_FORMAT_R8_UNORM, 0, + base_index + 1); + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[2], + width[2], height[2], pitch[2], + 0, 0, + SURFACE_FORMAT_R8_UNORM, 0, + base_index + 2); } } } diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index acc9732c..ba4fdc31 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1823,18 +1823,10 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc struct object_image *obj_image; dri_bo *bo; int fourcc = pp_get_surface_fourcc(ctx, surface); - const int U = (fourcc == VA_FOURCC_YV12 || - fourcc == VA_FOURCC_YV16 || - fourcc == VA_FOURCC_IMC1) ? 2 : 1; - const int V = (fourcc == VA_FOURCC_YV12 || - fourcc == VA_FOURCC_YV16 || - fourcc == VA_FOURCC_IMC1) ? 1 : 2; - int interleaved_uv = fourcc == VA_FOURCC_NV12; - int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY); - int rgbx_format = (fourcc == VA_FOURCC_RGBA || - fourcc == VA_FOURCC_RGBX || - fourcc == VA_FOURCC_BGRA || - fourcc == VA_FOURCC_BGRX); + const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc); + + if (fourcc_info == NULL) + return; if (surface->type == I965_SURFACE_TYPE_SURFACE) { obj_surface = (struct object_surface *)surface->base; @@ -1844,15 +1836,8 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc pitch[0] = obj_surface->width; offset[0] = 0; - if (packed_yuv) { - if (is_target) - width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */ - else - width[0] = obj_surface->orig_width; /* surface foramt is YCBCR, width is specified in units of pixels */ - } else if (rgbx_format) { - if (is_target) - width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */ - } + if (fourcc_info->num_planes == 1 && is_target) + width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */ width[1] = obj_surface->cb_cr_width; height[1] = obj_surface->cb_cr_height; @@ -1864,6 +1849,9 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc pitch[2] = obj_surface->cb_cr_pitch; offset[2] = obj_surface->y_cr_offset * obj_surface->width; } else { + int U = 0, V = 0; + + /* FIXME: add support for ARGB/ABGR image */ obj_image = (struct object_image *)surface->base; bo = obj_image->bo; width[0] = obj_image->image.width; @@ -1871,35 +1859,30 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc pitch[0] = obj_image->image.pitches[0]; offset[0] = obj_image->image.offsets[0]; - if (rgbx_format) { - if (is_target) - width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */ - } else if (packed_yuv) { + if (fourcc_info->num_planes == 1) { if (is_target) - width[0] = obj_image->image.width * 2; /* surface format is R8, so double the width */ - else - width[0] = obj_image->image.width; /* surface foramt is YCBCR, width is specified in units of pixels */ - } else if (interleaved_uv) { - width[1] = obj_image->image.width / 2; - height[1] = obj_image->image.height / 2; - pitch[1] = obj_image->image.pitches[1]; - offset[1] = obj_image->image.offsets[1]; + width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */ + } else if (fourcc_info->num_planes == 2) { + U = 1, V = 1; } else { - width[1] = obj_image->image.width / 2; - height[1] = obj_image->image.height / 2; - pitch[1] = obj_image->image.pitches[U]; - offset[1] = obj_image->image.offsets[U]; - width[2] = obj_image->image.width / 2; - height[2] = obj_image->image.height / 2; - pitch[2] = obj_image->image.pitches[V]; - offset[2] = obj_image->image.offsets[V]; - if (fourcc == VA_FOURCC_YV16 || fourcc == VA_FOURCC_422H) { - width[1] = obj_image->image.width / 2; - height[1] = obj_image->image.height; - width[2] = obj_image->image.width / 2; - height[2] = obj_image->image.height; - } + assert(fourcc_info->num_components == 3); + + U = fourcc_info->components[1].plane; + V = fourcc_info->components[2].plane; + assert((U == 1 && V == 2) || + (U == 2 && V == 1)); } + + /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */ + width[1] = obj_image->image.width / fourcc_info->hfactor; + height[1] = obj_image->image.height / fourcc_info->vfactor; + pitch[1] = obj_image->image.pitches[U]; + offset[1] = obj_image->image.offsets[U]; + + width[2] = obj_image->image.width / fourcc_info->hfactor; + height[2] = obj_image->image.height / fourcc_info->vfactor; + pitch[2] = obj_image->image.pitches[V]; + offset[2] = obj_image->image.offsets[V]; } if (is_target) { @@ -1908,34 +1891,34 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc width[0] / 4, height[0], pitch[0], I965_SURFACEFORMAT_R8_UINT, base_index, 1); - if (rgbx_format) { - struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - /* the format is MSB: X-B-G-R */ - pp_static_parameter->grf2.save_avs_rgb_swap = 0; - if ((fourcc == VA_FOURCC_BGRA) || - (fourcc == VA_FOURCC_BGRX)) { - /* It is stored as MSB: X-R-G-B */ - pp_static_parameter->grf2.save_avs_rgb_swap = 1; - } - } - if (!packed_yuv && !rgbx_format) { - if (interleaved_uv) { - gen7_pp_set_surface_state(ctx, pp_context, - bo, offset[1], - width[1] / 2, height[1], pitch[1], - I965_SURFACEFORMAT_R8G8_SINT, - base_index + 1, 1); - } else { - gen7_pp_set_surface_state(ctx, pp_context, - bo, offset[1], - width[1] / 4, height[1], pitch[1], - I965_SURFACEFORMAT_R8_SINT, - base_index + 1, 1); - gen7_pp_set_surface_state(ctx, pp_context, - bo, offset[2], - width[2] / 4, height[2], pitch[2], - I965_SURFACEFORMAT_R8_SINT, - base_index + 2, 1); + + if (fourcc_info->num_planes == 2) { + gen7_pp_set_surface_state(ctx, pp_context, + bo, offset[1], + width[1] / 2, height[1], pitch[1], + I965_SURFACEFORMAT_R8G8_SINT, + base_index + 1, 1); + } else if (fourcc_info->num_planes == 3) { + gen7_pp_set_surface_state(ctx, pp_context, + bo, offset[1], + width[1] / 4, height[1], pitch[1], + I965_SURFACEFORMAT_R8_SINT, + base_index + 1, 1); + gen7_pp_set_surface_state(ctx, pp_context, + bo, offset[2], + width[2] / 4, height[2], pitch[2], + I965_SURFACEFORMAT_R8_SINT, + base_index + 2, 1); + } + + if (fourcc_info->format == I965_COLOR_RGB) { + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + /* the format is MSB: X-B-G-R */ + pp_static_parameter->grf2.save_avs_rgb_swap = 0; + if ((fourcc == VA_FOURCC_BGRA) || + (fourcc == VA_FOURCC_BGRX)) { + /* It is stored as MSB: X-R-G-B */ + pp_static_parameter->grf2.save_avs_rgb_swap = 1; } } } else { @@ -1953,7 +1936,8 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc default: break; } - if (rgbx_format) { + + if (fourcc_info->format == I965_COLOR_RGB) { struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */ format0 = SURFACE_FORMAT_R8G8B8A8_UNORM; @@ -1963,6 +1947,7 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc pp_static_parameter->grf2.src_avs_rgb_swap = 1; } } + gen7_pp_set_surface2_state(ctx, pp_context, bo, offset[0], width[0], height[0], pitch[0], @@ -1970,28 +1955,26 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc format0, 0, base_index); - if (!packed_yuv && !rgbx_format) { - if (interleaved_uv) { - gen7_pp_set_surface2_state(ctx, pp_context, - bo, offset[1], - width[1], height[1], pitch[1], - 0, 0, - SURFACE_FORMAT_R8B8_UNORM, 0, - base_index + 1); - } else { - gen7_pp_set_surface2_state(ctx, pp_context, - bo, offset[1], - width[1], height[1], pitch[1], - 0, 0, - SURFACE_FORMAT_R8_UNORM, 0, - base_index + 1); - gen7_pp_set_surface2_state(ctx, pp_context, - bo, offset[2], - width[2], height[2], pitch[2], - 0, 0, - SURFACE_FORMAT_R8_UNORM, 0, - base_index + 2); - } + if (fourcc_info->num_planes == 2) { + gen7_pp_set_surface2_state(ctx, pp_context, + bo, offset[1], + width[1], height[1], pitch[1], + 0, 0, + SURFACE_FORMAT_R8B8_UNORM, 0, + base_index + 1); + } else if (fourcc_info->num_planes == 3) { + gen7_pp_set_surface2_state(ctx, pp_context, + bo, offset[1], + width[1], height[1], pitch[1], + 0, 0, + SURFACE_FORMAT_R8_UNORM, 0, + base_index + 1); + gen7_pp_set_surface2_state(ctx, pp_context, + bo, offset[2], + width[2], height[2], pitch[2], + 0, 0, + SURFACE_FORMAT_R8_UNORM, 0, + base_index + 2); } } } -- cgit v1.2.1 From aa1b177ace6b32db76e541c35c951433c909529e Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 26 May 2014 10:14:52 +0800 Subject: Fix the scaling issue on IVB/HSW/BDW Scaling is done on each 16x16 block. The shader for scaling might write pixels out-of-rectangle if the rectangle width/height isn't aligned to 16. Signed-off-by: Xiang, Haihao (cherry picked from commit d560387cc819a31791c2a30026473c9bd8786f07) --- src/gen8_post_processing.c | 30 +++++++++++++++--------------- src/i965_post_processing.c | 33 ++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index b55f5be0..1e6068d7 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -457,6 +457,7 @@ static void gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context, const struct i965_surface *surface, int base_index, int is_target, + const VARectangle *rect, int *width, int *height, int *pitch, int *offset) { struct object_surface *obj_surface; @@ -471,21 +472,21 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc if (surface->type == I965_SURFACE_TYPE_SURFACE) { obj_surface = (struct object_surface *)surface->base; bo = obj_surface->bo; - width[0] = obj_surface->orig_width; - height[0] = obj_surface->orig_height; + width[0] = MIN(rect->x + rect->width, obj_surface->orig_width); + height[0] = MIN(rect->y + rect->height, obj_surface->orig_height); pitch[0] = obj_surface->width; offset[0] = 0; if (fourcc_info->num_planes == 1 && is_target) width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */ - width[1] = obj_surface->cb_cr_width; - height[1] = obj_surface->cb_cr_height; + width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width); + height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height); pitch[1] = obj_surface->cb_cr_pitch; offset[1] = obj_surface->y_cb_offset * obj_surface->width; - width[2] = obj_surface->cb_cr_width; - height[2] = obj_surface->cb_cr_height; + width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width); + height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height); pitch[2] = obj_surface->cb_cr_pitch; offset[2] = obj_surface->y_cr_offset * obj_surface->width; } else { @@ -494,8 +495,8 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc /* FIXME: add support for ARGB/ABGR image */ obj_image = (struct object_image *)surface->base; bo = obj_image->bo; - width[0] = obj_image->image.width; - height[0] = obj_image->image.height; + width[0] = MIN(rect->x + rect->width, obj_image->image.width); + height[0] = MIN(rect->y + rect->height, obj_image->image.height); pitch[0] = obj_image->image.pitches[0]; offset[0] = obj_image->image.offsets[0]; @@ -514,13 +515,13 @@ gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc } /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */ - width[1] = obj_image->image.width / fourcc_info->hfactor; - height[1] = obj_image->image.height / fourcc_info->vfactor; + width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor); + height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor); pitch[1] = obj_image->image.pitches[U]; offset[1] = obj_image->image.offsets[U]; - width[2] = obj_image->image.width / fourcc_info->hfactor; - height[2] = obj_image->image.height / fourcc_info->vfactor; + width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor); + height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor); pitch[2] = obj_image->image.pitches[V]; offset[2] = obj_image->image.offsets[V]; } @@ -760,12 +761,14 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con /* source surface */ gen8_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0, + src_rect, width, height, pitch, offset); src_height = height[0]; src_width = width[0]; /* destination surface */ gen8_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1, + dst_rect, width, height, pitch, offset); /* sampler 8x8 state */ @@ -946,9 +949,6 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con pp_static_parameter->grf1.pointer_to_inline_parameter = 7; pp_static_parameter->grf2.avs_wa_enable = 0; /* It is not required on GEN8+ */ - pp_static_parameter->grf2.avs_wa_width = src_width; - pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width); - pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width); pp_static_parameter->grf2.alpha = 255; pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index ba4fdc31..3ee3f073 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1817,6 +1817,7 @@ static void gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context, const struct i965_surface *surface, int base_index, int is_target, + const VARectangle *rect, int *width, int *height, int *pitch, int *offset) { struct object_surface *obj_surface; @@ -1831,21 +1832,21 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc if (surface->type == I965_SURFACE_TYPE_SURFACE) { obj_surface = (struct object_surface *)surface->base; bo = obj_surface->bo; - width[0] = obj_surface->orig_width; - height[0] = obj_surface->orig_height; + width[0] = MIN(rect->x + rect->width, obj_surface->orig_width); + height[0] = MIN(rect->y + rect->height, obj_surface->orig_height); pitch[0] = obj_surface->width; offset[0] = 0; if (fourcc_info->num_planes == 1 && is_target) width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */ - width[1] = obj_surface->cb_cr_width; - height[1] = obj_surface->cb_cr_height; + width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width); + height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height); pitch[1] = obj_surface->cb_cr_pitch; offset[1] = obj_surface->y_cb_offset * obj_surface->width; - width[2] = obj_surface->cb_cr_width; - height[2] = obj_surface->cb_cr_height; + width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width); + height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height); pitch[2] = obj_surface->cb_cr_pitch; offset[2] = obj_surface->y_cr_offset * obj_surface->width; } else { @@ -1854,8 +1855,8 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc /* FIXME: add support for ARGB/ABGR image */ obj_image = (struct object_image *)surface->base; bo = obj_image->bo; - width[0] = obj_image->image.width; - height[0] = obj_image->image.height; + width[0] = MIN(rect->x + rect->width, obj_image->image.width); + height[0] = MIN(rect->y + rect->height, obj_image->image.height); pitch[0] = obj_image->image.pitches[0]; offset[0] = obj_image->image.offsets[0]; @@ -1874,13 +1875,13 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc } /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */ - width[1] = obj_image->image.width / fourcc_info->hfactor; - height[1] = obj_image->image.height / fourcc_info->vfactor; + width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor); + height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor); pitch[1] = obj_image->image.pitches[U]; offset[1] = obj_image->image.offsets[U]; - width[2] = obj_image->image.width / fourcc_info->hfactor; - height[2] = obj_image->image.height / fourcc_info->vfactor; + width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor); + height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor); pitch[2] = obj_image->image.pitches[V]; offset[2] = obj_image->image.offsets[V]; } @@ -2750,12 +2751,14 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con /* source surface */ gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0, + src_rect, width, height, pitch, offset); src_width = width[0]; src_height = height[0]; /* destination surface */ gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1, + dst_rect, width, height, pitch, offset); /* sampler 8x8 state */ @@ -2934,9 +2937,9 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con } } - pp_static_parameter->grf2.avs_wa_width = dw; - pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw); - pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw); + pp_static_parameter->grf2.avs_wa_width = src_width; + pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width); + pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width); pp_static_parameter->grf2.alpha = 255; pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; -- cgit v1.2.1 From 9047420a7312a94403bf863c0f11c44bef84b065 Mon Sep 17 00:00:00 2001 From: "Zhao, Halley" Date: Wed, 28 May 2014 16:38:01 +0800 Subject: debug: add g_intel_debug_option_flags for simple driver debug VA_INTEL_DEBUG_ASSERT decides assert() is enabled or not VA_INTEL_DEBUG_BENCH decides skipping swapbuffer in dri output (cherry picked from commit 60413182f66c44781456e827b439e98f21cfae4c) --- src/i965_output_dri.c | 5 ++--- src/intel_driver.c | 9 +++++++++ src/intel_driver.h | 9 +++++++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/i965_output_dri.c b/src/i965_output_dri.c index fdd69cea..2a812d3a 100644 --- a/src/i965_output_dri.c +++ b/src/i965_output_dri.c @@ -137,8 +137,7 @@ i965_put_surface_dri( * will get here */ obj_surface = SURFACE(surface); - if (!obj_surface || !obj_surface->bo) - return VA_STATUS_SUCCESS; + ASSERT_RET(obj_surface && obj_surface->bo, VA_STATUS_SUCCESS); _i965LockMutex(&i965->render_mutex); @@ -204,7 +203,7 @@ i965_put_surface_dri( } } - if (!getenv("INTEL_DEBUG_BENCH")) + if (!(g_intel_debug_option_flags & VA_INTEL_DEBUG_OPTION_BENCH)) dri_vtable->swap_buffer(ctx, dri_drawable); obj_surface->flags |= SURFACE_DISPLAYED; diff --git a/src/intel_driver.c b/src/intel_driver.c index e3e082d1..994e64c3 100644 --- a/src/intel_driver.c +++ b/src/intel_driver.c @@ -34,6 +34,7 @@ #include "intel_batchbuffer.h" #include "intel_memman.h" #include "intel_driver.h" +uint32_t g_intel_debug_option_flags = 0; static Bool intel_driver_get_param(struct intel_driver_data *intel, int param, int *value) @@ -75,6 +76,14 @@ intel_driver_init(VADriverContextP ctx) struct intel_driver_data *intel = intel_driver_data(ctx); struct drm_state * const drm_state = (struct drm_state *)ctx->drm_state; int has_exec2 = 0, has_bsd = 0, has_blt = 0, has_vebox = 0; + char *env_str = NULL; + + g_intel_debug_option_flags = 0; + if ((env_str = getenv("VA_INTEL_DEBUG"))) + g_intel_debug_option_flags = atoi(env_str); + + if (g_intel_debug_option_flags) + fprintf(stderr, "g_intel_debug_option_flags:%x\n", g_intel_debug_option_flags); assert(drm_state); assert(VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI1) || diff --git a/src/intel_driver.h b/src/intel_driver.h index 8636b216..7a726e32 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -76,9 +76,14 @@ struct intel_batchbuffer; #define True 1 #define False 0 +extern uint32_t g_intel_debug_option_flags; +#define VA_INTEL_DEBUG_OPTION_ASSERT (1 << 0) +#define VA_INTEL_DEBUG_OPTION_BENCH (1 << 1) + #define ASSERT_RET(value, fail_ret) do { \ - if (!(value)) { \ - assert(0); \ + if (!(value)) { \ + if (g_intel_debug_option_flags & VA_INTEL_DEBUG_OPTION_ASSERT) \ + assert(value); \ return fail_ret; \ } \ } while (0) -- cgit v1.2.1 From 364d9b1968f905573bd6ae0e27d43ccdaedea4d5 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 9 Jun 2014 11:26:54 +0800 Subject: Update NEWS for 1.3.2 pre1 Signed-off-by: Xiang, Haihao --- NEWS | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 307127ed..112b562d 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,17 @@ -libva-intel-driver NEWS -- summary of changes. 2014-05-09 +libva-intel-driver NEWS -- summary of changes. 2014-06-xx Copyright (C) 2009-2014 Intel Corporation +Version 1.3.2 - xx.Jun.2014 +* Export JPEG format by vaDeriveImage() +* Add support for MADI on SNB +* H.264: fix the support for grayscale format (Y800) +* Fix vaGetConfigAttributes() to validate the profile/entrypoint pair +* Fix vaCreateConfig() to not override user chroma format +* Fix the scaling issue on IVB/HSW/BDW +* Fix https://bugs.freedesktop.org/show_bug.cgi?id=73424 +* Fix https://bugs.freedesktop.org/show_bug.cgi?id=72522 +* Fix https://bugs.freedesktop.org/show_bug.cgi?id=77041 + Version 1.3.1 - 09.May.2014 * Add support for STE on Broadwell * Add support for YV16 -- cgit v1.2.1 From 5e89ef46609d590f2807ef26f39eb532cb948134 Mon Sep 17 00:00:00 2001 From: "Zhao, Yakui" Date: Mon, 9 Jun 2014 12:08:13 +0800 Subject: Encoding: Fix one type error in intra-prediction shader on BDW Otherwise it will cause the incorrect intra-prediction for encoding on Broadwell. Signed-off-by: Zhao Yakui (cherry picked from commit 20bee4c3cb478702155df1779f24ec483aeab059) --- src/shaders/vme/intra_frame_gen8.asm | 2 +- src/shaders/vme/intra_frame_gen8.g8b | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shaders/vme/intra_frame_gen8.asm b/src/shaders/vme/intra_frame_gen8.asm index 41cdb3a5..682d1468 100644 --- a/src/shaders/vme/intra_frame_gen8.asm +++ b/src/shaders/vme/intra_frame_gen8.asm @@ -127,7 +127,7 @@ mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; /* m0 */ /* 16x16 Source, Intra_harr */ add (1) vme_m0.12<1>:UD vme_m0.12<0,1,0>:ud INTRA_SAD_HAAR:UD {align1}; -mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_0<1>:UD vme_m0.0<8,8,1>:UD {align1}; /* after verification it will be passed by using payload */ send (8) diff --git a/src/shaders/vme/intra_frame_gen8.g8b b/src/shaders/vme/intra_frame_gen8.g8b index 43c904a5..56c7283b 100644 --- a/src/shaders/vme/intra_frame_gen8.g8b +++ b/src/shaders/vme/intra_frame_gen8.g8b @@ -56,7 +56,7 @@ { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, { 0x00000040, 0x244c0208, 0x0600044c, 0x00800000 }, - { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, -- cgit v1.2.1 From cc403687155f8b79b3752e32731d44d39c040642 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 16 Jun 2014 10:21:07 +0800 Subject: Intel driver 1.3.2 Signed-off-by: Xiang, Haihao --- NEWS | 5 +++-- configure.ac | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 112b562d..1c6c3246 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,7 @@ -libva-intel-driver NEWS -- summary of changes. 2014-06-xx +libva-intel-driver NEWS -- summary of changes. 2014-06-16 Copyright (C) 2009-2014 Intel Corporation -Version 1.3.2 - xx.Jun.2014 +Version 1.3.2 - 16.Jun.2014 * Export JPEG format by vaDeriveImage() * Add support for MADI on SNB * H.264: fix the support for grayscale format (Y800) @@ -11,6 +11,7 @@ Version 1.3.2 - xx.Jun.2014 * Fix https://bugs.freedesktop.org/show_bug.cgi?id=73424 * Fix https://bugs.freedesktop.org/show_bug.cgi?id=72522 * Fix https://bugs.freedesktop.org/show_bug.cgi?id=77041 +* Quality improvement for H.264 encoding on BDW Version 1.3.1 - 09.May.2014 * Add support for STE on Broadwell diff --git a/configure.ac b/configure.ac index ad10943c..d0e0df0f 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [3]) m4_define([intel_driver_micro_version], [2]) -m4_define([intel_driver_pre_version], [1]) +m4_define([intel_driver_pre_version], [0]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1 From ce2cc4e48e5478538fe332119bb0523db4761b44 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 16 Jun 2014 10:27:12 +0800 Subject: 1.3.3.pre1 for development Signed-off-by: Xiang, Haihao --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index d0e0df0f..86c2b40d 100644 --- a/configure.ac +++ b/configure.ac @@ -1,8 +1,8 @@ # intel-driver package version number m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [3]) -m4_define([intel_driver_micro_version], [2]) -m4_define([intel_driver_pre_version], [0]) +m4_define([intel_driver_micro_version], [3]) +m4_define([intel_driver_pre_version], [1]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1 From 89dcd2e82093654d5c1ab68a9d132406dd641b34 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 27 Feb 2014 14:10:24 +0800 Subject: Check the pointer against NULL The issue is reported by Klockwork Signed-off-by: Xiang, Haihao (cherry picked from commit 80d1f89388c9cb70218cd759592d2167c8845322) --- src/gen6_mfc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index 987fa52f..21db0a77 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -1480,6 +1480,9 @@ Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e { struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context)); + if (!mfc_context) + return False; + mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS; -- cgit v1.2.1 From 8715ce33aae4e25ef98eed9c1bdcc6b95c5e3253 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 4 Mar 2014 16:23:07 +0800 Subject: Define i965_CreateSurfaces in header file explicitly to avoid multiple declaration Signed-off-by: Zhao Yakui (cherry picked from commit 8b3945aa5df443e93a3f5e6e97dffb1574e2a936) --- src/gen75_mfd.c | 8 -------- src/gen75_vpp_gpe.c | 8 -------- src/gen75_vpp_vebox.c | 8 -------- src/gen7_mfd.c | 8 -------- src/gen8_mfd.c | 8 -------- src/i965_drv_video.h | 7 +++++++ src/i965_encoder.c | 8 -------- src/i965_post_processing.c | 8 -------- 8 files changed, 7 insertions(+), 56 deletions(-) diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index 895b1940..b29f0777 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -2460,14 +2460,6 @@ gen75_mfd_jpeg_bsd_object(VADriverContextP ctx, /* Workaround for JPEG decoding on Ivybridge */ -VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - static struct { int width; int height; diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index 2e3b104c..042e4e61 100644 --- a/src/gen75_vpp_gpe.c +++ b/src/gen75_vpp_gpe.c @@ -51,14 +51,6 @@ #define CURBE_TOTAL_DATA_LENGTH (4 * 32) #define CURBE_URB_ENTRY_LENGTH 4 -extern VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - /* Shaders information for sharpening */ static const unsigned int gen75_gpe_sharpening_h_blur[][4] = { #include "shaders/post_processing/gen75/sharpening_h_blur.g75b" diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index d63729e1..1113c90a 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -54,14 +54,6 @@ extern VAStatus i965_DestroyImage(VADriverContextP ctx, VAImageID image); -extern VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - VAStatus vpp_surface_convert(VADriverContextP ctx, struct object_surface *src_obj_surf, struct object_surface *dst_obj_surf) diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index 91036bec..58306184 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -2110,14 +2110,6 @@ gen7_mfd_jpeg_bsd_object(VADriverContextP ctx, /* Workaround for JPEG decoding on Ivybridge */ -VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - static struct { int width; int height; diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index a080504a..1897d71b 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -2164,14 +2164,6 @@ gen8_mfd_jpeg_bsd_object(VADriverContextP ctx, /* Workaround for JPEG decoding on Ivybridge */ #ifdef JPEG_WA -VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - static struct { int width; int height; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index e8bbf87e..66c3466d 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -435,6 +435,13 @@ extern VAStatus i965_DestroySurfaces(VADriverContextP ctx, VASurfaceID *surface_list, int num_surfaces); +extern VAStatus i965_CreateSurfaces(VADriverContextP ctx, + int width, + int height, + int format, + int num_surfaces, + VASurfaceID *surfaces); + #define I965_SURFACE_MEM_NATIVE 0 #define I965_SURFACE_MEM_GEM_FLINK 1 #define I965_SURFACE_MEM_DRM_PRIME 2 diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 174f882e..0846a390 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -44,14 +44,6 @@ extern Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_con extern Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); extern Bool gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); -VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - static VAStatus intel_encoder_check_yuv_surface(VADriverContextP ctx, VAProfile profile, diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 3ee3f073..6d435a86 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -59,14 +59,6 @@ vpp_surface_convert(VADriverContextP ctx, #define VA_STATUS_SUCCESS_1 0xFFFFFFFE -extern VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - static const uint32_t pp_null_gen5[][4] = { #include "shaders/post_processing/gen5_6/null.g4b.gen5" }; -- cgit v1.2.1 From ab9de41941ea86e09e0ea2a170ebd2c84693a508 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Tue, 3 Jun 2014 17:30:11 +0200 Subject: h264: Add the macros for MVC profiles to keep backward compatibility with libva 1.3.1 It is a part of 1f244834dedb7b46863b315a898d8649d01c5f58 on staging Signed-off-by: Gwenole Beauchesne Signed-off-by: Xiang, Haihao --- src/va_backend_compat.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/va_backend_compat.h b/src/va_backend_compat.h index 267f1d8f..5fcb1983 100644 --- a/src/va_backend_compat.h +++ b/src/va_backend_compat.h @@ -51,4 +51,9 @@ #endif +#if !VA_CHECK_VERSION(0,35,2) +# define VAProfileH264MultiviewHigh 15 +# define VAProfileH264StereoHigh 16 +#endif + #endif /* VA_BACKEND_COMPAT_H */ -- cgit v1.2.1 From 40af27ded9830fa61dcf61d01c21551e1a148647 Mon Sep 17 00:00:00 2001 From: Li Xiaowei Date: Tue, 10 Dec 2013 10:35:38 +0800 Subject: MVC: CODEC_H264_MVC defination and relatived properties check Signed-off-by: Li Xiaowei (cherry picked from commit 7d1ddfd3646f35f306f38bfabef6af9b2ebb19f4) Conflicts: src/i965_drv_video.c --- src/i965_device_info.c | 4 ++++ src/i965_drv_video.c | 40 +++++++++++++++++++++++++++++++++++++--- src/i965_drv_video.h | 3 +++ src/i965_encoder.c | 7 +++++++ 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 6fad1061..c0b26e86 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -179,6 +179,8 @@ static const struct hw_codec_info hsw_hw_codec_info = { .has_tiled_surface = 1, .has_di_motion_adptive = 1, .has_di_motion_compensated = 1, + .has_h264_mvc_decoding = 1, + .has_h264_mvc_encoding = 1, .num_filters = 5, .filters = { @@ -221,6 +223,8 @@ static const struct hw_codec_info bdw_hw_codec_info = { .has_di_motion_adptive = 1, .has_di_motion_compensated = 1, .has_vp8_decoding = 1, + .has_h264_mvc_decoding = 1, + .has_h264_mvc_encoding = 1, .num_filters = 5, .filters = { diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 00479cf7..f23b4491 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -84,6 +84,11 @@ #define HAS_VP8_ENCODING(ctx) ((ctx)->codec_info->has_vp8_encoding && \ (ctx)->intel.has_bsd) +#define HAS_H264_MVC_DECODING(ctx) ((ctx)->codec_info->has_h264_mvc_decoding && \ + (ctx)->intel.has_bsd) + +#define HAS_H264_MVC_ENCODING(ctx) ((ctx)->codec_info->has_h264_mvc_encoding && \ + (ctx)->intel.has_bsd) static int get_sampling_from_fourcc(unsigned int fourcc); @@ -405,6 +410,12 @@ i965_QueryConfigProfiles(VADriverContextP ctx, profile_list[i++] = VAProfileVP8Version0_3; } + if (HAS_H264_MVC_DECODING(i965) || + HAS_H264_MVC_ENCODING(i965)) { + profile_list[i++] = VAProfileH264MultiviewHigh; + profile_list[i++] = VAProfileH264StereoHigh; + } + /* If the assert fails then I965_MAX_PROFILES needs to be bigger */ ASSERT_RET(i <= I965_MAX_PROFILES, VA_STATUS_ERROR_OPERATION_FAILED); *num_profiles = i; @@ -437,11 +448,19 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx, case VAProfileH264High: if (HAS_H264_DECODING(i965)) entrypoint_list[n++] = VAEntrypointVLD; - + if (HAS_H264_ENCODING(i965)) entrypoint_list[n++] = VAEntrypointEncSlice; break; + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: + if (HAS_H264_MVC_DECODING(i965)) + entrypoint_list[n++] = VAEntrypointVLD; + + if (HAS_H264_MVC_ENCODING(i965)) + entrypoint_list[n++] = VAEntrypointEncSlice; + break; case VAProfileVC1Simple: case VAProfileVC1Main: @@ -542,6 +561,17 @@ i965_validate_config(VADriverContextP ctx, VAProfile profile, } break; + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: + if ((HAS_H264_MVC_DECODING(i965) && VAEntrypointVLD == entrypoint) || + (HAS_H264_MVC_ENCODING(i965) && VAEntrypointEncSlice == entrypoint)) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + + break; + default: va_status = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; break; @@ -696,6 +726,7 @@ i965_CreateConfig(VADriverContextP ctx, VAStatus vaStatus; vaStatus = i965_validate_config(ctx, profile, entrypoint); + if (VA_STATUS_SUCCESS != vaStatus) { return vaStatus; } @@ -1956,7 +1987,8 @@ i965_MapBuffer(VADriverContextP ctx, coded_buffer_segment->base.buf = buffer = (unsigned char *)(obj_buffer->buffer_store->bo->virtual) + I965_CODEDBUFFER_HEADER_SIZE; - if (coded_buffer_segment->codec == CODEC_H264) { + if (coded_buffer_segment->codec == CODEC_H264 || + coded_buffer_segment->codec == CODEC_H264_MVC) { delimiter0 = H264_DELIMITER0; delimiter1 = H264_DELIMITER1; delimiter2 = H264_DELIMITER2; @@ -2073,7 +2105,9 @@ i965_BeginPicture(VADriverContextP ctx, case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: - vaStatus = VA_STATUS_SUCCESS; + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: + vaStatus = VA_STATUS_SUCCESS; break; case VAProfileVC1Simple: diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 66c3466d..75c66e9f 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -321,6 +321,8 @@ struct hw_codec_info unsigned int has_di_motion_compensated:1; unsigned int has_vp8_decoding:1; unsigned int has_vp8_encoding:1; + unsigned int has_h264_mvc_decoding:1; + unsigned int has_h264_mvc_encoding:1; unsigned int num_filters; struct i965_filter filters[VAProcFilterCount]; @@ -403,6 +405,7 @@ va_enc_packed_type_to_idx(int packed_type); /* reserve 2 byte for internal using */ #define CODEC_H264 0 #define CODEC_MPEG2 1 +#define CODEC_H264_MVC 2 #define H264_DELIMITER0 0x00 #define H264_DELIMITER1 0x00 diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 0846a390..14c37bb7 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -258,6 +258,8 @@ intel_encoder_sanity_check_input(VADriverContextP ctx, case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: vaStatus = intel_encoder_check_avc_parameter(ctx, encode_state, encoder_context); break; @@ -346,6 +348,11 @@ intel_enc_hw_context_init(VADriverContextP ctx, encoder_context->codec = CODEC_H264; break; + case VAProfileH264StereoHigh: + case VAProfileH264MultiviewHigh: + encoder_context->codec = CODEC_H264_MVC; + break; + default: /* Never get here */ assert(0); -- cgit v1.2.1 From 1ae22a0549d30dd24991c4c2c9902e7b7a8d935e Mon Sep 17 00:00:00 2001 From: Li Xiaowei Date: Thu, 19 Dec 2013 17:51:45 +0800 Subject: encoder: MVC: Add support for MVC profiles This is a part of bd630edd844b88ea543a027654db296ff7da16cd on staging Signed-off-by: Li Xiaowei Signed-off-by: Xiang, Haihao --- src/gen75_mfc.c | 13 +++++++++---- src/gen75_vme.c | 5 ++++- src/gen8_mfc.c | 13 +++++++++---- src/gen8_vme.c | 4 +++- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 2bfb6c2a..48d84da1 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -427,7 +427,8 @@ static void gen75_mfc_init(VADriverContextP ctx, int height_in_mbs = 0; int slice_batchbuffer_size; - if (encoder_context->codec == CODEC_H264) { + if (encoder_context->codec == CODEC_H264 || + encoder_context->codec == CODEC_H264_MVC) { VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; width_in_mbs = pSequenceParameter->picture_width_in_mbs; height_in_mbs = pSequenceParameter->picture_height_in_mbs; @@ -1182,13 +1183,13 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); assert(qp >= 0 && qp < 52); - gen75_mfc_avc_slice_state(ctx, + gen75_mfc_avc_slice_state(ctx, pPicParameter, pSliceParameter, encode_state, encoder_context, (rate_control_mode == VA_RC_CBR), qp, slice_batch); - if ( slice_index == 0) + if ( slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); @@ -1199,6 +1200,8 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, 5, /* first 5 bytes are start code + nal unit type */ 1, 0, 1, slice_batch); + free(slice_header); + dri_bo_map(vme_context->vme_output.bo , 1); msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; @@ -1244,7 +1247,6 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, 1, 1, 1, 0, slice_batch); } - free(slice_header); } @@ -1556,6 +1558,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, 0, 1, slice_batch); + free(slice_header); intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ @@ -2534,6 +2537,8 @@ static VAStatus gen75_mfc_pipeline(VADriverContextP ctx, case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context); break; diff --git a/src/gen75_vme.c b/src/gen75_vme.c index 0467f383..e8527c37 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -336,7 +336,8 @@ static VAStatus gen75_vme_constant_setup(VADriverContextP ctx, vme_state_message = (unsigned int *)vme_context->vme_state_message; - if (encoder_context->codec == CODEC_H264) { + if (encoder_context->codec == CODEC_H264 || + encoder_context->codec == CODEC_H264_MVC) { if (vme_context->h264_level >= 30) { mv_num = 16; @@ -462,6 +463,7 @@ static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx, switch (encoder_context->codec) { case CODEC_H264: + case CODEC_H264_MVC: gen75_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message); break; @@ -1010,6 +1012,7 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context * switch (encoder_context->codec) { case CODEC_H264: + case CODEC_H264_MVC: vme_kernel_list = gen75_vme_kernels; encoder_context->vme_pipeline = gen75_vme_pipeline; i965_kernel_num = sizeof(gen75_vme_kernels) / sizeof(struct i965_kernel); diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index ac421bdb..df996036 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -401,7 +401,8 @@ static void gen8_mfc_init(VADriverContextP ctx, int height_in_mbs = 0; int slice_batchbuffer_size; - if (encoder_context->codec == CODEC_H264) { + if (encoder_context->codec == CODEC_H264 || + encoder_context->codec == CODEC_H264_MVC) { VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; width_in_mbs = pSequenceParameter->picture_width_in_mbs; height_in_mbs = pSequenceParameter->picture_height_in_mbs; @@ -1046,13 +1047,13 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); assert(qp >= 0 && qp < 52); - gen8_mfc_avc_slice_state(ctx, + gen8_mfc_avc_slice_state(ctx, pPicParameter, pSliceParameter, encode_state, encoder_context, (rate_control_mode == VA_RC_CBR), qp, slice_batch); - if ( slice_index == 0) + if ( slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); @@ -1063,6 +1064,8 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, 5, /* first 5 bytes are start code + nal unit type */ 1, 0, 1, slice_batch); + free(slice_header); + dri_bo_map(vme_context->vme_output.bo , 1); msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; @@ -1108,7 +1111,6 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, 1, 1, 1, 0, slice_batch); } - free(slice_header); } @@ -1452,6 +1454,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, 0, 1, slice_batch); + free(slice_header); intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ @@ -2435,6 +2438,8 @@ static VAStatus gen8_mfc_pipeline(VADriverContextP ctx, case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context); break; diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 67571be7..34a3b685 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -321,7 +321,8 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, vme_state_message = (unsigned int *)vme_context->vme_state_message; - if (encoder_context->codec == CODEC_H264) { + if (encoder_context->codec == CODEC_H264 || + encoder_context->codec == CODEC_H264_MVC) { if (vme_context->h264_level >= 30) { mv_num = 16; @@ -1155,6 +1156,7 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e switch (encoder_context->codec) { case CODEC_H264: + case CODEC_H264_MVC: vme_kernel_list = gen8_vme_kernels; encoder_context->vme_pipeline = gen8_vme_pipeline; i965_kernel_num = sizeof(gen8_vme_kernels) / sizeof(struct i965_kernel); -- cgit v1.2.1 From 99075c5cc755a767e8904777d3a98bd8c222a03b Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Tue, 6 May 2014 13:08:28 +0200 Subject: decoder: h264: don't deallocate surface storage of older frames. Drop the optimization whereby surfaces that are no longer marked as reference and that were already displayed are to be destroyed. This is wrong mainly for two reasons: 1. The surface was displayed... once but it may still be needed for subsequent operations like displaying it again, using it for a transcode pipeline (encode) for instance, etc. 2. The new set of ReferenceFrames[] correspond to the active set of reference frames used for decoding the current slice. In presence of Multiview Coding (MVC), that could correspond to the current view, in view order index, but the surface may still be needed for decoding the next view with the same view_id, while also decoding other views with another set of reference frames for them. Signed-off-by: Gwenole Beauchesne (cherry picked from commit 77af916b44da04e3424490506a7e5bef39c80c7c) --- src/i965_decoder_utils.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index ae5fd76b..2b4651e6 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -451,15 +451,6 @@ intel_update_avc_frame_store_index(VADriverContextP ctx, /* remove it from the internal DPB */ if (!found) { - struct object_surface *obj_surface = frame_store[i].obj_surface; - - obj_surface->flags &= ~SURFACE_REFERENCED; - - if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) { - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - i965_destroy_surface_storage(obj_surface); - } - frame_store[i].surface_id = VA_INVALID_ID; frame_store[i].frame_store_id = -1; frame_store[i].obj_surface = NULL; -- cgit v1.2.1 From a35d3c2d8f006d4b019881a23ad57819eeb7d62e Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Tue, 6 May 2014 14:07:30 +0200 Subject: decoder: h264: allocate reference frames earlier, if needed. Sometimes, a dummy frame comes from the codec layer and it is used as a reference, per the comment in the existing code. Even though this looks suspicious, keep this criterion but make sure to try allocating the VA surface, if needed, earlier in the function that sanity checks the parameters for decoding the current frame. This makes it possible to fail at a much earlier time, and actually make it possible to return a sensible error code to the upper layer. Also fix the reference_objects[] array elements to be an exact 1:1 match for ReferenceFrames[] array elements, including possible but unlikely holes in it. The former array holds object_surface structs corresponding to the VA surfaces present in the ReferenceFrames[] array and identified by VAPictureH264.picture_id. Signed-off-by: Gwenole Beauchesne (cherry picked from commit 5a12ccda3f77d03b6ffa8249d607c03e4dc8161f) --- src/i965_decoder_utils.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 2b4651e6..8aada54c 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -479,12 +479,6 @@ intel_update_avc_frame_store_index(VADriverContextP ctx, int slot_found; struct object_surface *obj_surface = decode_state->reference_objects[i]; - /* - * Sometimes a dummy frame comes from the upper layer library, call i965_check_alloc_surface_bo() - * to ake sure the store buffer is allocated for this reference frame - */ - avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); - slot_found = 0; frame_idx = -1; /* Find a free frame store index */ @@ -608,6 +602,7 @@ intel_decoder_check_avc_parameter(VADriverContextP ctx, { struct i965_driver_data *i965 = i965_driver_data(ctx); VAPictureParameterBufferH264 *pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + VAStatus va_status; struct object_surface *obj_surface; int i; @@ -631,28 +626,33 @@ intel_decoder_check_avc_parameter(VADriverContextP ctx, } } - for (i = 0; i < 16; i++) { - if (pic_param->ReferenceFrames[i].flags & VA_PICTURE_H264_INVALID || - pic_param->ReferenceFrames[i].picture_id == VA_INVALID_SURFACE) - break; - else { - obj_surface = SURFACE(pic_param->ReferenceFrames[i].picture_id); - assert(obj_surface); + /* Fill in the reference objects array with the actual VA surface + objects with 1:1 correspondance with any entry in ReferenceFrames[], + i.e. including "holes" for invalid entries, that are expanded + to NULL in the reference_objects[] array */ + for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) { + const VAPictureH264 * const va_pic = &pic_param->ReferenceFrames[i]; + obj_surface = NULL; + if (!(va_pic->flags & VA_PICTURE_H264_INVALID) && + va_pic->picture_id != VA_INVALID_ID) { + obj_surface = SURFACE(pic_param->ReferenceFrames[i].picture_id); if (!obj_surface) - goto error; + return VA_STATUS_ERROR_INVALID_SURFACE; - if (!obj_surface->bo) { /* a reference frame without store buffer */ - WARN_ONCE("Invalid reference frame!!!\n"); - } - - decode_state->reference_objects[i] = obj_surface; + /* + * Sometimes a dummy frame comes from the upper layer + * library, call i965_check_alloc_surface_bo() to make + * sure the store buffer is allocated for this reference + * frame + */ + va_status = avc_ensure_surface_bo(ctx, decode_state, obj_surface, + pic_param); + if (va_status != VA_STATUS_SUCCESS) + return va_status; } + decode_state->reference_objects[i] = obj_surface; } - - for ( ; i < 16; i++) - decode_state->reference_objects[i] = NULL; - return VA_STATUS_SUCCESS; error: -- cgit v1.2.1 From d2e843eed7fa31d86e196fb0e07b9cd3c377c969 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Tue, 6 May 2014 15:28:29 +0200 Subject: decoder: h264: simplify and optimize reference frame store updates. Simplify and optimize the update process of the reference frame store. Use less iterations to look up existing objects. Use a cache to store the free'd slots. Prerequisite: the reference_objects[] array was previously arranged in a way that the element at index i is exactly the object_surface that corresponds to the VA surface identified by the VAPictureH264.picture_id located at index i in the ReferenceFrames[] array. Theory of operations: 1. Obsolete entries are removed first, i.e. entries in the internal DPB that no longer have a match in the supplied ReferenceFrames[] array. That obsolete entry index is stored in a local cache: free_slots[]. 2. This cache is completed with entries considered as "invalid" or "not present", sequentially while traversing the frame store for obsolete entries. At the end of this removal process, the free_slots[] array represents all possible indices in there that could be re-used for new reference frames to track. 3. The list of ReferenceFrames[] objects is traversed for new entries that are not already in the frame store. If an entry needs to be added, it is placed at the index obtained from the next free_slots[] element. There is no need to traverse the frame store array again, the next available slot can be known from that free_slots[] cache. v2: dropped the superfluous "found" variable [Yakui] v3: renamed "free_slots" array to "free_refs", which now holds GenFrameStore entries Signed-off-by: Gwenole Beauchesne (cherry picked from commit 70ecad1264255123df99b472891e8ee90399013c) --- src/i965_decoder_utils.c | 108 +++++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 64 deletions(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 8aada54c..c7791520 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -420,88 +420,68 @@ gen6_send_avc_ref_idx_state( } void -intel_update_avc_frame_store_index(VADriverContextP ctx, - struct decode_state *decode_state, - VAPictureParameterBufferH264 *pic_param, - GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]) +intel_update_avc_frame_store_index( + VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferH264 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +) { - int i, j; - - assert(MAX_GEN_REFERENCE_FRAMES == ARRAY_ELEMS(pic_param->ReferenceFrames)); - - for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { - int found = 0; - - if (frame_store[i].surface_id == VA_INVALID_ID || - frame_store[i].obj_surface == NULL) + GenFrameStore *free_refs[MAX_GEN_REFERENCE_FRAMES]; + int i, j, n, num_free_refs; + + /* Remove obsolete entries from the internal DPB */ + for (i = 0, n = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + GenFrameStore * const fs = &frame_store[i]; + if (fs->surface_id == VA_INVALID_ID || !fs->obj_surface) { + free_refs[n++] = fs; continue; + } - assert(frame_store[i].frame_store_id != -1); - - for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { - VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j]; - if (ref_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (frame_store[i].surface_id == ref_pic->picture_id) { - found = 1; + // Find whether the current entry is still a valid reference frame + for (j = 0; j < ARRAY_ELEMS(decode_state->reference_objects); j++) { + struct object_surface * const obj_surface = + decode_state->reference_objects[j]; + if (obj_surface && obj_surface == fs->obj_surface) break; - } } - /* remove it from the internal DPB */ - if (!found) { - frame_store[i].surface_id = VA_INVALID_ID; - frame_store[i].frame_store_id = -1; - frame_store[i].obj_surface = NULL; + // ... or remove it + if (j == ARRAY_ELEMS(decode_state->reference_objects)) { + fs->surface_id = VA_INVALID_ID; + fs->obj_surface = NULL; + fs->frame_store_id = -1; + free_refs[n++] = fs; } } + num_free_refs = n; - for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { - VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i]; - int found = 0; - - if (ref_pic->flags & VA_PICTURE_H264_INVALID || - ref_pic->picture_id == VA_INVALID_SURFACE || - decode_state->reference_objects[i] == NULL) + /* Append the new reference frames */ + for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) { + struct object_surface * const obj_surface = + decode_state->reference_objects[i]; + if (!obj_surface) continue; + // Find whether the current frame is not already in our frame store for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { - if (frame_store[j].surface_id == ref_pic->picture_id) { - found = 1; + GenFrameStore * const fs = &frame_store[j]; + if (fs->obj_surface == obj_surface) break; - } } - /* add the new reference frame into the internal DPB */ - if (!found) { - int frame_idx; - int slot_found; - struct object_surface *obj_surface = decode_state->reference_objects[i]; - - slot_found = 0; - frame_idx = -1; - /* Find a free frame store index */ - for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { - if (frame_store[j].surface_id == VA_INVALID_ID || - frame_store[j].obj_surface == NULL) { - frame_idx = j; - slot_found = 1; - break; - } + // ... or add it + if (j == MAX_GEN_REFERENCE_FRAMES) { + if (n < num_free_refs) { + GenFrameStore * const fs = free_refs[n++]; + fs->surface_id = obj_surface->base.id; + fs->obj_surface = obj_surface; + fs->frame_store_id = fs - frame_store; + continue; } - - - if (slot_found) { - frame_store[j].surface_id = ref_pic->picture_id; - frame_store[j].frame_store_id = frame_idx; - frame_store[j].obj_surface = obj_surface; - } else { - WARN_ONCE("Not free slot for DPB reference list!!!\n"); - } + WARN_ONCE("No free slot found for DPB reference list!!!\n"); } } - } void -- cgit v1.2.1 From dcd97d032e4a236954f142d2adeed2f57ac65079 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Mon, 5 May 2014 00:05:06 +0200 Subject: decoder: h264: fix submission of AVC_REF_IDX_STATE command. If the RefPicListX[] entry has no valid picture_id associated to it, then set the resulting state to 0xff. If that entry has no surface buffer storage either, then compose a valid state that maps to the first item in the reference frames list, as mandated by the PRM. v2: dropped the superfluous "found" variable [Yakui] Signed-off-by: Gwenole Beauchesne (cherry picked from commit 151b8851c3a9309e87712651a3697e20a7bdb6c9) --- src/i965_decoder_utils.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index c7791520..ae17bd5b 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -336,34 +336,35 @@ gen5_fill_avc_ref_idx_state( const GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] ) { - unsigned int i, n, frame_idx; - int found; + int i, j; - for (i = 0, n = 0; i < ref_list_count; i++) { + for (i = 0; i < ref_list_count; i++) { const VAPictureH264 * const va_pic = &ref_list[i]; - if (va_pic->flags & VA_PICTURE_H264_INVALID) + if ((va_pic->flags & VA_PICTURE_H264_INVALID) || + va_pic->picture_id == VA_INVALID_ID) { + state[i] = 0xff; continue; + } - found = 0; - for (frame_idx = 0; frame_idx < MAX_GEN_REFERENCE_FRAMES; frame_idx++) { - const GenFrameStore * const fs = &frame_store[frame_idx]; - if (fs->surface_id != VA_INVALID_ID && - fs->surface_id == va_pic->picture_id) { - found = 1; + for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { + if (frame_store[j].surface_id == va_pic->picture_id) break; - } } - if (found) { - state[n++] = get_ref_idx_state_1(va_pic, frame_idx); - } else { - WARN_ONCE("Invalid Slice reference frame list !!!. It is not included in DPB \n"); + if (j != MAX_GEN_REFERENCE_FRAMES) { // Found picture in the Frame Store + const GenFrameStore * const fs = &frame_store[j]; + assert(fs->frame_store_id == j); // Current architecture/assumption + state[i] = get_ref_idx_state_1(va_pic, fs->frame_store_id); + } + else { + WARN_ONCE("Invalid RefPicListX[] entry!!! It is not included in DPB\n"); + state[i] = get_ref_idx_state_1(va_pic, 0) | 0x80; } } - for (; n < 32; n++) - state[n] = 0xff; + for (; i < 32; i++) + state[i] = 0xff; } /* Emit Reference List Entries (Gen6+: SNB, IVB) */ -- cgit v1.2.1 From c4344dc2d58eedc2f3c07cb8ac512b3ee2a6477d Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Fri, 16 May 2014 17:04:55 +0200 Subject: decoder: h264: factor out look ups for VA/H264 picture info. Add new avc_find_picture() helper function to search for a VAPictureH264 struct based on the supplied VA surface id. Signed-off-by: Gwenole Beauchesne (cherry picked from commit 3f4f9fc2893af24b7e88f44b6350a5a74d49f0c2) --- src/gen6_mfd.c | 26 +++++++----------------- src/gen75_mfd.c | 52 +++++++++++++----------------------------------- src/gen7_mfd.c | 26 +++++++----------------- src/gen8_mfd.c | 26 +++++++----------------- src/i965_avc_bsd.c | 48 ++++++++++---------------------------------- src/i965_decoder_utils.c | 17 ++++++++++++++++ src/i965_decoder_utils.h | 3 +++ src/i965_media_h264.c | 30 ++++++++++------------------ 8 files changed, 77 insertions(+), 151 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index f925d986..113561c8 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -420,7 +420,7 @@ gen6_mfd_avc_directmode_state(VADriverContextP ctx, struct object_surface *obj_surface; GenAvcSurface *gen6_avc_surface; VAPictureH264 *va_pic; - int i, j; + int i; BEGIN_BCS_BATCH(batch, 69); OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2)); @@ -472,26 +472,14 @@ gen6_mfd_avc_directmode_state(VADriverContextP ctx, /* POC List */ for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) { - if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { - int found = 0; + obj_surface = gen6_mfd_context->reference_surface[i].obj_surface; - assert(gen6_mfd_context->reference_surface[i].obj_surface != NULL); - - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) { - found = 1; - break; - } - } + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); - assert(found == 1); - assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); - + assert(va_pic != NULL); OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } else { diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index b29f0777..1ed874fb 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -664,7 +664,7 @@ gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx, struct object_surface *obj_surface; GenAvcSurface *gen7_avc_surface; VAPictureH264 *va_pic; - int i, j; + int i; BEGIN_BCS_BATCH(batch, 71); OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2)); @@ -704,26 +704,14 @@ gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx, /* POC List */ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { - if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { - int found = 0; + obj_surface = gen7_mfd_context->reference_surface[i].obj_surface; - assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL); - - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) { - found = 1; - break; - } - } + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); - assert(found == 1); - assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); - + assert(va_pic != NULL); OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } else { @@ -751,7 +739,7 @@ gen75_mfd_avc_directmode_state(VADriverContextP ctx, struct object_surface *obj_surface; GenAvcSurface *gen7_avc_surface; VAPictureH264 *va_pic; - int i, j; + int i; if (IS_STEPPING_BPLUS(i965)) { gen75_mfd_avc_directmode_state_bplus(ctx, decode_state, pic_param, slice_param, @@ -811,26 +799,14 @@ gen75_mfd_avc_directmode_state(VADriverContextP ctx, /* POC List */ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { - if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { - int found = 0; + obj_surface = gen7_mfd_context->reference_surface[i].obj_surface; - assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL); - - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) { - found = 1; - break; - } - } + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); - assert(found == 1); - assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); - + assert(va_pic != NULL); OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } else { diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index 58306184..e7f188cc 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -429,7 +429,7 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx, struct object_surface *obj_surface; GenAvcSurface *gen7_avc_surface; VAPictureH264 *va_pic; - int i, j; + int i; BEGIN_BCS_BATCH(batch, 69); OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2)); @@ -481,26 +481,14 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx, /* POC List */ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { - if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { - int found = 0; + obj_surface = gen7_mfd_context->reference_surface[i].obj_surface; - assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL); - - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) { - found = 1; - break; - } - } + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); - assert(found == 1); - assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); - + assert(va_pic != NULL); OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } else { diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 1897d71b..cecd05c5 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -521,7 +521,7 @@ gen8_mfd_avc_directmode_state(VADriverContextP ctx, struct object_surface *obj_surface; GenAvcSurface *gen7_avc_surface; VAPictureH264 *va_pic; - int i, j; + int i; BEGIN_BCS_BATCH(batch, 71); OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2)); @@ -562,26 +562,14 @@ gen8_mfd_avc_directmode_state(VADriverContextP ctx, /* POC List */ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { - if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { - int found = 0; + obj_surface = gen7_mfd_context->reference_surface[i].obj_surface; - assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL); - - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) { - found = 1; - break; - } - } + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); - assert(found == 1); - assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); - + assert(va_pic != NULL); OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } else { diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c index 67c7c959..43bace6e 100644 --- a/src/i965_avc_bsd.c +++ b/src/i965_avc_bsd.c @@ -378,7 +378,7 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, { struct intel_batchbuffer *batch = i965_h264_context->batch; struct i965_avc_bsd_context *i965_avc_bsd_context; - int i, j; + int i; VAPictureH264 *va_pic; struct object_surface *obj_surface; GenAvcSurface *avc_bsd_surface; @@ -408,24 +408,8 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) { - if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID && - i965_h264_context->fsid_list[i].obj_surface && - i965_h264_context->fsid_list[i].obj_surface->private_data) { - int found = 0; - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) { - found = 1; - break; - } - } - - assert(found == 1); - obj_surface = i965_h264_context->fsid_list[i].obj_surface; + obj_surface = i965_h264_context->fsid_list[i].obj_surface; + if (obj_surface && obj_surface->private_data) { avc_bsd_surface = obj_surface->private_data; OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top, @@ -480,26 +464,16 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, /* POC List */ for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) { - if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID) { - int found = 0; - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) { - found = 1; - break; - } - } + obj_surface = i965_h264_context->fsid_list[i].obj_surface; - assert(found == 1); + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); - if (!(va_pic->flags & VA_PICTURE_H264_INVALID)) { - OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); - OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); - } + assert(va_pic != NULL); + OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } else { OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index ae17bd5b..32392127 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -254,6 +254,23 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix) memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8)); } +/* Finds the VA/H264 picture associated with the specified VA surface id */ +VAPictureH264 * +avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count) +{ + int i; + + if (id != VA_INVALID_ID) { + for (i = 0; i < pic_list_count; i++) { + VAPictureH264 * const va_pic = &pic_list[i]; + if (va_pic->picture_id == id && + !(va_pic->flags & VA_PICTURE_H264_INVALID)) + return va_pic; + } + } + return NULL; +} + /* Get first macroblock bit offset for BSD, minus EPB count (AVC) */ /* XXX: slice_data_bit_offset does not account for EPB */ unsigned int diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h index 14a45fba..a4c9415f 100644 --- a/src/i965_decoder_utils.h +++ b/src/i965_decoder_utils.h @@ -54,6 +54,9 @@ avc_ensure_surface_bo( void avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix); +VAPictureH264 * +avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count); + unsigned int avc_get_first_mb_bit_offset( dri_bo *slice_data_bo, diff --git a/src/i965_media_h264.c b/src/i965_media_h264.c index 9de4e091..8ec7e4fd 100644 --- a/src/i965_media_h264.c +++ b/src/i965_media_h264.c @@ -11,6 +11,7 @@ #include "i965_drv_video.h" #include "i965_media.h" #include "i965_media_h264.h" +#include "i965_decoder_utils.h" enum { INTRA_16X16 = 0, @@ -343,7 +344,7 @@ i965_media_h264_surfaces_setup(VADriverContextP ctx, struct object_surface *obj_surface; VAPictureParameterBufferH264 *pic_param; VAPictureH264 *va_pic; - int i, j, w, h; + int i, w, h; int field_picture; assert(media_context->private_context); @@ -375,24 +376,15 @@ i965_media_h264_surfaces_setup(VADriverContextP ctx, /* Reference Pictures */ for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) { - if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID && - i965_h264_context->fsid_list[i].obj_surface != NULL) { - int found = 0; - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) { - found = 1; - break; - } - } - - assert(found == 1); - - obj_surface = i965_h264_context->fsid_list[i].obj_surface; + struct object_surface * const obj_surface = + i965_h264_context->fsid_list[i].obj_surface; + + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); + + assert(va_pic != NULL); w = obj_surface->width; h = obj_surface->height; field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD)); -- cgit v1.2.1 From d07f1e3c6505b169f59f075b5ea5a82331118f8f Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Mon, 2 Jun 2014 13:08:57 +0200 Subject: decoder: h264: enable Picture ID Remapping on Haswell and newer. Fill and submit MFX_AVC_PICID_STATE commands to Gen7.5+ hardware. This optimizes the management of the DPB as the binding array can now contain entries in any order. This also makes it possible to support H.264 MultiView High profiles, with any particular number of views. v2: added more comments for clarity, removed an assert [Yakui] Signed-off-by: Gwenole Beauchesne (cherry picked from commit 8dfdf10612c726b60ecd5b61eee2b7d6a520bb33) --- src/gen75_mfd.c | 23 +++-------- src/gen8_mfd.c | 27 ++++--------- src/i965_decoder_utils.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ src/i965_decoder_utils.h | 23 +++++++++++ 4 files changed, 136 insertions(+), 36 deletions(-) diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index 1ed874fb..bab5df44 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -632,25 +632,13 @@ gen75_mfd_avc_qm_state(VADriverContextP ctx, } } -static void +static inline void gen75_mfd_avc_picid_state(VADriverContextP ctx, struct decode_state *decode_state, struct gen7_mfd_context *gen7_mfd_context) { - struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; - - BEGIN_BCS_BATCH(batch, 10); - OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2)); - OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(batch); + gen75_send_avc_picid_state(gen7_mfd_context->base.batch, + gen7_mfd_context->reference_surface); } static void @@ -1054,7 +1042,8 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx, assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface); + gen75_update_avc_frame_store_index(ctx, decode_state, pic_param, + gen7_mfd_context->reference_surface); width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */ @@ -1139,8 +1128,8 @@ gen75_mfd_avc_decode_picture(VADriverContextP ctx, gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context); - gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context); gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context); + gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context); for (j = 0; j < decode_state->num_slice_params; j++) { assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index cecd05c5..0b39dac5 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -489,25 +489,13 @@ gen8_mfd_avc_qm_state(VADriverContextP ctx, } } -static void +static inline void gen8_mfd_avc_picid_state(VADriverContextP ctx, - struct decode_state *decode_state, - struct gen7_mfd_context *gen7_mfd_context) + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) { - struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; - - BEGIN_BCS_BATCH(batch, 10); - OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2)); - OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(batch); + gen75_send_avc_picid_state(gen7_mfd_context->base.batch, + gen7_mfd_context->reference_surface); } static void @@ -817,7 +805,8 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx, assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface); + gen75_update_avc_frame_store_index(ctx, decode_state, pic_param, + gen7_mfd_context->reference_surface); width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */ @@ -902,8 +891,8 @@ gen8_mfd_avc_decode_picture(VADriverContextP ctx, gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context); - gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context); gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context); for (j = 0; j < decode_state->num_slice_params; j++) { assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 32392127..9af57c29 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -254,6 +254,24 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix) memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8)); } +/* Returns a unique picture ID that represents the supplied VA surface object */ +int +avc_get_picture_id(struct object_surface *obj_surface) +{ + int pic_id; + + /* This highly depends on how the internal VA objects are organized. + + Theory of operations: + The VA objects are maintained in heaps so that any released VA + surface will become free again for future allocation. This means + that holes in there are filled in for subsequent allocations. + So, this ultimately means that we could just use the Heap ID of + the VA surface as the resulting picture ID (16 bits) */ + pic_id = 1 + (obj_surface->base.id & OBJECT_HEAP_ID_MASK); + return (pic_id <= 0xffff) ? pic_id : -1; +} + /* Finds the VA/H264 picture associated with the specified VA surface id */ VAPictureH264 * avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count) @@ -502,6 +520,87 @@ intel_update_avc_frame_store_index( } } +void +gen75_update_avc_frame_store_index( + VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferH264 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +) +{ + int i, n; + + /* Construct the Frame Store array, in compact form. i.e. empty or + invalid entries are discarded. */ + for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) { + struct object_surface * const obj_surface = + decode_state->reference_objects[i]; + if (!obj_surface) + continue; + + GenFrameStore * const fs = &frame_store[n]; + fs->surface_id = obj_surface->base.id; + fs->obj_surface = obj_surface; + fs->frame_store_id = n++; + } + + /* Any remaining entry is marked as invalid */ + for (; n < MAX_GEN_REFERENCE_FRAMES; n++) { + GenFrameStore * const fs = &frame_store[n]; + fs->surface_id = VA_INVALID_ID; + fs->obj_surface = NULL; + fs->frame_store_id = -1; + } +} + +bool +gen75_fill_avc_picid_list( + uint16_t pic_ids[16], + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +) +{ + int i, pic_id; + + /* Fill in with known picture IDs. The Frame Store array is in + compact form, i.e. empty entries are only to be found at the + end of the array: there are no holes in the set of active + reference frames */ + for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + GenFrameStore * const fs = &frame_store[i]; + if (!fs->obj_surface) + break; + pic_id = avc_get_picture_id(fs->obj_surface); + if (pic_id < 0) + return false; + pic_ids[i] = pic_id; + } + + /* When an element of the list is not relevant the value of the + picture ID shall be set to 0 */ + for (; i < MAX_GEN_REFERENCE_FRAMES; i++) + pic_ids[i] = 0; + return true; +} + +bool +gen75_send_avc_picid_state( + struct intel_batchbuffer *batch, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +) +{ + uint16_t pic_ids[16]; + + if (!gen75_fill_avc_picid_list(pic_ids, frame_store)) + return false; + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2)); + OUT_BCS_BATCH(batch, 0); // enable Picture ID Remapping + intel_batchbuffer_data(batch, pic_ids, sizeof(pic_ids)); + ADVANCE_BCS_BATCH(batch); + return true; +} + void intel_update_vc1_frame_store_index(VADriverContextP ctx, struct decode_state *decode_state, diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h index a4c9415f..0ffbd7f3 100644 --- a/src/i965_decoder_utils.h +++ b/src/i965_decoder_utils.h @@ -54,6 +54,9 @@ avc_ensure_surface_bo( void avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix); +int +avc_get_picture_id(struct object_surface *obj_surface); + VAPictureH264 * avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count); @@ -97,6 +100,26 @@ intel_update_avc_frame_store_index(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]); +void +gen75_update_avc_frame_store_index( + VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferH264 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +); + +bool +gen75_fill_avc_picid_list( + uint16_t pic_ids[16], + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +); + +bool +gen75_send_avc_picid_state( + struct intel_batchbuffer *batch, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +); + void intel_update_vc1_frame_store_index(VADriverContextP ctx, struct decode_state *decode_state, -- cgit v1.2.1 From 585596a468db65e93b618b7aabe4e46d23de7b1f Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Tue, 3 Jun 2014 17:30:11 +0200 Subject: decoder: h264: expose the set of supported MVC profiles. H.264 MVC decoding support is defined as follows: - Stereo High profile on Sandybridge and newer ; - Multiview High profile on Haswell and newer. Signed-off-by: Gwenole Beauchesne (cherry picked from commit 1f244834dedb7b46863b315a898d8649d01c5f58) Conflicts: src/i965_device_info.c src/i965_drv_video.c src/va_backend_compat.h --- src/gen6_mfd.c | 1 + src/gen75_mfd.c | 4 ++++ src/gen7_mfd.c | 2 ++ src/gen8_mfd.c | 4 ++++ src/i965_decoder_utils.c | 2 ++ src/i965_device_info.c | 12 ++++++++++-- src/i965_drv_video.c | 38 ++++++++++++++++++++++++++++++-------- src/i965_drv_video.h | 2 +- src/va_backend_compat.h | 6 ------ 9 files changed, 54 insertions(+), 17 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 113561c8..e22e57a7 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -1858,6 +1858,7 @@ gen6_mfd_decode_picture(VADriverContextP ctx, case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context); break; diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index bab5df44..aaee8078 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -3141,6 +3141,8 @@ gen75_mfd_decode_picture(VADriverContextP ctx, case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: + case VAProfileH264MultiviewHigh: gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context); break; @@ -3235,6 +3237,8 @@ gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: + case VAProfileH264MultiviewHigh: gen75_mfd_avc_context_init(ctx, gen7_mfd_context); break; default: diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index e7f188cc..db35abf4 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -2612,6 +2612,7 @@ gen7_mfd_decode_picture(VADriverContextP ctx, case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context); break; @@ -2706,6 +2707,7 @@ gen7_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: gen7_mfd_avc_context_init(ctx, gen7_mfd_context); break; default: diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 0b39dac5..4e24f553 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -3062,6 +3062,8 @@ gen8_mfd_decode_picture(VADriverContextP ctx, case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: + case VAProfileH264MultiviewHigh: gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context); break; @@ -3162,6 +3164,8 @@ gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: + case VAProfileH264MultiviewHigh: gen8_mfd_avc_context_init(ctx, gen7_mfd_context); break; default: diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 9af57c29..7e3d33ab 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -919,6 +919,8 @@ intel_decoder_sanity_check_input(VADriverContextP ctx, case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: + case VAProfileH264MultiviewHigh: vaStatus = intel_decoder_check_avc_parameter(ctx, profile, decode_state); break; diff --git a/src/i965_device_info.c b/src/i965_device_info.c index c0b26e86..9573b7d2 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -36,6 +36,10 @@ (VA_RT_FORMAT_YUV400 | VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | \ VA_RT_FORMAT_YUV444) +/* Defines VA profile as a 32-bit unsigned integer mask */ +#define VA_PROFILE_MASK(PROFILE) \ + (1U << VAProfile##PROFILE) + extern struct hw_context *i965_proc_context_init(VADriverContextP, struct object_config *); extern struct hw_context *g4x_dec_hw_context_init(VADriverContextP, struct object_config *); extern bool genx_render_init(VADriverContextP); @@ -94,6 +98,7 @@ static const struct hw_codec_info snb_hw_codec_info = { .min_linear_wpitch = 16, .min_linear_hpitch = 16, + .h264_mvc_dec_profiles = VA_PROFILE_MASK(H264StereoHigh), .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .has_mpeg2_decoding = 1, @@ -127,6 +132,7 @@ static const struct hw_codec_info ivb_hw_codec_info = { .min_linear_wpitch = 64, .min_linear_hpitch = 16, + .h264_mvc_dec_profiles = VA_PROFILE_MASK(H264StereoHigh), .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, @@ -164,6 +170,8 @@ static const struct hw_codec_info hsw_hw_codec_info = { .min_linear_wpitch = 64, .min_linear_hpitch = 16, + .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) | + VA_PROFILE_MASK(H264MultiviewHigh)), .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, @@ -179,7 +187,6 @@ static const struct hw_codec_info hsw_hw_codec_info = { .has_tiled_surface = 1, .has_di_motion_adptive = 1, .has_di_motion_compensated = 1, - .has_h264_mvc_decoding = 1, .has_h264_mvc_encoding = 1, .num_filters = 5, @@ -207,6 +214,8 @@ static const struct hw_codec_info bdw_hw_codec_info = { .min_linear_wpitch = 64, .min_linear_hpitch = 16, + .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) | + VA_PROFILE_MASK(H264MultiviewHigh)), .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, @@ -223,7 +232,6 @@ static const struct hw_codec_info bdw_hw_codec_info = { .has_di_motion_adptive = 1, .has_di_motion_compensated = 1, .has_vp8_decoding = 1, - .has_h264_mvc_decoding = 1, .has_h264_mvc_encoding = 1, .num_filters = 5, diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index f23b4491..403acdeb 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -84,8 +84,12 @@ #define HAS_VP8_ENCODING(ctx) ((ctx)->codec_info->has_vp8_encoding && \ (ctx)->intel.has_bsd) -#define HAS_H264_MVC_DECODING(ctx) ((ctx)->codec_info->has_h264_mvc_decoding && \ - (ctx)->intel.has_bsd) +#define HAS_H264_MVC_DECODING(ctx) \ + (HAS_H264_DECODING(ctx) && (ctx)->codec_info->h264_mvc_dec_profiles) + +#define HAS_H264_MVC_DECODING_PROFILE(ctx, profile) \ + (HAS_H264_MVC_DECODING(ctx) && \ + ((ctx)->codec_info->h264_mvc_dec_profiles & (1U << profile))) #define HAS_H264_MVC_ENCODING(ctx) ((ctx)->codec_info->has_h264_mvc_encoding && \ (ctx)->intel.has_bsd) @@ -390,6 +394,10 @@ i965_QueryConfigProfiles(VADriverContextP ctx, profile_list[i++] = VAProfileH264Main; profile_list[i++] = VAProfileH264High; } + if (HAS_H264_MVC_DECODING_PROFILE(i965, VAProfileH264MultiviewHigh)) + profile_list[i++] = VAProfileH264MultiviewHigh; + if (HAS_H264_MVC_DECODING_PROFILE(i965, VAProfileH264StereoHigh)) + profile_list[i++] = VAProfileH264StereoHigh; if (HAS_VC1_DECODING(i965)) { profile_list[i++] = VAProfileVC1Simple; @@ -410,8 +418,7 @@ i965_QueryConfigProfiles(VADriverContextP ctx, profile_list[i++] = VAProfileVP8Version0_3; } - if (HAS_H264_MVC_DECODING(i965) || - HAS_H264_MVC_ENCODING(i965)) { + if (HAS_H264_MVC_ENCODING(i965)) { profile_list[i++] = VAProfileH264MultiviewHigh; profile_list[i++] = VAProfileH264StereoHigh; } @@ -455,7 +462,7 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx, break; case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: - if (HAS_H264_MVC_DECODING(i965)) + if (HAS_H264_MVC_DECODING_PROFILE(i965, profile)) entrypoint_list[n++] = VAEntrypointVLD; if (HAS_H264_MVC_ENCODING(i965)) @@ -563,8 +570,9 @@ i965_validate_config(VADriverContextP ctx, VAProfile profile, case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: - if ((HAS_H264_MVC_DECODING(i965) && VAEntrypointVLD == entrypoint) || - (HAS_H264_MVC_ENCODING(i965) && VAEntrypointEncSlice == entrypoint)) { + if ((HAS_H264_MVC_DECODING_PROFILE(i965, profile) && + entrypoint == VAEntrypointVLD) || + (HAS_H264_MVC_ENCODING(i965) && entrypoint == VAEntrypointEncSlice)) { va_status = VA_STATUS_SUCCESS; } else { va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; @@ -1695,6 +1703,12 @@ i965_CreateContext(VADriverContextP ctx, return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; render_state->interleaved_uv = 1; break; + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: + if (!HAS_H264_MVC_DECODING(i965)) + return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; + render_state->interleaved_uv = 1; + break; default: render_state->interleaved_uv = !!(IS_GEN6(i965->intel.device_info) || IS_GEN7(i965->intel.device_info) || IS_GEN8(i965->intel.device_info)); break; @@ -2105,9 +2119,17 @@ i965_BeginPicture(VADriverContextP ctx, case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + vaStatus = VA_STATUS_SUCCESS; + break; + case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: - vaStatus = VA_STATUS_SUCCESS; + if (HAS_H264_MVC_DECODING_PROFILE(i965, obj_config->profile) || + HAS_H264_MVC_ENCODING(i965)) { + vaStatus = VA_STATUS_SUCCESS; + } else { + ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_PROFILE); + } break; case VAProfileVC1Simple: diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 75c66e9f..e25b9c86 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -302,6 +302,7 @@ struct hw_codec_info int min_linear_wpitch; int min_linear_hpitch; + unsigned int h264_mvc_dec_profiles; unsigned int h264_dec_chroma_formats; unsigned int jpeg_dec_chroma_formats; @@ -321,7 +322,6 @@ struct hw_codec_info unsigned int has_di_motion_compensated:1; unsigned int has_vp8_decoding:1; unsigned int has_vp8_encoding:1; - unsigned int has_h264_mvc_decoding:1; unsigned int has_h264_mvc_encoding:1; unsigned int num_filters; diff --git a/src/va_backend_compat.h b/src/va_backend_compat.h index 5fcb1983..87671538 100644 --- a/src/va_backend_compat.h +++ b/src/va_backend_compat.h @@ -45,12 +45,6 @@ # define VA_DRM_AUTH_CUSTOM VA_DUMMY #endif -#if !VA_CHECK_VERSION(0,35,1) - -#define VAProcFilterSkinToneEnhancement 5 - -#endif - #if !VA_CHECK_VERSION(0,35,2) # define VAProfileH264MultiviewHigh 15 # define VAProfileH264StereoHigh 16 -- cgit v1.2.1 From c173e3560dbbd9346cf34c6da9a6406c6c8089be Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Fri, 9 May 2014 18:52:00 +0200 Subject: decoder: MVC chroma formats This is a part of fa469f74227a7b4e0e6f882c488132eaa9c44417 on staging Signed-off-by: Gwenole Beauchesne Signed-off-by: Xiang, Haihao --- src/i965_drv_video.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 403acdeb..eba9a478 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -602,6 +602,12 @@ i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile, chroma_formats |= i965->codec_info->h264_dec_chroma_formats; break; + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: + if (HAS_H264_MVC_DECODING(i965) && entrypoint == VAEntrypointVLD) + chroma_formats |= i965->codec_info->h264_dec_chroma_formats; + break; + case VAProfileJPEGBaseline: if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats; -- cgit v1.2.1 From aeef40157a7d3349ea8afc5dca0c198055027601 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Fri, 9 May 2014 14:53:30 +0200 Subject: render: don't deallocate surface storage of displayed frames. Keep the VA surface storage live until it is explicitly scheduled for destruction through vaDestroySurfaces() interface. Otherwise, subsequent vaPutSurface() calls would have no effect. This fixes various use cases like: display of interlaced frames that are not marked for reference, multiple rendering to Pixmap for EXT_texture_from_pixmap and more precisely interlaced streams. Signed-off-by: Gwenole Beauchesne (cherry picked from commit a840e6403071d397a33e127e8058881a3ef50077) --- src/i965_output_dri.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/i965_output_dri.c b/src/i965_output_dri.c index 2a812d3a..3b245344 100644 --- a/src/i965_output_dri.c +++ b/src/i965_output_dri.c @@ -207,11 +207,6 @@ i965_put_surface_dri( dri_vtable->swap_buffer(ctx, dri_drawable); obj_surface->flags |= SURFACE_DISPLAYED; - if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) { - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - i965_destroy_surface_storage(obj_surface); - } - _i965UnlockMutex(&i965->render_mutex); return VA_STATUS_SUCCESS; -- cgit v1.2.1 From da8a7e9accb5f8e1913440fdd2d662af6a0b7ef1 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Fri, 9 May 2014 15:08:05 +0200 Subject: surface: drop SURFACE_DISPLAYED flag. The optimization by which the VA surface storage is deallocated after it is displayed and not used for reference or vaDeriveImage() purposes cannot be implemented safely. We need to honour explicit lifetimes defined by the upper codec layer. Signed-off-by: Gwenole Beauchesne (cherry picked from commit 84926ace7a2c5b88df1ada167a1c273128469aad) --- src/gen6_mfd.c | 6 ++++-- src/gen75_mfd.c | 6 ++++-- src/gen7_mfd.c | 6 ++++-- src/gen8_mfd.c | 6 ++++-- src/i965_avc_bsd.c | 6 ++++-- src/i965_drv_video.h | 4 ---- src/i965_output_dri.c | 1 - 7 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index e22e57a7..437ad3b1 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -830,8 +830,10 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); + if (pic_param->pic_fields.bits.reference_pic_flag) + obj_surface->flags |= SURFACE_REFERENCED; + else + obj_surface->flags &= ~SURFACE_REFERENCED; avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface); diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index aaee8078..a1d004b7 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -1051,8 +1051,10 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); + if (pic_param->pic_fields.bits.reference_pic_flag) + obj_surface->flags |= SURFACE_REFERENCED; + else + obj_surface->flags &= ~SURFACE_REFERENCED; avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface); diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index db35abf4..97f97053 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -748,8 +748,10 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); + if (pic_param->pic_fields.bits.reference_pic_flag) + obj_surface->flags |= SURFACE_REFERENCED; + else + obj_surface->flags &= ~SURFACE_REFERENCED; avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface); diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 4e24f553..df0cd42a 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -814,8 +814,10 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); + if (pic_param->pic_fields.bits.reference_pic_flag) + obj_surface->flags |= SURFACE_REFERENCED; + else + obj_surface->flags &= ~SURFACE_REFERENCED; avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface); diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c index 43bace6e..aca3c012 100644 --- a/src/i965_avc_bsd.c +++ b/src/i965_avc_bsd.c @@ -432,8 +432,10 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, va_pic = &pic_param->CurrPic; obj_surface = decode_state->render_object; - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); + if (pic_param->pic_fields.bits.reference_pic_flag) + obj_surface->flags |= SURFACE_REFERENCED; + else + obj_surface->flags &= ~SURFACE_REFERENCED; i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420); /* initial uv component for YUV400 case */ diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index e25b9c86..95ee1938 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -202,12 +202,8 @@ struct object_context }; #define SURFACE_REFERENCED (1 << 0) -#define SURFACE_DISPLAYED (1 << 1) #define SURFACE_DERIVED (1 << 2) -#define SURFACE_REF_DIS_MASK ((SURFACE_REFERENCED) | \ - (SURFACE_DISPLAYED)) #define SURFACE_ALL_MASK ((SURFACE_REFERENCED) | \ - (SURFACE_DISPLAYED) | \ (SURFACE_DERIVED)) struct object_surface diff --git a/src/i965_output_dri.c b/src/i965_output_dri.c index 3b245344..6f8ea312 100644 --- a/src/i965_output_dri.c +++ b/src/i965_output_dri.c @@ -205,7 +205,6 @@ i965_put_surface_dri( if (!(g_intel_debug_option_flags & VA_INTEL_DEBUG_OPTION_BENCH)) dri_vtable->swap_buffer(ctx, dri_drawable); - obj_surface->flags |= SURFACE_DISPLAYED; _i965UnlockMutex(&i965->render_mutex); -- cgit v1.2.1 From d2262e37a4737d0d03004c0b34165180ddb1d956 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Wed, 4 Jun 2014 18:25:33 +0200 Subject: decoder: h264: improve AVC_REF_IDX_STATE for MVC. Each Reference List Entry has Bit 6 set to one if the reference picture is to be used as a long-term reference picture. However, the H.264 standard, and subsequently the VA-API specs, makes it possible to mark the picture as "used for short-term reference", as "used for long-term reference", or even none of those flags. This means we have to handle a minimum of 3 states. This doesn't fit the range of a single bit. Let's examine how this could be fixed from known practices. There are cases where the picture is added to RefPicListX[] even if it is not marked as "used for short-term reference" or "used for long-term reference": MVC with inter-view reference components or inter-view only reference components [H.8.4]. Ultimately, this has an incidence on the value of colZeroFlag (8.4.1.2.2). Since there is no way to program that, and that it depends on the picture to be marked as "used for short-term reference" or not, then it looks reasonable to imply Bit 6 (LongTermPicFlag) as a picture that is *not* "used for short-term reference", i.e. thus including genuine long-term reference pictures, and those that are neither long-term reference nor short-term reference pictures. In practice, this fixes MVCNV-2.264. Signed-off-by: Gwenole Beauchesne (cherry picked from commit edbdc0e87919d8b7261d882a32b2d3c271660931) --- src/i965_decoder_utils.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 7e3d33ab..7833919f 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -349,8 +349,24 @@ avc_get_first_mb_bit_offset_with_epb( static inline uint8_t get_ref_idx_state_1(const VAPictureH264 *va_pic, unsigned int frame_store_id) { + /* The H.264 standard, and the VA-API specification, allows for at + least 3 states for a picture: "used for short-term reference", + "used for long-term reference", or considered as not used for + reference. + + The latter is used in the MVC inter prediction and inter-view + prediction process (H.8.4). This has an incidence on the + colZeroFlag variable, as defined in 8.4.1.2. + + Since it is not possible to directly program that flag, let's + make the hardware derive this value by assimilating "considered + as not used for reference" to a "not used for short-term + reference", and subsequently making it "used for long-term + reference" to fit the definition of Bit6 here */ + const unsigned int ref_flags = VA_PICTURE_H264_SHORT_TERM_REFERENCE | + VA_PICTURE_H264_LONG_TERM_REFERENCE; const unsigned int is_long_term = - !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE); + ((va_pic->flags & ref_flags) != VA_PICTURE_H264_SHORT_TERM_REFERENCE); const unsigned int is_top_field = !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD); const unsigned int is_bottom_field = -- cgit v1.2.1 From 143c53900943c660390122f7fa4a1c5705798fdd Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Thu, 5 Jun 2014 21:36:33 +0200 Subject: decoder: h264: fix frame store logic for MVC. In strict MVC decoding mode, when only the necessary set of inter-view reference pictures are passed to the ReferenceFrames array for decoding the current picture, we should not re-use a frame store id that might be needed for decoding another view component in the same access unit. One way to solve this problem is to track when the VA surface in a specified frame store id was last referenced. So, a "ref_age" field is introduced to the GenFrameStore struct and is updated whenever the surface is being referenced. Additionally, the list of retired refs candidates (free_refs) is kept ordered by increasing ref_age. That way, we can immediately know what is the oldest frame store id to recycle. Let deltaAge = CurrAge - RefAge: If deltaAge > 1, we know for sure that the VA surface is gone ; If deltaAge = 1, the surface could be re-used for inter prediction ; If deltaAge = 0, the surface could be re-used for inter-view prediction. The ref_age in each Frame Store entry is always current, i.e. it is the same for all reference frames that intervened in the decoding process of all inter view components of the previous access unit. The age tracks access units. v2: used a more correct representation of age, instead of POC [Yakui] v3: minor optimization for detecting changes of access unit [Haihao] Signed-off-by: Gwenole Beauchesne (cherry picked from commit 3b5eb0522fbfe1220dcd0c0bb093a93cfc25e22c) --- src/gen6_mfd.c | 4 +- src/gen6_mfd.h | 1 + src/gen75_mfd.c | 1 + src/gen7_mfd.c | 4 +- src/gen7_mfd.h | 1 + src/gen8_mfd.c | 1 + src/i965_avc_bsd.c | 4 +- src/i965_decoder.h | 15 +++++++ src/i965_decoder_utils.c | 112 +++++++++++++++++++++++++++++++---------------- src/i965_decoder_utils.h | 11 +++-- src/i965_media_h264.h | 1 + src/intel_media.h | 1 + 12 files changed, 111 insertions(+), 45 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 437ad3b1..8128a80b 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -61,6 +61,7 @@ gen6_mfd_init_avc_surface(VADriverContextP ctx, if (!gen6_avc_surface) { gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1); + gen6_avc_surface->frame_store_id = -1; assert((obj_surface->size & 0x3f) == 0); obj_surface->private_data = gen6_avc_surface; } @@ -825,7 +826,8 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx, assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen6_mfd_context->reference_surface); + intel_update_avc_frame_store_index(ctx, decode_state, pic_param, + gen6_mfd_context->reference_surface, &gen6_mfd_context->fs_ctx); width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff); /* Current decoded picture */ diff --git a/src/gen6_mfd.h b/src/gen6_mfd.h index de131d69..f499803e 100644 --- a/src/gen6_mfd.h +++ b/src/gen6_mfd.h @@ -62,6 +62,7 @@ struct gen6_mfd_context VAIQMatrixBufferMPEG2 mpeg2; } iq_matrix; + GenFrameStoreContext fs_ctx; GenFrameStore reference_surface[MAX_GEN_REFERENCE_FRAMES]; GenBuffer post_deblocking_output; GenBuffer pre_deblocking_output; diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index a1d004b7..a89640da 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -67,6 +67,7 @@ gen75_mfd_init_avc_surface(VADriverContextP ctx, if (!gen7_avc_surface) { gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1); + gen7_avc_surface->frame_store_id = -1; assert((obj_surface->size & 0x3f) == 0); obj_surface->private_data = gen7_avc_surface; } diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index 97f97053..7ab2955a 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -65,6 +65,7 @@ gen7_mfd_init_avc_surface(VADriverContextP ctx, if (!gen7_avc_surface) { gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1); + gen7_avc_surface->frame_store_id = -1; assert((obj_surface->size & 0x3f) == 0); obj_surface->private_data = gen7_avc_surface; } @@ -740,7 +741,8 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx, assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface); + intel_update_avc_frame_store_index(ctx, decode_state, pic_param, + gen7_mfd_context->reference_surface, &gen7_mfd_context->fs_ctx); width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */ diff --git a/src/gen7_mfd.h b/src/gen7_mfd.h index 02002164..af8e960d 100644 --- a/src/gen7_mfd.h +++ b/src/gen7_mfd.h @@ -77,6 +77,7 @@ struct gen7_mfd_context VAIQMatrixBufferH264 h264; /* flat scaling lists (default) */ } iq_matrix; + GenFrameStoreContext fs_ctx; GenFrameStore reference_surface[MAX_GEN_REFERENCE_FRAMES]; GenBuffer post_deblocking_output; GenBuffer pre_deblocking_output; diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index df0cd42a..5e1b70bf 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -74,6 +74,7 @@ gen8_mfd_init_avc_surface(VADriverContextP ctx, if (!gen7_avc_surface) { gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1); + gen7_avc_surface->frame_store_id = -1; assert((obj_surface->size & 0x3f) == 0); obj_surface->private_data = gen7_avc_surface; } diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c index aca3c012..ebeb2a6e 100644 --- a/src/i965_avc_bsd.c +++ b/src/i965_avc_bsd.c @@ -51,6 +51,7 @@ i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx, if (!avc_bsd_surface) { avc_bsd_surface = calloc(sizeof(GenAvcSurface), 1); + avc_bsd_surface->frame_store_id = -1; assert((obj_surface->size & 0x3f) == 0); obj_surface->private_data = avc_bsd_surface; } @@ -795,7 +796,8 @@ i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, v assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, i965_h264_context->fsid_list); + intel_update_avc_frame_store_index(ctx, decode_state, pic_param, + i965_h264_context->fsid_list, &i965_h264_context->fs_ctx); i965_h264_context->enable_avc_ildb = 0; i965_h264_context->picture.i_flag = 1; diff --git a/src/i965_decoder.h b/src/i965_decoder.h index 01c093fc..14d4d0c0 100644 --- a/src/i965_decoder.h +++ b/src/i965_decoder.h @@ -39,6 +39,21 @@ struct gen_frame_store { VASurfaceID surface_id; int frame_store_id; struct object_surface *obj_surface; + + /* This represents the time when this frame store was last used to + hold a reference frame. This is not connected to a presentation + timestamp (PTS), and this is not a common decoding time stamp + (DTS) either. It serves the purpose of tracking retired + reference frame candidates. + + This is only used for H.264 decoding on platforms before Haswell */ + uint64_t ref_age; +}; + +typedef struct gen_frame_store_context GenFrameStoreContext; +struct gen_frame_store_context { + uint64_t age; + int prev_poc; }; typedef struct gen_buffer GenBuffer; diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 7833919f..0539e083 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -22,10 +22,11 @@ */ #include "sysdeps.h" - +#include #include #include "intel_batchbuffer.h" +#include "intel_media.h" #include "i965_drv_video.h" #include "i965_decoder_utils.h" #include "i965_defines.h" @@ -254,6 +255,21 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix) memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8)); } +/* Returns the POC of the supplied VA picture */ +static int +avc_get_picture_poc(const VAPictureH264 *va_pic) +{ + int structure, field_poc[2]; + + structure = va_pic->flags & + (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD); + field_poc[0] = structure != VA_PICTURE_H264_BOTTOM_FIELD ? + va_pic->TopFieldOrderCnt : INT_MAX; + field_poc[1] = structure != VA_PICTURE_H264_TOP_FIELD ? + va_pic->BottomFieldOrderCnt : INT_MAX; + return MIN(field_poc[0], field_poc[1]); +} + /* Returns a unique picture ID that represents the supplied VA surface object */ int avc_get_picture_id(struct object_surface *obj_surface) @@ -471,68 +487,88 @@ gen6_send_avc_ref_idx_state( ); } +/* Comparison function for sorting out the array of free frame store entries */ +static int +compare_avc_ref_store_func(const void *p1, const void *p2) +{ + const GenFrameStore * const fs1 = *((GenFrameStore **)p1); + const GenFrameStore * const fs2 = *((GenFrameStore **)p2); + + return fs1->ref_age - fs2->ref_age; +} + void intel_update_avc_frame_store_index( VADriverContextP ctx, struct decode_state *decode_state, VAPictureParameterBufferH264 *pic_param, - GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES], + GenFrameStoreContext *fs_ctx ) { GenFrameStore *free_refs[MAX_GEN_REFERENCE_FRAMES]; - int i, j, n, num_free_refs; - - /* Remove obsolete entries from the internal DPB */ - for (i = 0, n = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { - GenFrameStore * const fs = &frame_store[i]; - if (fs->surface_id == VA_INVALID_ID || !fs->obj_surface) { - free_refs[n++] = fs; + uint32_t used_refs = 0, add_refs = 0; + uint64_t age; + int i, n, num_free_refs; + + /* Detect changes of access unit */ + const int poc = avc_get_picture_poc(&pic_param->CurrPic); + if (fs_ctx->age == 0 || fs_ctx->prev_poc != poc) + fs_ctx->age++; + fs_ctx->prev_poc = poc; + age = fs_ctx->age; + + /* Tag entries that are still available in our Frame Store */ + for (i = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) { + struct object_surface * const obj_surface = + decode_state->reference_objects[i]; + if (!obj_surface) continue; - } - // Find whether the current entry is still a valid reference frame - for (j = 0; j < ARRAY_ELEMS(decode_state->reference_objects); j++) { - struct object_surface * const obj_surface = - decode_state->reference_objects[j]; - if (obj_surface && obj_surface == fs->obj_surface) - break; + GenAvcSurface * const avc_surface = obj_surface->private_data; + if (avc_surface->frame_store_id >= 0) { + GenFrameStore * const fs = + &frame_store[avc_surface->frame_store_id]; + if (fs->surface_id == obj_surface->base.id) { + fs->obj_surface = obj_surface; + fs->ref_age = age; + used_refs |= 1 << fs->frame_store_id; + continue; + } } + add_refs |= 1 << i; + } - // ... or remove it - if (j == ARRAY_ELEMS(decode_state->reference_objects)) { - fs->surface_id = VA_INVALID_ID; + /* Build and sort out the list of retired candidates. The resulting + list is ordered by increasing age when they were last used */ + for (i = 0, n = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + if (!(used_refs & (1 << i))) { + GenFrameStore * const fs = &frame_store[i]; fs->obj_surface = NULL; - fs->frame_store_id = -1; free_refs[n++] = fs; } } num_free_refs = n; + qsort(&free_refs[0], n, sizeof(free_refs[0]), compare_avc_ref_store_func); /* Append the new reference frames */ for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) { struct object_surface * const obj_surface = decode_state->reference_objects[i]; - if (!obj_surface) + if (!obj_surface || !(add_refs & (1 << i))) continue; - // Find whether the current frame is not already in our frame store - for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { - GenFrameStore * const fs = &frame_store[j]; - if (fs->obj_surface == obj_surface) - break; - } - - // ... or add it - if (j == MAX_GEN_REFERENCE_FRAMES) { - if (n < num_free_refs) { - GenFrameStore * const fs = free_refs[n++]; - fs->surface_id = obj_surface->base.id; - fs->obj_surface = obj_surface; - fs->frame_store_id = fs - frame_store; - continue; - } - WARN_ONCE("No free slot found for DPB reference list!!!\n"); + GenAvcSurface * const avc_surface = obj_surface->private_data; + if (n < num_free_refs) { + GenFrameStore * const fs = free_refs[n++]; + fs->surface_id = obj_surface->base.id; + fs->obj_surface = obj_surface; + fs->frame_store_id = fs - frame_store; + fs->ref_age = age; + avc_surface->frame_store_id = fs->frame_store_id; + continue; } + WARN_ONCE("No free slot found for DPB reference list!!!\n"); } } diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h index 0ffbd7f3..3d39b21a 100644 --- a/src/i965_decoder_utils.h +++ b/src/i965_decoder_utils.h @@ -95,10 +95,13 @@ intel_decoder_sanity_check_input(VADriverContextP ctx, struct decode_state *decode_state); void -intel_update_avc_frame_store_index(VADriverContextP ctx, - struct decode_state *decode_state, - VAPictureParameterBufferH264 *pic_param, - GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]); +intel_update_avc_frame_store_index( + VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferH264 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES], + GenFrameStoreContext *fs_ctx +); void gen75_update_avc_frame_store_index( diff --git a/src/i965_media_h264.h b/src/i965_media_h264.h index 490213cc..e507e1d1 100644 --- a/src/i965_media_h264.h +++ b/src/i965_media_h264.h @@ -61,6 +61,7 @@ struct i965_h264_context struct i965_avc_hw_scoreboard_context avc_hw_scoreboard_context; struct i965_avc_ildb_context avc_ildb_context; + GenFrameStoreContext fs_ctx; GenFrameStore fsid_list[MAX_GEN_REFERENCE_FRAMES]; struct i965_kernel avc_kernels[NUM_H264_AVC_KERNELS]; diff --git a/src/intel_media.h b/src/intel_media.h index b30740a3..55136d64 100644 --- a/src/intel_media.h +++ b/src/intel_media.h @@ -39,6 +39,7 @@ struct gen_avc_surface dri_bo *dmv_top; dri_bo *dmv_bottom; int dmv_bottom_flag; + int frame_store_id; /* only used for H.264 on earlier generations ( Date: Tue, 10 Jun 2014 14:11:01 +0800 Subject: Encoding: Reinitialize CBR bit rate-control parameter to support switch of bitrate under CBR Tested-By: Sean V Kelley Signed-off-by: Zhao Yakui (cherry picked from commit 929c1446a28dbefd9655774f2db3e10f7b631dcf) --- src/gen6_mfc.h | 6 ++++++ src/gen6_mfc_common.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h index 6df80937..9437c317 100644 --- a/src/gen6_mfc.h +++ b/src/gen6_mfc.h @@ -159,6 +159,12 @@ struct gen6_mfc_context int target_frame_size[3]; // I,P,B double bits_per_frame; double qpf_rounding_accumulator; + + double saved_bps; + double saved_fps; + int saved_intra_period; + int saved_ip_period; + int saved_idr_period; } brc; struct { diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 33b9d557..3e30335c 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -384,6 +384,50 @@ int intel_mfc_interlace_check(VADriverContextP ctx, return 1; } +/* + * Check whether the parameters related with CBR are updated and decide whether + * it needs to reinitialize the configuration related with CBR. + * Currently it will check the following parameters: + * bits_per_second + * frame_rate + * gop_configuration(intra_period, ip_period, intra_idr_period) + */ +static bool intel_mfc_brc_updated_check(struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + unsigned int rate_control_mode = encoder_context->rate_control_mode; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + double cur_fps, cur_bitrate; + VAEncSequenceParameterBufferH264 *pSequenceParameter; + + + if (rate_control_mode != VA_RC_CBR) { + return false; + } + + pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + + cur_bitrate = pSequenceParameter->bits_per_second; + cur_fps = (double)pSequenceParameter->time_scale / + (2 * (double)pSequenceParameter->num_units_in_tick); + + if ((cur_bitrate == mfc_context->brc.saved_bps) && + (cur_fps == mfc_context->brc.saved_fps) && + (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) && + (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) && + (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) { + /* the parameters related with CBR are not updaetd */ + return false; + } + + mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period; + mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period; + mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period; + mfc_context->brc.saved_fps = cur_fps; + mfc_context->brc.saved_bps = cur_bitrate; + return true; +} + void intel_mfc_brc_prepare(struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { @@ -391,16 +435,20 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state, struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; if (rate_control_mode == VA_RC_CBR) { + bool brc_updated; assert(encoder_context->codec != CODEC_MPEG2); + brc_updated = intel_mfc_brc_updated_check(encode_state, encoder_context); + /*Programing bit rate control */ - if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) { + if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) || + brc_updated) { intel_mfc_bit_rate_control_context_init(encode_state, mfc_context); intel_mfc_brc_init(encode_state, encoder_context); } /*Programing HRD control */ - if ( mfc_context->vui_hrd.i_cpb_size_value == 0 ) + if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated ) intel_mfc_hrd_context_init(encode_state, encoder_context); } } -- cgit v1.2.1 From 772e94298f2fc42da1f9be69de80664d9055b685 Mon Sep 17 00:00:00 2001 From: "Zhao, Yakui" Date: Mon, 26 May 2014 08:40:15 +0800 Subject: H264_Encoding: Parse the packed header data from user to fix the hacked code of HW skip bytes When the packed header data from user is inserted into the coded clip, it uses the hacked code to check the number of HW skip emulation bytes. This is wrong. So fix it. Of course if the packed header data is generated by the driver, it is unnecessary to check it and it can still use the pre-defined number of HW skip bytes. V1->V2: Based on Gwenole's comment more nal_unit_type is added. Signed-off-by: Zhao Yakui (cherry picked from commit cd518563f239cb8523c58010a695098465a4f04e) --- src/gen6_mfc_common.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 3e30335c..95e4dc3b 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -453,6 +453,58 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state, } } +static int intel_avc_find_skipemulcnt(unsigned char *buf, int bits_length) +{ + int i, found; + int leading_zero_cnt, byte_length, zero_byte; + int nal_unit_type; + int skip_cnt = 0; + +#define NAL_UNIT_TYPE_MASK 0x1f +#define HW_MAX_SKIP_LENGTH 15 + + byte_length = ALIGN(bits_length, 32) >> 3; + + + leading_zero_cnt = 0; + found = 0; + for(i = 0; i < byte_length - 4; i++) { + if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) || + ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) { + found = 1; + break; + } + leading_zero_cnt++; + } + if (!found) { + /* warning message is complained. But anyway it will be inserted. */ + WARN_ONCE("Invalid packed header data. " + "Can't find the 000001 start_prefix code\n"); + return 0; + } + i = leading_zero_cnt; + + zero_byte = 0; + if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1))) + zero_byte = 1; + + skip_cnt = leading_zero_cnt + zero_byte + 3; + + /* the unit header byte is accounted */ + nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK; + skip_cnt += 1; + + if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) { + /* more unit header bytes are accounted for MVC/SVC */ + skip_cnt += 3; + } + if (skip_cnt > HW_MAX_SKIP_LENGTH) { + WARN_ONCE("Too many leading zeros are padded for packed data. " + "It is beyond the HW range.!!!\n"); + } + return skip_cnt; +} + void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context, @@ -461,6 +513,7 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS); unsigned int rate_control_mode = encoder_context->rate_control_mode; + unsigned int skip_emul_byte_cnt; if (encode_state->packed_header_data[idx]) { VAEncPackedHeaderParameterBuffer *param = NULL; @@ -471,12 +524,13 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; length_in_bits = param->bit_length; + skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); mfc_context->insert_object(ctx, encoder_context, header_data, ALIGN(length_in_bits, 32) >> 5, length_in_bits & 0x1f, - 5, /* FIXME: check it */ + skip_emul_byte_cnt, 0, 0, !param->has_emulation_bytes, @@ -494,12 +548,14 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; length_in_bits = param->bit_length; + skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); + mfc_context->insert_object(ctx, encoder_context, header_data, ALIGN(length_in_bits, 32) >> 5, length_in_bits & 0x1f, - 5, /* FIXME: check it */ + skip_emul_byte_cnt, 0, 0, !param->has_emulation_bytes, @@ -517,12 +573,13 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; length_in_bits = param->bit_length; + skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); mfc_context->insert_object(ctx, encoder_context, header_data, ALIGN(length_in_bits, 32) >> 5, length_in_bits & 0x1f, - 5, /* FIXME: check it */ + skip_emul_byte_cnt, 0, 0, !param->has_emulation_bytes, -- cgit v1.2.1 From 558e904bc5c471795b09f6c2dcf65ba31590b19b Mon Sep 17 00:00:00 2001 From: "Zhao, Yakui" Date: Mon, 26 May 2014 08:40:15 +0800 Subject: H264_Encoding: Prepare some data structures for adding packed raw data Under some encoding scenario, the user-space application hopes that the driver can insert the passed packed rawdata into the coded clip. But the insertion of packed rawdata is related with the slice. So some data structures are added so that it can store how the packed rawdata is inserted into the coded clip per-slice. Signed-off-by: Zhao, Yakui (cherry picked from commit 65727b1868f01d836659396724b83d2992656242) --- src/i965_drv_video.c | 43 +++++++++++++++++++++++++++++++++++++++++++ src/i965_drv_video.h | 20 ++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index eba9a478..db0440c9 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -1636,6 +1636,22 @@ i965_destroy_context(struct object_heap *heap, struct object_base *obj) i965_release_buffer_store(&obj_context->codec_state.encode.slice_params_ext[i]); free(obj_context->codec_state.encode.slice_params_ext); + if (obj_context->codec_state.encode.slice_rawdata_index) { + free(obj_context->codec_state.encode.slice_rawdata_index); + obj_context->codec_state.encode.slice_rawdata_index = NULL; + } + if (obj_context->codec_state.encode.slice_rawdata_count) { + free(obj_context->codec_state.encode.slice_rawdata_count); + obj_context->codec_state.encode.slice_rawdata_count = NULL; + } + for (i = 0; i < obj_context->codec_state.encode.num_packed_header_params_ext; i++) + i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_params_ext[i]); + free(obj_context->codec_state.encode.packed_header_params_ext); + + for (i = 0; i < obj_context->codec_state.encode.num_packed_header_data_ext; i++) + i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_data_ext[i]); + free(obj_context->codec_state.encode.packed_header_data_ext); + } else { assert(obj_context->codec_state.decode.num_slice_params <= obj_context->codec_state.decode.max_slice_params); assert(obj_context->codec_state.decode.num_slice_datas <= obj_context->codec_state.decode.max_slice_datas); @@ -1754,6 +1770,22 @@ i965_CreateContext(VADriverContextP ctx, obj_context->codec_state.encode.max_slice_params = NUM_SLICES; obj_context->codec_state.encode.slice_params = calloc(obj_context->codec_state.encode.max_slice_params, sizeof(*obj_context->codec_state.encode.slice_params)); + obj_context->codec_state.encode.max_packed_header_params_ext = NUM_SLICES; + obj_context->codec_state.encode.packed_header_params_ext = + calloc(obj_context->codec_state.encode.max_packed_header_params_ext, + sizeof(struct buffer_store *)); + + obj_context->codec_state.encode.max_packed_header_data_ext = NUM_SLICES; + obj_context->codec_state.encode.packed_header_data_ext = + calloc(obj_context->codec_state.encode.max_packed_header_data_ext, + sizeof(struct buffer_store *)); + + obj_context->codec_state.encode.slice_num = NUM_SLICES; + obj_context->codec_state.encode.slice_rawdata_index = + calloc(obj_context->codec_state.encode.slice_num, sizeof(int)); + obj_context->codec_state.encode.slice_rawdata_count = + calloc(obj_context->codec_state.encode.slice_num, sizeof(int)); + assert(i965->codec_info->enc_hw_context_init); obj_context->hw_context = i965->codec_info->enc_hw_context_init(ctx, obj_config); } else { @@ -2187,6 +2219,17 @@ i965_BeginPicture(VADriverContextP ctx, obj_context->codec_state.encode.num_slice_params_ext = 0; obj_context->codec_state.encode.current_render_target = render_target; /*This is input new frame*/ obj_context->codec_state.encode.last_packed_header_type = 0; + memset(obj_context->codec_state.encode.slice_rawdata_index, 0, + sizeof(int) * obj_context->codec_state.encode.slice_num); + memset(obj_context->codec_state.encode.slice_rawdata_count, 0, + sizeof(int) * obj_context->codec_state.encode.slice_num); + + for (i = 0; i < obj_context->codec_state.encode.num_packed_header_params_ext; i++) + i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_params_ext[i]); + for (i = 0; i < obj_context->codec_state.encode.num_packed_header_data_ext; i++) + i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_data_ext[i]); + obj_context->codec_state.encode.num_packed_header_params_ext = 0; + obj_context->codec_state.encode.num_packed_header_data_ext = 0; } else { obj_context->codec_state.decode.current_render_target = render_target; i965_release_buffer_store(&obj_context->codec_state.decode.pic_param); diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 95ee1938..98272096 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -125,6 +125,9 @@ struct decode_state struct object_surface *reference_objects[16]; /* Up to 2 reference surfaces are valid for MPEG-2,*/ }; +#define SLICE_PACKED_DATA_INDEX_TYPE 0x80000000 +#define SLICE_PACKED_DATA_INDEX_MASK 0x00FFFFFF + struct encode_state { struct codec_state_base base; @@ -145,6 +148,23 @@ struct encode_state struct buffer_store **slice_params_ext; int max_slice_params_ext; int num_slice_params_ext; + + /* For the packed data that needs to be inserted into video clip */ + /* currently it is mainly for packed raw data */ + struct buffer_store **packed_header_params_ext; + int max_packed_header_params_ext; + int num_packed_header_params_ext; + struct buffer_store **packed_header_data_ext; + int max_packed_header_data_ext; + int num_packed_header_data_ext; + + /* the array is determined by max_slice_params_ext */ + int slice_num; + /* This is to store the first index of packed data for one slice */ + int *slice_rawdata_index; + /* This is to store the number of packed data for one slice */ + int *slice_rawdata_count; + int last_packed_header_type; struct buffer_store *misc_param[16]; -- cgit v1.2.1 From 89507c06c7ed03d829cf2526e621d844e174c90c Mon Sep 17 00:00:00 2001 From: "Zhao, Yakui" Date: Mon, 26 May 2014 08:40:15 +0800 Subject: H264_encoding: Add the support of inserting the packed raw data passed from user Under some encoding scenario, the user-space application hopes that the driver can insert the passed packed rawdata into the coded clip. This is to allow the insertion of packed rawdata passed from user. As the position of packed rawdata is related with the slice. So the following restrictions are added: 1. the packed rawdata header type/data should be paired. 2. the packed rawdata data is inserted by following the passed order 3. the packed rawdata header type/data is split by using VAEncSliceParameterBuffer. That is to say: The packed rawdata for slice 0 should be passed before the first VAEncSliceParameterBuffer. After one VAEncSliceParameterBuffer is parsed, the subseuquent packed rawdata is for another new slice. The subsequent packed rawdata after the last VAEncSliceParameterBuffer is ignored. 4. it does not change the rule for the packed data of SPS/PPS/MISC type. Signed-off-by: Zhao Yakui (cherry picked from commit 974597ef64dc9a283d4787e1484a75d1610414f4) Conflicts: src/gen75_mfc.c src/gen8_mfc.c --- src/gen6_mfc.c | 4 ++++ src/gen6_mfc.h | 7 ++++++ src/gen6_mfc_common.c | 47 ++++++++++++++++++++++++++++++++++++++ src/gen75_mfc.c | 6 +++-- src/gen8_mfc.c | 6 +++-- src/i965_drv_video.c | 62 +++++++++++++++++++++++++++++++++++++++++++++------ 6 files changed, 121 insertions(+), 11 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index 21db0a77..c6702e85 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -814,6 +814,8 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, if ( slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); + slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); // slice hander @@ -1206,6 +1208,8 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx, if (slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); + slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); // slice hander diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h index 9437c317..67c62a49 100644 --- a/src/gen6_mfc.h +++ b/src/gen6_mfc.h @@ -271,4 +271,11 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx, extern Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); +extern void +intel_avc_slice_insert_packed_data(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + struct intel_batchbuffer *slice_batch); + #endif /* _GEN6_MFC_BCS_H_ */ diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 95e4dc3b..44e6e957 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1516,3 +1516,50 @@ intel_avc_vme_reference_state(VADriverContextP ctx, vme_context->ref_index_in_mb[list_index] = 0; } } + +void intel_avc_slice_insert_packed_data(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + struct intel_batchbuffer *slice_batch) +{ + int count, i, start_index; + unsigned int length_in_bits; + VAEncPackedHeaderParameterBuffer *param = NULL; + unsigned int *header_data = NULL; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + /* If the number of packed data for current slice is zero, return */ + if (encode_state->slice_rawdata_count[slice_index] == 0) + return; + + count = encode_state->slice_rawdata_count[slice_index]; + start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK); + + for (i = 0; i < count; i++) { + unsigned int skip_emul_byte_cnt; + + header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer; + + param = (VAEncPackedHeaderParameterBuffer *) + (encode_state->packed_header_params_ext[start_index + i]->buffer); + length_in_bits = param->bit_length; + + skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); + + /* as the slice header is still required, the last header flag is set to + * zero. + */ + mfc_context->insert_object(ctx, + encoder_context, + header_data, + ALIGN(length_in_bits, 32) >> 5, + length_in_bits & 0x1f, + skip_emul_byte_cnt, + 0, + 0, + !param->has_emulation_bytes, + slice_batch); + } + return; +} diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 48d84da1..18a588f9 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -1192,6 +1192,8 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, if ( slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); + slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); // slice hander @@ -1246,8 +1248,6 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, tail_data, 1, 8, 1, 1, 1, 0, slice_batch); } - - } static dri_bo * @@ -1545,6 +1545,8 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, if (slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); + slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); // slice hander diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index df996036..2fc1facf 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -1056,6 +1056,8 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, if ( slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); + slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); // slice hander @@ -1110,8 +1112,6 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, tail_data, 1, 8, 1, 1, 1, 0, slice_batch); } - - } static dri_bo * @@ -1441,6 +1441,8 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, if (slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); + slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); // slice hander diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index db0440c9..d8b50dcd 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -655,6 +655,12 @@ i965_GetConfigAttributes(VADriverContextP ctx, case VAConfigAttribEncPackedHeaders: if (entrypoint == VAEntrypointEncSlice) { attrib_list[i].value = VA_ENC_PACKED_HEADER_SEQUENCE | VA_ENC_PACKED_HEADER_PICTURE | VA_ENC_PACKED_HEADER_MISC; + if (profile == VAProfileH264ConstrainedBaseline || + profile == VAProfileH264Main || + profile == VAProfileH264High || + profile == VAProfileH264MultiviewHigh) { + attrib_list[i].value |= VA_ENC_PACKED_HEADER_RAW_DATA; + } break; } @@ -2367,6 +2373,9 @@ DEF_RENDER_ENCODE_SINGLE_BUFFER_FUNC(picture_parameter_ext, pic_param_ext) // DEF_RENDER_ENCODE_MULTI_BUFFER_FUNC(slice_parameter, slice_params) DEF_RENDER_ENCODE_MULTI_BUFFER_FUNC(slice_parameter_ext, slice_params_ext) +DEF_RENDER_ENCODE_MULTI_BUFFER_FUNC(packed_header_params_ext, packed_header_params_ext) +DEF_RENDER_ENCODE_MULTI_BUFFER_FUNC(packed_header_data_ext, packed_header_data_ext) + static VAStatus i965_encoder_render_packed_header_parameter_buffer(VADriverContextP ctx, struct object_context *obj_context, @@ -2430,10 +2439,12 @@ i965_encoder_render_picture(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_context *obj_context = CONTEXT(context); VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; + struct encode_state *encode; int i; ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); + encode = &obj_context->codec_state.encode; for (i = 0; i < num_buffers; i++) { struct object_buffer *obj_buffer = BUFFER(buffers[i]); @@ -2459,35 +2470,67 @@ i965_encoder_render_picture(VADriverContextP ctx, case VAEncSliceParameterBufferType: vaStatus = I965_RENDER_ENCODE_BUFFER(slice_parameter_ext); + if (vaStatus == VA_STATUS_SUCCESS) { + /* When the max number of slices is updated, it also needs + * to reallocate the arrays that is used to store + * the packed data index/count for the slice + */ + if (encode->max_slice_params_ext > encode->slice_num) { + encode->slice_num = encode->max_slice_params_ext; + encode->slice_rawdata_index = realloc(encode->slice_rawdata_index, + encode->slice_num * sizeof(int)); + encode->slice_rawdata_count = realloc(encode->slice_rawdata_count, + encode->slice_num * sizeof(int)); + if ((encode->slice_rawdata_index == NULL) || + (encode->slice_rawdata_count == NULL)) { + vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED; + return vaStatus; + } + } + } break; case VAEncPackedHeaderParameterBufferType: { - struct encode_state *encode = &obj_context->codec_state.encode; VAEncPackedHeaderParameterBuffer *param = (VAEncPackedHeaderParameterBuffer *)obj_buffer->buffer_store->buffer; encode->last_packed_header_type = param->type; - vaStatus = i965_encoder_render_packed_header_parameter_buffer(ctx, + if (param->type == VAEncPackedHeaderRawData) { + vaStatus = I965_RENDER_ENCODE_BUFFER(packed_header_params_ext); + } else { + vaStatus = i965_encoder_render_packed_header_parameter_buffer(ctx, obj_context, obj_buffer, va_enc_packed_type_to_idx(encode->last_packed_header_type)); + } break; } case VAEncPackedHeaderDataBufferType: { - struct encode_state *encode = &obj_context->codec_state.encode; - ASSERT_RET(encode->last_packed_header_type == VAEncPackedHeaderSequence || - encode->last_packed_header_type == VAEncPackedHeaderPicture || - encode->last_packed_header_type == VAEncPackedHeaderSlice || + if (encode->last_packed_header_type == VAEncPackedHeaderRawData) { + vaStatus = I965_RENDER_ENCODE_BUFFER(packed_header_data_ext); + if (vaStatus == VA_STATUS_SUCCESS) { + /* store the first index of the packed header data for current slice */ + if (encode->slice_rawdata_index[encode->num_slice_params_ext] == 0) { + encode->slice_rawdata_index[encode->num_slice_params_ext] = + SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1); + } + encode->slice_rawdata_count[encode->num_slice_params_ext]++; + } + } else { + ASSERT_RET(encode->last_packed_header_type == VAEncPackedHeaderSequence || + encode->last_packed_header_type == VAEncPackedHeaderPicture || + encode->last_packed_header_type == VAEncPackedHeaderSlice || (((encode->last_packed_header_type & VAEncPackedHeaderMiscMask) == VAEncPackedHeaderMiscMask) && ((encode->last_packed_header_type & (~VAEncPackedHeaderMiscMask)) != 0)), VA_STATUS_ERROR_ENCODING_ERROR); - vaStatus = i965_encoder_render_packed_header_data_buffer(ctx, + vaStatus = i965_encoder_render_packed_header_data_buffer(ctx, obj_context, obj_buffer, va_enc_packed_type_to_idx(encode->last_packed_header_type)); + } break; } @@ -2591,6 +2634,11 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context) } else if (obj_context->codec_type == CODEC_ENC) { ASSERT_RET(VAEntrypointEncSlice == obj_config->entrypoint, VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT); + if (obj_context->codec_state.encode.num_packed_header_params_ext != + obj_context->codec_state.encode.num_packed_header_data_ext) { + WARN_ONCE("the packed header/data is not paired for encoding!\n"); + return VA_STATUS_ERROR_INVALID_PARAMETER; + } if (!(obj_context->codec_state.encode.pic_param || obj_context->codec_state.encode.pic_param_ext)) { return VA_STATUS_ERROR_INVALID_PARAMETER; -- cgit v1.2.1 From ee4b8c3ec1e5b5f126d8bdb7021eb15630de751e Mon Sep 17 00:00:00 2001 From: "Zhao, Yakui" Date: Mon, 26 May 2014 08:40:15 +0800 Subject: H264_encoding: Reset the last_packed_header_type to avoid the unpaired packed header type/data After adding the support of inserting the packed rawdata, more group of packed header data can be passed. In order to insert the packed rawdata correctly, the packed header type/ data should be paired. Signed-off-by: Zhao Yakui (cherry picked from commit fd78866bd64d7ab57fe8cb0c4b25e8357973b0b1) --- src/i965_drv_video.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index d8b50dcd..cf42b687 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -2508,7 +2508,11 @@ i965_encoder_render_picture(VADriverContextP ctx, case VAEncPackedHeaderDataBufferType: { - + if (encode->last_packed_header_type == 0) { + WARN_ONCE("the packed header data is passed without type!\n"); + vaStatus = VA_STATUS_ERROR_INVALID_PARAMETER; + return vaStatus; + } if (encode->last_packed_header_type == VAEncPackedHeaderRawData) { vaStatus = I965_RENDER_ENCODE_BUFFER(packed_header_data_ext); if (vaStatus == VA_STATUS_SUCCESS) { @@ -2531,6 +2535,7 @@ i965_encoder_render_picture(VADriverContextP ctx, obj_buffer, va_enc_packed_type_to_idx(encode->last_packed_header_type)); } + encode->last_packed_header_type = 0; break; } -- cgit v1.2.1 From 107274f309c6c3a7c59b70d5140b781341c7e9c2 Mon Sep 17 00:00:00 2001 From: "Zhao, Yakui" Date: Mon, 26 May 2014 08:40:15 +0800 Subject: H264_Encoding: Add the support of packed slice header to be flexible Under some encoding scenario, the user hopes to generate the packed slice header data by themself and then the driver can insert the passed slice header packed data into the coded clip. 1.The VA_ENC_PACKED_HEADER_SLICE flag is exported and it is treated as optional. This is to say: if packed slice header data is passed, it will be inserted directly. If no packed slice header data is passed, the driver will help to generate it. 2.Another restriction is that the packed slice header data is inserted after the packed rawdata for one slice. That is to say: If it needs to insert the packed rawdata and slice header data, the packed rawdata will be inserted firstly(This is handled by the driver). Signed-off-by: Zhao, Yakui (cherry picked from commit 00111e8a8bfa67b971419b72577eaa1b9f47bc34) Conflicts: src/gen75_mfc.c src/gen8_mfc.c --- src/gen6_mfc.c | 28 ----------------------- src/gen6_mfc_common.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++--- src/gen75_mfc.c | 30 ------------------------- src/gen8_mfc.c | 30 ------------------------- src/i965_drv_video.c | 32 ++++++++++++++++++++++++--- src/i965_drv_video.h | 3 +++ 6 files changed, 90 insertions(+), 94 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index c6702e85..f1b29b9d 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -788,8 +788,6 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, int i,x,y; int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned char *slice_header = NULL; - int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; @@ -816,14 +814,6 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); - slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); - - // slice hander - mfc_context->insert_object(ctx, encoder_context, - (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f, - 5, /* first 5 bytes are start code + nal unit type */ - 1, 0, 1, slice_batch); - dri_bo_map(vme_context->vme_output.bo , 1); msg = (unsigned int *)vme_context->vme_output.bo->virtual; @@ -869,7 +859,6 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, 1, 1, 1, 0, slice_batch); } - free(slice_header); } @@ -1176,8 +1165,6 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx, int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned char *slice_header = NULL; - int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; long head_offset; int old_used = intel_batchbuffer_used_size(slice_batch), used; @@ -1210,21 +1197,6 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx, intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); - slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); - - // slice hander - mfc_context->insert_object(ctx, - encoder_context, - (unsigned int *)slice_header, - ALIGN(slice_header_length_in_bits, 32) >> 5, - slice_header_length_in_bits & 0x1f, - 5, /* first 5 bytes are start code + nal unit type */ - 1, - 0, - 1, - slice_batch); - free(slice_header); - intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ used = intel_batchbuffer_used_size(slice_batch); head_size = (used - old_used) / 16; diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 44e6e957..e500feb8 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1528,10 +1528,12 @@ void intel_avc_slice_insert_packed_data(VADriverContextP ctx, VAEncPackedHeaderParameterBuffer *param = NULL; unsigned int *header_data = NULL; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + int slice_header_index; - /* If the number of packed data for current slice is zero, return */ - if (encode_state->slice_rawdata_count[slice_index] == 0) - return; + if (encode_state->slice_header_index[slice_index] == 0) + slice_header_index = -1; + else + slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK); count = encode_state->slice_rawdata_count[slice_index]; start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK); @@ -1543,6 +1545,11 @@ void intel_avc_slice_insert_packed_data(VADriverContextP ctx, param = (VAEncPackedHeaderParameterBuffer *) (encode_state->packed_header_params_ext[start_index + i]->buffer); + + /* skip the slice header packed data type as it is lastly inserted */ + if (param->type == VAEncPackedHeaderSlice) + continue; + length_in_bits = param->bit_length; skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); @@ -1561,5 +1568,53 @@ void intel_avc_slice_insert_packed_data(VADriverContextP ctx, !param->has_emulation_bytes, slice_batch); } + + if (slice_header_index == -1) { + unsigned char *slice_header = NULL; + int slice_header_length_in_bits = 0; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; + + /* No slice header data is passed. And the driver needs to generate it */ + /* For the Normal H264 */ + slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, + pPicParameter, + pSliceParameter, + &slice_header); + mfc_context->insert_object(ctx, encoder_context, + (unsigned int *)slice_header, + ALIGN(slice_header_length_in_bits, 32) >> 5, + slice_header_length_in_bits & 0x1f, + 5, /* first 5 bytes are start code + nal unit type */ + 1, 0, 1, slice_batch); + + free(slice_header); + } else { + unsigned int skip_emul_byte_cnt; + + header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer; + + param = (VAEncPackedHeaderParameterBuffer *) + (encode_state->packed_header_params_ext[start_index + i]->buffer); + length_in_bits = param->bit_length; + + /* as the slice header is the last header data for one slice, + * the last header flag is set to one. + */ + skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); + + mfc_context->insert_object(ctx, + encoder_context, + header_data, + ALIGN(length_in_bits, 32) >> 5, + length_in_bits & 0x1f, + skip_emul_byte_cnt, + 1, + 0, + !param->has_emulation_bytes, + slice_batch); + } + return; } diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 18a588f9..1051dd53 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -1166,8 +1166,6 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, int i,x,y; int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned char *slice_header = NULL; - int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; @@ -1194,16 +1192,6 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); - slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); - - // slice hander - mfc_context->insert_object(ctx, encoder_context, - (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f, - 5, /* first 5 bytes are start code + nal unit type */ - 1, 0, 1, slice_batch); - - free(slice_header); - dri_bo_map(vme_context->vme_output.bo , 1); msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; @@ -1516,8 +1504,6 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned char *slice_header = NULL; - int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; long head_offset; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); @@ -1547,22 +1533,6 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); - slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); - - // slice hander - mfc_context->insert_object(ctx, - encoder_context, - (unsigned int *)slice_header, - ALIGN(slice_header_length_in_bits, 32) >> 5, - slice_header_length_in_bits & 0x1f, - 5, /* first 5 bytes are start code + nal unit type */ - 1, - 0, - 1, - slice_batch); - - free(slice_header); - intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ head_offset = intel_batchbuffer_used_size(slice_batch); diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 2fc1facf..c86cf091 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -1029,8 +1029,6 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, int i,x,y; int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned char *slice_header = NULL; - int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; @@ -1058,16 +1056,6 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); - slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); - - // slice hander - mfc_context->insert_object(ctx, encoder_context, - (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f, - 5, /* first 5 bytes are start code + nal unit type */ - 1, 0, 1, slice_batch); - - free(slice_header); - dri_bo_map(vme_context->vme_output.bo , 1); msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; @@ -1409,8 +1397,6 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned char *slice_header = NULL; - int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; long head_offset; int old_used = intel_batchbuffer_used_size(slice_batch), used; @@ -1443,22 +1429,6 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); - slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); - - // slice hander - mfc_context->insert_object(ctx, - encoder_context, - (unsigned int *)slice_header, - ALIGN(slice_header_length_in_bits, 32) >> 5, - slice_header_length_in_bits & 0x1f, - 5, /* first 5 bytes are start code + nal unit type */ - 1, - 0, - 1, - slice_batch); - - free(slice_header); - intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ used = intel_batchbuffer_used_size(slice_batch); head_size = (used - old_used) / 16; diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index cf42b687..8da5a947 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -659,7 +659,8 @@ i965_GetConfigAttributes(VADriverContextP ctx, profile == VAProfileH264Main || profile == VAProfileH264High || profile == VAProfileH264MultiviewHigh) { - attrib_list[i].value |= VA_ENC_PACKED_HEADER_RAW_DATA; + attrib_list[i].value |= (VA_ENC_PACKED_HEADER_RAW_DATA | + VA_ENC_PACKED_HEADER_SLICE); } break; } @@ -1650,6 +1651,12 @@ i965_destroy_context(struct object_heap *heap, struct object_base *obj) free(obj_context->codec_state.encode.slice_rawdata_count); obj_context->codec_state.encode.slice_rawdata_count = NULL; } + + if (obj_context->codec_state.encode.slice_header_index) { + free(obj_context->codec_state.encode.slice_header_index); + obj_context->codec_state.encode.slice_header_index = NULL; + } + for (i = 0; i < obj_context->codec_state.encode.num_packed_header_params_ext; i++) i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_params_ext[i]); free(obj_context->codec_state.encode.packed_header_params_ext); @@ -1792,6 +1799,9 @@ i965_CreateContext(VADriverContextP ctx, obj_context->codec_state.encode.slice_rawdata_count = calloc(obj_context->codec_state.encode.slice_num, sizeof(int)); + obj_context->codec_state.encode.slice_header_index = + calloc(obj_context->codec_state.encode.slice_num, sizeof(int)); + assert(i965->codec_info->enc_hw_context_init); obj_context->hw_context = i965->codec_info->enc_hw_context_init(ctx, obj_config); } else { @@ -2229,6 +2239,8 @@ i965_BeginPicture(VADriverContextP ctx, sizeof(int) * obj_context->codec_state.encode.slice_num); memset(obj_context->codec_state.encode.slice_rawdata_count, 0, sizeof(int) * obj_context->codec_state.encode.slice_num); + memset(obj_context->codec_state.encode.slice_header_index, 0, + sizeof(int) * obj_context->codec_state.encode.slice_num); for (i = 0; i < obj_context->codec_state.encode.num_packed_header_params_ext; i++) i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_params_ext[i]); @@ -2481,7 +2493,10 @@ i965_encoder_render_picture(VADriverContextP ctx, encode->slice_num * sizeof(int)); encode->slice_rawdata_count = realloc(encode->slice_rawdata_count, encode->slice_num * sizeof(int)); + encode->slice_header_index = realloc(encode->slice_header_index, + encode->slice_num * sizeof(int)); if ((encode->slice_rawdata_index == NULL) || + (encode->slice_header_index == NULL) || (encode->slice_rawdata_count == NULL)) { vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED; return vaStatus; @@ -2495,7 +2510,8 @@ i965_encoder_render_picture(VADriverContextP ctx, VAEncPackedHeaderParameterBuffer *param = (VAEncPackedHeaderParameterBuffer *)obj_buffer->buffer_store->buffer; encode->last_packed_header_type = param->type; - if (param->type == VAEncPackedHeaderRawData) { + if ((param->type == VAEncPackedHeaderRawData) || + (param->type == VAEncPackedHeaderSlice)) { vaStatus = I965_RENDER_ENCODE_BUFFER(packed_header_params_ext); } else { vaStatus = i965_encoder_render_packed_header_parameter_buffer(ctx, @@ -2513,7 +2529,8 @@ i965_encoder_render_picture(VADriverContextP ctx, vaStatus = VA_STATUS_ERROR_INVALID_PARAMETER; return vaStatus; } - if (encode->last_packed_header_type == VAEncPackedHeaderRawData) { + if (encode->last_packed_header_type == VAEncPackedHeaderRawData || + encode->last_packed_header_type == VAEncPackedHeaderSlice) { vaStatus = I965_RENDER_ENCODE_BUFFER(packed_header_data_ext); if (vaStatus == VA_STATUS_SUCCESS) { /* store the first index of the packed header data for current slice */ @@ -2522,6 +2539,15 @@ i965_encoder_render_picture(VADriverContextP ctx, SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1); } encode->slice_rawdata_count[encode->num_slice_params_ext]++; + if (encode->last_packed_header_type == VAEncPackedHeaderSlice) { + if (encode->slice_header_index[encode->num_slice_params_ext] == 0) { + encode->slice_header_index[encode->num_slice_params_ext] = + SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1); + } else { + WARN_ONCE("Multi slice header data is passed for" + " slice %d!\n", encode->num_slice_params_ext); + } + } } } else { ASSERT_RET(encode->last_packed_header_type == VAEncPackedHeaderSequence || diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 98272096..29e550f4 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -165,6 +165,9 @@ struct encode_state /* This is to store the number of packed data for one slice */ int *slice_rawdata_count; + /* This is to store the index of packed slice header for one slice */ + int *slice_header_index; + int last_packed_header_type; struct buffer_store *misc_param[16]; -- cgit v1.2.1 From b5fec62b2533c7086ddd40d0c61b51aedc6e33c2 Mon Sep 17 00:00:00 2001 From: "Zhao, Yakui" Date: Mon, 26 May 2014 08:40:15 +0800 Subject: H264_encoding: Don't update the slice qp for CBR mode when finding packed slice_header data Otherwise the slice qp is inconsistent and the encoding is incorrect. Signed-off-by: Zhao Yakui (cherry picked from commit 897527c30435202927e6cd05cd5189a710d02c91) --- src/gen6_mfc.c | 6 ++++-- src/gen75_mfc.c | 6 ++++-- src/gen8_mfc.c | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index f1b29b9d..0a100549 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -794,7 +794,8 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + if (encode_state->slice_header_index[slice_index] == 0) + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; } /* only support for 8-bit pixel bit-depth */ @@ -1173,7 +1174,8 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx, if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + if (encode_state->slice_header_index[slice_index] == 0) + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; } /* only support for 8-bit pixel bit-depth */ diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 1051dd53..2ff35af6 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -1172,7 +1172,8 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + if (encode_state->slice_header_index[slice_index] == 0) + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; } /* only support for 8-bit pixel bit-depth */ @@ -1510,7 +1511,8 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + if (encode_state->slice_header_index[slice_index] == 0) + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; } /* only support for 8-bit pixel bit-depth */ diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index c86cf091..e3f07add 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -1036,7 +1036,8 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + if (encode_state->slice_header_index[slice_index] == 0) + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; } /* only support for 8-bit pixel bit-depth */ @@ -1405,7 +1406,8 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + if (encode_state->slice_header_index[slice_index] == 0) + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; } /* only support for 8-bit pixel bit-depth */ -- cgit v1.2.1 From bb3c0d9848f757906662884755e3184a05981d0b Mon Sep 17 00:00:00 2001 From: Sreerenj Balachandran Date: Thu, 5 Jun 2014 12:00:49 +0300 Subject: Fix the GetConfigAttributes() for H264SteroHighProfile (cherry picked from commit 510b271f912afb35edac1d3fb39354ee98b01711) --- src/i965_drv_video.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 8da5a947..82a4e691 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -658,6 +658,7 @@ i965_GetConfigAttributes(VADriverContextP ctx, if (profile == VAProfileH264ConstrainedBaseline || profile == VAProfileH264Main || profile == VAProfileH264High || + profile == VAProfileH264StereoHigh || profile == VAProfileH264MultiviewHigh) { attrib_list[i].value |= (VA_ENC_PACKED_HEADER_RAW_DATA | VA_ENC_PACKED_HEADER_SLICE); -- cgit v1.2.1 From 1ed9128c99723ea202ba7614ca4f9c563b7ad846 Mon Sep 17 00:00:00 2001 From: Sreerenj Balachandran Date: Tue, 27 May 2014 05:18:05 -0600 Subject: Fix the segfault while inserting packed slice header Signed-off-by: Sreerenj Balachandran Reviewed-by: Zhao, Yakui (cherry picked from commit 7c2273f1334d7d45e248d128e17200b7e8beffd1) --- src/gen6_mfc_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index e500feb8..77c46ddb 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1593,10 +1593,10 @@ void intel_avc_slice_insert_packed_data(VADriverContextP ctx, } else { unsigned int skip_emul_byte_cnt; - header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer; + header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer; param = (VAEncPackedHeaderParameterBuffer *) - (encode_state->packed_header_params_ext[start_index + i]->buffer); + (encode_state->packed_header_params_ext[slice_header_index]->buffer); length_in_bits = param->bit_length; /* as the slice header is the last header data for one slice, -- cgit v1.2.1 From 45481435f93b52f944c5f876e95fb7bfba56ed4f Mon Sep 17 00:00:00 2001 From: Sreerenj Balachandran Date: Wed, 28 May 2014 15:02:41 -0600 Subject: Fix the segfault while encoding multiple slice per frame. Zero initialize the packed raw data index array and packed slice header index array during each preallocation. Signed-off-by: Sreerenj Balachandran Reviewed-by: Zhao, Yakui (cherry picked from commit fbbe401aa28a0b3859d587ef08f0df15a2f7c8f2) --- src/i965_drv_video.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 82a4e691..68a6052b 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -2489,6 +2489,7 @@ i965_encoder_render_picture(VADriverContextP ctx, * the packed data index/count for the slice */ if (encode->max_slice_params_ext > encode->slice_num) { + int slice_num = encode->slice_num; encode->slice_num = encode->max_slice_params_ext; encode->slice_rawdata_index = realloc(encode->slice_rawdata_index, encode->slice_num * sizeof(int)); @@ -2496,6 +2497,12 @@ i965_encoder_render_picture(VADriverContextP ctx, encode->slice_num * sizeof(int)); encode->slice_header_index = realloc(encode->slice_header_index, encode->slice_num * sizeof(int)); + memset(encode->slice_rawdata_index + slice_num, 0, + sizeof(int) * NUM_SLICES); + memset(encode->slice_rawdata_count + slice_num, 0, + sizeof(int) * NUM_SLICES); + memset(encode->slice_header_index + slice_num, 0, + sizeof(int) * NUM_SLICES); if ((encode->slice_rawdata_index == NULL) || (encode->slice_header_index == NULL) || (encode->slice_rawdata_count == NULL)) { -- cgit v1.2.1 From 773525af39331df7a9d3178037320734774fc8be Mon Sep 17 00:00:00 2001 From: "Zhao, Yakui" Date: Thu, 12 Jun 2014 08:54:38 +0800 Subject: Encoding: Update the comment for the data structure about the packed rawdata/slice_header data Signed-off-by: Zhao Yakui (cherry picked from commit 56715c893fa87e2d3af2938b9202d75cdc79a8fd) --- src/i965_drv_video.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 29e550f4..dfa2a7ad 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -150,7 +150,7 @@ struct encode_state int num_slice_params_ext; /* For the packed data that needs to be inserted into video clip */ - /* currently it is mainly for packed raw data */ + /* currently it is mainly to track packed raw data and packed slice_header data. */ struct buffer_store **packed_header_params_ext; int max_packed_header_params_ext; int num_packed_header_params_ext; @@ -162,7 +162,11 @@ struct encode_state int slice_num; /* This is to store the first index of packed data for one slice */ int *slice_rawdata_index; - /* This is to store the number of packed data for one slice */ + /* This is to store the number of packed data for one slice. + * Both packed rawdata and slice_header data are tracked by this + * this variable. That is to say: When one packed slice_header is parsed, + * this variable will also be increased. + */ int *slice_rawdata_count; /* This is to store the index of packed slice header for one slice */ -- cgit v1.2.1 From 745340dd013399f64507de73401ab3adb712dad5 Mon Sep 17 00:00:00 2001 From: "Zhao, Yakui" Date: Thu, 12 Jun 2014 08:54:41 +0800 Subject: Encoding: Use the different delimeter to pass packed_rawdata and slice_header based on VAConfigAttribEncPackedHeaders attribute Currently the packed_slice_header is optional. And it uses the VAEncSliceParameterBuffer as the delimeter to decide how to insert the packed rawdata/slice_header for one slice. This is not convenient under some scenario. For example: some user hope to be more flexible. When the user is responsible for generating the packed slice_header, it hopes to use the packed slice_header as the delimeter to determine how to inser the packed rawdata/slice_header for the given slice. So the VAConfigAttribEncPackedHeaders attriburation of encoding_context is used to decide which kind of delimeter. a. When the VAEncPackedSlice is set when calling vaCreateConfig, it will use the packed slice_header as delimeter. Of course the packed rawdata should be parsed before the packed slice_header for one given slice. For exmaple: for the slice 0: the packed rawdata should be parsed before paring the first packed slice_header. After one packed slice_header is parsed, it will start to parse the corresponding data for a new slice. b. When the VAEncPackedSlice is not set when calling vaCreateConfig, it will use the VAEncSliceParameterBuffer as delimeter. V1->V2: Return an error instead of only complaining warning message when packed slice_header is missing for some slice under the VAEncPackedSlice mode. This is the suggestion from Gwenole and Sreerenj Balachandran. Signed-off-by: Zhao, Yakui (cherry picked from commit 9d49a6d693aa6c862467a4a879bc86d9cb98dbe5) --- src/i965_drv_video.c | 119 ++++++++++++++++++++++++++++++++++++++++----------- src/i965_drv_video.h | 10 ++++- 2 files changed, 104 insertions(+), 25 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 68a6052b..b7a04853 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -1778,6 +1778,7 @@ i965_CreateContext(VADriverContextP ctx, assert(i965->codec_info->proc_hw_context_init); obj_context->hw_context = i965->codec_info->proc_hw_context_init(ctx, obj_config); } else if (VAEntrypointEncSlice == obj_config->entrypoint) { /*encode routin only*/ + VAConfigAttrib *packed_attrib; obj_context->codec_type = CODEC_ENC; memset(&obj_context->codec_state.encode, 0, sizeof(obj_context->codec_state.encode)); obj_context->codec_state.encode.current_render_target = VA_INVALID_ID; @@ -1794,15 +1795,28 @@ i965_CreateContext(VADriverContextP ctx, calloc(obj_context->codec_state.encode.max_packed_header_data_ext, sizeof(struct buffer_store *)); - obj_context->codec_state.encode.slice_num = NUM_SLICES; + obj_context->codec_state.encode.max_slice_num = NUM_SLICES; obj_context->codec_state.encode.slice_rawdata_index = - calloc(obj_context->codec_state.encode.slice_num, sizeof(int)); + calloc(obj_context->codec_state.encode.max_slice_num, sizeof(int)); obj_context->codec_state.encode.slice_rawdata_count = - calloc(obj_context->codec_state.encode.slice_num, sizeof(int)); + calloc(obj_context->codec_state.encode.max_slice_num, sizeof(int)); obj_context->codec_state.encode.slice_header_index = - calloc(obj_context->codec_state.encode.slice_num, sizeof(int)); - + calloc(obj_context->codec_state.encode.max_slice_num, sizeof(int)); + + obj_context->codec_state.encode.slice_index = 0; + packed_attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribEncPackedHeaders); + if (packed_attrib) + obj_context->codec_state.encode.packed_header_flag = packed_attrib->value; + else { + /* use the default value. SPS/PPS/RAWDATA is passed from user + * while Slice_header data is generated by driver. + */ + obj_context->codec_state.encode.packed_header_flag = + VA_ENC_PACKED_HEADER_SEQUENCE | + VA_ENC_PACKED_HEADER_PICTURE | + VA_ENC_PACKED_HEADER_RAW_DATA; + } assert(i965->codec_info->enc_hw_context_init); obj_context->hw_context = i965->codec_info->enc_hw_context_init(ctx, obj_config); } else { @@ -2237,11 +2251,11 @@ i965_BeginPicture(VADriverContextP ctx, obj_context->codec_state.encode.current_render_target = render_target; /*This is input new frame*/ obj_context->codec_state.encode.last_packed_header_type = 0; memset(obj_context->codec_state.encode.slice_rawdata_index, 0, - sizeof(int) * obj_context->codec_state.encode.slice_num); + sizeof(int) * obj_context->codec_state.encode.max_slice_num); memset(obj_context->codec_state.encode.slice_rawdata_count, 0, - sizeof(int) * obj_context->codec_state.encode.slice_num); + sizeof(int) * obj_context->codec_state.encode.max_slice_num); memset(obj_context->codec_state.encode.slice_header_index, 0, - sizeof(int) * obj_context->codec_state.encode.slice_num); + sizeof(int) * obj_context->codec_state.encode.max_slice_num); for (i = 0; i < obj_context->codec_state.encode.num_packed_header_params_ext; i++) i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_params_ext[i]); @@ -2249,6 +2263,7 @@ i965_BeginPicture(VADriverContextP ctx, i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_data_ext[i]); obj_context->codec_state.encode.num_packed_header_params_ext = 0; obj_context->codec_state.encode.num_packed_header_data_ext = 0; + obj_context->codec_state.encode.slice_index = 0; } else { obj_context->codec_state.decode.current_render_target = render_target; i965_release_buffer_store(&obj_context->codec_state.decode.pic_param); @@ -2488,21 +2503,25 @@ i965_encoder_render_picture(VADriverContextP ctx, * to reallocate the arrays that is used to store * the packed data index/count for the slice */ - if (encode->max_slice_params_ext > encode->slice_num) { - int slice_num = encode->slice_num; - encode->slice_num = encode->max_slice_params_ext; + if (!(encode->packed_header_flag & VA_ENC_PACKED_HEADER_SLICE)) { + encode->slice_index++; + } + if (encode->slice_index == encode->max_slice_num) { + int slice_num = encode->max_slice_num; encode->slice_rawdata_index = realloc(encode->slice_rawdata_index, - encode->slice_num * sizeof(int)); + (slice_num + NUM_SLICES) * sizeof(int)); encode->slice_rawdata_count = realloc(encode->slice_rawdata_count, - encode->slice_num * sizeof(int)); + (slice_num + NUM_SLICES) * sizeof(int)); encode->slice_header_index = realloc(encode->slice_header_index, - encode->slice_num * sizeof(int)); + (slice_num + NUM_SLICES) * sizeof(int)); memset(encode->slice_rawdata_index + slice_num, 0, sizeof(int) * NUM_SLICES); memset(encode->slice_rawdata_count + slice_num, 0, sizeof(int) * NUM_SLICES); memset(encode->slice_header_index + slice_num, 0, sizeof(int) * NUM_SLICES); + + encode->max_slice_num += NUM_SLICES; if ((encode->slice_rawdata_index == NULL) || (encode->slice_header_index == NULL) || (encode->slice_rawdata_count == NULL)) { @@ -2540,20 +2559,64 @@ i965_encoder_render_picture(VADriverContextP ctx, if (encode->last_packed_header_type == VAEncPackedHeaderRawData || encode->last_packed_header_type == VAEncPackedHeaderSlice) { vaStatus = I965_RENDER_ENCODE_BUFFER(packed_header_data_ext); - if (vaStatus == VA_STATUS_SUCCESS) { + + /* When the PACKED_SLICE_HEADER flag is passed, it will use + * the packed_slice_header as the delimeter to decide how + * the packed rawdata is inserted for the given slice. + * Otherwise it will use the VAEncSequenceParameterBuffer + * as the delimeter + */ + if (encode->packed_header_flag & VA_ENC_PACKED_HEADER_SLICE) { /* store the first index of the packed header data for current slice */ - if (encode->slice_rawdata_index[encode->num_slice_params_ext] == 0) { - encode->slice_rawdata_index[encode->num_slice_params_ext] = - SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1); + if (encode->slice_rawdata_index[encode->slice_index] == 0) { + encode->slice_rawdata_index[encode->slice_index] = + SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1); } - encode->slice_rawdata_count[encode->num_slice_params_ext]++; + encode->slice_rawdata_count[encode->slice_index]++; if (encode->last_packed_header_type == VAEncPackedHeaderSlice) { - if (encode->slice_header_index[encode->num_slice_params_ext] == 0) { - encode->slice_header_index[encode->num_slice_params_ext] = + /* find one packed slice_header delimeter. And the following + * packed data is for the next slice + */ + encode->slice_header_index[encode->slice_index] = + SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1); + encode->slice_index++; + /* Reallocate the buffer to record the index/count of + * packed_data for one slice. + */ + if (encode->slice_index == encode->max_slice_num) { + int slice_num = encode->max_slice_num; + + encode->slice_rawdata_index = realloc(encode->slice_rawdata_index, + (slice_num + NUM_SLICES) * sizeof(int)); + encode->slice_rawdata_count = realloc(encode->slice_rawdata_count, + (slice_num + NUM_SLICES) * sizeof(int)); + encode->slice_header_index = realloc(encode->slice_header_index, + (slice_num + NUM_SLICES) * sizeof(int)); + memset(encode->slice_rawdata_index + slice_num, 0, + sizeof(int) * NUM_SLICES); + memset(encode->slice_rawdata_count + slice_num, 0, + sizeof(int) * NUM_SLICES); + memset(encode->slice_header_index + slice_num, 0, + sizeof(int) * NUM_SLICES); + encode->max_slice_num += NUM_SLICES; + } + } + } else { + if (vaStatus == VA_STATUS_SUCCESS) { + /* store the first index of the packed header data for current slice */ + if (encode->slice_rawdata_index[encode->slice_index] == 0) { + encode->slice_rawdata_index[encode->slice_index] = SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1); - } else { - WARN_ONCE("Multi slice header data is passed for" - " slice %d!\n", encode->num_slice_params_ext); + } + encode->slice_rawdata_count[encode->slice_index]++; + if (encode->last_packed_header_type == VAEncPackedHeaderSlice) { + if (encode->slice_header_index[encode->slice_index] == 0) { + encode->slice_header_index[encode->slice_index] = + SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1); + } else { + WARN_ONCE("Multi slice header data is passed for" + " slice %d!\n", encode->slice_index); + } } } } @@ -2690,6 +2753,14 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context) (obj_context->codec_state.encode.num_slice_params_ext <=0)) { return VA_STATUS_ERROR_INVALID_PARAMETER; } + + if ((obj_context->codec_state.encode.packed_header_flag & VA_ENC_PACKED_HEADER_SLICE) && + (obj_context->codec_state.encode.num_slice_params_ext != + obj_context->codec_state.encode.slice_index)) { + WARN_ONCE("packed slice_header data is missing for some slice" + " under packed SLICE_HEADER mode\n"); + return VA_STATUS_ERROR_INVALID_PARAMETER; + } } else { if (obj_context->codec_state.decode.pic_param == NULL) { return VA_STATUS_ERROR_INVALID_PARAMETER; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index dfa2a7ad..10e87782 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -149,6 +149,12 @@ struct encode_state int max_slice_params_ext; int num_slice_params_ext; + /* Check the user-configurable packed_header attribute. + * Currently it is mainly used to check whether the packed slice_header data + * is provided by user or the driver. + * TBD: It will check for the packed SPS/PPS/MISC/RAWDATA and so on. + */ + unsigned int packed_header_flag; /* For the packed data that needs to be inserted into video clip */ /* currently it is mainly to track packed raw data and packed slice_header data. */ struct buffer_store **packed_header_params_ext; @@ -158,8 +164,10 @@ struct encode_state int max_packed_header_data_ext; int num_packed_header_data_ext; + /* the index of current slice */ + int slice_index; /* the array is determined by max_slice_params_ext */ - int slice_num; + int max_slice_num; /* This is to store the first index of packed data for one slice */ int *slice_rawdata_index; /* This is to store the number of packed data for one slice. -- cgit v1.2.1 From c5cb17ea86f0065a939d3636dd26651c93d497c8 Mon Sep 17 00:00:00 2001 From: "Zhao, Yakui" Date: Tue, 1 Jul 2014 09:43:56 +0800 Subject: remove fixed uses of inte-gen4asm tool In the gen7 and gen8 post processing Makefiles the GEN4ASM variable is not honored when calling intel-gen4asm. This causes build errors when GEN4ASM is set to a different value This was discovered when using intel-gpu-tools version 1.7 where the shaders are actually compiled. Signed-off-by: Daniel Charles Reviewed-by: Zhao, Yakui (cherry picked from commit f574f2e8a8da27b96abc3936c5b2372ff2b7eefb) --- src/shaders/post_processing/gen7/Makefile.am | 4 ++-- src/shaders/post_processing/gen8/Makefile.am | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shaders/post_processing/gen7/Makefile.am b/src/shaders/post_processing/gen7/Makefile.am index 0bb572da..1ffc1cde 100644 --- a/src/shaders/post_processing/gen7/Makefile.am +++ b/src/shaders/post_processing/gen7/Makefile.am @@ -88,10 +88,10 @@ $(INTEL_PP_GEN7_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G4A) ../../gpp.py _pp0.$@ $@; \ rm _pp0.$@ .g7s.g7b: - $(AM_V_GEN)intel-gen4asm -a -o $@ -g 7 $< + $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 7 $< .g7s.g75b: - $(AM_V_GEN)intel-gen4asm -a -o $@ -g 7.5 $< + $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 7.5 $< CLEANFILES = $(INTEL_PP_GEN7_ASM) diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index b41ab46e..9898a452 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -63,7 +63,7 @@ $(INTEL_PP_GEN8_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G8A) ../../gpp.py _pp0.$@ $@; \ rm _pp0.$@ .g8s.g8b: - $(AM_V_GEN)intel-gen4asm -a -o $@ -g 8 $< + $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 8 $< CLEANFILES = $(INTEL_PP_GEN7_ASM) -- cgit v1.2.1 From 82d2ed8d7da3619c0ea467c06604f5626fc0b901 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 23 Jul 2014 13:46:17 +0800 Subject: Add more check of H264 slice param to avoid GPU hang caused by the incorrect parameter This is to fix the GPU hang in https://bugs.freedesktop.org/show_bug.cgi?id=76363 V1->V2: Use the new check based on Haihao's comment. Discard the current frame with the error slice_param instead of smart fix. In such case it can prompt that the error slice_param can be fixed by the upper-middle. Signed-off-by: Zhao Yakui Tested-by: ValdikSS Reviewed-by: Xiang Haihao (cherry picked from commit 04202281135149a13a32dfb8a902debfac1331fe) --- src/i965_decoder_utils.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 0539e083..546285ea 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -754,6 +754,8 @@ intel_decoder_check_avc_parameter(VADriverContextP ctx, VAStatus va_status; struct object_surface *obj_surface; int i; + VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param; + int j; assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID)); assert(pic_param->CurrPic.picture_id != VA_INVALID_SURFACE); @@ -802,6 +804,37 @@ intel_decoder_check_avc_parameter(VADriverContextP ctx, } decode_state->reference_objects[i] = obj_surface; } + + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer; + + if (j == decode_state->num_slice_params - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = next_slice_group_param; + + if (next_slice_param != NULL) { + /* If the mb position of next_slice is less than or equal to the current slice, + * discard the current frame. + */ + if (next_slice_param->first_mb_in_slice <= slice_param->first_mb_in_slice) { + next_slice_param = NULL; + WARN_ONCE("!!!incorrect slice_param. The first_mb_in_slice of next_slice is less" + " than or equal to that in current slice\n"); + goto error; + } + } + } + } + return VA_STATUS_SUCCESS; error: -- cgit v1.2.1 From f3f49f2f0e6b3669d9b09341a11cf0b96e138674 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Wed, 18 Jun 2014 13:11:48 +0200 Subject: decoder: h264: fix RefPicList0/1 without frame in DPB. Some bitstreams (e.g. truncated, or non conformant), or bugs in codec layers, would incorrectly make decoders to fill in the RefPicList0/1 lists with surfaces that have not received any content yet. There is no reason for the driver to crash in such cases. https://bugs.freedesktop.org/show_bug.cgi?id=82466 Signed-off-by: Gwenole Beauchesne --- src/i965_decoder_utils.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 546285ea..7ea39ddd 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -526,6 +526,8 @@ intel_update_avc_frame_store_index( continue; GenAvcSurface * const avc_surface = obj_surface->private_data; + if (!avc_surface) + continue; if (avc_surface->frame_store_id >= 0) { GenFrameStore * const fs = &frame_store[avc_surface->frame_store_id]; @@ -559,6 +561,8 @@ intel_update_avc_frame_store_index( continue; GenAvcSurface * const avc_surface = obj_surface->private_data; + if (!avc_surface) + continue; if (n < num_free_refs) { GenFrameStore * const fs = free_refs[n++]; fs->surface_id = obj_surface->base.id; -- cgit v1.2.1 From 865f288d6821dfea5a9dc2a0525eba69ef786b23 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 1 Sep 2014 15:48:42 +0800 Subject: H264_Encoding: Fix the incorrect Qp setting under CBR when slice_header is passed Fix the issue in https://bugs.freedesktop.org/show_bug.cgi?id=83143 Signed-off-by: Zhao Yakui Tested-by: Sreerenj Balachandran (cherry picked from commit eca8e0065e3a04156e0817d3a5ac14f4df39d603) Conflicts: src/gen6_mfc.c src/gen8_mfc.c --- src/gen6_mfc.c | 17 +++++++++++++---- src/gen75_mfc.c | 16 ++++++++++++---- src/gen8_mfc.c | 17 ++++++++++++----- 3 files changed, 37 insertions(+), 13 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index 0a100549..455721fc 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -791,11 +791,15 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, unsigned int tail_data[] = { 0x0, 0x0 }; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; + int qp_slice; + qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - if (encode_state->slice_header_index[slice_index] == 0) + if (encode_state->slice_header_index[slice_index] == 0) { pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + qp_slice = qp; + } } /* only support for 8-bit pixel bit-depth */ @@ -808,7 +812,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, pPicParameter, pSliceParameter, encode_state, encoder_context, - (rate_control_mode == VA_RC_CBR), qp, slice_batch); + (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch); if ( slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); @@ -1171,11 +1175,16 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx, int old_used = intel_batchbuffer_used_size(slice_batch), used; unsigned short head_size, tail_size; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); + int qp_slice; + qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - if (encode_state->slice_header_index[slice_index] == 0) + if (encode_state->slice_header_index[slice_index] == 0) { pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + /* Use the adjusted qp when slice_header is generated by driver */ + qp_slice = qp; + } } /* only support for 8-bit pixel bit-depth */ @@ -1191,7 +1200,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx, encode_state, encoder_context, (rate_control_mode == VA_RC_CBR), - qp, + qp_slice, slice_batch); if (slice_index == 0) diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 2ff35af6..a6a3c1d3 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -1169,11 +1169,15 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, unsigned int tail_data[] = { 0x0, 0x0 }; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; + int qp_slice; + qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - if (encode_state->slice_header_index[slice_index] == 0) + if (encode_state->slice_header_index[slice_index] == 0) { pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + qp_slice = qp; + } } /* only support for 8-bit pixel bit-depth */ @@ -1186,7 +1190,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, pPicParameter, pSliceParameter, encode_state, encoder_context, - (rate_control_mode == VA_RC_CBR), qp, slice_batch); + (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch); if ( slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); @@ -1508,11 +1512,15 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, unsigned int tail_data[] = { 0x0, 0x0 }; long head_offset; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); + int qp_slice; + qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - if (encode_state->slice_header_index[slice_index] == 0) + if (encode_state->slice_header_index[slice_index] == 0) { pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + qp_slice = qp; + } } /* only support for 8-bit pixel bit-depth */ @@ -1527,7 +1535,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, encode_state, encoder_context, (rate_control_mode == VA_RC_CBR), - qp, + qp_slice, slice_batch); if (slice_index == 0) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index e3f07add..2d76816d 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -1032,12 +1032,15 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, unsigned int tail_data[] = { 0x0, 0x0 }; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; + int qp_slice; - + qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - if (encode_state->slice_header_index[slice_index] == 0) + if (encode_state->slice_header_index[slice_index] == 0) { pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + qp_slice = qp; + } } /* only support for 8-bit pixel bit-depth */ @@ -1050,7 +1053,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, pPicParameter, pSliceParameter, encode_state, encoder_context, - (rate_control_mode == VA_RC_CBR), qp, slice_batch); + (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch); if ( slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); @@ -1403,11 +1406,15 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, int old_used = intel_batchbuffer_used_size(slice_batch), used; unsigned short head_size, tail_size; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); + int qp_slice; + qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - if (encode_state->slice_header_index[slice_index] == 0) + if (encode_state->slice_header_index[slice_index] == 0) { pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + qp_slice = qp; + } } /* only support for 8-bit pixel bit-depth */ @@ -1423,7 +1430,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, encode_state, encoder_context, (rate_control_mode == VA_RC_CBR), - qp, + qp_slice, slice_batch); if (slice_index == 0) -- cgit v1.2.1 From 242fb4fcb8aa7300ee47400a6c2784610e5d4a6a Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Wed, 27 Aug 2014 01:04:48 -0600 Subject: Move gen6 phantom slice function as common Following haihao's suggestion, make gen6 phantom slice funcion can be re-used by SNB+. v1->v2: Remove an unnessary comment. Signed-off-by: Zhong Li Reviewed-by: Xiang Haihao Gwenole Beauchesne (cherry picked from commit 5e30cc3093407ca2b4dbefa1da8a1262ff8ae7e9) --- src/gen6_mfd.c | 81 ++---------------------------------------------- src/i965_decoder_utils.c | 75 ++++++++++++++++++++++++++++++++++++++++++++ src/i965_decoder_utils.h | 7 +++++ 3 files changed, 84 insertions(+), 79 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 8128a80b..b6d19e88 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -596,56 +596,6 @@ gen6_mfd_avc_slice_state(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); } -static void -gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx, - VAPictureParameterBufferH264 *pic_param, - VASliceParameterBufferH264 *next_slice_param, - struct gen6_mfd_context *gen6_mfd_context) -{ - struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; - int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; - int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ - int slice_hor_pos, slice_ver_pos, slice_start_mb_num, next_slice_hor_pos, next_slice_ver_pos; - int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag && - pic_param->seq_fields.bits.mb_adaptive_frame_field_flag); - - if (next_slice_param) { - int first_mb_in_next_slice; - - slice_hor_pos = 0; - slice_ver_pos = 0; - slice_start_mb_num = 0; - first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture; - next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; - next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs; - } else { - slice_hor_pos = 0; - slice_ver_pos = height_in_mbs; - slice_start_mb_num = width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag); - next_slice_hor_pos = 0; - next_slice_ver_pos = 0; - } - - BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */ - OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2)); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, - slice_ver_pos << 24 | - slice_hor_pos << 16 | - slice_start_mb_num << 0); - OUT_BCS_BATCH(batch, - next_slice_ver_pos << 16 | - next_slice_hor_pos << 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(batch); -} - static inline void gen6_mfd_avc_ref_idx_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, @@ -746,40 +696,13 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); } -static void -gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx, - VAPictureParameterBufferH264 *pic_param, - struct gen6_mfd_context *gen6_mfd_context) -{ - struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; - - BEGIN_BCS_BATCH(batch, 6); - OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2)); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(batch); -} - -static void -gen6_mfd_avc_phantom_slice(VADriverContextP ctx, - VAPictureParameterBufferH264 *pic_param, - VASliceParameterBufferH264 *next_slice_param, - struct gen6_mfd_context *gen6_mfd_context) -{ - gen6_mfd_avc_phantom_slice_state(ctx, pic_param, next_slice_param, gen6_mfd_context); - gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, gen6_mfd_context); -} - static void gen6_mfd_avc_phantom_slice_first(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, VASliceParameterBufferH264 *next_slice_param, struct gen6_mfd_context *gen6_mfd_context) { - gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen6_mfd_context); + gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen6_mfd_context->base.batch); } static void @@ -787,7 +710,7 @@ gen6_mfd_avc_phantom_slice_last(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, struct gen6_mfd_context *gen6_mfd_context) { - gen6_mfd_avc_phantom_slice(ctx, pic_param, NULL, gen6_mfd_context); + gen6_mfd_avc_phantom_slice(ctx, pic_param, NULL, gen6_mfd_context->base.batch); } static void diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 7ea39ddd..8b546db9 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -487,6 +487,81 @@ gen6_send_avc_ref_idx_state( ); } +static void +gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct intel_batchbuffer *batch) +{ + int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; + int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ + int slice_hor_pos, slice_ver_pos, slice_start_mb_num, next_slice_hor_pos, next_slice_ver_pos; + int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag && + pic_param->seq_fields.bits.mb_adaptive_frame_field_flag); + + if (next_slice_param) { + int first_mb_in_next_slice; + + slice_hor_pos = 0; + slice_ver_pos = 0; + slice_start_mb_num = 0; + first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture; + next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; + next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs; + } else { + slice_hor_pos = 0; + slice_ver_pos = height_in_mbs; + slice_start_mb_num = width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag); + next_slice_hor_pos = 0; + next_slice_ver_pos = 0; + } + + BEGIN_BCS_BATCH(batch, 11); + OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + slice_ver_pos << 24 | + slice_hor_pos << 16 | + slice_start_mb_num << 0); + OUT_BCS_BATCH(batch, + next_slice_ver_pos << 16 | + next_slice_hor_pos << 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + struct intel_batchbuffer *batch) +{ + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +void +gen6_mfd_avc_phantom_slice(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct intel_batchbuffer *batch) +{ + gen6_mfd_avc_phantom_slice_state(ctx, pic_param, next_slice_param, batch); + gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, batch); +} + /* Comparison function for sorting out the array of free frame store entries */ static int compare_avc_ref_store_func(const void *p1, const void *p2) diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h index 3d39b21a..3e6acdd9 100644 --- a/src/i965_decoder_utils.h +++ b/src/i965_decoder_utils.h @@ -89,6 +89,13 @@ gen6_send_avc_ref_idx_state( const GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] ); +void +gen6_mfd_avc_phantom_slice(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct intel_batchbuffer *batch +); + VAStatus intel_decoder_sanity_check_input(VADriverContextP ctx, VAProfile profile, -- cgit v1.2.1 From 812d8d039031bf20e66748a39edcdb8f9c798ec4 Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Wed, 27 Aug 2014 01:04:49 -0600 Subject: Add phantom slice support on IVB+ HW requires driver to add a phantom slice when FirstMbX and FirstMbY are not 0, in order to avc decoding error concealment. Otherwise, GPU may hang. This patch is a workround for bug: https://bugs.freedesktop.org/show_bug.cgi?id=81447 v1->v2: Follow haihao's suggestion to use common funcion of gen6 phantom slice. v2->v3: Remove an extraneous newline. Signed-off-by: Zhong Li Reviewed-by: Xiang Haihao Gwenole Beauchesne (cherry picked from commit 9672c5ab17c32f25ce1bbdb883abda689440b116) --- src/gen75_mfd.c | 12 ++++++++++++ src/gen7_mfd.c | 12 ++++++++++++ src/gen8_mfd.c | 12 ++++++++++++ 3 files changed, 36 insertions(+) diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index a89640da..299f2b53 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -811,6 +811,15 @@ gen75_mfd_avc_directmode_state(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); } +static void +gen75_mfd_avc_phantom_slice_first(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch); +} + static void gen75_mfd_avc_slice_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, @@ -1145,6 +1154,9 @@ gen75_mfd_avc_decode_picture(VADriverContextP ctx, else next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer; + if (j == 0 && slice_param->first_mb_in_slice) + gen75_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); assert((slice_param->slice_type == SLICE_TYPE_I) || diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index 7ab2955a..bfb95bf6 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -505,6 +505,15 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); } +static void +gen7_mfd_avc_phantom_slice_first(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch); +} + static void gen7_mfd_avc_slice_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, @@ -842,6 +851,9 @@ gen7_mfd_avc_decode_picture(VADriverContextP ctx, else next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer; + if (j == 0 && slice_param->first_mb_in_slice) + gen7_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); assert((slice_param->slice_type == SLICE_TYPE_I) || diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index 5e1b70bf..b4828467 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -574,6 +574,15 @@ gen8_mfd_avc_directmode_state(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); } +static void +gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch); +} + static void gen8_mfd_avc_slice_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, @@ -908,6 +917,9 @@ gen8_mfd_avc_decode_picture(VADriverContextP ctx, else next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer; + if (j == 0 && slice_param->first_mb_in_slice) + gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); assert((slice_param->slice_type == SLICE_TYPE_I) || -- cgit v1.2.1 From 22cdc92c99ef0805458e30b7b5ddb6b03c3731ff Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Tue, 10 Jun 2014 20:49:21 -0600 Subject: Add multi quality levels encoding support for GEN7 Two encoding quality levels are support on GEN7. Default quality level is set to be 1, which has better quality, but higher gpu usage. The second quality level is set to be 2, which has worse quality but it has lower gpu usage. Other platforms support for multi-quality-level will be added later. v1->v2: 1. follow haihao's comments to init and check quality_level. 2. remove CBR limitation for low quality level. (Zhao Yakui helps to merge several patches on staging so that it can be cherry-picked to master) Signed-off-by: Zhong Li Signed-off-by: Zhao Yakui --- src/gen7_vme.c | 27 +++++++++++++++++---------- src/i965_drv_video.c | 9 +++++++++ src/i965_drv_video.h | 5 +++++ src/i965_encoder.c | 37 ++++++++++++++++++++++++++++++++++++- src/i965_encoder.h | 2 ++ src/shaders/vme/inter_frame_ivb.asm | 22 ++++++++++++++++++++-- src/shaders/vme/inter_frame_ivb.g7b | 16 +++++++++++++--- src/shaders/vme/vme7.inc | 8 +++++++- 8 files changed, 109 insertions(+), 17 deletions(-) diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 042fe5d0..dc15445e 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -370,6 +370,7 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx, unsigned int *mb_cost_table; int i; VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY); mb_cost_table = (unsigned int *)vme_context->vme_state_message; //building VME state message @@ -377,8 +378,9 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx, assert(vme_context->vme_state.bo->virtual); vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual; - if ((slice_param->slice_type == SLICE_TYPE_P) || - (slice_param->slice_type == SLICE_TYPE_SP)) { + if (((slice_param->slice_type == SLICE_TYPE_P) || + (slice_param->slice_type == SLICE_TYPE_SP) && + !is_low_quality)) { vme_state_message[0] = 0x01010101; vme_state_message[1] = 0x10010101; vme_state_message[2] = 0x0F0F0F0F; @@ -544,7 +546,7 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); - *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); i += 1; } @@ -598,13 +600,18 @@ static void gen7_vme_pipeline_programing(VADriverContextP ctx, int s; bool allow_hwscore = true; int kernel_shader; - - for (s = 0; s < encode_state->num_slice_params_ext; s++) { - pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; - if ((pSliceParameter->macroblock_address % width_in_mbs)) { - allow_hwscore = false; - break; - } + unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY); + + if (is_low_quality) + allow_hwscore = false; + else { + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; + if ((pSliceParameter->macroblock_address % width_in_mbs)) { + allow_hwscore = false; + break; + } + } } if ((pSliceParameter->slice_type == SLICE_TYPE_I) || diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index b7a04853..7f35f01a 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -627,6 +627,7 @@ i965_GetConfigAttributes(VADriverContextP ctx, int num_attribs) { VAStatus va_status; + struct i965_driver_data *i965 = i965_driver_data(ctx); int i; va_status = i965_validate_config(ctx, profile, entrypoint); @@ -672,6 +673,14 @@ i965_GetConfigAttributes(VADriverContextP ctx, break; } + case VAConfigAttribEncQualityRange: + if (entrypoint == VAEntrypointEncSlice) { + attrib_list[i].value = 1; + if(IS_GEN7(i965->intel.device_info)) + attrib_list[i].value = ENCODER_QUALITY_RANGE; + break; + } + default: /* Do nothing */ attrib_list[i].value = VA_ATTRIB_NOT_SUPPORTED; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 10e87782..7b931d22 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -65,6 +65,11 @@ #define DEFAULT_HUE 0 #define DEFAULT_SATURATION 50 +#define ENCODER_QUALITY_RANGE 2 +#define ENCODER_DEFAULT_QUALITY 1 +#define ENCODER_HIGH_QUALITY ENCODER_DEFAULT_QUALITY +#define ENCODER_LOW_QUALITY 2 + struct i965_surface { struct object_base *base; diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 14c37bb7..f1c1f3dc 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -122,6 +122,30 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, return VA_STATUS_SUCCESS; } +static VAStatus +intel_encoder_check_misc_parameter(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + + if (encode_state->misc_param[VAEncMiscParameterTypeQualityLevel] && + encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer) { + VAEncMiscParameterBuffer* pMiscParam = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer; + VAEncMiscParameterBufferQualityLevel* param_quality_level = (VAEncMiscParameterBufferQualityLevel*)pMiscParam->data; + encoder_context->quality_level = param_quality_level->quality_level; + + if (encoder_context->quality_level == 0) + encoder_context->quality_level = ENCODER_DEFAULT_QUALITY; + else if (encoder_context->quality_level > encoder_context->quality_range) + goto error; + } + + return VA_STATUS_SUCCESS; + +error: + return VA_STATUS_ERROR_INVALID_PARAMETER; +} + static VAStatus intel_encoder_check_avc_parameter(VADriverContextP ctx, struct encode_state *encode_state, @@ -278,6 +302,9 @@ intel_encoder_sanity_check_input(VADriverContextP ctx, vaStatus = intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context); + if (vaStatus == VA_STATUS_SUCCESS) + vaStatus = intel_encoder_check_misc_parameter(ctx, encode_state, encoder_context); + out: return vaStatus; } @@ -335,6 +362,8 @@ intel_enc_hw_context_init(VADriverContextP ctx, encoder_context->input_yuv_surface = VA_INVALID_SURFACE; encoder_context->is_tmp_id = 0; encoder_context->rate_control_mode = VA_RC_NONE; + encoder_context->quality_level = ENCODER_DEFAULT_QUALITY; + encoder_context->quality_range = 1; switch (obj_config->profile) { case VAProfileMPEG2Simple: @@ -395,7 +424,13 @@ gen6_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) struct hw_context * gen7_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) { - return intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init); + struct intel_encoder_context *encoder_context; + + encoder_context = (struct intel_encoder_context *)intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init); + + encoder_context->quality_range = ENCODER_QUALITY_RANGE; + + return (struct hw_context *)encoder_context; } struct hw_context * diff --git a/src/i965_encoder.h b/src/i965_encoder.h index 71396d61..20d49fc2 100644 --- a/src/i965_encoder.h +++ b/src/i965_encoder.h @@ -43,6 +43,8 @@ struct intel_encoder_context VASurfaceID input_yuv_surface; int is_tmp_id; unsigned int rate_control_mode; + unsigned int quality_level; + unsigned int quality_range; void *vme_context; void *mfc_context; void (*vme_context_destroy)(void *vme_context); diff --git a/src/shaders/vme/inter_frame_ivb.asm b/src/shaders/vme/inter_frame_ivb.asm index 3c088511..46f2b4b0 100644 --- a/src/shaders/vme/inter_frame_ivb.asm +++ b/src/shaders/vme/inter_frame_ivb.asm @@ -391,12 +391,14 @@ mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; /* M0 */ /* IME search */ +cmp.z.f0.0 (1) null<1>:uw quality_level_ub<0,1,0>:ub LOW_QUALITY_LEVEL:uw {align1}; +(f0.0) jmpi (1) __low_quality_search; + +__high_quality_search: mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1}; /* 16x16 Source, 1/4 pixel, harr */ mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ -mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1}; - mov (1) vme_m0.0<1>:W -16:W {align1}; mov (1) vme_m0.2<1>:W -12:W {align1}; @@ -405,6 +407,22 @@ and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; (f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1}; +jmpi __vme_msg; + +__low_quality_search: +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1}; +/* 16x16 Source, 1/2 pixel, harr */ +mov (1) vme_m0.22<1>:UW MIN_REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ + +mov (1) vme_m0.0<1>:W -8:W {align1}; +mov (1) vme_m0.2<1>:W -8:W {align1}; + +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1}; + +__vme_msg: mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; diff --git a/src/shaders/vme/inter_frame_ivb.g7b b/src/shaders/vme/inter_frame_ivb.g7b index e4db6ea7..7ed38c5e 100644 --- a/src/shaders/vme/inter_frame_ivb.g7b +++ b/src/shaders/vme/inter_frame_ivb.g7b @@ -141,13 +141,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000bc }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000d0 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000b0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000c4 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, @@ -163,15 +163,25 @@ { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 }, { 0x00010001, 0x247c0171, 0x00000000, 0x00020002 }, { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 }, + { 0x01000010, 0x20002e28, 0x000000a7, 0x00020002 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000012 }, { 0x00000001, 0x244c0061, 0x00000000, 0x00203000 }, { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, - { 0x00000001, 0x24400021, 0x00000448, 0x00000000 }, { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, { 0x00010040, 0x24403dad, 0x00000440, 0x000c000c }, { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, { 0x00010040, 0x24423dad, 0x00000442, 0x00080008 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000010 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00201000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x20202020 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff8fff8 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff8fff8 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, + { 0x00010040, 0x24403dad, 0x00000440, 0x00040004 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, + { 0x00010040, 0x24423dad, 0x00000442, 0x00040004 }, { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 }, { 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 }, diff --git a/src/shaders/vme/vme7.inc b/src/shaders/vme/vme7.inc index 3fa99b75..e9d5864e 100644 --- a/src/shaders/vme/vme7.inc +++ b/src/shaders/vme/vme7.inc @@ -54,6 +54,8 @@ define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300') define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700') define(`REF_REGION_SIZE', `0x2830:UW') +define(`MIN_REF_REGION_SIZE', `0x2020:UW') +define(`DREF_REGION_SIZE', `0x2020:UW') define(`BI_SUB_MB_PART_MASK', `0x0c000000') define(`MAX_NUM_MV', `0x00000020') @@ -132,6 +134,7 @@ define(`orig_y_ub', `inline_reg0.1') define(`transform_8x8_ub', `inline_reg0.4') define(`input_mb_intra_ub', `inline_reg0.5') define(`num_macroblocks', `inline_reg0.6') +define(`quality_level_ub', `inline_reg0.7') /* * GRF 6~11 -- reserved @@ -291,7 +294,6 @@ define(`mb_mv3', `r96') define(`mb_ref', `r97') define(`mb_ref_win', `r84') -define(`DREF_REGION_SIZE', `0x2020:UW') define(`PRED_L0', `0x0':uw) define(`PRED_L1', `0x1':uw) define(`PRED_BI', `0x2':uw) @@ -317,3 +319,7 @@ define(`INTER_8X16MODE', `0x02') define(`OBR_MESSAGE_FENCE', `7') define(`OBR_MF_NOCOMMIT', `0') define(`OBR_MF_COMMIT', `0x20') + +define(`DEFAULT_QUALITY_LEVEL', `0x01') +define(`HIGH_QUALITY_LEVEL', `DEFAULT_QUALITY_LEVEL') +define(`LOW_QUALITY_LEVEL', `0x02') -- cgit v1.2.1 From 0e5fd95b7ac614964ca157526ecc1b0a5d6a6ae0 Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Thu, 7 Aug 2014 22:05:40 -0600 Subject: Enabling SNB avc encoding configuration Signed-off-by: Zhong Li (cherry picked from commit 3e531e7d640ef329151b2205c2e61a67fb80862d) --- src/gen6_vme.c | 50 ++++++++++++++++++++++++++++------------- src/shaders/vme/inter_frame.asm | 25 +++++++++++++++++++-- src/shaders/vme/inter_frame.g6b | 9 ++++++++ src/shaders/vme/inter_frame.g7b | 8 +++++++ src/shaders/vme/vme.inc | 5 +++++ 5 files changed, 79 insertions(+), 18 deletions(-) diff --git a/src/gen6_vme.c b/src/gen6_vme.c index 8f0006f9..2e025915 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -384,21 +384,38 @@ static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx, dri_bo_map(vme_context->vme_state.bo, 1); assert(vme_context->vme_state.bo->virtual); vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual; - - vme_state_message[0] = 0x01010101; - vme_state_message[1] = 0x10010101; - vme_state_message[2] = 0x0F0F0F0F; - vme_state_message[3] = 0x100F0F0F; - vme_state_message[4] = 0x01010101; - vme_state_message[5] = 0x10010101; - vme_state_message[6] = 0x0F0F0F0F; - vme_state_message[7] = 0x100F0F0F; - vme_state_message[8] = 0x01010101; - vme_state_message[9] = 0x10010101; - vme_state_message[10] = 0x0F0F0F0F; - vme_state_message[11] = 0x000F0F0F; - vme_state_message[12] = 0x00; - vme_state_message[13] = 0x00; + + if (encoder_context->quality_level != ENCODER_LOW_QUALITY) { + vme_state_message[0] = 0x01010101; + vme_state_message[1] = 0x10010101; + vme_state_message[2] = 0x0F0F0F0F; + vme_state_message[3] = 0x100F0F0F; + vme_state_message[4] = 0x01010101; + vme_state_message[5] = 0x10010101; + vme_state_message[6] = 0x0F0F0F0F; + vme_state_message[7] = 0x100F0F0F; + vme_state_message[8] = 0x01010101; + vme_state_message[9] = 0x10010101; + vme_state_message[10] = 0x0F0F0F0F; + vme_state_message[11] = 0x000F0F0F; + vme_state_message[12] = 0x00; + vme_state_message[13] = 0x00; + } else { + vme_state_message[0] = 0x10010101; + vme_state_message[1] = 0x100F0F0F; + vme_state_message[2] = 0x10010101; + vme_state_message[3] = 0x000F0F0F; + vme_state_message[4] = 0; + vme_state_message[5] = 0; + vme_state_message[6] = 0; + vme_state_message[7] = 0; + vme_state_message[8] = 0; + vme_state_message[9] = 0; + vme_state_message[10] = 0; + vme_state_message[11] = 0; + vme_state_message[12] = 0; + vme_state_message[13] = 0; + } vme_state_message[14] = 0x4a4a; vme_state_message[15] = 0x0; @@ -452,7 +469,7 @@ gen6_vme_fill_vme_batchbuffer(VADriverContextP ctx, number_mb_cmds = slice_mb_number - i; } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = 0; *command_ptr++ = 0; @@ -462,6 +479,7 @@ gen6_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = (number_mb_cmds << 16 | transform_8x8_mode_flag | ((i==0) << 1)); + *command_ptr++ = encoder_context->quality_level; i += number_mb_cmds; } diff --git a/src/shaders/vme/inter_frame.asm b/src/shaders/vme/inter_frame.asm index e1b6e68d..7c5cfd44 100644 --- a/src/shaders/vme/inter_frame.asm +++ b/src/shaders/vme/inter_frame.asm @@ -35,7 +35,11 @@ mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1}; mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* Source = (x, y) * 16 */ - + +cmp.z.f0.0 (1) null<1>:uw quality_level_ub<0,1,0>:ub LOW_QUALITY_LEVEL:uw {align1}; +(f0.0) jmpi (1) __low_quality_search; + +__high_quality_search: #ifdef DEV_SNB shl (2) vme_m0.0<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+24) */ @@ -47,8 +51,25 @@ mov (1) vme_m0.2<1>:W -12:W {align1} ; mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1}; /* 16x16 Source, 1/4 pixel, harr */ mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ -mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ +jmpi __vme_msg1; + + +__low_quality_search: +#ifdef DEV_SNB +shl (2) vme_m0.0<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; +add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -8:W {align1}; +add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -8:W {align1}; +#else +mov (1) vme_m0.0<1>:W -8:W {align1} ; +mov (1) vme_m0.2<1>:W -8:W {align1} ; +#endif + +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1}; /* 16x16 Source, 1/2 pixel, harr */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) vme_m0.22<1>:UW MIN_REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ +__vme_msg1: mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; mov (1) vme_m1.4<1>:UD FB_PRUNING_ENABLE:UD {align1}; /* MV num is passed by constant buffer. R4.28 */ diff --git a/src/shaders/vme/inter_frame.g6b b/src/shaders/vme/inter_frame.g6b index ca251bbb..bc7cd436 100644 --- a/src/shaders/vme/inter_frame.g6b +++ b/src/shaders/vme/inter_frame.g6b @@ -11,12 +11,21 @@ { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 }, { 0x00000001, 0x24340231, 0x00000014, 0x00000000 }, { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, + { 0x01000010, 0x20002e28, 0x000000a8, 0x00020002 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000000e }, { 0x00200009, 0x24402e29, 0x004500a0, 0x00040004 }, { 0x00000040, 0x24403dad, 0x00000440, 0xfff0fff0 }, { 0x00000040, 0x24423dad, 0x00000442, 0xfff4fff4 }, { 0x00000001, 0x244c0061, 0x00000000, 0x00203000 }, { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000000c }, + { 0x00200009, 0x24402e29, 0x004500a0, 0x00040004 }, + { 0x00000040, 0x24403dad, 0x00000440, 0xfff8fff8 }, + { 0x00000040, 0x24423dad, 0x00000442, 0xfff8fff8 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00201000 }, + { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x20202020 }, { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, { 0x00000001, 0x24640061, 0x00000000, 0x40000000 }, { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame.g7b b/src/shaders/vme/inter_frame.g7b index 52732001..2a349273 100644 --- a/src/shaders/vme/inter_frame.g7b +++ b/src/shaders/vme/inter_frame.g7b @@ -11,11 +11,19 @@ { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 }, { 0x00000001, 0x24340231, 0x00000014, 0x00000000 }, { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, + { 0x01000010, 0x20002e28, 0x000000a8, 0x00020002 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000000c }, { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, { 0x00000001, 0x244c0061, 0x00000000, 0x00203000 }, { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000000a }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff8fff8 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff8fff8 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00201000 }, + { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x20202020 }, { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, { 0x00000001, 0x24640061, 0x00000000, 0x40000000 }, { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, diff --git a/src/shaders/vme/vme.inc b/src/shaders/vme/vme.inc index dd7e1bb6..992c6b99 100644 --- a/src/shaders/vme/vme.inc +++ b/src/shaders/vme/vme.inc @@ -54,6 +54,7 @@ define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300') define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700') define(`REF_REGION_SIZE', `0x2830:UW') +define(`MIN_REF_REGION_SIZE', `0x2020:UW') define(`BI_SUB_MB_PART_MASK', `0x0c000000') define(`MAX_NUM_MV', `0x00000020') @@ -140,6 +141,7 @@ define(`orig_y_ub', `inline_reg0.1') define(`transform_8x8_ub', `inline_reg0.4') define(`slice_edge_ub', `inline_reg0.4') define(`num_macroblocks', `inline_reg0.6') +define(`quality_level_ub', `inline_reg0.8') /* * GRF 6~11 -- reserved @@ -277,3 +279,6 @@ define(`vme_msg_4', `msg_reg4') #endif +define(`DEFAULT_QUALITY_LEVEL', `0x01') +define(`HIGH_QUALITY_LEVEL', `DEFAULT_QUALITY_LEVEL') +define(`LOW_QUALITY_LEVEL', `0x02') -- cgit v1.2.1 From 4aa7a860659b1f05792ded121d74614f807fce4e Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Thu, 7 Aug 2014 22:05:41 -0600 Subject: Enabling HSW avc encoding configuration Signed-off-by: Zhong Li (cherry picked from commit 1239853edc9c344a823d788c6df57da82eda25bf) --- src/gen75_vme.c | 22 ++++++---- src/shaders/vme/inter_frame_haswell.asm | 69 ++++++++++++++++++++------------ src/shaders/vme/inter_frame_haswell.g75b | 52 +++++++++++++++--------- src/shaders/vme/vme75.inc | 8 +++- 4 files changed, 97 insertions(+), 54 deletions(-) diff --git a/src/gen75_vme.c b/src/gen75_vme.c index e8527c37..576e91a9 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -534,7 +534,7 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); - *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); i += 1; } @@ -578,14 +578,20 @@ static void gen75_vme_pipeline_programing(VADriverContextP ctx, int kernel_shader; bool allow_hwscore = true; int s; - - for (s = 0; s < encode_state->num_slice_params_ext; s++) { - pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; - if ((pSliceParameter->macroblock_address % width_in_mbs)) { - allow_hwscore = false; - break; - } + unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY); + + if (is_low_quality) + allow_hwscore = false; + else { + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; + if ((pSliceParameter->macroblock_address % width_in_mbs)) { + allow_hwscore = false; + break; + } + } } + if ((pSliceParameter->slice_type == SLICE_TYPE_I) || (pSliceParameter->slice_type == SLICE_TYPE_I)) { kernel_shader = VME_INTRA_SHADER; diff --git a/src/shaders/vme/inter_frame_haswell.asm b/src/shaders/vme/inter_frame_haswell.asm index bcfd2601..399125ad 100644 --- a/src/shaders/vme/inter_frame_haswell.asm +++ b/src/shaders/vme/inter_frame_haswell.asm @@ -475,24 +475,58 @@ send (16) {align1}; /* IME search */ -mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ -mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ - -mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1}; +cmp.z.f0.0 (1) null<1>:uw quality_level_ub<0,1,0>:ub LOW_QUALITY_LEVEL:uw {align1}; +(f0.0) jmpi (1) __low_quality_search; -add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+28) */ -add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -12:W {align1}; +__high_quality_search: +/* M3/M4 search path */ +mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; +mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ mov (1) vme_m0.0<1>:W -16:W {align1}; mov (1) vme_m0.2<1>:W -12:W {align1}; -mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; - and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; (f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 12:w {align1}; and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; (f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1}; - + +jmpi (1) __vme_msg; + +__low_quality_search: +/* M3/M4 search path */ +mov (1) vme_msg_3.0<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.4<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_3.8<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.12<1>:UD 0x000F0F0F:UD {align1}; +mov (4) vme_msg_3.16<1>:UD 0x0:UD {align1}; +mov (8) vme_msg_4.16<1>:UD 0x0:UD {align1}; + +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW MIN_REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ +mov (1) vme_m0.0<1>:W -8:W {align1}; +mov (1) vme_m0.2<1>:W -8:W {align1}; + +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1}; + +__vme_msg: +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; @@ -507,23 +541,6 @@ mov (1) vme_m1.20<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; -/* M3/M4 search path */ - -mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1}; -mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1}; -mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1}; -mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1}; -mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1}; -mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1}; -mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1}; -mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1}; - -mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; -mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; -mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; -mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; - -mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; send (8) vme_msg_ind diff --git a/src/shaders/vme/inter_frame_haswell.g75b b/src/shaders/vme/inter_frame_haswell.g75b index 61551f90..1a60c511 100644 --- a/src/shaders/vme/inter_frame_haswell.g75b +++ b/src/shaders/vme/inter_frame_haswell.g75b @@ -145,13 +145,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000850 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000930 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000007f0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000008d0 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, @@ -192,18 +192,45 @@ { 0x00000001, 0x28380021, 0x0000019c, 0x00000000 }, { 0x00000001, 0x283c0021, 0x00000488, 0x00000000 }, { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, + { 0x01000010, 0x20002e28, 0x000000a7, 0x00020002 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000160 }, + { 0x00000001, 0x28600061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28640061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28700061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28740061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28800061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28900061, 0x00000000, 0x00000000 }, { 0x00000001, 0x244c0061, 0x00000000, 0x00200000 }, { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, - { 0x00000001, 0x24400021, 0x00000448, 0x00000000 }, - { 0x00000040, 0x24403dad, 0x00000440, 0xfff0fff0 }, - { 0x00000040, 0x24423dad, 0x00000442, 0xfff4fff4 }, { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, - { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, { 0x00010040, 0x24403dad, 0x00000440, 0x000c000c }, { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, { 0x00010040, 0x24423dad, 0x00000442, 0x00080008 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000e0 }, + { 0x00000001, 0x28600061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28640061, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28680061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x286c0061, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28700061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28900061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00200000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x20202020 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff8fff8 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff8fff8 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, + { 0x00010040, 0x24403dad, 0x00000440, 0x00040004 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, + { 0x00010040, 0x24423dad, 0x00000442, 0x00040004 }, + { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 }, { 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 }, { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, @@ -214,19 +241,6 @@ { 0x00000001, 0x24740021, 0x00000ac0, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, - { 0x00000001, 0x28600061, 0x00000000, 0x01010101 }, - { 0x00000001, 0x28640061, 0x00000000, 0x10010101 }, - { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f }, - { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f }, - { 0x00000001, 0x28700061, 0x00000000, 0x01010101 }, - { 0x00000001, 0x28740061, 0x00000000, 0x10010101 }, - { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f }, - { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f }, - { 0x00000001, 0x28800061, 0x00000000, 0x01010101 }, - { 0x00000001, 0x28840061, 0x00000000, 0x10010101 }, - { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f }, - { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f }, - { 0x00400001, 0x28900061, 0x00000000, 0x00000000 }, { 0x08600031, 0x21801ca1, 0x00000800, 0x0a784000 }, { 0x00000001, 0x25740061, 0x00000000, 0x00000000 }, { 0x00000001, 0x25750231, 0x00000199, 0x00000000 }, diff --git a/src/shaders/vme/vme75.inc b/src/shaders/vme/vme75.inc index be490567..97e814f7 100644 --- a/src/shaders/vme/vme75.inc +++ b/src/shaders/vme/vme75.inc @@ -59,6 +59,8 @@ define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300') define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700') define(`REF_REGION_SIZE', `0x2830:UW') +define(`MIN_REF_REGION_SIZE', `0x2020:UW') +define(`DREF_REGION_SIZE', `0x2020:UW') define(`BI_SUB_MB_PART_MASK', `0x0c000000') define(`MAX_NUM_MV', `0x00000020') @@ -133,6 +135,7 @@ define(`orig_y_ub', `inline_reg0.1') define(`transform_8x8_ub', `inline_reg0.4') define(`input_mb_intra_ub', `inline_reg0.5') define(`num_macroblocks', `inline_reg0.6') +define(`quality_level_ub', `inline_reg0.7') /* * GRF 6~11 -- reserved @@ -311,7 +314,6 @@ define(`mb_mv3', `r96') define(`mb_ref', `r97') define(`mb_ref_win', `r84') -define(`DREF_REGION_SIZE', `0x2020:UW') define(`PRED_L0', `0x0':uw) define(`PRED_L1', `0x1':uw) define(`PRED_BI', `0x2':uw) @@ -337,3 +339,7 @@ define(`INTER_8X16MODE', `0x02') define(`OBR_MESSAGE_FENCE', `7') define(`OBR_MF_NOCOMMIT', `0') define(`OBR_MF_COMMIT', `0x20') + +define(`DEFAULT_QUALITY_LEVEL', `0x01') +define(`HIGH_QUALITY_LEVEL', `DEFAULT_QUALITY_LEVEL') +define(`LOW_QUALITY_LEVEL', `0x02') -- cgit v1.2.1 From 1ac78501b0bc4d9dc872259681aa7bdff200aab9 Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Thu, 7 Aug 2014 22:05:42 -0600 Subject: Enabling BDW avc encoding configuration Signed-off-by: Zhong Li (cherry picked from commit 739eb9c0c954eecaf17a24be336ca5054241a338) --- src/gen8_vme.c | 21 +++++++++------ src/shaders/vme/inter_frame_gen8.asm | 51 +++++++++++++++++++++++++++++------- src/shaders/vme/inter_frame_gen8.g8b | 39 ++++++++++++++++++++++----- src/shaders/vme/vme8.inc | 8 +++++- 4 files changed, 95 insertions(+), 24 deletions(-) diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 34a3b685..8cae2a0c 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -557,7 +557,7 @@ gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); - *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); *command_ptr++ = CMD_MEDIA_STATE_FLUSH; *command_ptr++ = 0; @@ -603,13 +603,18 @@ static void gen8_vme_pipeline_programing(VADriverContextP ctx, int kernel_shader; bool allow_hwscore = true; int s; - - for (s = 0; s < encode_state->num_slice_params_ext; s++) { - pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; - if ((pSliceParameter->macroblock_address % width_in_mbs)) { - allow_hwscore = false; - break; - } + unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY); + + if (is_low_quality) + allow_hwscore = false; + else { + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; + if ((pSliceParameter->macroblock_address % width_in_mbs)) { + allow_hwscore = false; + break; + } + } } if ((pSliceParameter->slice_type == SLICE_TYPE_I) || diff --git a/src/shaders/vme/inter_frame_gen8.asm b/src/shaders/vme/inter_frame_gen8.asm index 7db5fa66..991d903c 100644 --- a/src/shaders/vme/inter_frame_gen8.asm +++ b/src/shaders/vme/inter_frame_gen8.asm @@ -479,24 +479,57 @@ send (16) {align1}; /* IME search */ +cmp.z.f0.0 (1) null<1>:uw quality_level_ub<0,1,0>:ub LOW_QUALITY_LEVEL:uw {align1}; +(f0.0) jmpi (1) __low_quality_search; + +__high_quality_search: +/* M3/M4 search path */ +mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; +mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; + mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ - -mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1}; - -add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+28) */ -add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -12:W {align1}; - mov (1) vme_m0.0<1>:W -16:W {align1}; mov (1) vme_m0.2<1>:W -12:W {align1}; -mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; - and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; (f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 12:w {align1}; and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; (f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1}; - +jmpi (1) __vme_msg; + +__low_quality_search: +/* M3/M4 search path */ +mov (1) vme_msg_3.0<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.4<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_3.8<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.12<1>:UD 0x000F0F0F:UD {align1}; +mov (4) vme_msg_3.16<1>:UD 0x0:UD {align1}; +mov (8) vme_msg_4.16<1>:UD 0x0:UD {align1}; + +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW MIN_REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ +mov (1) vme_m0.0<1>:W -8:W {align1}; +mov (1) vme_m0.2<1>:W -8:W {align1}; + +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1}; + +__vme_msg: +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; diff --git a/src/shaders/vme/inter_frame_gen8.g8b b/src/shaders/vme/inter_frame_gen8.g8b index 63565417..d0cc25d7 100644 --- a/src/shaders/vme/inter_frame_gen8.g8b +++ b/src/shaders/vme/inter_frame_gen8.g8b @@ -146,13 +146,13 @@ { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000870 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000a20 }, { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000810 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000009c0 }, { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, { 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 }, { 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 }, @@ -193,18 +193,45 @@ { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x01000010, 0x20002240, 0x160000a7, 0x00020002 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000160 }, + { 0x00000001, 0x28600608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28640608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28680608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x286c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28700608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28740608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28780608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x287c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28800608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28900608, 0x00000000, 0x00000000 }, { 0x00000001, 0x244c0608, 0x00000000, 0x00200000 }, { 0x00000001, 0x24561648, 0x10000000, 0x28302830 }, - { 0x00000001, 0x24400208, 0x00000448, 0x00000000 }, - { 0x00000040, 0x24401a68, 0x1e000440, 0xfff0fff0 }, - { 0x00000040, 0x24421a68, 0x1e000442, 0xfff4fff4 }, { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 }, { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 }, - { 0x00000001, 0x24440208, 0x00000440, 0x00000000 }, { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, { 0x00010040, 0x24401a68, 0x1e000440, 0x000c000c }, { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, { 0x00010040, 0x24421a68, 0x1e000442, 0x00080008 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000000e0 }, + { 0x00000001, 0x28600608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28640608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28680608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x286c0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28700608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28900608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00200000 }, + { 0x00000001, 0x24561648, 0x10000000, 0x20202020 }, + { 0x00000001, 0x24401e68, 0x18000000, 0xfff8fff8 }, + { 0x00000001, 0x24421e68, 0x18000000, 0xfff8fff8 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00010040, 0x24401a68, 0x1e000440, 0x00040004 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00010040, 0x24421a68, 0x1e000442, 0x00040004 }, + { 0x00000001, 0x24440208, 0x00000440, 0x00000000 }, { 0x00200040, 0x24401a68, 0x1a450440, 0x00450a90 }, { 0x00200040, 0x24441a68, 0x1a450444, 0x00450a90 }, { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, diff --git a/src/shaders/vme/vme8.inc b/src/shaders/vme/vme8.inc index bf60ad51..5b6f4690 100644 --- a/src/shaders/vme/vme8.inc +++ b/src/shaders/vme/vme8.inc @@ -59,6 +59,8 @@ define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300') define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700') define(`REF_REGION_SIZE', `0x2830:UW') +define(`MIN_REF_REGION_SIZE', `0x2020:UW') +define(`DREF_REGION_SIZE', `0x2020:UW') define(`BI_SUB_MB_PART_MASK', `0x0c000000') define(`MAX_NUM_MV', `0x00000020') @@ -133,6 +135,7 @@ define(`orig_y_ub', `inline_reg0.1') define(`transform_8x8_ub', `inline_reg0.4') define(`input_mb_intra_ub', `inline_reg0.5') define(`num_macroblocks', `inline_reg0.6') +define(`quality_level_ub', `inline_reg0.7') /* * GRF 6~11 -- reserved @@ -312,7 +315,6 @@ define(`mb_mv3', `r96') define(`mb_ref', `r97') define(`mb_ref_win', `r84') -define(`DREF_REGION_SIZE', `0x2020:UW') define(`PRED_L0', `0x0':uw) define(`PRED_L1', `0x1':uw) define(`PRED_BI', `0x2':uw) @@ -339,3 +341,7 @@ define(`INTER_8X16MODE', `0x02') define(`OBR_MESSAGE_FENCE', `7') define(`OBR_MF_NOCOMMIT', `0') define(`OBR_MF_COMMIT', `0x20') + +define(`DEFAULT_QUALITY_LEVEL', `0x01') +define(`HIGH_QUALITY_LEVEL', `DEFAULT_QUALITY_LEVEL') +define(`LOW_QUALITY_LEVEL', `0x02') -- cgit v1.2.1 From b18479e4fdd7af7cf2840542ef19dbe9114fdeaf Mon Sep 17 00:00:00 2001 From: Zhong Li Date: Thu, 7 Aug 2014 22:05:43 -0600 Subject: Remove gen7 only limitation for encoding configuration Since all generations from gen6 can support avc encoding configuration, remove gen7 only limitation. MVC also can be support with little change, but it is low priority, it will be support when necessary and this function is stable. Signed-off-by: Zhong Li (cherry picked from commit f2178e267a67bdecab0a30502804c2ce5bccd0c2) --- src/i965_drv_video.c | 4 +++- src/i965_encoder.c | 8 ++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 7f35f01a..100ed9b5 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -676,7 +676,9 @@ i965_GetConfigAttributes(VADriverContextP ctx, case VAConfigAttribEncQualityRange: if (entrypoint == VAEntrypointEncSlice) { attrib_list[i].value = 1; - if(IS_GEN7(i965->intel.device_info)) + if (profile == VAProfileH264ConstrainedBaseline || + profile == VAProfileH264Main || + profile == VAProfileH264High ) attrib_list[i].value = ENCODER_QUALITY_RANGE; break; } diff --git a/src/i965_encoder.c b/src/i965_encoder.c index f1c1f3dc..f66d8892 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -375,6 +375,7 @@ intel_enc_hw_context_init(VADriverContextP ctx, case VAProfileH264Main: case VAProfileH264High: encoder_context->codec = CODEC_H264; + encoder_context->quality_range = ENCODER_QUALITY_RANGE; break; case VAProfileH264StereoHigh: @@ -424,13 +425,8 @@ gen6_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) struct hw_context * gen7_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) { - struct intel_encoder_context *encoder_context; - encoder_context = (struct intel_encoder_context *)intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init); - - encoder_context->quality_range = ENCODER_QUALITY_RANGE; - - return (struct hw_context *)encoder_context; + return intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init); } struct hw_context * -- cgit v1.2.1 From 2e11383b5157c0e933399308de8591dd5c5b5058 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Wed, 5 Mar 2014 22:54:10 +0100 Subject: Add support for new VA buffer export APIs. Implement va{Acquire,Release}BufferHandle() hooks so that to allow VA surface or VA image buffer sharing with thirdparty APIs like EGL, OpenCL, etc. v2: made sure to sync bo before export, improved VA buffer type check. v3: tracked internal resources on acquire, disposed them on release. Signed-off-by: Gwenole Beauchesne (cherry picked from commit 483bb130925182f2096cd9e6fa5dbae6a55e7764) --- src/i965_drv_video.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/i965_drv_video.h | 4 ++ 2 files changed, 133 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 100ed9b5..f9a1e848 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -28,6 +28,7 @@ */ #include "sysdeps.h" +#include #ifdef HAVE_VA_X11 # include "i965_output_dri.h" @@ -5145,6 +5146,130 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, return vaStatus; } +/* Acquires buffer handle for external API usage (internal implementation) */ +static VAStatus +i965_acquire_buffer_handle(struct object_buffer *obj_buffer, + uint32_t mem_type, VABufferInfo *out_buf_info) +{ + struct buffer_store *buffer_store; + + buffer_store = obj_buffer->buffer_store; + if (!buffer_store || !buffer_store->bo) + return VA_STATUS_ERROR_INVALID_BUFFER; + + /* Synchronization point */ + drm_intel_bo_wait_rendering(buffer_store->bo); + + if (obj_buffer->export_refcount > 0) { + if (obj_buffer->export_state.mem_type != mem_type) + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + else { + VABufferInfo * const buf_info = &obj_buffer->export_state; + + switch (mem_type) { + case VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM: { + uint32_t name; + if (drm_intel_bo_flink(buffer_store->bo, &name) != 0) + return VA_STATUS_ERROR_INVALID_BUFFER; + buf_info->handle = name; + break; + } + case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: { + int fd; + if (drm_intel_bo_gem_export_to_prime(buffer_store->bo, &fd) != 0) + return VA_STATUS_ERROR_INVALID_BUFFER; + buf_info->handle = (intptr_t)fd; + break; + } + } + + buf_info->type = obj_buffer->type; + buf_info->mem_type = mem_type; + buf_info->mem_size = + obj_buffer->num_elements * obj_buffer->size_element; + } + + obj_buffer->export_refcount++; + *out_buf_info = obj_buffer->export_state; + return VA_STATUS_SUCCESS; +} + +/* Releases buffer handle after usage (internal implementation) */ +static VAStatus +i965_release_buffer_handle(struct object_buffer *obj_buffer) +{ + if (obj_buffer->export_refcount == 0) + return VA_STATUS_ERROR_INVALID_BUFFER; + + if (--obj_buffer->export_refcount == 0) { + VABufferInfo * const buf_info = &obj_buffer->export_state; + + switch (buf_info->mem_type) { + case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: { + close((intptr_t)buf_info->handle); + break; + } + } + buf_info->mem_type = 0; + } + return VA_STATUS_SUCCESS; +} + +/** Acquires buffer handle for external API usage */ +static VAStatus +i965_AcquireBufferHandle(VADriverContextP ctx, VABufferID buf_id, + VABufferInfo *buf_info) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + struct object_buffer * const obj_buffer = BUFFER(buf_id); + uint32_t i, mem_type; + + /* List of supported memory types, in preferred order */ + static const uint32_t mem_types[] = { + VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME, + VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM, + 0 + }; + + if (!obj_buffer) + return VA_STATUS_ERROR_INVALID_BUFFER; + /* XXX: only VA surface|image like buffers are supported for now */ + if (obj_buffer->type != VAImageBufferType) + return VA_STATUS_ERROR_UNSUPPORTED_BUFFERTYPE; + + if (!buf_info) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + if (!buf_info->mem_type) + mem_type = mem_types[0]; + else { + mem_type = 0; + for (i = 0; mem_types[i] != 0; i++) { + if (buf_info->mem_type & mem_types[i]) { + mem_type = buf_info->mem_type; + break; + } + } + if (!mem_type) + return VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE; + } + return i965_acquire_buffer_handle(obj_buffer, mem_type, buf_info); +} + +/** Releases buffer handle after usage from external API */ +static VAStatus +i965_ReleaseBufferHandle(VADriverContextP ctx, VABufferID buf_id) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + struct object_buffer * const obj_buffer = BUFFER(buf_id); + + if (!obj_buffer) + return VA_STATUS_ERROR_INVALID_BUFFER; + + return i965_release_buffer_handle(obj_buffer); +} + static int i965_os_has_ring_support(VADriverContextP ctx, int ring) @@ -5671,6 +5796,10 @@ VA_DRIVER_INIT_FUNC( VADriverContextP ctx ) vtable->vaQuerySurfaceAttributes = i965_QuerySurfaceAttributes; vtable->vaCreateSurfaces2 = i965_CreateSurfaces2; + /* 0.36.0 */ + vtable->vaAcquireBufferHandle = i965_AcquireBufferHandle; + vtable->vaReleaseBufferHandle = i965_ReleaseBufferHandle; + vtable_vpp->vaQueryVideoProcFilters = i965_QueryVideoProcFilters; vtable_vpp->vaQueryVideoProcFilterCaps = i965_QueryVideoProcFilterCaps; vtable_vpp->vaQueryVideoProcPipelineCaps = i965_QueryVideoProcPipelineCaps; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 7b931d22..a72bb64b 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -287,6 +287,10 @@ struct object_buffer int num_elements; int size_element; VABufferType type; + + /* Export state */ + unsigned int export_refcount; + VABufferInfo export_state; }; struct object_image -- cgit v1.2.1 From f03dc2c08df47e8cf05364bc3c5846b64f427a2f Mon Sep 17 00:00:00 2001 From: "Zhao, Halley" Date: Tue, 2 Sep 2014 02:28:07 -0600 Subject: initial export_refcount for obj_buffer (cherry picked from commit 11d01d01ce7ceedf19c74bdd8a07961b0fd8dd9b) --- src/i965_drv_video.c | 1 + 1 file changed, 1 insertion(+) mode change 100755 => 100644 src/i965_drv_video.c diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c old mode 100755 new mode 100644 index f9a1e848..a9d67005 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -1950,6 +1950,7 @@ i965_create_buffer_internal(VADriverContextP ctx, obj_buffer->num_elements = num_elements; obj_buffer->size_element = size; obj_buffer->type = type; + obj_buffer->export_refcount = 0; obj_buffer->buffer_store = NULL; buffer_store = calloc(1, sizeof(struct buffer_store)); assert(buffer_store); -- cgit v1.2.1 From 91d68d6c8108808b11d6150c6269ccb3370fba71 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Tue, 26 Aug 2014 13:40:54 +0200 Subject: Factor out va{Get,Put}Image() implementations. Use VA internal objects wherever possible for internal functions, thus leaving VA object ids to the base interface implementation. Robustify params validation earlier in there too. Signed-off-by: Gwenole Beauchesne (cherry picked from commit 78ad786967144c2947ac36eac9ee39b0c94778ef) --- src/i965_drv_video.c | 283 ++++++++++++++++----------------------------------- 1 file changed, 87 insertions(+), 196 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index a9d67005..4ff853b8 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -3860,73 +3860,44 @@ get_image_yuy2(struct object_image *obj_image, uint8_t *image_data, static VAStatus i965_sw_getimage(VADriverContextP ctx, - VASurfaceID surface, - int x, /* coordinates of the upper left source pixel */ - int y, - unsigned int width, /* width and height of the region */ - unsigned int height, - VAImageID image) + struct object_surface *obj_surface, struct object_image *obj_image, + const VARectangle *rect) { - struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_driver_data * const i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - VAStatus va_status = VA_STATUS_SUCCESS; - - struct object_surface *obj_surface = SURFACE(surface); - if (!obj_surface) - return VA_STATUS_ERROR_INVALID_SURFACE; - - struct object_image *obj_image = IMAGE(image); - if (!obj_image) - return VA_STATUS_ERROR_INVALID_IMAGE; - - if (x < 0 || y < 0) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (x + width > obj_surface->orig_width || - y + height > obj_surface->orig_height) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (x + width > obj_image->image.width || - y + height > obj_image->image.height) - return VA_STATUS_ERROR_INVALID_PARAMETER; + void *image_data = NULL; + VAStatus va_status; if (obj_surface->fourcc != obj_image->image.format.fourcc) return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT; - void *image_data = NULL; - va_status = i965_MapBuffer(ctx, obj_image->image.buf, &image_data); if (va_status != VA_STATUS_SUCCESS) return va_status; - VARectangle rect; - rect.x = x; - rect.y = y; - rect.width = width; - rect.height = height; - switch (obj_image->image.format.fourcc) { case VA_FOURCC_YV12: case VA_FOURCC_I420: /* I420 is native format for MPEG-2 decoded surfaces */ if (render_state->interleaved_uv) goto operation_failed; - get_image_i420(obj_image, image_data, obj_surface, &rect); + get_image_i420(obj_image, image_data, obj_surface, rect); break; case VA_FOURCC_NV12: /* NV12 is native format for H.264 decoded surfaces */ if (!render_state->interleaved_uv) goto operation_failed; - get_image_nv12(obj_image, image_data, obj_surface, &rect); + get_image_nv12(obj_image, image_data, obj_surface, rect); break; case VA_FOURCC_YUY2: /* YUY2 is the format supported by overlay plane */ - get_image_yuy2(obj_image, image_data, obj_surface, &rect); + get_image_yuy2(obj_image, image_data, obj_surface, rect); break; default: operation_failed: va_status = VA_STATUS_ERROR_OPERATION_FAILED; break; } - if (va_status != VA_STATUS_SUCCESS) return va_status; @@ -3936,44 +3907,11 @@ i965_sw_getimage(VADriverContextP ctx, static VAStatus i965_hw_getimage(VADriverContextP ctx, - VASurfaceID surface, - int x, /* coordinates of the upper left source pixel */ - int y, - unsigned int width, /* width and height of the region */ - unsigned int height, - VAImageID image) + struct object_surface *obj_surface, struct object_image *obj_image, + const VARectangle *rect) { - struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_surface src_surface; struct i965_surface dst_surface; - VAStatus va_status = VA_STATUS_SUCCESS; - VARectangle rect; - struct object_surface *obj_surface = SURFACE(surface); - struct object_image *obj_image = IMAGE(image); - - if (!obj_surface) - return VA_STATUS_ERROR_INVALID_SURFACE; - - if (!obj_image) - return VA_STATUS_ERROR_INVALID_IMAGE; - - if (x < 0 || y < 0) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (x + width > obj_surface->orig_width || - y + height > obj_surface->orig_height) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (x + width > obj_image->image.width || - y + height > obj_image->image.height) - return VA_STATUS_ERROR_INVALID_PARAMETER; - - if (!obj_surface->bo) - return VA_STATUS_SUCCESS; - assert(obj_image->bo); // image bo is always created, see i965_CreateImage() - - rect.x = x; - rect.y = y; - rect.width = width; - rect.height = height; src_surface.base = (struct object_base *)obj_surface; src_surface.type = I965_SURFACE_TYPE_SURFACE; @@ -3983,14 +3921,7 @@ i965_hw_getimage(VADriverContextP ctx, dst_surface.type = I965_SURFACE_TYPE_IMAGE; dst_surface.flags = I965_SURFACE_FLAG_FRAME; - va_status = i965_image_processing(ctx, - &src_surface, - &rect, - &dst_surface, - &rect); - - - return va_status; + return i965_image_processing(ctx, &src_surface, rect, &dst_surface, rect); } VAStatus @@ -4003,20 +3934,37 @@ i965_GetImage(VADriverContextP ctx, VAImageID image) { struct i965_driver_data * const i965 = i965_driver_data(ctx); - VAStatus va_status = VA_STATUS_SUCCESS; + struct object_surface * const obj_surface = SURFACE(surface); + struct object_image * const obj_image = IMAGE(image); + VARectangle rect; + VAStatus va_status; + + if (!obj_surface) + return VA_STATUS_ERROR_INVALID_SURFACE; + if (!obj_surface->bo) /* don't get anything, keep previous data */ + return VA_STATUS_SUCCESS; + + if (!obj_image || !obj_image->bo) + return VA_STATUS_ERROR_INVALID_IMAGE; + + if (x < 0 || y < 0) + return VA_STATUS_ERROR_INVALID_PARAMETER; + if (x + width > obj_surface->orig_width || + y + height > obj_surface->orig_height) + return VA_STATUS_ERROR_INVALID_PARAMETER; + if (x + width > obj_image->image.width || + y + height > obj_image->image.height) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + rect.x = x; + rect.y = y; + rect.width = width; + rect.height = height; if (HAS_ACCELERATED_GETIMAGE(i965)) - va_status = i965_hw_getimage(ctx, - surface, - x, y, - width, height, - image); + va_status = i965_hw_getimage(ctx, obj_surface, obj_image, &rect); else - va_status = i965_sw_getimage(ctx, - surface, - x, y, - width, height, - image); + va_status = i965_sw_getimage(ctx, obj_surface, obj_image, &rect); return va_status; } @@ -4184,42 +4132,17 @@ put_image_yuy2(struct object_surface *obj_surface, return va_status; } - static VAStatus i965_sw_putimage(VADriverContextP ctx, - VASurfaceID surface, - VAImageID image, - int src_x, - int src_y, - unsigned int src_width, - unsigned int src_height, - int dest_x, - int dest_y, - unsigned int dest_width, - unsigned int dest_height) + struct object_surface *obj_surface, struct object_image *obj_image, + const VARectangle *src_rect, const VARectangle *dst_rect) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct object_surface *obj_surface = SURFACE(surface); - struct object_image *obj_image = IMAGE(image); VAStatus va_status = VA_STATUS_SUCCESS; void *image_data = NULL; - ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE); - ASSERT_RET(obj_image, VA_STATUS_ERROR_INVALID_IMAGE); - - if (src_x < 0 || src_y < 0) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (src_x + src_width > obj_image->image.width || - src_y + src_height > obj_image->image.height) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (dest_x < 0 || dest_y < 0) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (dest_x + dest_width > obj_surface->orig_width || - dest_y + dest_height > obj_surface->orig_height) - return VA_STATUS_ERROR_INVALID_PARAMETER; - /* XXX: don't allow scaling */ - if (src_width != dest_width || src_height != dest_height) + if (src_rect->width != dst_rect->width || + src_rect->height != dst_rect->height) return VA_STATUS_ERROR_INVALID_PARAMETER; if (obj_surface->fourcc) { @@ -4244,27 +4167,17 @@ i965_sw_putimage(VADriverContextP ctx, va_status = i965_MapBuffer(ctx, obj_image->image.buf, &image_data); if (va_status != VA_STATUS_SUCCESS) return va_status; - - VARectangle src_rect, dest_rect; - src_rect.x = src_x; - src_rect.y = src_y; - src_rect.width = src_width; - src_rect.height = src_height; - dest_rect.x = dest_x; - dest_rect.y = dest_y; - dest_rect.width = dest_width; - dest_rect.height = dest_height; switch (obj_image->image.format.fourcc) { case VA_FOURCC_YV12: case VA_FOURCC_I420: - va_status = put_image_i420(obj_surface, &dest_rect, obj_image, image_data, &src_rect); + va_status = put_image_i420(obj_surface, dst_rect, obj_image, image_data, src_rect); break; case VA_FOURCC_NV12: - va_status = put_image_nv12(obj_surface, &dest_rect, obj_image, image_data, &src_rect); + va_status = put_image_nv12(obj_surface, dst_rect, obj_image, image_data, src_rect); break; case VA_FOURCC_YUY2: - va_status = put_image_yuy2(obj_surface, &dest_rect, obj_image, image_data, &src_rect); + va_status = put_image_yuy2(obj_surface, dst_rect, obj_image, image_data, src_rect); break; default: va_status = VA_STATUS_ERROR_OPERATION_FAILED; @@ -4279,38 +4192,11 @@ i965_sw_putimage(VADriverContextP ctx, static VAStatus i965_hw_putimage(VADriverContextP ctx, - VASurfaceID surface, - VAImageID image, - int src_x, - int src_y, - unsigned int src_width, - unsigned int src_height, - int dest_x, - int dest_y, - unsigned int dest_width, - unsigned int dest_height) + struct object_surface *obj_surface, struct object_image *obj_image, + const VARectangle *src_rect, const VARectangle *dst_rect) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct object_surface *obj_surface = SURFACE(surface); - struct object_image *obj_image = IMAGE(image); struct i965_surface src_surface, dst_surface; VAStatus va_status = VA_STATUS_SUCCESS; - VARectangle src_rect, dst_rect; - - ASSERT_RET(obj_surface,VA_STATUS_ERROR_INVALID_SURFACE); - ASSERT_RET(obj_image && obj_image->bo, VA_STATUS_ERROR_INVALID_IMAGE); - - if (src_x < 0 || - src_y < 0 || - src_x + src_width > obj_image->image.width || - src_y + src_height > obj_image->image.height) - return VA_STATUS_ERROR_INVALID_PARAMETER; - - if (dest_x < 0 || - dest_y < 0 || - dest_x + dest_width > obj_surface->orig_width || - dest_y + dest_height > obj_surface->orig_height) - return VA_STATUS_ERROR_INVALID_PARAMETER; if (!obj_surface->bo) { unsigned int tiling, swizzle; @@ -4329,24 +4215,16 @@ i965_hw_putimage(VADriverContextP ctx, src_surface.base = (struct object_base *)obj_image; src_surface.type = I965_SURFACE_TYPE_IMAGE; src_surface.flags = I965_SURFACE_FLAG_FRAME; - src_rect.x = src_x; - src_rect.y = src_y; - src_rect.width = src_width; - src_rect.height = src_height; dst_surface.base = (struct object_base *)obj_surface; dst_surface.type = I965_SURFACE_TYPE_SURFACE; dst_surface.flags = I965_SURFACE_FLAG_FRAME; - dst_rect.x = dest_x; - dst_rect.y = dest_y; - dst_rect.width = dest_width; - dst_rect.height = dest_height; va_status = i965_image_processing(ctx, &src_surface, - &src_rect, + src_rect, &dst_surface, - &dst_rect); + dst_rect); return va_status; } @@ -4364,33 +4242,46 @@ i965_PutImage(VADriverContextP ctx, unsigned int dest_width, unsigned int dest_height) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - VAStatus va_status = VA_STATUS_SUCCESS; + struct i965_driver_data * const i965 = i965_driver_data(ctx); + struct object_surface * const obj_surface = SURFACE(surface); + struct object_image * const obj_image = IMAGE(image); + VARectangle src_rect, dst_rect; + VAStatus va_status; + + if (!obj_surface) + return VA_STATUS_ERROR_INVALID_SURFACE; + + if (!obj_image || !obj_image->bo) + return VA_STATUS_ERROR_INVALID_IMAGE; + + if (src_x < 0 || + src_y < 0 || + src_x + src_width > obj_image->image.width || + src_y + src_height > obj_image->image.height) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + src_rect.x = src_x; + src_rect.y = src_y; + src_rect.width = src_width; + src_rect.height = src_height; + + if (dest_x < 0 || + dest_y < 0 || + dest_x + dest_width > obj_surface->orig_width || + dest_y + dest_height > obj_surface->orig_height) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + dst_rect.x = dest_x; + dst_rect.y = dest_y; + dst_rect.width = dest_width; + dst_rect.height = dest_height; if (HAS_ACCELERATED_PUTIMAGE(i965)) - va_status = i965_hw_putimage(ctx, - surface, - image, - src_x, - src_y, - src_width, - src_height, - dest_x, - dest_y, - dest_width, - dest_height); + va_status = i965_hw_putimage(ctx, obj_surface, obj_image, + &src_rect, &dst_rect); else - va_status = i965_sw_putimage(ctx, - surface, - image, - src_x, - src_y, - src_width, - src_height, - dest_x, - dest_y, - dest_width, - dest_height); + va_status = i965_sw_putimage(ctx, obj_surface, obj_image, + &src_rect, &dst_rect); return va_status; } -- cgit v1.2.1 From c27d56290a150b44a87ba2d2df4d0c36ca5ab218 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Tue, 26 Aug 2014 14:14:54 +0200 Subject: Report BUSY surface state accordingly. Return VA_STATUS_ERROR_SURFACE_BUSY for key interfaces. This covers for va{Get,Put}Image(), as originally mandated by the VA-API specs; vaBeginPicture() as this is the entry-point to any decode, encode or video processing operation; but also for plain vaMapBuffer() operation. Signed-off-by: Gwenole Beauchesne (cherry picked from commit 62bfb507c8512af6529ee794848155bd7cd97fc6) --- src/i965_drv_video.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ src/i965_drv_video.h | 1 + 2 files changed, 49 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 4ff853b8..9ff6902c 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -335,6 +335,37 @@ get_subpic_format(const VAImageFormat *va_format) return NULL; } +/* Checks whether the surface is in busy state */ +static bool +is_surface_busy(struct i965_driver_data *i965, + struct object_surface *obj_surface) +{ + assert(obj_surface != NULL); + + if (obj_surface->locked_image_id != VA_INVALID_ID) + return true; + if (obj_surface->derived_image_id != VA_INVALID_ID) + return true; + return false; +} + +/* Checks whether the image is in busy state */ +static bool +is_image_busy(struct i965_driver_data *i965, struct object_image *obj_image) +{ + struct object_buffer *obj_buffer; + + assert(obj_image != NULL); + + if (obj_image->derived_surface != VA_INVALID_ID) + return true; + + obj_buffer = BUFFER(obj_image->image.buf); + if (obj_buffer && obj_buffer->export_refcount > 0) + return true; + return false; +} + #define I965_PACKED_HEADER_BASE 0 #define I965_PACKED_MISC_HEADER_BASE 3 @@ -1190,6 +1221,7 @@ i965_CreateSurfaces2( obj_surface->fourcc = 0; obj_surface->bo = NULL; obj_surface->locked_image_id = VA_INVALID_ID; + obj_surface->derived_image_id = VA_INVALID_ID; obj_surface->private_data = NULL; obj_surface->free_private_data = NULL; obj_surface->subsampling = SUBSAMPLE_YUV420; @@ -2060,6 +2092,9 @@ i965_MapBuffer(VADriverContextP ctx, ASSERT_RET(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER); ASSERT_RET(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer), VA_STATUS_ERROR_INVALID_BUFFER); + if (obj_buffer->export_refcount > 0) + return VA_STATUS_ERROR_INVALID_BUFFER; + if (NULL != obj_buffer->buffer_store->bo) { unsigned int tiling, swizzle; @@ -2192,6 +2227,9 @@ i965_BeginPicture(VADriverContextP ctx, obj_config = obj_context->obj_config; ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG); + if (is_surface_busy(i965, obj_surface)) + return VA_STATUS_ERROR_SURFACE_BUSY; + switch (obj_config->profile) { case VAProfileMPEG2Simple: case VAProfileMPEG2Main: @@ -3599,6 +3637,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, *out_image = *image; obj_surface->flags |= SURFACE_DERIVED; + obj_surface->derived_image_id = image_id; obj_image->derived_surface = surface; return VA_STATUS_SUCCESS; @@ -3642,6 +3681,7 @@ i965_DestroyImage(VADriverContextP ctx, VAImageID image) if (obj_surface) { obj_surface->flags &= ~SURFACE_DERIVED; + obj_surface->derived_image_id = VA_INVALID_ID; } i965_destroy_image(&i965->image_heap, (struct object_base *)obj_image); @@ -3943,9 +3983,13 @@ i965_GetImage(VADriverContextP ctx, return VA_STATUS_ERROR_INVALID_SURFACE; if (!obj_surface->bo) /* don't get anything, keep previous data */ return VA_STATUS_SUCCESS; + if (is_surface_busy(i965, obj_surface)) + return VA_STATUS_ERROR_SURFACE_BUSY; if (!obj_image || !obj_image->bo) return VA_STATUS_ERROR_INVALID_IMAGE; + if (is_image_busy(i965, obj_image)) + return VA_STATUS_ERROR_SURFACE_BUSY; if (x < 0 || y < 0) return VA_STATUS_ERROR_INVALID_PARAMETER; @@ -4250,9 +4294,13 @@ i965_PutImage(VADriverContextP ctx, if (!obj_surface) return VA_STATUS_ERROR_INVALID_SURFACE; + if (is_surface_busy(i965, obj_surface)) + return VA_STATUS_ERROR_SURFACE_BUSY; if (!obj_image || !obj_image->bo) return VA_STATUS_ERROR_INVALID_IMAGE; + if (is_image_busy(i965, obj_image)) + return VA_STATUS_ERROR_SURFACE_BUSY; if (src_x < 0 || src_y < 0 || diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index a72bb64b..aadfa020 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -263,6 +263,7 @@ struct object_surface unsigned int fourcc; dri_bo *bo; VAImageID locked_image_id; + VAImageID derived_image_id; void (*free_private_data)(void **data); void *private_data; unsigned int subsampling; -- cgit v1.2.1 From 8d2077666fa71cda672e0f5476f9b91369398a6a Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Mon, 8 Sep 2014 19:06:21 +0200 Subject: vpp: expose BGRA surface formats on Ivybridge and Haswell. Allow for vaQuerySurfaceAttributes() to return BGRA and BGRX formats for VPP on Ivybridge and Haswell. This is supported as both source and target surface formats. This fixes VA/EGL interop on Gen7 processors when a BGR[AX] surface is exported into an EGLImage. Signed-off-by: Gwenole Beauchesne --- src/i965_drv_video.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 9ff6902c..03e2e171 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -4934,6 +4934,18 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].value.value.i = VA_FOURCC_RGBX; i++; + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_BGRA; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_BGRX; + i++; + attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; -- cgit v1.2.1 From d12dcedd6d3de29b9c7764d8b112d351c2830025 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Tue, 2 Sep 2014 09:57:20 +0200 Subject: i965_drv_video: factor out and robustify vendor string. Move out generation of the vendor string to its specific helper function, while also making it more robust and aware of possible overflows. Signed-off-by: Gwenole Beauchesne --- src/i965_drv_video.c | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 03e2e171..dec218f8 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -5606,6 +5606,39 @@ struct { #endif }; +static bool +ensure_vendor_string(struct i965_driver_data *i965, const char *chipset) +{ + int ret, len; + + if (i965->va_vendor[0] != '\0') + return true; + + len = 0; + ret = snprintf(i965->va_vendor, sizeof(i965->va_vendor), + "%s %s driver for %s - %d.%d.%d", + INTEL_STR_DRIVER_VENDOR, INTEL_STR_DRIVER_NAME, chipset, + INTEL_DRIVER_MAJOR_VERSION, INTEL_DRIVER_MINOR_VERSION, + INTEL_DRIVER_MICRO_VERSION); + if (ret < 0 || ret >= sizeof(i965->va_vendor)) + goto error; + len = ret; + + if (INTEL_DRIVER_PRE_VERSION > 0) { + ret = snprintf(&i965->va_vendor[len], sizeof(i965->va_vendor) - len, + ".pre%d", INTEL_DRIVER_PRE_VERSION); + if (ret < 0 || ret >= sizeof(i965->va_vendor)) + goto error; + len += ret; + } + return true; + +error: + i965->va_vendor[0] = '\0'; + ASSERT_RET(ret > 0 && len < sizeof(i965->va_vendor), false); + return false; +} + static VAStatus i965_Init(VADriverContextP ctx) { @@ -5630,18 +5663,8 @@ i965_Init(VADriverContextP ctx) break; } - sprintf(i965->va_vendor, "%s %s driver for %s - %d.%d.%d", - INTEL_STR_DRIVER_VENDOR, - INTEL_STR_DRIVER_NAME, - chipset, - INTEL_DRIVER_MAJOR_VERSION, - INTEL_DRIVER_MINOR_VERSION, - INTEL_DRIVER_MICRO_VERSION); - - if (INTEL_DRIVER_PRE_VERSION > 0) { - const int len = strlen(i965->va_vendor); - sprintf(&i965->va_vendor[len], ".pre%d", INTEL_DRIVER_PRE_VERSION); - } + if (!ensure_vendor_string(i965, chipset)) + return VA_STATUS_ERROR_ALLOCATION_FAILED; i965->current_context_id = VA_INVALID_ID; -- cgit v1.2.1 From cf00f065fd5bb3dfc06e98ceacc2e218bd8f4e92 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Tue, 2 Sep 2014 10:10:21 +0200 Subject: i965_drv_video: add git commit id to VA driver version string. v2: renamed i965_drv_version.h to intel_version.h, changed macro definition to INTEL_DRIVER_GIT_VERSION, and displayed the git version string only in "pre" version modes. Signed-off-by: Gwenole Beauchesne --- configure.ac | 5 +++++ src/Makefile.am | 39 +++++++++++++++++++++++++++++++++++++-- src/i965_drv_video.c | 7 +++++++ src/intel_version.h.in | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 src/intel_version.h.in diff --git a/configure.ac b/configure.ac index 86c2b40d..7feb074a 100644 --- a/configure.ac +++ b/configure.ac @@ -80,6 +80,11 @@ PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.5], [gen4asm=yes], [gen4asm=no]) AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes) AC_PATH_PROG([GEN4ASM], [intel-gen4asm]) +dnl Check for git +AC_ARG_VAR([GIT], [Path to git program, if any]) +AC_PATH_PROG([GIT], [git]) +AM_CONDITIONAL([HAVE_GIT], [test -n "$GIT"]) + dnl Check for VA-API PKG_CHECK_MODULES(LIBVA_DEPS, [libva >= va_api_version]) diff --git a/src/Makefile.am b/src/Makefile.am index b35d1ac9..acfa8492 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -20,7 +20,10 @@ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -SUBDIRS = shaders +SUBDIRS = shaders +DIST_SUBDIRS = $(SUBDIRS) +EXTRA_DIST = +BUILT_SOURCES = AM_CPPFLAGS = \ -DPTHREADS \ @@ -120,6 +123,7 @@ source_h = \ intel_driver.h \ intel_media.h \ intel_memman.h \ + intel_version.h \ object_heap.h \ sysdeps.h \ va_backend_compat.h \ @@ -145,12 +149,43 @@ source_h += i965_output_wayland.h driver_cflags += $(WAYLAND_CFLAGS) endif +# git version +VERSION_FILE = .VERSION +OLD_VERSION_FILE = $(VERSION_FILE).old +NEW_VERSION_FILE = $(VERSION_FILE).new +PKG_VERSION_FILE = $(VERSION_FILE).pkg + +intel_version.h: gen-version + $(AM_V_GEN) \ + OV=`[ -f $(OLD_VERSION_FILE) ] && cat $(OLD_VERSION_FILE) || :`; \ + NV=`cat $(NEW_VERSION_FILE)`; \ + if [ "$$OV" != "$$NV" -o ! -f intel_version.h ]; then \ + cp -f $(NEW_VERSION_FILE) $(OLD_VERSION_FILE); \ + $(SED) -e "s|\@INTEL_DRIVER_GIT_VERSION\@|$${NV}|" \ + $(srcdir)/intel_version.h.in > intel_version.h; \ + fi + +gen-version: + @echo $(VERSION) > $(NEW_VERSION_FILE) +if HAVE_GIT + @[ -d $(top_srcdir)/.git ] && \ + (cd $(top_srcdir) && $(GIT) describe --tags) > $(NEW_VERSION_FILE) || : +endif + @[ -f $(srcdir)/$(PKG_VERSION_FILE) ] && \ + cp -f $(srcdir)/$(PKG_VERSION_FILE) $(NEW_VERSION_FILE) || : + +$(PKG_VERSION_FILE): $(NEW_VERSION_FILE) + @cp -f $< $@ + +BUILT_SOURCES += intel_version.h +EXTRA_DIST += intel_version.h.in $(PKG_VERSION_FILE) + # Wayland protocol protocol_source_h = wayland-drm-client-protocol.h i965_output_wayland.c: $(protocol_source_h) @wayland_scanner_rules@ -DIST_SUBDIRS = $(SUBDIRS) wayland +DIST_SUBDIRS += wayland # Extra clean files so that maintainer-clean removes *everything* MAINTAINERCLEANFILES = Makefile.in config.h.in diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index dec218f8..2b9f6ec4 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -38,6 +38,7 @@ # include "i965_output_wayland.h" #endif +#include "intel_version.h" #include "intel_driver.h" #include "intel_memman.h" #include "intel_batchbuffer.h" @@ -5630,6 +5631,12 @@ ensure_vendor_string(struct i965_driver_data *i965, const char *chipset) if (ret < 0 || ret >= sizeof(i965->va_vendor)) goto error; len += ret; + + ret = snprintf(&i965->va_vendor[len], sizeof(i965->va_vendor) - len, + " (%s)", INTEL_DRIVER_GIT_VERSION); + if (ret < 0 || ret >= sizeof(i965->va_vendor)) + goto error; + len += ret; } return true; diff --git a/src/intel_version.h.in b/src/intel_version.h.in new file mode 100644 index 00000000..050e8346 --- /dev/null +++ b/src/intel_version.h.in @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef INTEL_VERSION_H +#define INTEL_VERSION_H + +/** + * INTEL_DRIVER_GIT_VERSION: + * + * The full version identifier of libva-intel-driver, from a git + * repository, in string form (suitable for string concatenation). + */ +#define INTEL_DRIVER_GIT_VERSION "@INTEL_DRIVER_GIT_VERSION@" + +#endif /* INTEL_VERSION_H */ -- cgit v1.2.1 From 238d8077705711036d62a6d536311def3ef35035 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Tue, 9 Sep 2014 15:04:43 +0200 Subject: build: fix make dist for packaging. If the intel-gen4asm tool is not available, ship with the pre-built EU kernels instead of trying to regenerate them. In particular, just don't expose the build rules if intel-gen4asm is not installed. Signed-off-by: Gwenole Beauchesne --- src/shaders/post_processing/gen7/Makefile.am | 2 ++ src/shaders/post_processing/gen8/Makefile.am | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/shaders/post_processing/gen7/Makefile.am b/src/shaders/post_processing/gen7/Makefile.am index 1ffc1cde..f4e2a8d6 100644 --- a/src/shaders/post_processing/gen7/Makefile.am +++ b/src/shaders/post_processing/gen7/Makefile.am @@ -82,6 +82,7 @@ all-local: $(TARGETS) SUFFIXES = .g7b .g7s .asm +if HAVE_GEN4ASM $(INTEL_PP_GEN7_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G4A) .asm.g7s: $(AM_V_GEN)cpp $< > _pp0.$@; \ @@ -92,6 +93,7 @@ $(INTEL_PP_GEN7_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G4A) .g7s.g75b: $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 7.5 $< +endif CLEANFILES = $(INTEL_PP_GEN7_ASM) diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index 9898a452..54533fc6 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -57,6 +57,7 @@ all-local: $(TARGETS) SUFFIXES = .g8b .g8s .asm +if HAVE_GEN4ASM $(INTEL_PP_GEN8_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G8A) .asm.g8s: $(AM_V_GEN)cpp $< > _pp0.$@; \ @@ -64,6 +65,7 @@ $(INTEL_PP_GEN8_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G8A) rm _pp0.$@ .g8s.g8b: $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 8 $< +endif CLEANFILES = $(INTEL_PP_GEN7_ASM) -- cgit v1.2.1 From 77a7cbdd3c5648b691d5b07895780fab8fe5a342 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 12 Sep 2014 10:13:56 +0800 Subject: Add one callback function for hw_codec_info to initialize hw_codec_info V1->V2: Refine the call back function name from hw_codec_hook to preinit_hw_codec And it is called after VADriverContext is fully initialized. This is based on the comment from Gwenole Beauchesne. Signed-off-by: Zhao Yakui (cherry picked from commit 33e62fda870dc5c9b482fb6a23260da9ae465806) --- src/i965_drv_video.c | 3 +++ src/i965_drv_video.h | 1 + 2 files changed, 4 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 2b9f6ec4..0992956a 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -5660,6 +5660,9 @@ i965_Init(VADriverContextP ctx) break; } + if (i965->codec_info->preinit_hw_codec) + i965->codec_info->preinit_hw_codec(ctx, i965->codec_info); + if (i == ARRAY_ELEMS(i965_sub_ops)) { switch (i965->intel.device_id) { #undef CHIPSET diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index aadfa020..bc9913f4 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -337,6 +337,7 @@ struct hw_codec_info struct hw_context *(*proc_hw_context_init)(VADriverContextP, struct object_config *); bool (*render_init)(VADriverContextP); void (*post_processing_context_init)(VADriverContextP, void *, struct intel_batchbuffer *); + void (*preinit_hw_codec)(VADriverContextP, struct hw_codec_info *); int max_width; int max_height; -- cgit v1.2.1 From 6405108e9dea62445c97bc2fa309c61febcdce6c Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 12 Sep 2014 10:14:00 +0800 Subject: change the attribute of hw_codec_info so that it can be updated dynamically Signed-off-by: Zhao Yakui (cherry picked from commit 065db8289d3f38a923959f87d1b75767a0633e61) --- src/i965_device_info.c | 14 +++++++------- src/i965_drv_video.c | 2 +- src/i965_drv_video.h | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 9573b7d2..e6036cd2 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -44,7 +44,7 @@ extern struct hw_context *i965_proc_context_init(VADriverContextP, struct object extern struct hw_context *g4x_dec_hw_context_init(VADriverContextP, struct object_config *); extern bool genx_render_init(VADriverContextP); -static const struct hw_codec_info g4x_hw_codec_info = { +static struct hw_codec_info g4x_hw_codec_info = { .dec_hw_context_init = g4x_dec_hw_context_init, .enc_hw_context_init = NULL, .proc_hw_context_init = NULL, @@ -64,7 +64,7 @@ static const struct hw_codec_info g4x_hw_codec_info = { extern struct hw_context *ironlake_dec_hw_context_init(VADriverContextP, struct object_config *); extern void i965_post_processing_context_init(VADriverContextP, void *, struct intel_batchbuffer *); -static const struct hw_codec_info ilk_hw_codec_info = { +static struct hw_codec_info ilk_hw_codec_info = { .dec_hw_context_init = ironlake_dec_hw_context_init, .enc_hw_context_init = NULL, .proc_hw_context_init = i965_proc_context_init, @@ -86,7 +86,7 @@ static const struct hw_codec_info ilk_hw_codec_info = { extern struct hw_context *gen6_dec_hw_context_init(VADriverContextP, struct object_config *); extern struct hw_context *gen6_enc_hw_context_init(VADriverContextP, struct object_config *); -static const struct hw_codec_info snb_hw_codec_info = { +static struct hw_codec_info snb_hw_codec_info = { .dec_hw_context_init = gen6_dec_hw_context_init, .enc_hw_context_init = gen6_enc_hw_context_init, .proc_hw_context_init = i965_proc_context_init, @@ -120,7 +120,7 @@ static const struct hw_codec_info snb_hw_codec_info = { extern struct hw_context *gen7_dec_hw_context_init(VADriverContextP, struct object_config *); extern struct hw_context *gen7_enc_hw_context_init(VADriverContextP, struct object_config *); -static const struct hw_codec_info ivb_hw_codec_info = { +static struct hw_codec_info ivb_hw_codec_info = { .dec_hw_context_init = gen7_dec_hw_context_init, .enc_hw_context_init = gen7_enc_hw_context_init, .proc_hw_context_init = i965_proc_context_init, @@ -158,7 +158,7 @@ static const struct hw_codec_info ivb_hw_codec_info = { extern struct hw_context *gen75_dec_hw_context_init(VADriverContextP, struct object_config *); extern struct hw_context *gen75_enc_hw_context_init(VADriverContextP, struct object_config *); extern struct hw_context *gen75_proc_context_init(VADriverContextP, struct object_config *); -static const struct hw_codec_info hsw_hw_codec_info = { +static struct hw_codec_info hsw_hw_codec_info = { .dec_hw_context_init = gen75_dec_hw_context_init, .enc_hw_context_init = gen75_enc_hw_context_init, .proc_hw_context_init = gen75_proc_context_init, @@ -202,7 +202,7 @@ static const struct hw_codec_info hsw_hw_codec_info = { extern struct hw_context *gen8_dec_hw_context_init(VADriverContextP, struct object_config *); extern struct hw_context *gen8_enc_hw_context_init(VADriverContextP, struct object_config *); extern void gen8_post_processing_context_init(VADriverContextP, void *, struct intel_batchbuffer *); -static const struct hw_codec_info bdw_hw_codec_info = { +static struct hw_codec_info bdw_hw_codec_info = { .dec_hw_context_init = gen8_dec_hw_context_init, .enc_hw_context_init = gen8_enc_hw_context_init, .proc_hw_context_init = gen75_proc_context_init, @@ -244,7 +244,7 @@ static const struct hw_codec_info bdw_hw_codec_info = { }, }; -const struct hw_codec_info * +struct hw_codec_info * i965_get_codec_info(int devid) { switch (devid) { diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 0992956a..aa521e5a 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -5473,7 +5473,7 @@ VAStatus i965_QueryVideoProcPipelineCaps( return VA_STATUS_SUCCESS; } -extern const struct hw_codec_info *i965_get_codec_info(int devid); +extern struct hw_codec_info *i965_get_codec_info(int devid); static bool i965_driver_data_init(VADriverContextP ctx) diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index bc9913f4..629489f7 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -382,7 +382,7 @@ struct i965_driver_data struct object_heap buffer_heap; struct object_heap image_heap; struct object_heap subpic_heap; - const struct hw_codec_info *codec_info; + struct hw_codec_info *codec_info; _I965Mutex render_mutex; _I965Mutex pp_mutex; -- cgit v1.2.1 From bc2e06ef0f89b264fe968fbff4f06e425385c3d8 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 12 Sep 2014 10:14:04 +0800 Subject: Encoding: Add one hook callback function to detect encoding capabilitiy on Haswell Some Haswell machine doesn't support the encoding. So it should be disabled. Otherwise the driver can't report the supported capability correctly. V1->V2: Minor update based on comment from Gwenole Beauchesne. For example: use the bool tye and remove the hardcoded value Signed-off-by: Zhao Yakui (cherry picked from commit 23ae0f76ee5b3c097e6ae34182219cf44b09b82e) --- src/i965_device_info.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index e6036cd2..be6592ae 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -27,6 +27,10 @@ #include #include "i965_drv_video.h" +#include +#include +#include + /* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */ #define EXTRA_H264_DEC_CHROMA_FORMATS \ (VA_RT_FORMAT_YUV400) @@ -155,6 +159,8 @@ static struct hw_codec_info ivb_hw_codec_info = { }, }; +static void hsw_hw_codec_preinit(VADriverContextP ctx, struct hw_codec_info *codec_info); + extern struct hw_context *gen75_dec_hw_context_init(VADriverContextP, struct object_config *); extern struct hw_context *gen75_enc_hw_context_init(VADriverContextP, struct object_config *); extern struct hw_context *gen75_proc_context_init(VADriverContextP, struct object_config *); @@ -164,6 +170,7 @@ static struct hw_codec_info hsw_hw_codec_info = { .proc_hw_context_init = gen75_proc_context_init, .render_init = genx_render_init, .post_processing_context_init = i965_post_processing_context_init, + .preinit_hw_codec = hsw_hw_codec_preinit, .max_width = 4096, .max_height = 4096, @@ -367,3 +374,103 @@ i965_get_device_info(int devid) return NULL; } } + +static int intel_driver_detect_cpustring(char *model_id) +{ + FILE *fp; + size_t line_length; + ssize_t read_length; + char *line_string, *model_ptr; + bool found; + + if (model_id == NULL) + return -EINVAL; + + fp = fopen("/proc/cpuinfo", "r"); + if (fp == NULL) { + fprintf(stderr, "no permission to access /proc/cpuinfo\n"); + return -EACCES; + } + line_string = NULL; + found = false; + + while((read_length = getline(&line_string, &line_length, fp)) != -1) { + if (strstr(line_string, "model name")) { + model_ptr = strstr(line_string, ": "); + model_ptr += 2; + found = true; + strncpy(model_id, model_ptr, strlen(model_ptr)); + break; + } + } + fclose(fp); + + if (line_string) + free(line_string); + + if (found) + return 0; + else + return -EINVAL; +} + +/* + * the hook_list for HSW. + * It is captured by /proc/cpuinfo and the space character is stripped. + */ +const static char *hsw_cpu_hook_list[] = { +"Intel(R)Pentium(R)3556U", +"Intel(R)Pentium(R)3560Y", +"Intel(R)Pentium(R)3550M", +"Intel(R)Celeron(R)2980U", +"Intel(R)Celeron(R)2955U", +"Intel(R)Celeron(R)2950M", +}; + +static void hsw_hw_codec_preinit(VADriverContextP ctx, struct hw_codec_info *codec_info) +{ + char model_string[64], model_id[64]; + char *model_ptr, *tmp_ptr; + int i, model_len, list_len; + bool found; + + memset(model_string, 0, sizeof(model_string)); + memset(model_id, 0, sizeof(model_id)); + + /* If it can't detect cpu model_string, leave it alone */ + if (intel_driver_detect_cpustring(model_string)) + return; + + /* strip the cpufreq info */ + model_ptr = model_string; + tmp_ptr = strstr(model_ptr, "@"); + *tmp_ptr = '\0'; + + /* strip the space character and convert to the lower case */ + model_ptr = model_id; + model_len = strlen(model_string); + for (i = 0; i < model_len; i++) { + if (model_string[i] != ' ') { + *model_ptr = model_string[i]; + model_ptr++; + } + } + *model_ptr = '\0'; + + found = false; + list_len = sizeof(hsw_cpu_hook_list) / sizeof(char *); + for (i = 0; i < list_len; i++) { + model_ptr = (char *)hsw_cpu_hook_list[i]; + if (strcasecmp(model_id, model_ptr) == 0) { + found = true; + break; + } + } + + if (found) { + codec_info->has_h264_encoding = 0; + codec_info->has_h264_mvc_encoding = 0; + codec_info->has_mpeg2_encoding = 0; + } + return; +} -- cgit v1.2.1 From e35f62b9bc6e50772891fe1c879844d8b7f9026b Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 26 Sep 2014 09:15:41 +0800 Subject: Use the strncasecmp instead of strcasecmp to make it more robust At the same time the extra string arrary is removed. Signed-off-by: Zhao Yakui (cherry picked from commit 05ece0c3698f4f48c4141e8bd0608b9869d8ad0c) --- src/i965_device_info.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index be6592ae..d0912961 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -429,13 +429,12 @@ const static char *hsw_cpu_hook_list[] = { static void hsw_hw_codec_preinit(VADriverContextP ctx, struct hw_codec_info *codec_info) { - char model_string[64], model_id[64]; + char model_string[64]; char *model_ptr, *tmp_ptr; int i, model_len, list_len; bool found; memset(model_string, 0, sizeof(model_string)); - memset(model_id, 0, sizeof(model_id)); /* If it can't detect cpu model_string, leave it alone */ if (intel_driver_detect_cpustring(model_string)) @@ -447,7 +446,7 @@ static void hsw_hw_codec_preinit(VADriverContextP ctx, struct hw_codec_info *cod *tmp_ptr = '\0'; /* strip the space character and convert to the lower case */ - model_ptr = model_id; + model_ptr = model_string; model_len = strlen(model_string); for (i = 0; i < model_len; i++) { if (model_string[i] != ' ') { @@ -459,9 +458,14 @@ static void hsw_hw_codec_preinit(VADriverContextP ctx, struct hw_codec_info *cod found = false; list_len = sizeof(hsw_cpu_hook_list) / sizeof(char *); + model_len = strlen(model_string); for (i = 0; i < list_len; i++) { model_ptr = (char *)hsw_cpu_hook_list[i]; - if (strcasecmp(model_id, model_ptr) == 0) { + + if (strlen(model_ptr) != model_len) + continue; + + if (strncasecmp(model_string, model_ptr, model_len) == 0) { found = true; break; } -- cgit v1.2.1 From 095ba812b995e92bf7cb26795ab696e0bc55b577 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 26 Sep 2014 09:15:44 +0800 Subject: Check the value returned by strstr to avoid NULL pointer Signed-off-by: Zhao Yakui (cherry picked from commit 021149c50ffd5c55d45acbda6bb950c7206b7255) --- src/i965_device_info.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index d0912961..fd5d81cd 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -443,7 +443,9 @@ static void hsw_hw_codec_preinit(VADriverContextP ctx, struct hw_codec_info *cod /* strip the cpufreq info */ model_ptr = model_string; tmp_ptr = strstr(model_ptr, "@"); - *tmp_ptr = '\0'; + + if (tmp_ptr) + *tmp_ptr = '\0'; /* strip the space character and convert to the lower case */ model_ptr = model_string; -- cgit v1.2.1 From 925c98afcd381e52b37eb3870c3c80ff9c59a069 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 26 Sep 2014 09:15:36 +0800 Subject: Use the inline CPUID assembly to obtain CPUID string instead of /proc/cpuinfo On some systems there is no access to /proc/cpuinfo. So the inline assembly is used directly to detect the CPUID string. V2->V3: Follow the suggestion from Google engineer to remove the unused code of "if 0". And the "unsigned int" data type is updated to "uint32_t". V1->V2: Based on the Matt Turner's suggestion the __cpuid defined in GCC cpuid.h is called directly, which is helpful to handle the PIC issue on 32-bit. Signed-off-by: Zhao Yakui (cherry picked from commit 3e8cce4e7292651af10c9f375a6ad2e9fa494021) --- src/i965_device_info.c | 54 ++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index fd5d81cd..282e56e3 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -30,6 +30,7 @@ #include #include #include +#include /* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */ #define EXTRA_H264_DEC_CHROMA_FORMATS \ @@ -375,43 +376,40 @@ i965_get_device_info(int devid) } } +static void cpuid(unsigned int op, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + __cpuid_count(op, 0, *eax, *ebx, *ecx, *edx); +} + +/* + * This function doesn't check the length. And the caller should + * assure that the length of input string should be greater than 48. + */ static int intel_driver_detect_cpustring(char *model_id) { - FILE *fp; - size_t line_length; - ssize_t read_length; - char *line_string, *model_ptr; - bool found; + uint32_t *rdata; if (model_id == NULL) return -EINVAL; - fp = fopen("/proc/cpuinfo", "r"); - if (fp == NULL) { - fprintf(stderr, "no permission to access /proc/cpuinfo\n"); - return -EACCES; - } - line_string = NULL; - found = false; + rdata = (uint32_t *)model_id; - while((read_length = getline(&line_string, &line_length, fp)) != -1) { - if (strstr(line_string, "model name")) { - model_ptr = strstr(line_string, ": "); - model_ptr += 2; - found = true; - strncpy(model_id, model_ptr, strlen(model_ptr)); - break; - } - } - fclose(fp); + /* obtain the max supported extended CPUID info */ + cpuid(0x80000000, &rdata[0], &rdata[1], &rdata[2], &rdata[3]); - if (line_string) - free(line_string); + /* If the max extended CPUID info is less than 0x80000004, fail */ + if (rdata[0] < 0x80000004) + return -EINVAL; - if (found) - return 0; - else - return -EINVAL; + /* obtain the CPUID string */ + cpuid(0x80000002, &rdata[0], &rdata[1], &rdata[2], &rdata[3]); + cpuid(0x80000003, &rdata[4], &rdata[5], &rdata[6], &rdata[7]); + cpuid(0x80000004, &rdata[8], &rdata[9], &rdata[10], &rdata[11]); + + *(model_id + 48) = '\0'; + return 0; } /* -- cgit v1.2.1 From fa968655cb4facf9d5899da1d39e56030889d51e Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Wed, 20 Aug 2014 13:03:52 -0700 Subject: CHV: Add PCIID placeholders for CHV Pending branding and differentiation by stepping. CHV is used generically to match libdrm and mesa identification. Signed-off-by: Sean V Kelley (cherry picked from commit 1121cb1f87228005a2b15b5ead95701a7fbe7519) Conflicts: src/i965_device_info.c --- src/i965_device_info.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/i965_pciids.h | 4 ++++ src/intel_driver.h | 2 ++ 3 files changed, 57 insertions(+) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 282e56e3..a4155877 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -252,6 +252,48 @@ static struct hw_codec_info bdw_hw_codec_info = { }, }; +static struct hw_codec_info chv_hw_codec_info = { + .dec_hw_context_init = gen8_dec_hw_context_init, + .enc_hw_context_init = gen8_enc_hw_context_init, + .proc_hw_context_init = gen75_proc_context_init, + .render_init = gen8_render_init, + .post_processing_context_init = gen8_post_processing_context_init, + + .max_width = 4096, + .max_height = 4096, + .min_linear_wpitch = 64, + .min_linear_hpitch = 16, + + .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) | + VA_PROFILE_MASK(H264MultiviewHigh)), + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, + .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, + + .has_mpeg2_decoding = 1, + .has_mpeg2_encoding = 1, + .has_h264_decoding = 1, + .has_h264_encoding = 1, + .has_vc1_decoding = 1, + .has_jpeg_decoding = 1, + .has_vpp = 1, + .has_accelerated_getimage = 1, + .has_accelerated_putimage = 1, + .has_tiled_surface = 1, + .has_di_motion_adptive = 1, + .has_di_motion_compensated = 1, + .has_vp8_decoding = 1, + .has_h264_mvc_encoding = 1, + + .num_filters = 5, + .filters = { + { VAProcFilterNoiseReduction, I965_RING_VEBOX }, + { VAProcFilterDeinterlacing, I965_RING_VEBOX }, + { VAProcFilterSharpening, I965_RING_NULL }, /* need to rebuild the shader for BDW */ + { VAProcFilterColorBalance, I965_RING_VEBOX}, + { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX}, + }, +}; + struct hw_codec_info * i965_get_codec_info(int devid) { @@ -364,6 +406,15 @@ static const struct intel_device_info bdw_device_info = { .max_wm_threads = 64, /* per PSD */ }; +static const struct intel_device_info chv_device_info = { + .gen = 8, + + .urb_size = 4096, + .max_wm_threads = 64, /* per PSD */ + + .is_cherryview = 1, +}; + const struct intel_device_info * i965_get_device_info(int devid) { diff --git a/src/i965_pciids.h b/src/i965_pciids.h index 64973e44..fc046d1d 100644 --- a/src/i965_pciids.h +++ b/src/i965_pciids.h @@ -129,3 +129,7 @@ CHIPSET(0x162A, bdw, bdw, "Intel(R) Broadwell") CHIPSET(0x162B, bdw, bdw, "Intel(R) Broadwell") CHIPSET(0x162D, bdw, bdw, "Intel(R) Broadwell") CHIPSET(0x162E, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x22B0, chv, chv, "Intel(R) CherryView") +CHIPSET(0x22B1, chv, chv, "Intel(R) CherryView") +CHIPSET(0x22B2, chv, chv, "Intel(R) CherryView") +CHIPSET(0x22B3, chv, chv, "Intel(R) CherryView") diff --git a/src/intel_driver.h b/src/intel_driver.h index 7a726e32..432a0d9a 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -133,6 +133,7 @@ struct intel_device_info unsigned int is_ivybridge : 1; /* gen7 */ unsigned int is_baytrail : 1; /* gen7 */ unsigned int is_haswell : 1; /* gen7 */ + unsigned int is_cherryview : 1; /* gen8 */ }; struct intel_driver_data @@ -188,6 +189,7 @@ struct intel_region #define IS_HASWELL(device_info) (device_info->is_haswell) #define IS_GEN7(device_info) (device_info->gen == 7) +#define IS_CHERRYVIEW(device_info) (device_info->is_cherryview) #define IS_GEN8(device_info) (device_info->gen == 8) #endif /* _INTEL_DRIVER_H_ */ -- cgit v1.2.1 From f11176415ec26eb5960ba6841d2d9c22f2cabc60 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 30 Sep 2014 12:30:08 +0800 Subject: 1.4.1.pre1 for development Sync news from V1.4-branch: c8a8a406cffdccf046a2c92fb8d172b77e7398b3 fe3712b134068c1c25dc42e7dc908f2fa6d8aa0e Signed-off-by: Xiang, Haihao --- NEWS | 15 ++++++++++++++- configure.ac | 8 ++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index 1c6c3246..01b54645 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,19 @@ -libva-intel-driver NEWS -- summary of changes. 2014-06-16 +libva-intel-driver NEWS -- summary of changes. 2014-09-30 Copyright (C) 2009-2014 Intel Corporation +Version 1.4.0 - 30.Sep.2014 +* Add support for exporting VA buffer +* Add support for MVC decoding/encoding +* Add support for encoding quality level on Sandybride and newer +* Add support of inserting packed slice header & raw data for encoding +* Add support for Cherryview +* Fix the GPU hang issue on Ivybridge when using the gstreamer and mplayer to play back one H264 clip + (https://bugs.freedesktop.org/show_bug.cgi?id=80720) +* Fix the GPU hang issue on Sandybride and newer when playing back one H264 clip + (https://bugs.freedesktop.org/show_bug.cgi?id=76363) +* Fix the GPU hang issue on Haswell when using XBMC to play back one H264 clip + (https://bugs.freedesktop.org/show_bug.cgi?id=81447) + Version 1.3.2 - 16.Jun.2014 * Export JPEG format by vaDeriveImage() * Add support for MADI on SNB diff --git a/configure.ac b/configure.ac index 7feb074a..d2bbe47c 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # intel-driver package version number m4_define([intel_driver_major_version], [1]) -m4_define([intel_driver_minor_version], [3]) -m4_define([intel_driver_micro_version], [3]) +m4_define([intel_driver_minor_version], [4]) +m4_define([intel_driver_micro_version], [1]) m4_define([intel_driver_pre_version], [1]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) @@ -10,8 +10,8 @@ m4_append([intel_driver_version], intel_driver_pre_version, [.pre]) ]) # libva minimum version requirement -m4_define([va_api_version], [0.35]) -m4_define([libva_package_version], [1.3.0]) +m4_define([va_api_version], [0.36]) +m4_define([libva_package_version], [1.4.0]) # libdrm minimum version requirement m4_define([libdrm_version], [2.4.45]) -- cgit v1.2.1