diff options
author | Austin Yuan <shengquan.yuan@gmail.com> | 2010-12-22 16:59:19 +0800 |
---|---|---|
committer | Austin Yuan <shengquan.yuan@gmail.com> | 2010-12-22 16:59:19 +0800 |
commit | 1b5b86f4d846906bf408993a61f3b2b6856f0997 (patch) | |
tree | 963d16ec8044a28adf4431bd6887c2515ce44bb7 | |
parent | 78d9fb9c0d95dc5abd2272d60009cd88ffae33e7 (diff) | |
parent | 8f056b1c0bd0dac3c57193d30506730218740c70 (diff) | |
download | libva-1b5b86f4d846906bf408993a61f3b2b6856f0997.tar.gz |
Merge branch 'fdo-master'
Conflicts:
Android.mk
libva.spec
va/Android.mk
va/va.c
va/va.h
va/va_trace.c
Signed-off-by: Austin Yuan <shengquan.yuan@gmail.com>
37 files changed, 3665 insertions, 236 deletions
@@ -6,6 +6,7 @@ *.rej *.loT *.bin +*.pc .deps .libs install-sh @@ -25,5 +26,20 @@ config.status config.sub configure depcomp -fw/msvdx_bin -fw/msvdx_fw.bin +TAGS +/va/va_version.h +/test/basic/test_01 +/test/basic/test_02 +/test/basic/test_03 +/test/basic/test_04 +/test/basic/test_05 +/test/basic/test_06 +/test/basic/test_07 +/test/basic/test_08 +/test/basic/test_09 +/test/basic/test_10 +/test/basic/test_11 +/test/decode/mpeg2vldemo +/test/encode/h264encode +/test/putsurface/putsurface +/test/vainfo @@ -1,4 +1,4 @@ # Recursive call sub-folder Android.mk # - include $(call all-subdir-makefiles) + include $(call all-subdir-makefiles) diff --git a/build/gen_version.sh b/build/gen_version.sh new file mode 100644 index 0000000..dd01d95 --- /dev/null +++ b/build/gen_version.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +libva_topdir="$1" +version_h_in="$2" + +parse_configure_ac() { + sed -n "/^m4_define.*${1}.*\[\([0-9]*\)\].*/s//\1/p" ${libva_topdir}/configure.ac +} + +parse_configure() { + sed -n "/^${1}=\([0-9]*\)/s//\1/p" ${libva_topdir}/configure +} + +if test -f "${libva_topdir}/configure.ac"; then + libva_major_version=`parse_configure_ac libva_major_version` + libva_minor_version=`parse_configure_ac libva_minor_version` + libva_micro_version=`parse_configure_ac libva_micro_version` +elif test -f "${libva_topdir}/configure"; then + libva_major_version=`parse_configure LIBVA_MAJOR_VERSION` + libva_minor_version=`parse_configure LIBVA_MINOR_VERSION` + libva_micro_version=`parse_configure LIBVA_MICRO_VERSION` +else + echo "ERROR: configure or configure.ac file not found in $libva_topdir/" + exit 1 +fi +libva_version="$libva_major_version.$libva_minor_version.$libva_micro_version" + +sed -e "s/@LIBVA_MAJOR_VERSION@/${libva_major_version}/" \ + -e "s/@LIBVA_MINOR_VERSION@/${libva_minor_version}/" \ + -e "s/@LIBVA_MICRO_VERSION@/${libva_micro_version}/" \ + -e "s/@LIBVA_VERSION@/${libva_version}/" \ + $version_h_in diff --git a/configure.ac b/configure.ac index 6debe95..7ae538d 100644 --- a/configure.ac +++ b/configure.ac @@ -42,6 +42,7 @@ AC_CONFIG_SRCDIR([Makefile.am]) AM_INIT_AUTOMAKE([dist-bzip2]) AM_CONFIG_HEADER([config.h]) +m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) LIBVA_MAJOR_VERSION=libva_major_version LIBVA_MINOR_VERSION=libva_minor_version @@ -100,12 +101,12 @@ PKG_CHECK_MODULES([XEXT],[xext]) PKG_CHECK_MODULES([XFIXES], [xfixes]) PKG_CHECK_MODULES([DRM], [libdrm]) -PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.0], [gen4asm=yes], [gen4asm=no]) +PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.1], [gen4asm=yes], [gen4asm=no]) AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes) -# Check for libdrm >= 2.4.21 (needed for i965_drv_video.so) -if test x$enable_i965_driver = xyes && ! $PKG_CONFIG --atleast-version=2.4.21 libdrm; then - AC_MSG_WARN([libdrm < 2.4.21 found, disabling build of i965 video driver]) +# Check for libdrm >= 2.4.23 (needed for i965_drv_video.so) +if test x$enable_i965_driver = xyes && ! $PKG_CONFIG --atleast-version=2.4.23 libdrm; then + AC_MSG_WARN([libdrm < 2.4.23 found, disabling build of i965 video driver]) enable_i965_driver=no fi AM_CONDITIONAL(BUILD_I965_DRIVER, test x$enable_i965_driver = xyes) @@ -175,3 +176,25 @@ AC_OUTPUT([ libva-tpi.pc ]) +# Print a small summary + +echo "" +echo "libva - ${LIBVA_VERSION}" +echo "" + +echo " • Global :" +echo " Prefix: ${prefix}" +echo "" + +AS_IF([test x$enable_i965_driver = xyes], [DRIVERS="i965 $DRIVERS"]) +AS_IF([test x$enable_dummy_driver = xyes], [DRIVERS="dummy $DRIVERS"]) + +echo " • Drivers: ${DRIVERS}" + +AS_IF([test x$USE_GLX = xyes], [BACKENDS="glx $BACKENDS"]) +BACKENDS="x11 $BACKENDS" +AS_IF([test x$enable_dummy_backend = xyes], [BACKENDS="dummy +$BACKENDS"]) + +echo " • Winsys : ${BACKENDS}" + diff --git a/i965_drv_video/Makefile.am b/i965_drv_video/Makefile.am index f32d579..8dd13bd 100644 --- a/i965_drv_video/Makefile.am +++ b/i965_drv_video/Makefile.am @@ -43,7 +43,8 @@ i965_drv_video_la_SOURCES = \ i965_avc_bsd.c \ i965_avc_hw_scoreboard.c\ i965_avc_ildb.c \ - i965_post_processing.c + i965_post_processing.c \ + gen6_mfd.c noinst_HEADERS = \ object_heap.h \ @@ -61,4 +62,5 @@ noinst_HEADERS = \ i965_avc_bsd.h \ i965_avc_hw_scoreboard.h\ i965_avc_ildb.h \ - i965_post_processing.h + i965_post_processing.h \ + gen6_mfd.h diff --git a/i965_drv_video/gen6_mfd.c b/i965_drv_video/gen6_mfd.c new file mode 100644 index 0000000..0fe7860 --- /dev/null +++ b/i965_drv_video/gen6_mfd.c @@ -0,0 +1,1484 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Xiang Haihao <haihao.xiang@intel.com> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include <va/va_backend.h> + +#include "intel_batchbuffer.h" +#include "intel_driver.h" + +#include "i965_defines.h" +#include "i965_drv_video.h" + +#include "gen6_mfd.h" + +#define DMV_SIZE 0x88000 /* 557056 bytes for a frame */ + +static const uint32_t zigzag_direct[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +static void +gen6_mfd_avc_frame_store_index(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_media_state *media_state = &i965->media_state; + struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)media_state->private_context; + int i, j; + + assert(ARRAY_ELEMS(gen6_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames)); + + for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) { + int found = 0; + + if (gen6_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID) + continue; + + for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { + VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j]; + if (ref_pic->flags & VA_PICTURE_H264_INVALID) + continue; + + if (gen6_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) { + found = 1; + break; + } + } + + if (!found) { + struct object_surface *obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id); + obj_surface->flags &= ~SURFACE_REFERENCED; + + if (obj_surface->flags & SURFACE_DISPLAYED) { + dri_bo_unreference(obj_surface->bo); + obj_surface->bo = NULL; + obj_surface->flags = 0; + } + + if (obj_surface->free_private_data) + obj_surface->free_private_data(&obj_surface->private_data); + + gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID; + gen6_mfd_context->reference_surface[i].frame_store_id = -1; + } + } + + for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) { + VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i]; + int found = 0; + + if (ref_pic->flags & VA_PICTURE_H264_INVALID) + continue; + + for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) { + if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) + continue; + + if (gen6_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) { + found = 1; + break; + } + } + + if (!found) { + int frame_idx; + struct object_surface *obj_surface = SURFACE(ref_pic->picture_id); + + if (obj_surface->bo == NULL) { + uint32_t tiling_mode = I915_TILING_Y; + unsigned long pitch; + + obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr, + "vaapi surface", + obj_surface->width, + obj_surface->height + obj_surface->height / 2, + 1, + &tiling_mode, + &pitch, + 0); + assert(obj_surface->bo); + assert(tiling_mode == I915_TILING_Y); + assert(pitch == obj_surface->width); + } + + for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) { + for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) { + if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) + continue; + + if (gen6_mfd_context->reference_surface[j].frame_store_id == frame_idx) + break; + } + + if (j == ARRAY_ELEMS(gen6_mfd_context->reference_surface)) + break; + } + + assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface)); + + for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) { + if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) { + gen6_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id; + gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx; + break; + } + } + } + } + + /* sort */ + for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface) - 1; i++) { + if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID && + gen6_mfd_context->reference_surface[i].frame_store_id == i) + continue; + + for (j = i + 1; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) { + if (gen6_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID && + gen6_mfd_context->reference_surface[j].frame_store_id == i) { + VASurfaceID id = gen6_mfd_context->reference_surface[i].surface_id; + int frame_idx = gen6_mfd_context->reference_surface[i].frame_store_id; + + gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[j].surface_id; + gen6_mfd_context->reference_surface[i].frame_store_id = gen6_mfd_context->reference_surface[j].frame_store_id; + gen6_mfd_context->reference_surface[j].surface_id = id; + gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx; + break; + } + } + } +} + +static void +gen6_mfd_free_avc_surface(void **data) +{ + struct gen6_mfd_surface *gen6_mfd_surface = *data; + + if (!gen6_mfd_surface) + return; + + dri_bo_unreference(gen6_mfd_surface->dmv_top); + gen6_mfd_surface->dmv_top = NULL; + dri_bo_unreference(gen6_mfd_surface->dmv_bottom); + gen6_mfd_surface->dmv_bottom = NULL; + + free(gen6_mfd_surface); + *data = NULL; +} + +static void +gen6_mfd_init_avc_surface(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + struct object_surface *obj_surface) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_mfd_surface *gen6_mfd_surface = obj_surface->private_data; + + obj_surface->free_private_data = gen6_mfd_free_avc_surface; + + if (!gen6_mfd_surface) { + gen6_mfd_surface = calloc(sizeof(struct gen6_mfd_surface), 1); + assert((obj_surface->size & 0x3f) == 0); + obj_surface->private_data = gen6_mfd_surface; + } + + gen6_mfd_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag && + !pic_param->seq_fields.bits.direct_8x8_inference_flag); + + if (gen6_mfd_surface->dmv_top == NULL) { + gen6_mfd_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr, + "direct mv w/r buffer", + DMV_SIZE, + 0x1000); + } + + if (gen6_mfd_surface->dmv_bottom_flag && + gen6_mfd_surface->dmv_bottom == NULL) { + gen6_mfd_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr, + "direct mv w/r buffer", + DMV_SIZE, + 0x1000); + } +} + +static void +gen6_mfd_pipe_mode_select(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_media_state *media_state = &i965->media_state; + struct gen6_mfd_context *gen6_mfd_context = media_state->private_context; + + assert(standard_select == MFX_FORMAT_MPEG2 || + standard_select == MFX_FORMAT_AVC || + standard_select == MFX_FORMAT_VC1); + + BEGIN_BCS_BATCH(ctx, 4); + OUT_BCS_BATCH(ctx, MFX_PIPE_MODE_SELECT | (4 - 2)); + OUT_BCS_BATCH(ctx, + (MFD_MODE_VLD << 16) | /* VLD mode */ + (0 << 10) | /* disable Stream-Out */ + (gen6_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */ + (gen6_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */ + (0 << 7) | /* disable TLB prefectch */ + (0 << 5) | /* not in stitch mode */ + (MFX_CODEC_DECODE << 4) | /* decoding mode */ + (standard_select << 0)); + OUT_BCS_BATCH(ctx, + (0 << 20) | /* round flag in PB slice */ + (0 << 19) | /* round flag in Intra8x8 */ + (0 << 7) | /* expand NOA bus flag */ + (1 << 6) | /* must be 1 */ + (0 << 5) | /* disable clock gating for NOA */ + (0 << 4) | /* terminate if AVC motion and POC table error occurs */ + (0 << 3) | /* terminate if AVC mbdata error occurs */ + (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ + (0 << 1) | /* AVC long field motion vector */ + (1 << 0)); /* always calculate AVC ILDB boundary strength */ + OUT_BCS_BATCH(ctx, 0); + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_surface_state(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface = SURFACE(decode_state->current_render_target); + assert(obj_surface); + + BEGIN_BCS_BATCH(ctx, 6); + OUT_BCS_BATCH(ctx, MFX_SURFACE_STATE | (6 - 2)); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, + ((obj_surface->height - 1) << 19) | + ((obj_surface->width - 1) << 6)); + OUT_BCS_BATCH(ctx, + (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ + (0 << 22) | /* surface object control state, FIXME??? */ + ((obj_surface->width - 1) << 3) | /* pitch */ + (0 << 2) | /* must be 0 for interleave U/V */ + (1 << 1) | /* must be y-tiled */ + (I965_TILEWALK_YMAJOR << 0)); /* tile walk, FIXME: must be 1 ??? */ + OUT_BCS_BATCH(ctx, + (0 << 16) | /* must be 0 for interleave U/V */ + (obj_surface->height)); /* y offset for U(cb) */ + OUT_BCS_BATCH(ctx, 0); + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_media_state *media_state = &i965->media_state; + struct gen6_mfd_context *gen6_mfd_context = media_state->private_context; + int i; + + BEGIN_BCS_BATCH(ctx, 24); + OUT_BCS_BATCH(ctx, MFX_PIPE_BUF_ADDR_STATE | (24 - 2)); + if (gen6_mfd_context->pre_deblocking_output.valid) + OUT_BCS_RELOC(ctx, gen6_mfd_context->pre_deblocking_output.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(ctx, 0); + + if (gen6_mfd_context->post_deblocking_output.valid) + OUT_BCS_RELOC(ctx, gen6_mfd_context->post_deblocking_output.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(ctx, 0); + + OUT_BCS_BATCH(ctx, 0); /* ignore for decoding */ + OUT_BCS_BATCH(ctx, 0); /* ignore for decoding */ + + if (gen6_mfd_context->intra_row_store_scratch_buffer.valid) + OUT_BCS_RELOC(ctx, gen6_mfd_context->intra_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(ctx, 0); + + if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid) + OUT_BCS_RELOC(ctx, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(ctx, 0); + + /* DW 7..22 */ + for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) { + struct object_surface *obj_surface; + + if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { + obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id); + assert(obj_surface && obj_surface->bo); + + OUT_BCS_RELOC(ctx, obj_surface->bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + } else { + OUT_BCS_BATCH(ctx, 0); + } + } + + OUT_BCS_BATCH(ctx, 0); /* ignore DW23 for decoding */ + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx, + dri_bo *slice_data_bo, + int standard_select) +{ + BEGIN_BCS_BATCH(ctx, 11); + OUT_BCS_BATCH(ctx, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2)); + OUT_BCS_RELOC(ctx, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */ + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(ctx, 0); + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_media_state *media_state = &i965->media_state; + struct gen6_mfd_context *gen6_mfd_context = media_state->private_context; + + BEGIN_BCS_BATCH(ctx, 4); + OUT_BCS_BATCH(ctx, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2)); + + if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid) + OUT_BCS_RELOC(ctx, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(ctx, 0); + + if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid) + OUT_BCS_RELOC(ctx, gen6_mfd_context->mpr_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(ctx, 0); + + if (gen6_mfd_context->bitplane_read_buffer.valid) + OUT_BCS_RELOC(ctx, gen6_mfd_context->bitplane_read_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + else + OUT_BCS_BATCH(ctx, 0); + + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_aes_state(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select) +{ + /* FIXME */ +} + +static void +gen6_mfd_wait(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select) +{ + BEGIN_BCS_BATCH(ctx, 1); + OUT_BCS_BATCH(ctx, MFX_WAIT | (1 << 8)); + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_avc_img_state(VADriverContextP ctx, struct decode_state *decode_state) +{ + int qm_present_flag; + int img_struct; + int mbaff_frame_flag; + unsigned int width_in_mbs, height_in_mbs; + VAPictureParameterBufferH264 *pic_param; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID)); + + if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) + qm_present_flag = 1; + else + qm_present_flag = 0; /* built-in QM matrices */ + + if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD) + img_struct = 1; + else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD) + img_struct = 3; + else + img_struct = 0; + + if ((img_struct & 0x1) == 0x1) { + assert(pic_param->pic_fields.bits.field_pic_flag == 0x1); + } else { + assert(pic_param->pic_fields.bits.field_pic_flag == 0x0); + } + + if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */ + assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0); + assert(pic_param->pic_fields.bits.field_pic_flag == 0); + } else { + assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */ + } + + mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag && + !pic_param->pic_fields.bits.field_pic_flag); + + width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff); + height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */ + assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */ + + /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */ + assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */ + pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */ + assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */ + + BEGIN_BCS_BATCH(ctx, 13); + OUT_BCS_BATCH(ctx, MFX_AVC_IMG_STATE | (13 - 2)); + OUT_BCS_BATCH(ctx, + ((width_in_mbs * height_in_mbs) & 0x7fff)); + OUT_BCS_BATCH(ctx, + (height_in_mbs << 16) | + (width_in_mbs << 0)); + OUT_BCS_BATCH(ctx, + ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) | + ((pic_param->chroma_qp_index_offset & 0x1f) << 16) | + (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */ + (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */ + (1 << 12) | /* always 1, hardware requirement */ + (qm_present_flag << 10) | + (img_struct << 8) | + (16 << 0)); + OUT_BCS_BATCH(ctx, + (pic_param->seq_fields.bits.chroma_format_idc << 10) | + (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) | + ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) | + (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) | + (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) | + (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) | + (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) | + (mbaff_frame_flag << 1) | + (pic_param->pic_fields.bits.field_pic_flag << 0)); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_avc_qm_state(VADriverContextP ctx, struct decode_state *decode_state) +{ + int cmd_len; + VAIQMatrixBufferH264 *iq_matrix; + VAPictureParameterBufferH264 *pic_param; + + if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer) + return; + + iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + + cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */ + + if (pic_param->pic_fields.bits.transform_8x8_mode_flag) + cmd_len += 2 * 16; /* load two 8x8 scaling matrices */ + + BEGIN_BCS_BATCH(ctx, cmd_len); + OUT_BCS_BATCH(ctx, MFX_AVC_QM_STATE | (cmd_len - 2)); + + if (pic_param->pic_fields.bits.transform_8x8_mode_flag) + OUT_BCS_BATCH(ctx, + (0x0 << 8) | /* don't use default built-in matrices */ + (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */ + else + OUT_BCS_BATCH(ctx, + (0x0 << 8) | /* don't use default built-in matrices */ + (0x3f << 0)); /* six 4x4 scaling matrices */ + + intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4); + + if (pic_param->pic_fields.bits.transform_8x8_mode_flag) + intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4); + + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_avc_directmode_state(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_media_state *media_state = &i965->media_state; + struct gen6_mfd_context *gen6_mfd_context = media_state->private_context; + struct object_surface *obj_surface; + struct gen6_mfd_surface *gen6_mfd_surface; + VAPictureH264 *va_pic; + int i, j; + + BEGIN_BCS_BATCH(ctx, 69); + OUT_BCS_BATCH(ctx, MFX_AVC_DIRECTMODE_STATE | (69 - 2)); + + /* reference surfaces 0..15 */ + for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) { + if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { + obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id); + assert(obj_surface); + gen6_mfd_surface = obj_surface->private_data; + + if (gen6_mfd_surface == NULL) { + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + } else { + OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_top, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + + if (gen6_mfd_surface->dmv_bottom_flag == 1) + OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_bottom, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + else + OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_top, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + } + } else { + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + } + } + + /* the current decoding frame/field */ + va_pic = &pic_param->CurrPic; + assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); + obj_surface = SURFACE(va_pic->picture_id); + assert(obj_surface && obj_surface->bo && obj_surface->private_data); + gen6_mfd_surface = obj_surface->private_data; + + OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_top, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + + if (gen6_mfd_surface->dmv_bottom_flag == 1) + OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_bottom, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_top, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + + /* POC List */ + for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) { + if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { + int found = 0; + for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { + va_pic = &pic_param->ReferenceFrames[j]; + + if (va_pic->flags & VA_PICTURE_H264_INVALID) + continue; + + if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) { + found = 1; + break; + } + } + + assert(found == 1); + assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); + + OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt); + OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt); + } else { + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + } + } + + va_pic = &pic_param->CurrPic; + OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt); + OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt); + + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_avc_slice_state(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param, + VASliceParameterBufferH264 *next_slice_param) +{ + int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; + int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; + int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos; + int num_ref_idx_l0, num_ref_idx_l1; + int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag && + pic_param->seq_fields.bits.mb_adaptive_frame_field_flag); + int weighted_pred_idc = 0; + int first_mb_in_slice = 0, first_mb_in_next_slice = 0; + int slice_type; + + if (slice_param->slice_type == SLICE_TYPE_I || + slice_param->slice_type == SLICE_TYPE_SI) { + slice_type = SLICE_TYPE_I; + } else if (slice_param->slice_type == SLICE_TYPE_P || + slice_param->slice_type == SLICE_TYPE_SP) { + slice_type = SLICE_TYPE_P; + } else { + assert(slice_param->slice_type == SLICE_TYPE_B); + slice_type = SLICE_TYPE_B; + } + + if (slice_type == SLICE_TYPE_I) { + assert(slice_param->num_ref_idx_l0_active_minus1 == 0); + assert(slice_param->num_ref_idx_l1_active_minus1 == 0); + num_ref_idx_l0 = 0; + num_ref_idx_l1 = 0; + } else if (slice_type == SLICE_TYPE_P) { + assert(slice_param->num_ref_idx_l1_active_minus1 == 0); + num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_idx_l1 = 0; + weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1); + } else { + num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1; + weighted_pred_idc = (pic_param->pic_fields.bits.weighted_bipred_idc == 1); + } + + first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture; + slice_hor_pos = first_mb_in_slice % width_in_mbs; + slice_ver_pos = first_mb_in_slice / width_in_mbs; + + if (next_slice_param) { + first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture; + next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; + next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs; + } else { + next_slice_hor_pos = 0; + next_slice_ver_pos = height_in_mbs; + } + + BEGIN_BCS_BATCH(ctx, 11); /* FIXME: is it 10??? */ + OUT_BCS_BATCH(ctx, MFX_AVC_SLICE_STATE | (11 - 2)); + OUT_BCS_BATCH(ctx, slice_type); + OUT_BCS_BATCH(ctx, + (num_ref_idx_l1 << 24) | + (num_ref_idx_l0 << 16) | + (slice_param->chroma_log2_weight_denom << 8) | + (slice_param->luma_log2_weight_denom << 0)); + OUT_BCS_BATCH(ctx, + (weighted_pred_idc << 30) | + (slice_param->direct_spatial_mv_pred_flag << 29) | + (slice_param->disable_deblocking_filter_idc << 27) | + (slice_param->cabac_init_idc << 24) | + ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) | + ((slice_param->slice_beta_offset_div2 & 0xf) << 8) | + ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0)); + OUT_BCS_BATCH(ctx, + (slice_ver_pos << 24) | + (slice_hor_pos << 16) | + (first_mb_in_slice << 0)); + OUT_BCS_BATCH(ctx, + (next_slice_ver_pos << 16) | + (next_slice_hor_pos << 0)); + OUT_BCS_BATCH(ctx, + (next_slice_param == NULL) << 19); /* last slice flag */ + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param) +{ + int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; + int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ + + BEGIN_BCS_BATCH(ctx, 11); /* FIXME: is it 10??? */ + OUT_BCS_BATCH(ctx, MFX_AVC_SLICE_STATE | (11 - 2)); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, + height_in_mbs << 24 | + width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag)); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_avc_ref_idx_state(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_media_state *media_state = &i965->media_state; + struct gen6_mfd_context *gen6_mfd_context = media_state->private_context; + int i, j, num_ref_list; + struct { + unsigned char bottom_idc:1; + unsigned char frame_store_index:4; + unsigned char field_picture:1; + unsigned char long_term:1; + unsigned char non_exist:1; + } refs[32]; + + if (slice_param->slice_type == SLICE_TYPE_I || + slice_param->slice_type == SLICE_TYPE_SI) + return; + + if (slice_param->slice_type == SLICE_TYPE_P || + slice_param->slice_type == SLICE_TYPE_SP) { + num_ref_list = 1; + } else { + num_ref_list = 2; + } + + for (i = 0; i < num_ref_list; i++) { + VAPictureH264 *va_pic; + + if (i == 0) { + va_pic = slice_param->RefPicList0; + } else { + va_pic = slice_param->RefPicList1; + } + + BEGIN_BCS_BATCH(ctx, 10); + OUT_BCS_BATCH(ctx, MFX_AVC_REF_IDX_STATE | (10 - 2)); + OUT_BCS_BATCH(ctx, i); + + for (j = 0; j < 32; j++) { + if (va_pic->flags & VA_PICTURE_H264_INVALID) { + refs[j].non_exist = 1; + refs[j].long_term = 1; + refs[j].field_picture = 1; + refs[j].frame_store_index = 0xf; + refs[j].bottom_idc = 1; + } else { + int frame_idx; + + for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) { + if (gen6_mfd_context->reference_surface[frame_idx].surface_id != VA_INVALID_ID && + va_pic->picture_id == gen6_mfd_context->reference_surface[frame_idx].surface_id) { + assert(frame_idx == gen6_mfd_context->reference_surface[frame_idx].frame_store_id); + break; + } + } + + assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface)); + + refs[j].non_exist = 0; + refs[j].long_term = !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE); + refs[j].field_picture = !!(va_pic->flags & + (VA_PICTURE_H264_TOP_FIELD | + VA_PICTURE_H264_BOTTOM_FIELD)); + refs[j].frame_store_index = frame_idx; + refs[j].bottom_idc = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD); + } + + va_pic++; + } + + intel_batchbuffer_data_bcs(ctx, refs, sizeof(refs)); + ADVANCE_BCS_BATCH(ctx); + } +} + +static void +gen6_mfd_avc_weightoffset_state(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param) +{ + int i, j, num_weight_offset_table = 0; + short weightoffsets[32 * 6]; + + if ((slice_param->slice_type == SLICE_TYPE_P || + slice_param->slice_type == SLICE_TYPE_SP) && + (pic_param->pic_fields.bits.weighted_pred_flag == 1)) { + num_weight_offset_table = 1; + } + + if ((slice_param->slice_type == SLICE_TYPE_B) && + (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) { + num_weight_offset_table = 2; + } + + for (i = 0; i < num_weight_offset_table; i++) { + BEGIN_BCS_BATCH(ctx, 98); + OUT_BCS_BATCH(ctx, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2)); + OUT_BCS_BATCH(ctx, i); + + if (i == 0) { + for (j = 0; j < 32; j++) { + weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j]; + weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j]; + weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0]; + weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0]; + weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1]; + weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1]; + } + } else { + for (j = 0; j < 32; j++) { + weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j]; + weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j]; + weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0]; + weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0]; + weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1]; + weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1]; + } + } + + intel_batchbuffer_data_bcs(ctx, weightoffsets, sizeof(weightoffsets)); + ADVANCE_BCS_BATCH(ctx); + } +} + +static int +gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset) +{ + int out_slice_data_bit_offset; + int slice_header_size = in_slice_data_bit_offset / 8; + int i, j; + + for (i = 0, j = 0; i < slice_header_size; i++, j++) { + if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) { + i++, j += 2; + } + } + + out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8; + + if (mode_flag == ENTROPY_CABAC) + out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8); + + return out_slice_data_bit_offset; +} + +static void +gen6_mfd_avc_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param, + dri_bo *slice_data_bo) +{ + int slice_data_bit_offset; + uint8_t *slice_data = NULL; + + dri_bo_map(slice_data_bo, 0); + slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset); + slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data, + pic_param->pic_fields.bits.entropy_coding_mode_flag, + slice_param->slice_data_bit_offset); + dri_bo_unmap(slice_data_bo); + + BEGIN_BCS_BATCH(ctx, 6); + OUT_BCS_BATCH(ctx, MFD_AVC_BSD_OBJECT | (6 - 2)); + OUT_BCS_BATCH(ctx, + ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0)); + OUT_BCS_BATCH(ctx, slice_param->slice_data_offset + (slice_data_bit_offset >> 3)); + OUT_BCS_BATCH(ctx, + (0 << 31) | + (0 << 14) | + (0 << 12) | + (0 << 10) | + (0 << 8)); + OUT_BCS_BATCH(ctx, + (0 << 16) | + (0 << 6) | + ((0x7 - (slice_data_bit_offset & 0x7)) << 0)); + OUT_BCS_BATCH(ctx, 0); + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param) +{ + BEGIN_BCS_BATCH(ctx, 6); + OUT_BCS_BATCH(ctx, MFD_AVC_BSD_OBJECT | (6 - 2)); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_avc_phantom_slice(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param) +{ + gen6_mfd_avc_phantom_slice_state(ctx, pic_param); + gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param); +} + +static void +gen6_mfd_avc_decode_init(VADriverContextP ctx, struct decode_state *decode_state) +{ + VAPictureParameterBufferH264 *pic_param; + VASliceParameterBufferH264 *slice_param; + VAPictureH264 *va_pic; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_media_state *media_state = &i965->media_state; + struct gen6_mfd_context *gen6_mfd_context; + struct object_surface *obj_surface; + dri_bo *bo; + int i, j, enable_avc_ildb = 0; + + for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer; + + assert(decode_state->slice_params[j]->num_elements == 1); + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + assert((slice_param->slice_type == SLICE_TYPE_I) || + (slice_param->slice_type == SLICE_TYPE_SI) || + (slice_param->slice_type == SLICE_TYPE_P) || + (slice_param->slice_type == SLICE_TYPE_SP) || + (slice_param->slice_type == SLICE_TYPE_B)); + + if (slice_param->disable_deblocking_filter_idc != 1) { + enable_avc_ildb = 1; + break; + } + + slice_param++; + } + } + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + gen6_mfd_context = media_state->private_context; + + if (gen6_mfd_context == NULL) { + gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context)); + media_state->private_context = gen6_mfd_context; + + for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) { + gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID; + gen6_mfd_context->reference_surface[i].frame_store_id = -1; + } + } + + /* Current decoded picture */ + va_pic = &pic_param->CurrPic; + assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); + obj_surface = SURFACE(va_pic->picture_id); + assert(obj_surface); + obj_surface->flags = (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); + gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface); + + if (obj_surface->bo == NULL) { + uint32_t tiling_mode = I915_TILING_Y; + unsigned long pitch; + + obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr, + "vaapi surface", + obj_surface->width, + obj_surface->height + obj_surface->height / 2, + 1, + &tiling_mode, + &pitch, + 0); + assert(obj_surface->bo); + assert(tiling_mode == I915_TILING_Y); + assert(pitch == obj_surface->width); + } + + dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo); + gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo); + gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb; + + dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo); + gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo); + gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb; + + dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "intra row store", + 128 * 64, + 0x1000); + assert(bo); + gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo; + gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "deblocking filter row store", + 30720, /* 4 * 120 * 64 */ + 0x1000); + assert(bo); + gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo; + gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + 11520, /* 1.5 * 120 * 64 */ + 0x1000); + assert(bo); + gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "mpr row store", + 7680, /* 1. 0 * 120 * 64 */ + 0x1000); + assert(bo); + gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo; + gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1; + + gen6_mfd_context->bitplane_read_buffer.valid = 0; + gen6_mfd_avc_frame_store_index(ctx, pic_param); +} + +static void +gen6_mfd_avc_decode_picture(VADriverContextP ctx, struct decode_state *decode_state) +{ + VAPictureParameterBufferH264 *pic_param; + VASliceParameterBufferH264 *slice_param, *next_slice_param; + dri_bo *slice_data_bo; + int i, j; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + + gen6_mfd_avc_decode_init(ctx, decode_state); + intel_batchbuffer_start_atomic_bcs(ctx, 0x1000); + intel_batchbuffer_emit_mi_flush_bcs(ctx); + gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC); + gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC); + gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC); + gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC); + gen6_mfd_avc_img_state(ctx, decode_state); + gen6_mfd_avc_qm_state(ctx, decode_state); + + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer; + slice_data_bo = decode_state->slice_datas[j]->bo; + + if (j == decode_state->num_slice_params - 1) + next_slice_param = NULL; + else + next_slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer; + + gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC); + assert(decode_state->slice_params[j]->num_elements == 1); + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + assert((slice_param->slice_type == SLICE_TYPE_I) || + (slice_param->slice_type == SLICE_TYPE_SI) || + (slice_param->slice_type == SLICE_TYPE_P) || + (slice_param->slice_type == SLICE_TYPE_SP) || + (slice_param->slice_type == SLICE_TYPE_B)); + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + + gen6_mfd_avc_directmode_state(ctx, pic_param, slice_param); + gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param); + gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param); + gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param); + gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo); + slice_param++; + } + } + + gen6_mfd_avc_phantom_slice(ctx, pic_param); + intel_batchbuffer_end_atomic_bcs(ctx); + intel_batchbuffer_flush_bcs(ctx); +} + +static void +gen6_mfd_mpeg2_decode_init(VADriverContextP ctx, struct decode_state *decode_state) +{ + VAPictureParameterBufferMPEG2 *pic_param; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_media_state *media_state = &i965->media_state; + struct gen6_mfd_context *gen6_mfd_context; + struct object_surface *obj_surface; + int i; + dri_bo *bo; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; + gen6_mfd_context = media_state->private_context; + + if (gen6_mfd_context == NULL) { + gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context)); + media_state->private_context = gen6_mfd_context; + + for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) { + gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID; + gen6_mfd_context->reference_surface[i].frame_store_id = -1; + } + } + + /* reference picture */ + obj_surface = SURFACE(pic_param->forward_reference_picture); + + if (obj_surface && obj_surface->bo) + gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture; + else + gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID; + + obj_surface = SURFACE(pic_param->backward_reference_picture); + + if (obj_surface && obj_surface->bo) + gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture; + else + gen6_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture; + + /* must do so !!! */ + for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) + gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id; + + /* Current decoded picture */ + obj_surface = SURFACE(decode_state->current_render_target); + assert(obj_surface); + if (obj_surface->bo == NULL) { + uint32_t tiling_mode = I915_TILING_Y; + unsigned long pitch; + + obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr, + "vaapi surface", + obj_surface->width, + obj_surface->height + obj_surface->height / 2, + 1, + &tiling_mode, + &pitch, + 0); + assert(obj_surface->bo); + assert(tiling_mode == I915_TILING_Y); + assert(pitch == obj_surface->width); + } + + dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo); + gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo); + gen6_mfd_context->pre_deblocking_output.valid = 1; + + dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + 11520, /* 1.5 * 120 * 64 */ + 0x1000); + assert(bo); + gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1; + + gen6_mfd_context->post_deblocking_output.valid = 0; + gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0; + gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0; + gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0; + gen6_mfd_context->bitplane_read_buffer.valid = 0; +} + +static void +gen6_mfd_mpeg2_pic_state(VADriverContextP ctx, struct decode_state *decode_state) +{ + VAPictureParameterBufferMPEG2 *pic_param; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; + + BEGIN_BCS_BATCH(ctx, 4); + OUT_BCS_BATCH(ctx, MFX_MPEG2_PIC_STATE | (4 - 2)); + OUT_BCS_BATCH(ctx, + (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */ + ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */ + ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */ + ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */ + pic_param->picture_coding_extension.bits.intra_dc_precision << 14 | + pic_param->picture_coding_extension.bits.picture_structure << 12 | + pic_param->picture_coding_extension.bits.top_field_first << 11 | + pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 | + pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 | + pic_param->picture_coding_extension.bits.q_scale_type << 8 | + pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | + pic_param->picture_coding_extension.bits.alternate_scan << 6); + OUT_BCS_BATCH(ctx, + pic_param->picture_coding_type << 9); + OUT_BCS_BATCH(ctx, + (ALIGN(pic_param->vertical_size, 16) / 16) << 16 | + (ALIGN(pic_param->horizontal_size, 16) / 16)); + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_mpeg2_qm_state(VADriverContextP ctx, struct decode_state *decode_state) +{ + VAIQMatrixBufferMPEG2 *iq_matrix; + int i; + + if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer) + return; + + iq_matrix = (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer; + + for (i = 0; i < 2; i++) { + int k, m; + unsigned char *qm = NULL; + unsigned char qmx[64]; + + if (i == 0) { + if (iq_matrix->load_intra_quantiser_matrix) + qm = iq_matrix->intra_quantiser_matrix; + } else { + if (iq_matrix->load_non_intra_quantiser_matrix) + qm = iq_matrix->non_intra_quantiser_matrix; + } + + if (!qm) + continue; + + /* Upload quantisation matrix in raster order. The mplayer vaapi + * patch passes quantisation matrix in zig-zag order to va library. + */ + for (k = 0; k < 64; k++) { + m = zigzag_direct[k]; + qmx[m] = qm[k]; + } + + BEGIN_BCS_BATCH(ctx, 18); + OUT_BCS_BATCH(ctx, MFX_MPEG2_QM_STATE | (18 - 2)); + OUT_BCS_BATCH(ctx, i); + intel_batchbuffer_data_bcs(ctx, qmx, 64); + ADVANCE_BCS_BATCH(ctx); + } +} + +static void +gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferMPEG2 *pic_param, + VASliceParameterBufferMPEG2 *slice_param, + VASliceParameterBufferMPEG2 *next_slice_param) +{ + unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16; + unsigned int height_in_mbs = ALIGN(pic_param->vertical_size, 16) / 16; + int mb_count; + + if (next_slice_param == NULL) + mb_count = width_in_mbs * height_in_mbs - + (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position); + else + mb_count = (next_slice_param->slice_vertical_position * width_in_mbs + next_slice_param->slice_horizontal_position) - + (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position); + + BEGIN_BCS_BATCH(ctx, 5); + OUT_BCS_BATCH(ctx, MFD_MPEG2_BSD_OBJECT | (5 - 2)); + OUT_BCS_BATCH(ctx, + slice_param->slice_data_size - (slice_param->macroblock_offset >> 3)); + OUT_BCS_BATCH(ctx, + slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3)); + OUT_BCS_BATCH(ctx, + slice_param->slice_horizontal_position << 24 | + slice_param->slice_vertical_position << 16 | + mb_count << 8 | + (next_slice_param == NULL) << 5 | + (next_slice_param == NULL) << 3 | + (slice_param->macroblock_offset & 0x7)); + OUT_BCS_BATCH(ctx, + slice_param->quantiser_scale_code << 24); + ADVANCE_BCS_BATCH(ctx); +} + +static void +gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx, struct decode_state *decode_state) +{ + VAPictureParameterBufferMPEG2 *pic_param; + VASliceParameterBufferMPEG2 *slice_param, *next_slice_param; + dri_bo *slice_data_bo; + int i, j; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; + + gen6_mfd_mpeg2_decode_init(ctx, decode_state); + intel_batchbuffer_start_atomic_bcs(ctx, 0x1000); + intel_batchbuffer_emit_mi_flush_bcs(ctx); + gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2); + gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2); + gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2); + gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2); + gen6_mfd_mpeg2_pic_state(ctx, decode_state); + gen6_mfd_mpeg2_qm_state(ctx, decode_state); + + assert(decode_state->num_slice_params == 1); + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer; + slice_data_bo = decode_state->slice_datas[j]->bo; + gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2); + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = NULL; + + gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param); + slice_param++; + } + } + + intel_batchbuffer_end_atomic_bcs(ctx); + intel_batchbuffer_flush_bcs(ctx); +} + +static void +gen6_mfd_vc1_decode_picture(VADriverContextP ctx, struct decode_state *decode_state) +{ + +} + +void +gen6_mfd_decode_picture(VADriverContextP ctx, + VAProfile profile, + struct decode_state *decode_state) +{ + switch (profile) { + case VAProfileMPEG2Simple: + case VAProfileMPEG2Main: + gen6_mfd_mpeg2_decode_picture(ctx, decode_state); + break; + + case VAProfileH264Baseline: + case VAProfileH264Main: + case VAProfileH264High: + gen6_mfd_avc_decode_picture(ctx, decode_state); + break; + + case VAProfileVC1Simple: + case VAProfileVC1Main: + case VAProfileVC1Advanced: + gen6_mfd_vc1_decode_picture(ctx, decode_state); + break; + + default: + assert(0); + break; + } +} + +Bool +gen6_mfd_init(VADriverContextP ctx) +{ + return True; +} + +Bool +gen6_mfd_terminate(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_media_state *media_state = &i965->media_state; + struct gen6_mfd_context *gen6_mfd_context = media_state->private_context; + + if (gen6_mfd_context) { + dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo); + gen6_mfd_context->post_deblocking_output.bo = NULL; + + dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo); + gen6_mfd_context->pre_deblocking_output.bo = NULL; + + dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo); + gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo); + gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo); + gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo); + gen6_mfd_context->bitplane_read_buffer.bo = NULL; + + free(gen6_mfd_context); + } + + media_state->private_context = NULL; + return True; +} + diff --git a/i965_drv_video/gen6_mfd.h b/i965_drv_video/gen6_mfd.h new file mode 100644 index 0000000..dba1d07 --- /dev/null +++ b/i965_drv_video/gen6_mfd.h @@ -0,0 +1,95 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Xiang Haihao <haihao.xiang@intel.com> + * + */ + +#ifndef _GEN6_MFD_H_ +#define _GEN6_MFD_H_ + +#include <xf86drm.h> +#include <drm.h> +#include <i915_drm.h> +#include <intel_bufmgr.h> + +struct gen6_mfd_surface +{ + dri_bo *dmv_top; + dri_bo *dmv_bottom; + int dmv_bottom_flag; +}; + +#define MAX_MFX_REFERENCE_SURFACES 16 +struct gen6_mfd_context +{ + struct { + VASurfaceID surface_id; + int frame_store_id; + } reference_surface[MAX_MFX_REFERENCE_SURFACES]; + + struct { + dri_bo *bo; + int valid; + } post_deblocking_output; + + struct { + dri_bo *bo; + int valid; + } pre_deblocking_output; + + struct { + dri_bo *bo; + int valid; + } intra_row_store_scratch_buffer; + + struct { + dri_bo *bo; + int valid; + } deblocking_filter_row_store_scratch_buffer; + + struct { + dri_bo *bo; + int valid; + } bsd_mpc_row_store_scratch_buffer; + + struct { + dri_bo *bo; + int valid; + } mpr_row_store_scratch_buffer; + + struct { + dri_bo *bo; + int valid; + } bitplane_read_buffer; +}; + +struct decode_state; + +Bool gen6_mfd_init(VADriverContextP ctx); +Bool gen6_mfd_terminate(VADriverContextP ctx); +void gen6_mfd_decode_picture(VADriverContextP ctx, + VAProfile profile, + struct decode_state *decode_state); +#endif /* _GEN6_MFD_H_ */ diff --git a/i965_drv_video/i965_avc_ildb.c b/i965_drv_video/i965_avc_ildb.c index a053062..a6be42b 100644 --- a/i965_drv_video/i965_avc_ildb.c +++ b/i965_drv_video/i965_avc_ildb.c @@ -360,7 +360,7 @@ i965_avc_ildb_upload_constants(VADriverContextP ctx, struct decode_state *decode if (IS_IRONLAKE(i965->intel.device_id)) { root_input->max_concurrent_threads = 76; /* 72 - 2 + 8 - 2 */ } else { - root_input->max_concurrent_threads = 54; /* 50 - 2 + 8 = 2 */ + root_input->max_concurrent_threads = 54; /* 50 - 2 + 8 - 2 */ } if (pic_param->pic_fields.bits.field_pic_flag) diff --git a/i965_drv_video/i965_defines.h b/i965_drv_video/i965_defines.h index 839712e..6c202c7 100644 --- a/i965_drv_video/i965_defines.h +++ b/i965_drv_video/i965_defines.h @@ -29,6 +29,10 @@ #define CMD_PIPELINED_POINTERS CMD(3, 0, 0) #define CMD_BINDING_TABLE_POINTERS CMD(3, 0, 1) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */ +# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */ +# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */ + #define CMD_VERTEX_BUFFERS CMD(3, 0, 8) #define CMD_VERTEX_ELEMENTS CMD(3, 0, 9) #define CMD_DRAWING_RECTANGLE CMD(3, 1, 0) @@ -36,6 +40,125 @@ #define CMD_3DPRIMITIVE CMD(3, 3, 0) #define CMD_DEPTH_BUFFER CMD(3, 1, 5) +# define CMD_DEPTH_BUFFER_TYPE_SHIFT 29 +# define CMD_DEPTH_BUFFER_FORMAT_SHIFT 18 + +#define CMD_CLEAR_PARAMS CMD(3, 1, 0x10) +/* DW1 */ +# define CMD_CLEAR_PARAMS_DEPTH_CLEAR_VALID (1 << 15) + +/* for GEN6+ */ +#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS CMD(3, 0, 0x02) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8) + +#define GEN6_3DSTATE_URB CMD(3, 0, 0x05) +/* DW1 */ +# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16 +# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0 +/* DW2 */ +# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0 + +#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS CMD(3, 0, 0x0d) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10) + +#define GEN6_3DSTATE_CC_STATE_POINTERS CMD(3, 0, 0x0e) + +#define GEN6_3DSTATE_VS CMD(3, 0, 0x10) + +#define GEN6_3DSTATE_GS CMD(3, 0, 0x11) +/* DW4 */ +# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0 + +#define GEN6_3DSTATE_CLIP CMD(3, 0, 0x12) + +#define GEN6_3DSTATE_SF CMD(3, 0, 0x13) +/* DW1 */ +# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +/* DW3 */ +# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29) +# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29) +# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29) +# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29) +/* DW4 */ +# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25 + + +#define GEN6_3DSTATE_WM CMD(3, 0, 0x14) +/* DW2 */ +# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27 +# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16 +/* DW5 */ +# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25 +# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) + + +#define GEN6_3DSTATE_CONSTANT_VS CMD(3, 0, 0x15) +#define GEN6_3DSTATE_CONSTANT_GS CMD(3, 0, 0x16) +#define GEN6_3DSTATE_CONSTANT_PS CMD(3, 0, 0x17) + +#define GEN6_3DSTATE_SAMPLE_MASK CMD(3, 0, 0x18) + +#define GEN6_3DSTATE_MULTISAMPLE CMD(3, 1, 0x0d) +/* DW1 */ +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1) + +#define MFX(pipeline, op, sub_opa, sub_opb) \ + (3 << 29 | \ + (pipeline) << 27 | \ + (op) << 24 | \ + (sub_opa) << 21 | \ + (sub_opb) << 16) + +#define MFX_PIPE_MODE_SELECT MFX(2, 0, 0, 0) +#define MFX_SURFACE_STATE MFX(2, 0, 0, 1) +#define MFX_PIPE_BUF_ADDR_STATE MFX(2, 0, 0, 2) +#define MFX_IND_OBJ_BASE_ADDR_STATE MFX(2, 0, 0, 3) +#define MFX_BSP_BUF_BASE_ADDR_STATE MFX(2, 0, 0, 4) +#define MFX_AES_STATE MFX(2, 0, 0, 5) +#define MFX_STATE_POINTER MFX(2, 0, 0, 6) + +#define MFX_WAIT MFX(1, 0, 0, 0) + +#define MFX_AVC_IMG_STATE MFX(2, 1, 0, 0) +#define MFX_AVC_QM_STATE MFX(2, 1, 0, 1) +#define MFX_AVC_DIRECTMODE_STATE MFX(2, 1, 0, 2) +#define MFX_AVC_SLICE_STATE MFX(2, 1, 0, 3) +#define MFX_AVC_REF_IDX_STATE MFX(2, 1, 0, 4) +#define MFX_AVC_WEIGHTOFFSET_STATE MFX(2, 1, 0, 5) + +#define MFD_AVC_BSD_OBJECT MFX(2, 1, 1, 8) + +#define MFX_MPEG2_PIC_STATE MFX(2, 3, 0, 0) +#define MFX_MPEG2_QM_STATE MFX(2, 3, 0, 1) + +#define MFD_MPEG2_BSD_OBJECT MFX(2, 3, 1, 8) + #define I965_DEPTHFORMAT_D32_FLOAT 1 #define BASE_ADDRESS_MODIFY (1 << 0) @@ -288,7 +411,9 @@ #define I965_VFCOMPONENT_STORE_PID 7 #define VE0_VERTEX_BUFFER_INDEX_SHIFT 27 +#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */ #define VE0_VALID (1 << 26) +#define GEN6_VE0_VALID (1 << 25) /* for GEN6 */ #define VE0_FORMAT_SHIFT 16 #define VE0_OFFSET_SHIFT 0 #define VE1_VFCOMPONENT_0_SHIFT 28 @@ -298,8 +423,11 @@ #define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0 #define VB0_BUFFER_INDEX_SHIFT 27 +#define GEN6_VB0_BUFFER_INDEX_SHIFT 26 #define VB0_VERTEXDATA (0 << 26) #define VB0_INSTANCEDATA (1 << 26) +#define GEN6_VB0_VERTEXDATA (0 << 20) +#define GEN6_VB0_INSTANCEDATA (1 << 20) #define VB0_BUFFER_PITCH_SHIFT 0 #define _3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) @@ -380,6 +508,21 @@ #define IEF_FILTER_SIZE_3X3 0 #define IEF_FILTER_SIZE_5X5 1 -#define URB_SIZE(intel) (IS_IRONLAKE(intel->device_id) ? 1024 : \ +#define MFX_FORMAT_MPEG2 0 +#define MFX_FORMAT_VC1 1 +#define MFX_FORMAT_AVC 2 + +#define MFX_CODEC_DECODE 0 +#define MFX_CODEC_ENCODE 1 + +#define MFD_MODE_VLD 0 +#define MFD_MODE_IT 1 + +#define MFX_SURFACE_PLANAR_420_8 4 +#define MFX_SURFACE_MONOCHROME 12 + +#define URB_SIZE(intel) (IS_GEN6(intel->device_id) ? 1024 : \ + IS_IRONLAKE(intel->device_id) ? 1024 : \ IS_G4X(intel->device_id) ? 384 : 256) + #endif /* _I965_DEFINES_H_ */ diff --git a/i965_drv_video/i965_drv_video.c b/i965_drv_video/i965_drv_video.c index ec5412d..f0466ad 100644 --- a/i965_drv_video/i965_drv_video.c +++ b/i965_drv_video/i965_drv_video.c @@ -119,6 +119,7 @@ i965_QueryConfigProfiles(VADriverContextP ctx, VAProfile *profile_list, /* out */ int *num_profiles) /* out */ { + struct i965_driver_data *i965 = i965_driver_data(ctx); int i = 0; profile_list[i++] = VAProfileMPEG2Simple; @@ -127,6 +128,12 @@ i965_QueryConfigProfiles(VADriverContextP ctx, profile_list[i++] = VAProfileH264Main; profile_list[i++] = VAProfileH264High; + if (IS_GEN6(i965->intel.device_id)) { + profile_list[i++] = VAProfileVC1Simple; + profile_list[i++] = VAProfileVC1Main; + profile_list[i++] = VAProfileVC1Advanced; + } + /* If the assert fails then I965_MAX_PROFILES needs to be bigger */ assert(i <= I965_MAX_PROFILES); *num_profiles = i; @@ -156,6 +163,13 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx, entrypoint_list[0] = VAEntrypointVLD; break; + case VAProfileVC1Simple: + case VAProfileVC1Main: + case VAProfileVC1Advanced: + *num_entrypoints = 1; + entrypoint_list[0] = VAEntrypointVLD; + break; + default: vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; *num_entrypoints = 0; @@ -262,6 +276,17 @@ i965_CreateConfig(VADriverContextP ctx, break; + case VAProfileVC1Simple: + case VAProfileVC1Main: + case VAProfileVC1Advanced: + if (VAEntrypointVLD == entrypoint) { + vaStatus = VA_STATUS_SUCCESS; + } else { + vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + + break; + default: vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; break; @@ -392,8 +417,15 @@ i965_CreateSurfaces(VADriverContextP ctx, obj_surface->subpic = VA_INVALID_ID; obj_surface->orig_width = width; obj_surface->orig_height = height; - obj_surface->width = ALIGN(obj_surface->orig_width, 16); - obj_surface->height = ALIGN(obj_surface->orig_height, 16); + + if (IS_GEN6(i965->intel.device_id)) { + obj_surface->width = ALIGN(obj_surface->orig_width, 128); + obj_surface->height = ALIGN(obj_surface->orig_height, 32); + } else { + obj_surface->width = ALIGN(obj_surface->orig_width, 16); + obj_surface->height = ALIGN(obj_surface->orig_height, 16); + } + obj_surface->size = SIZE_YUV420(obj_surface->width, obj_surface->height); obj_surface->flags = SURFACE_REFERENCED; obj_surface->bo = NULL; @@ -724,14 +756,18 @@ i965_CreateContext(VADriverContextP ctx, return vaStatus; } - switch (obj_config->profile) { - case VAProfileH264Baseline: - case VAProfileH264Main: - case VAProfileH264High: + if (IS_GEN6(i965->intel.device_id)) render_state->interleaved_uv = 1; - break; - default: - render_state->interleaved_uv = 0; + else { + switch (obj_config->profile) { + case VAProfileH264Baseline: + case VAProfileH264Main: + case VAProfileH264High: + render_state->interleaved_uv = 1; + break; + default: + render_state->interleaved_uv = 0; + } } obj_context->context_id = contextID; @@ -978,6 +1014,12 @@ i965_BeginPicture(VADriverContextP ctx, vaStatus = VA_STATUS_SUCCESS; break; + case VAProfileVC1Simple: + case VAProfileVC1Main: + case VAProfileVC1Advanced: + vaStatus = VA_STATUS_SUCCESS; + break; + default: assert(0); vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; @@ -1252,7 +1294,8 @@ i965_Init(VADriverContextP ctx) return VA_STATUS_ERROR_UNKNOWN; if (!IS_G4X(i965->intel.device_id) && - !IS_IRONLAKE(i965->intel.device_id)) + !IS_IRONLAKE(i965->intel.device_id) && + !IS_GEN6(i965->intel.device_id)) return VA_STATUS_ERROR_UNKNOWN; if (i965_media_init(ctx) == False) @@ -1726,15 +1769,15 @@ i965_PutSurface(VADriverContextP ctx, if (flags & (VA_BOTTOM_FIELD | VA_TOP_FIELD)) pp_flag |= I965_PP_FLAG_DEINTERLACING; - i965_render_put_surface(ctx, surface, + intel_render_put_surface(ctx, surface, srcx, srcy, srcw, srch, destx, desty, destw, desth, pp_flag); if(obj_surface->subpic != VA_INVALID_ID) { - i965_render_put_subpic(ctx, surface, - srcx, srcy, srcw, srch, - destx, desty, destw, desth); + intel_render_put_subpicture(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); } dri_swap_buffer(ctx, dri_drawable); diff --git a/i965_drv_video/i965_media.c b/i965_drv_video/i965_media.c index 8945444..172dde5 100644 --- a/i965_drv_video/i965_media.c +++ b/i965_drv_video/i965_media.c @@ -39,6 +39,7 @@ #include "i965_defines.h" #include "i965_media_mpeg2.h" #include "i965_media_h264.h" +#include "gen6_mfd.h" #include "i965_media.h" #include "i965_drv_video.h" @@ -177,7 +178,7 @@ i965_media_depth_buffer(VADriverContextP ctx) OUT_BATCH(ctx, 0); OUT_BATCH(ctx, 0); OUT_BATCH(ctx, 0); - ADVANCE_BATCH(); + ADVANCE_BATCH(ctx); } static void @@ -273,6 +274,11 @@ i965_media_decode_picture(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_media_state *media_state = &i965->media_state; + if (IS_GEN6(i965->intel.device_id)) { + gen6_mfd_decode_picture(ctx, profile, decode_state); + return; + } + i965_media_decode_init(ctx, profile, decode_state); assert(media_state->media_states_setup); media_state->media_states_setup(ctx, decode_state); @@ -282,6 +288,11 @@ i965_media_decode_picture(VADriverContextP ctx, Bool i965_media_init(VADriverContextP ctx) { + struct i965_driver_data *i965 = i965_driver_data(ctx); + + if (IS_GEN6(i965->intel.device_id)) + return gen6_mfd_init(ctx); + return True; } @@ -292,6 +303,9 @@ i965_media_terminate(VADriverContextP ctx) struct i965_media_state *media_state = &i965->media_state; int i; + if (IS_GEN6(i965->intel.device_id)) + return gen6_mfd_terminate(ctx); + if (media_state->free_private_context) media_state->free_private_context(&media_state->private_context); diff --git a/i965_drv_video/i965_render.c b/i965_drv_video/i965_render.c index ceef319..8789ca8 100644 --- a/i965_drv_video/i965_render.c +++ b/i965_drv_video/i965_render.c @@ -97,6 +97,28 @@ static const unsigned int ps_subpic_kernel_static_gen5[][4] = #include "shaders/render/exa_wm_write.g4b.gen5" }; +/* programs for Sandybridge */ +static const unsigned int sf_kernel_static_gen6[][4] = +{ +}; + +static const uint32_t ps_kernel_static_gen6[][4] = { +#include "shaders/render/exa_wm_src_affine.g6b" +#include "shaders/render/exa_wm_src_sample_planar.g6b" +#include "shaders/render/exa_wm_yuv_rgb.g6b" +#include "shaders/render/exa_wm_write.g6b" +}; + +static const uint32_t ps_subpic_kernel_static_gen6[][4] = { +#include "shaders/render/exa_wm_src_affine.g6b" +#include "shaders/render/exa_wm_src_sample_argb.g6b" +#include "shaders/render/exa_wm_write.g6b" +}; + +#define SURFACE_STATE_PADDED_SIZE ALIGN(sizeof(struct i965_surface_state), 32) +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES) + static uint32_t float_to_uint (float f) { union { @@ -167,6 +189,28 @@ static struct render_kernel render_kernels_gen5[] = { } }; +static struct render_kernel render_kernels_gen6[] = { + { + "SF", + sf_kernel_static_gen6, + sizeof(sf_kernel_static_gen6), + NULL + }, + { + "PS", + ps_kernel_static_gen6, + sizeof(ps_kernel_static_gen6), + NULL + }, + + { + "PS_SUBPIC", + ps_subpic_kernel_static_gen6, + sizeof(ps_subpic_kernel_static_gen6), + NULL + } +}; + static struct render_kernel *render_kernels = NULL; #define NUM_RENDER_KERNEL (sizeof(render_kernels_gen4)/sizeof(render_kernels_gen4[0])) @@ -532,6 +576,25 @@ i965_render_cc_unit(VADriverContextP ctx) } static void +i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss3.tiled_surface = 0; + ss->ss3.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss3.tiled_surface = 1; + ss->ss3.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss3.tiled_surface = 1; + ss->ss3.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + +static void i965_render_src_surface_state(VADriverContextP ctx, int index, dri_bo *region, @@ -542,70 +605,15 @@ i965_render_src_surface_state(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; struct i965_surface_state *ss; - dri_bo *ss_bo; - - ss_bo = dri_bo_alloc(i965->intel.bufmgr, - "surface state", - sizeof(struct i965_surface_state), 32); - assert(ss_bo); - dri_bo_map(ss_bo, 1); - assert(ss_bo->virtual); - ss = ss_bo->virtual; - memset(ss, 0, sizeof(*ss)); - ss->ss0.surface_type = I965_SURFACE_2D; - ss->ss0.surface_format = format; - ss->ss0.writedisable_alpha = 0; - ss->ss0.writedisable_red = 0; - ss->ss0.writedisable_green = 0; - ss->ss0.writedisable_blue = 0; - ss->ss0.color_blend = 1; - ss->ss0.vert_line_stride = 0; - ss->ss0.vert_line_stride_ofs = 0; - ss->ss0.mipmap_layout_mode = 0; - ss->ss0.render_cache_read_mode = 0; - - ss->ss1.base_addr = region->offset + offset; - - ss->ss2.width = w - 1; - ss->ss2.height = h - 1; - ss->ss2.mip_count = 0; - ss->ss2.render_target_rotation = 0; - - ss->ss3.pitch = pitch - 1; - - dri_bo_emit_reloc(ss_bo, - I915_GEM_DOMAIN_SAMPLER, 0, - offset, - offsetof(struct i965_surface_state, ss1), - region); - - dri_bo_unmap(ss_bo); + dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo; + unsigned int tiling; + unsigned int swizzle; assert(index < MAX_RENDER_SURFACES); - assert(render_state->wm.surface[index] == NULL); - render_state->wm.surface[index] = ss_bo; - render_state->wm.sampler_count++; -} -static void -i965_subpic_render_src_surface_state(VADriverContextP ctx, - int index, - dri_bo *region, - unsigned long offset, - int w, int h, int p, int format) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - struct i965_surface_state *ss; - dri_bo *ss_bo; - - ss_bo = dri_bo_alloc(i965->intel.bufmgr, - "surface state", - sizeof(struct i965_surface_state), 32); - assert(ss_bo); dri_bo_map(ss_bo, 1); assert(ss_bo->virtual); - ss = ss_bo->virtual; + ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index)); memset(ss, 0, sizeof(*ss)); ss->ss0.surface_type = I965_SURFACE_2D; ss->ss0.surface_format = format; @@ -626,19 +634,19 @@ i965_subpic_render_src_surface_state(VADriverContextP ctx, ss->ss2.mip_count = 0; ss->ss2.render_target_rotation = 0; - ss->ss3.pitch = p - 1; + ss->ss3.pitch = pitch - 1; + + dri_bo_get_tiling(region, &tiling, &swizzle); + i965_render_set_surface_tiling(ss, tiling); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_SAMPLER, 0, offset, - offsetof(struct i965_surface_state, ss1), + SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1), region); + ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); dri_bo_unmap(ss_bo); - - assert(index < MAX_RENDER_SURFACES); - assert(render_state->wm.surface[index] == NULL); - render_state->wm.surface[index] = ss_bo; render_state->wm.sampler_count++; } @@ -702,27 +710,8 @@ i965_subpic_render_src_surfaces_state(VADriverContextP ctx, region = obj_surface->bo; subpic_region = obj_image->bo; /*subpicture surface*/ - i965_subpic_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format); - i965_subpic_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format); -} - -static void -i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling) -{ - switch (tiling) { - case I915_TILING_NONE: - ss->ss3.tiled_surface = 0; - ss->ss3.tile_walk = 0; - break; - case I915_TILING_X: - ss->ss3.tiled_surface = 1; - ss->ss3.tile_walk = I965_TILEWALK_XMAJOR; - break; - case I915_TILING_Y: - ss->ss3.tiled_surface = 1; - ss->ss3.tile_walk = I965_TILEWALK_YMAJOR; - break; - } + i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format); + i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format); } static void @@ -732,15 +721,13 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index) struct i965_render_state *render_state = &i965->render_state; struct intel_region *dest_region = render_state->draw_region; struct i965_surface_state *ss; - dri_bo *ss_bo; + dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo; + + assert(index < MAX_RENDER_SURFACES); - ss_bo = dri_bo_alloc(i965->intel.bufmgr, - "surface state", - sizeof(struct i965_surface_state), 32); - assert(ss_bo); dri_bo_map(ss_bo, 1); assert(ss_bo->virtual); - ss = ss_bo->virtual; + ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index)); memset(ss, 0, sizeof(*ss)); ss->ss0.surface_type = I965_SURFACE_2D; @@ -774,41 +761,11 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index) dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0, - offsetof(struct i965_surface_state, ss1), + SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1), dest_region->bo); + ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); dri_bo_unmap(ss_bo); - - assert(index < MAX_RENDER_SURFACES); - assert(render_state->wm.surface[index] == NULL); - render_state->wm.surface[index] = ss_bo; -} - -static void -i965_render_binding_table(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - int i; - unsigned int *binding_table; - - dri_bo_map(render_state->wm.binding_table, 1); - assert(render_state->wm.binding_table->virtual); - binding_table = render_state->wm.binding_table->virtual; - memset(binding_table, 0, render_state->wm.binding_table->size); - - for (i = 0; i < MAX_RENDER_SURFACES; i++) { - if (render_state->wm.surface[i]) { - binding_table[i] = render_state->wm.surface[i]->offset; - dri_bo_emit_reloc(render_state->wm.binding_table, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - i * sizeof(*binding_table), - render_state->wm.surface[i]); - } - } - - dri_bo_unmap(render_state->wm.binding_table); } static void @@ -964,7 +921,6 @@ i965_surface_render_state_setup(VADriverContextP ctx, i965_render_wm_unit(ctx); i965_render_cc_viewport(ctx); i965_render_cc_unit(ctx); - i965_render_binding_table(ctx); i965_render_upload_vertex(ctx, surface, srcx, srcy, srcw, srch, destx, desty, destw, desth); @@ -990,7 +946,6 @@ i965_subpic_render_state_setup(VADriverContextP ctx, i965_subpic_render_wm_unit(ctx); i965_render_cc_viewport(ctx); i965_subpic_render_cc_unit(ctx); - i965_render_binding_table(ctx); VARectangle output_rect; output_rect.x = destx; @@ -1022,12 +977,13 @@ static void i965_render_state_base_address(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; if (IS_IRONLAKE(i965->intel.device_id)) { BEGIN_BATCH(ctx, 8); OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); @@ -1038,7 +994,7 @@ i965_render_state_base_address(VADriverContextP ctx) BEGIN_BATCH(ctx, 6); OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); @@ -1049,16 +1005,13 @@ i965_render_state_base_address(VADriverContextP ctx) static void i965_render_binding_table_pointers(VADriverContextP ctx) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - BEGIN_BATCH(ctx, 6); OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS | 4); OUT_BATCH(ctx, 0); /* vs */ OUT_BATCH(ctx, 0); /* gs */ OUT_BATCH(ctx, 0); /* clip */ OUT_BATCH(ctx, 0); /* sf */ - OUT_RELOC(ctx, render_state->wm.binding_table, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* wm */ + OUT_BATCH(ctx, BINDING_TABLE_OFFSET); ADVANCE_BATCH(ctx); } @@ -1312,7 +1265,10 @@ i965_clear_dest_region(VADriverContextP ctx) br13 |= pitch; - BEGIN_BATCH(ctx, 6); + if (IS_GEN6(i965->intel.device_id)) + BEGIN_BLT_BATCH(ctx, 6); + else + BEGIN_BATCH(ctx, 6); OUT_BATCH(ctx, blt_cmd); OUT_BATCH(ctx, br13); OUT_BATCH(ctx, (dest_region->y << 16) | (dest_region->x)); @@ -1328,9 +1284,9 @@ i965_clear_dest_region(VADriverContextP ctx) static void i965_surface_render_pipeline_setup(VADriverContextP ctx) { + i965_clear_dest_region(ctx); intel_batchbuffer_start_atomic(ctx, 0x1000); intel_batchbuffer_emit_mi_flush(ctx); - i965_clear_dest_region(ctx); i965_render_pipeline_select(ctx); i965_render_state_sip(ctx); i965_render_state_base_address(ctx); @@ -1371,7 +1327,6 @@ i965_render_initialize(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - int i; dri_bo *bo; /* VERTEX BUFFER */ @@ -1404,18 +1359,13 @@ i965_render_initialize(VADriverContextP ctx) render_state->sf.state = bo; /* WM */ - for (i = 0; i < MAX_RENDER_SURFACES; i++) { - dri_bo_unreference(render_state->wm.surface[i]); - render_state->wm.surface[i] = NULL; - } - - dri_bo_unreference(render_state->wm.binding_table); + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); bo = dri_bo_alloc(i965->intel.bufmgr, - "binding table", - MAX_RENDER_SURFACES * sizeof(unsigned int), - 64); + "surface state & binding table", + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES, + 4096); assert(bo); - render_state->wm.binding_table = bo; + render_state->wm.surface_state_binding_table_bo = bo; dri_bo_unreference(render_state->wm.sampler); bo = dri_bo_alloc(i965->intel.bufmgr, @@ -1452,7 +1402,7 @@ i965_render_initialize(VADriverContextP ctx) render_state->cc.viewport = bo; } -void +static void i965_render_put_surface(VADriverContextP ctx, VASurfaceID surface, short srcx, @@ -1478,8 +1428,480 @@ i965_render_put_surface(VADriverContextP ctx, intel_batchbuffer_flush(ctx); } -void -i965_render_put_subpic(VADriverContextP ctx, +static void +i965_render_put_subpicture(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface = SURFACE(surface); + struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); + assert(obj_subpic); + + i965_render_initialize(ctx); + i965_subpic_render_state_setup(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); + i965_subpic_render_pipeline_setup(ctx); + i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff); + intel_batchbuffer_flush(ctx); +} + +/* + * for GEN6+ + */ +static void +gen6_render_initialize(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + dri_bo *bo; + + /* VERTEX BUFFER */ + dri_bo_unreference(render_state->vb.vertex_buffer); + bo = dri_bo_alloc(i965->intel.bufmgr, + "vertex buffer", + 4096, + 4096); + assert(bo); + render_state->vb.vertex_buffer = bo; + + /* WM */ + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES, + 4096); + assert(bo); + render_state->wm.surface_state_binding_table_bo = bo; + + dri_bo_unreference(render_state->wm.sampler); + bo = dri_bo_alloc(i965->intel.bufmgr, + "sampler state", + MAX_SAMPLERS * sizeof(struct i965_sampler_state), + 4096); + assert(bo); + render_state->wm.sampler = bo; + render_state->wm.sampler_count = 0; + + /* COLOR CALCULATOR */ + dri_bo_unreference(render_state->cc.state); + bo = dri_bo_alloc(i965->intel.bufmgr, + "color calc state", + sizeof(struct gen6_color_calc_state), + 4096); + assert(bo); + render_state->cc.state = bo; + + /* CC VIEWPORT */ + dri_bo_unreference(render_state->cc.viewport); + bo = dri_bo_alloc(i965->intel.bufmgr, + "cc viewport", + sizeof(struct i965_cc_viewport), + 4096); + assert(bo); + render_state->cc.viewport = bo; + + /* BLEND STATE */ + dri_bo_unreference(render_state->cc.blend); + bo = dri_bo_alloc(i965->intel.bufmgr, + "blend state", + sizeof(struct gen6_blend_state), + 4096); + assert(bo); + render_state->cc.blend = bo; + + /* DEPTH & STENCIL STATE */ + dri_bo_unreference(render_state->cc.depth_stencil); + bo = dri_bo_alloc(i965->intel.bufmgr, + "depth & stencil state", + sizeof(struct gen6_depth_stencil_state), + 4096); + assert(bo); + render_state->cc.depth_stencil = bo; +} + +static void +gen6_render_color_calc_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen6_color_calc_state *color_calc_state; + + dri_bo_map(render_state->cc.state, 1); + assert(render_state->cc.state->virtual); + color_calc_state = render_state->cc.state->virtual; + memset(color_calc_state, 0, sizeof(*color_calc_state)); + color_calc_state->constant_r = 1.0; + color_calc_state->constant_g = 0.0; + color_calc_state->constant_b = 1.0; + color_calc_state->constant_a = 1.0; + dri_bo_unmap(render_state->cc.state); +} + +static void +gen6_render_blend_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen6_blend_state *blend_state; + + dri_bo_map(render_state->cc.blend, 1); + assert(render_state->cc.blend->virtual); + blend_state = render_state->cc.blend->virtual; + memset(blend_state, 0, sizeof(*blend_state)); + blend_state->blend1.logic_op_enable = 1; + blend_state->blend1.logic_op_func = 0xc; + dri_bo_unmap(render_state->cc.blend); +} + +static void +gen6_render_depth_stencil_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen6_depth_stencil_state *depth_stencil_state; + + dri_bo_map(render_state->cc.depth_stencil, 1); + assert(render_state->cc.depth_stencil->virtual); + depth_stencil_state = render_state->cc.depth_stencil->virtual; + memset(depth_stencil_state, 0, sizeof(*depth_stencil_state)); + dri_bo_unmap(render_state->cc.depth_stencil); +} + +static void +gen6_render_setup_states(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth) +{ + i965_render_dest_surface_state(ctx, 0); + i965_render_src_surfaces_state(ctx, surface); + i965_render_sampler(ctx); + i965_render_cc_viewport(ctx); + gen6_render_color_calc_state(ctx); + gen6_render_blend_state(ctx); + gen6_render_depth_stencil_state(ctx); + i965_render_upload_vertex(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); +} + +static void +gen6_emit_invarient_states(VADriverContextP ctx) +{ + OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D); + + OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); + OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | + GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + OUT_BATCH(ctx, 0); + + OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(ctx, 1); + + /* Set system instruction pointer */ + OUT_BATCH(ctx, CMD_STATE_SIP | 0); + OUT_BATCH(ctx, 0); +} + +static void +gen6_emit_state_base_address(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + + OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2)); + OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state base address */ + OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state base address */ + OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object base address */ + OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction base address */ + OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state upper bound */ + OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ + OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ + OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ +} + +static void +gen6_emit_viewport_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + + OUT_BATCH(ctx, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | + GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | + (4 - 2)); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_RELOC(ctx, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); +} + +static void +gen6_emit_urb(VADriverContextP ctx) +{ + OUT_BATCH(ctx, GEN6_3DSTATE_URB | (3 - 2)); + OUT_BATCH(ctx, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | + (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ + OUT_BATCH(ctx, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | + (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ +} + +static void +gen6_emit_cc_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + + OUT_BATCH(ctx, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); + OUT_RELOC(ctx, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(ctx, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); +} + +static void +gen6_emit_sampler_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + + OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLER_STATE_POINTERS | + GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(ctx, 0); /* VS */ + OUT_BATCH(ctx, 0); /* GS */ + OUT_RELOC(ctx,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); +} + +static void +gen6_emit_binding_table(VADriverContextP ctx) +{ + /* Binding table pointers */ + OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS | + GEN6_BINDING_TABLE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(ctx, 0); /* vs */ + OUT_BATCH(ctx, 0); /* gs */ + /* Only the PS uses the binding table */ + OUT_BATCH(ctx, BINDING_TABLE_OFFSET); +} + +static void +gen6_emit_depth_buffer_state(VADriverContextP ctx) +{ + OUT_BATCH(ctx, CMD_DEPTH_BUFFER | (7 - 2)); + OUT_BATCH(ctx, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) | + (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT)); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + + OUT_BATCH(ctx, CMD_CLEAR_PARAMS | (2 - 2)); + OUT_BATCH(ctx, 0); +} + +static void +gen6_emit_drawing_rectangle(VADriverContextP ctx) +{ + i965_render_drawing_rectangle(ctx); +} + +static void +gen6_emit_vs_state(VADriverContextP ctx) +{ + /* disable VS constant buffer */ + OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + + OUT_BATCH(ctx, GEN6_3DSTATE_VS | (6 - 2)); + OUT_BATCH(ctx, 0); /* without VS kernel */ + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); /* pass-through */ +} + +static void +gen6_emit_gs_state(VADriverContextP ctx) +{ + /* disable GS constant buffer */ + OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + + OUT_BATCH(ctx, GEN6_3DSTATE_GS | (7 - 2)); + OUT_BATCH(ctx, 0); /* without GS kernel */ + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); /* pass-through */ +} + +static void +gen6_emit_clip_state(VADriverContextP ctx) +{ + OUT_BATCH(ctx, GEN6_3DSTATE_CLIP | (4 - 2)); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); /* pass-through */ + OUT_BATCH(ctx, 0); +} + +static void +gen6_emit_sf_state(VADriverContextP ctx) +{ + OUT_BATCH(ctx, GEN6_3DSTATE_SF | (20 - 2)); + OUT_BATCH(ctx, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) | + (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) | + (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, GEN6_3DSTATE_SF_CULL_NONE); + OUT_BATCH(ctx, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); /* DW9 */ + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); /* DW14 */ + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); /* DW19 */ +} + +static void +gen6_emit_wm_state(VADriverContextP ctx, int kernel) +{ + /* disable WM constant buffer */ + OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + + OUT_BATCH(ctx, GEN6_3DSTATE_WM | (9 - 2)); + OUT_RELOC(ctx, render_kernels[kernel].bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | + (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ + OUT_BATCH(ctx, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | + GEN6_3DSTATE_WM_DISPATCH_ENABLE | + GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); + OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | + GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); +} + +static void +gen6_emit_vertex_element_state(VADriverContextP ctx) +{ + /* Set up our vertex elements, sourced from the single vertex buffer. */ + OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | (5 - 2)); + /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ + OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ + OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (8 << VE0_OFFSET_SHIFT)); + OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); +} + +static void +gen6_emit_vertices(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(ctx, 11); + OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3); + OUT_BATCH(ctx, + (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | + GEN6_VB0_VERTEXDATA | + ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); + OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); + OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4); + OUT_BATCH(ctx, 0); + + OUT_BATCH(ctx, + CMD_3DPRIMITIVE | + _3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + OUT_BATCH(ctx, 3); /* vertex count per instance */ + OUT_BATCH(ctx, 0); /* start vertex offset */ + OUT_BATCH(ctx, 1); /* single instance */ + OUT_BATCH(ctx, 0); /* start instance location */ + OUT_BATCH(ctx, 0); /* index buffer offset, ignored */ + ADVANCE_BATCH(ctx); +} + +static void +gen6_render_emit_states(VADriverContextP ctx, int kernel) +{ + intel_batchbuffer_start_atomic(ctx, 0x1000); + intel_batchbuffer_emit_mi_flush(ctx); + gen6_emit_invarient_states(ctx); + gen6_emit_state_base_address(ctx); + gen6_emit_viewport_state_pointers(ctx); + gen6_emit_urb(ctx); + gen6_emit_cc_state_pointers(ctx); + gen6_emit_sampler_state_pointers(ctx); + gen6_emit_vs_state(ctx); + gen6_emit_gs_state(ctx); + gen6_emit_clip_state(ctx); + gen6_emit_sf_state(ctx); + gen6_emit_wm_state(ctx, kernel); + gen6_emit_binding_table(ctx); + gen6_emit_depth_buffer_state(ctx); + gen6_emit_drawing_rectangle(ctx); + gen6_emit_vertex_element_state(ctx); + gen6_emit_vertices(ctx); + intel_batchbuffer_end_atomic(ctx); +} + +static void +gen6_render_put_surface(VADriverContextP ctx, VASurfaceID surface, short srcx, short srcy, @@ -1488,22 +1910,147 @@ i965_render_put_subpic(VADriverContextP ctx, short destx, short desty, unsigned short destw, - unsigned short desth) + unsigned short desth, + unsigned int flag) +{ + gen6_render_initialize(ctx); + gen6_render_setup_states(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); + i965_clear_dest_region(ctx); + gen6_render_emit_states(ctx, PS_KERNEL); + intel_batchbuffer_flush(ctx); +} + +static void +gen6_subpicture_render_blend_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen6_blend_state *blend_state; + + dri_bo_unmap(render_state->cc.state); + dri_bo_map(render_state->cc.blend, 1); + assert(render_state->cc.blend->virtual); + blend_state = render_state->cc.blend->virtual; + memset(blend_state, 0, sizeof(*blend_state)); + blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; + blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; + blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD; + blend_state->blend0.blend_enable = 1; + blend_state->blend1.post_blend_clamp_enable = 1; + blend_state->blend1.pre_blend_clamp_enable = 1; + blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */ + dri_bo_unmap(render_state->cc.blend); +} + +static void +gen6_subpicture_render_setup_states(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth) +{ + VARectangle output_rect; + + output_rect.x = destx; + output_rect.y = desty; + output_rect.width = destw; + output_rect.height = desth; + + i965_render_dest_surface_state(ctx, 0); + i965_subpic_render_src_surfaces_state(ctx, surface); + i965_render_sampler(ctx); + i965_render_cc_viewport(ctx); + gen6_render_color_calc_state(ctx); + gen6_subpicture_render_blend_state(ctx); + gen6_render_depth_stencil_state(ctx); + i965_subpic_render_upload_vertex(ctx, surface, &output_rect); +} + +static void +gen6_render_put_subpicture(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface = SURFACE(surface); struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); - assert(obj_subpic); - i965_render_initialize(ctx); - i965_subpic_render_state_setup(ctx, surface, - srcx, srcy, srcw, srch, - destx, desty, destw, desth); - i965_subpic_render_pipeline_setup(ctx); - i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff); + assert(obj_subpic); + gen6_render_initialize(ctx); + gen6_subpicture_render_setup_states(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); + gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL); intel_batchbuffer_flush(ctx); } +/* + * global functions + */ +void +intel_render_put_surface(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth, + unsigned int flag) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + + if (IS_GEN6(i965->intel.device_id)) + gen6_render_put_surface(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth, + flag); + else + i965_render_put_surface(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth, + flag); +} + +void +intel_render_put_subpicture(VADriverContextP ctx, + VASurfaceID surface, + short srcx, + short srcy, + unsigned short srcw, + unsigned short srch, + short destx, + short desty, + unsigned short destw, + unsigned short desth) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + + if (IS_GEN6(i965->intel.device_id)) + gen6_render_put_subpicture(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); + else + i965_render_put_subpicture(ctx, surface, + srcx, srcy, srcw, srch, + destx, desty, destw, desth); +} Bool i965_render_init(VADriverContextP ctx) @@ -1515,14 +2062,22 @@ i965_render_init(VADriverContextP ctx) /* kernel */ assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / sizeof(render_kernels_gen5[0]))); + assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / + sizeof(render_kernels_gen6[0]))); - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_GEN6(i965->intel.device_id)) + render_kernels = render_kernels_gen6; + else if (IS_IRONLAKE(i965->intel.device_id)) render_kernels = render_kernels_gen5; else render_kernels = render_kernels_gen4; for (i = 0; i < NUM_RENDER_KERNEL; i++) { struct render_kernel *kernel = &render_kernels[i]; + + if (!kernel->size) + continue; + kernel->bo = dri_bo_alloc(i965->intel.bufmgr, kernel->name, kernel->size, 0x1000); @@ -1567,22 +2122,19 @@ i965_render_terminate(VADriverContextP ctx) render_state->vs.state = NULL; dri_bo_unreference(render_state->sf.state); render_state->sf.state = NULL; - dri_bo_unreference(render_state->wm.binding_table); - render_state->wm.binding_table = NULL; dri_bo_unreference(render_state->wm.sampler); render_state->wm.sampler = NULL; dri_bo_unreference(render_state->wm.state); render_state->wm.state = NULL; - - for (i = 0; i < MAX_RENDER_SURFACES; i++) { - dri_bo_unreference(render_state->wm.surface[i]); - render_state->wm.surface[i] = NULL; - } - + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); dri_bo_unreference(render_state->cc.viewport); render_state->cc.viewport = NULL; dri_bo_unreference(render_state->cc.state); render_state->cc.state = NULL; + dri_bo_unreference(render_state->cc.blend); + render_state->cc.blend = NULL; + dri_bo_unreference(render_state->cc.depth_stencil); + render_state->cc.depth_stencil = NULL; if (render_state->draw_region) { dri_bo_unreference(render_state->draw_region->bo); diff --git a/i965_drv_video/i965_render.h b/i965_drv_video/i965_render.h index 84b50f2..8ff4fe2 100644 --- a/i965_drv_video/i965_render.h +++ b/i965_drv_video/i965_render.h @@ -28,8 +28,8 @@ #ifndef _I965_RENDER_H_ #define _I965_RENDER_H_ -#define MAX_RENDER_SURFACES 16 #define MAX_SAMPLERS 16 +#define MAX_RENDER_SURFACES (MAX_SAMPLERS + 1) #include "i965_post_processing.h" @@ -50,14 +50,15 @@ struct i965_render_state struct { int sampler_count; dri_bo *sampler; - dri_bo *surface[MAX_RENDER_SURFACES]; - dri_bo *binding_table; dri_bo *state; + dri_bo *surface_state_binding_table_bo; } wm; struct { dri_bo *state; dri_bo *viewport; + dri_bo *blend; + dri_bo *depth_stencil; } cc; struct { @@ -74,7 +75,7 @@ struct i965_render_state Bool i965_render_init(VADriverContextP ctx); Bool i965_render_terminate(VADriverContextP ctx); -void i965_render_put_surface(VADriverContextP ctx, +void intel_render_put_surface(VADriverContextP ctx, VASurfaceID surface, short srcx, short srcy, @@ -88,7 +89,7 @@ void i965_render_put_surface(VADriverContextP ctx, void -i965_render_put_subpic(VADriverContextP ctx, +intel_render_put_subpicture(VADriverContextP ctx, VASurfaceID surface, short srcx, short srcy, diff --git a/i965_drv_video/i965_structs.h b/i965_drv_video/i965_structs.h index f8be616..5f85269 100644 --- a/i965_drv_video/i965_structs.h +++ b/i965_drv_video/i965_structs.h @@ -964,4 +964,105 @@ struct i965_sampler_dndi } dw7; }; + +struct gen6_blend_state +{ + struct { + unsigned int dest_blend_factor:5; + unsigned int source_blend_factor:5; + unsigned int pad3:1; + unsigned int blend_func:3; + unsigned int pad2:1; + unsigned int ia_dest_blend_factor:5; + unsigned int ia_source_blend_factor:5; + unsigned int pad1:1; + unsigned int ia_blend_func:3; + unsigned int pad0:1; + unsigned int ia_blend_enable:1; + unsigned int blend_enable:1; + } blend0; + + struct { + unsigned int post_blend_clamp_enable:1; + unsigned int pre_blend_clamp_enable:1; + unsigned int clamp_range:2; + unsigned int pad0:4; + unsigned int x_dither_offset:2; + unsigned int y_dither_offset:2; + unsigned int dither_enable:1; + unsigned int alpha_test_func:3; + unsigned int alpha_test_enable:1; + unsigned int pad1:1; + unsigned int logic_op_func:4; + unsigned int logic_op_enable:1; + unsigned int pad2:1; + unsigned int write_disable_b:1; + unsigned int write_disable_g:1; + unsigned int write_disable_r:1; + unsigned int write_disable_a:1; + unsigned int pad3:1; + unsigned int alpha_to_coverage_dither:1; + unsigned int alpha_to_one:1; + unsigned int alpha_to_coverage:1; + } blend1; +}; + +struct gen6_color_calc_state +{ + struct { + unsigned int alpha_test_format:1; + unsigned int pad0:14; + unsigned int round_disable:1; + unsigned int bf_stencil_ref:8; + unsigned int stencil_ref:8; + } cc0; + + union { + float alpha_ref_f; + struct { + unsigned int ui:8; + unsigned int pad0:24; + } alpha_ref_fi; + } cc1; + + float constant_r; + float constant_g; + float constant_b; + float constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + unsigned int pad0:3; + unsigned int bf_stencil_pass_depth_pass_op:3; + unsigned int bf_stencil_pass_depth_fail_op:3; + unsigned int bf_stencil_fail_op:3; + unsigned int bf_stencil_func:3; + unsigned int bf_stencil_enable:1; + unsigned int pad1:2; + unsigned int stencil_write_enable:1; + unsigned int stencil_pass_depth_pass_op:3; + unsigned int stencil_pass_depth_fail_op:3; + unsigned int stencil_fail_op:3; + unsigned int stencil_func:3; + unsigned int stencil_enable:1; + } ds0; + + struct { + unsigned int bf_stencil_write_mask:8; + unsigned int bf_stencil_test_mask:8; + unsigned int stencil_write_mask:8; + unsigned int stencil_test_mask:8; + } ds1; + + struct { + unsigned int pad0:26; + unsigned int depth_write_enable:1; + unsigned int depth_test_func:3; + unsigned int pad1:1; + unsigned int depth_test_enable:1; + } ds2; +}; + #endif /* _I965_STRUCTS_H_ */ diff --git a/i965_drv_video/intel_batchbuffer.c b/i965_drv_video/intel_batchbuffer.c index 15c3201..4988e9c 100644 --- a/i965_drv_video/intel_batchbuffer.c +++ b/i965_drv_video/intel_batchbuffer.c @@ -40,6 +40,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) int batch_size = BATCH_SIZE; assert(batch->flag == I915_EXEC_RENDER || + batch->flag == I915_EXEC_BLT || batch->flag == I915_EXEC_BSD); dri_bo_unreference(batch->buffer); @@ -281,21 +282,23 @@ intel_batchbuffer_data_bcs(VADriverContextP ctx, void *data, unsigned int size) intel_batchbuffer_data_helper(ctx, intel->batch_bcs, data, size); } -static void -intel_batchbuffer_emit_mi_flush_helper(VADriverContextP ctx, - struct intel_batchbuffer *batch) -{ - intel_batchbuffer_require_space_helper(ctx, batch, 4); - intel_batchbuffer_emit_dword_helper(batch, - MI_FLUSH | STATE_INSTRUCTION_CACHE_INVALIDATE); -} - void intel_batchbuffer_emit_mi_flush(VADriverContextP ctx) { struct intel_driver_data *intel = intel_driver_data(ctx); - intel_batchbuffer_emit_mi_flush_helper(ctx, intel->batch); + if (intel->batch->flag == I915_EXEC_BLT) { + BEGIN_BLT_BATCH(ctx, 4); + OUT_BATCH(ctx, MI_FLUSH_DW); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + OUT_BATCH(ctx, 0); + ADVANCE_BATCH(ctx); + } else if (intel->batch->flag == I915_EXEC_RENDER) { + BEGIN_BATCH(ctx, 1); + OUT_BATCH(ctx, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE); + ADVANCE_BATCH(ctx); + } } void @@ -303,7 +306,18 @@ intel_batchbuffer_emit_mi_flush_bcs(VADriverContextP ctx) { struct intel_driver_data *intel = intel_driver_data(ctx); - intel_batchbuffer_emit_mi_flush_helper(ctx, intel->batch_bcs); + if (IS_GEN6(intel->device_id)) { + BEGIN_BCS_BATCH(ctx, 4); + OUT_BCS_BATCH(ctx, MI_FLUSH_DW | MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(ctx, 0); + ADVANCE_BCS_BATCH(ctx); + } else { + BEGIN_BCS_BATCH(ctx, 1); + OUT_BCS_BATCH(ctx, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE); + ADVANCE_BCS_BATCH(ctx); + } } void @@ -320,7 +334,7 @@ void intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size) { struct intel_driver_data *intel = intel_driver_data(ctx); - + intel_batchbuffer_check_batchbuffer_flag(ctx, I915_EXEC_RENDER); intel_batchbuffer_start_atomic_helper(ctx, intel->batch, size); } @@ -354,3 +368,64 @@ intel_batchbuffer_end_atomic_bcs(VADriverContextP ctx) intel_batchbuffer_end_atomic_helper(intel->batch_bcs); } +static void +intel_batchbuffer_begin_batch_helper(struct intel_batchbuffer *batch, int total) +{ + batch->emit_total = total * 4; + batch->emit_start = batch->ptr; +} + +void +intel_batchbuffer_begin_batch(VADriverContextP ctx, int total) +{ + struct intel_driver_data *intel = intel_driver_data(ctx); + + intel_batchbuffer_begin_batch_helper(intel->batch, total); +} + +void +intel_batchbuffer_begin_batch_bcs(VADriverContextP ctx, int total) +{ + struct intel_driver_data *intel = intel_driver_data(ctx); + + intel_batchbuffer_begin_batch_helper(intel->batch_bcs, total); +} + +static void +intel_batchbuffer_advance_batch_helper(struct intel_batchbuffer *batch) +{ + assert(batch->emit_total == (batch->ptr - batch->emit_start)); +} + +void +intel_batchbuffer_advance_batch(VADriverContextP ctx) +{ + struct intel_driver_data *intel = intel_driver_data(ctx); + + intel_batchbuffer_advance_batch_helper(intel->batch); +} + +void +intel_batchbuffer_advance_batch_bcs(VADriverContextP ctx) +{ + struct intel_driver_data *intel = intel_driver_data(ctx); + + intel_batchbuffer_advance_batch_helper(intel->batch_bcs); +} + +void +intel_batchbuffer_check_batchbuffer_flag(VADriverContextP ctx, int flag) +{ + struct intel_driver_data *intel = intel_driver_data(ctx); + + if (flag != I915_EXEC_RENDER && + flag != I915_EXEC_BLT && + flag != I915_EXEC_BSD) + return; + + if (intel->batch->flag == flag) + return; + + intel_batchbuffer_flush_helper(ctx, intel->batch); + intel->batch->flag = flag; +} diff --git a/i965_drv_video/intel_batchbuffer.h b/i965_drv_video/intel_batchbuffer.h index 99ab08d..25652e1 100644 --- a/i965_drv_video/intel_batchbuffer.h +++ b/i965_drv_video/intel_batchbuffer.h @@ -18,6 +18,9 @@ struct intel_batchbuffer int atomic; int flag; + int emit_total; + unsigned char *emit_start; + int (*run)(drm_intel_bo *bo, int used, drm_clip_rect_t *cliprects, int num_cliprects, int DR4, int ring_flag); @@ -37,6 +40,9 @@ void intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size); void intel_batchbuffer_end_atomic(VADriverContextP ctx); Bool intel_batchbuffer_flush(VADriverContextP ctx); +void intel_batchbuffer_begin_batch(VADriverContextP ctx, int total); +void intel_batchbuffer_advance_batch(VADriverContextP ctx); + void intel_batchbuffer_emit_dword_bcs(VADriverContextP ctx, unsigned int x); void intel_batchbuffer_emit_reloc_bcs(VADriverContextP ctx, dri_bo *bo, uint32_t read_domains, uint32_t write_domains, @@ -48,9 +54,19 @@ void intel_batchbuffer_start_atomic_bcs(VADriverContextP ctx, unsigned int size) void intel_batchbuffer_end_atomic_bcs(VADriverContextP ctx); Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx); -#define BEGIN_BATCH(ctx, n) do { \ - intel_batchbuffer_require_space(ctx, (n) * 4); \ -} while (0) +void intel_batchbuffer_begin_batch_bcs(VADriverContextP ctx, int total); +void intel_batchbuffer_advance_batch_bcs(VADriverContextP ctx); + +void intel_batchbuffer_check_batchbuffer_flag(VADriverContextP ctx, int flag); + +#define __BEGIN_BATCH(ctx, n, flag) do { \ + intel_batchbuffer_check_batchbuffer_flag(ctx, flag); \ + intel_batchbuffer_require_space(ctx, (n) * 4); \ + intel_batchbuffer_begin_batch(ctx, (n)); \ + } while (0) + +#define BEGIN_BATCH(ctx, n) __BEGIN_BATCH(ctx, n, I915_EXEC_RENDER) +#define BEGIN_BLT_BATCH(ctx, n) __BEGIN_BATCH(ctx, n, I915_EXEC_BLT) #define OUT_BATCH(ctx, d) do { \ intel_batchbuffer_emit_dword(ctx, d); \ @@ -63,10 +79,12 @@ Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx); } while (0) #define ADVANCE_BATCH(ctx) do { \ + intel_batchbuffer_advance_batch(ctx); \ } while (0) #define BEGIN_BCS_BATCH(ctx, n) do { \ intel_batchbuffer_require_space_bcs(ctx, (n) * 4); \ + intel_batchbuffer_begin_batch_bcs(ctx, (n)); \ } while (0) #define OUT_BCS_BATCH(ctx, d) do { \ @@ -80,6 +98,7 @@ Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx); } while (0) #define ADVANCE_BCS_BATCH(ctx) do { \ + intel_batchbuffer_advance_batch_bcs(ctx); \ } while (0) #endif /* _INTEL_BATCHBUFFER_H_ */ diff --git a/i965_drv_video/intel_batchbuffer_dump.c b/i965_drv_video/intel_batchbuffer_dump.c index 0732f0f..99c2c1c 100644 --- a/i965_drv_video/intel_batchbuffer_dump.c +++ b/i965_drv_video/intel_batchbuffer_dump.c @@ -44,6 +44,7 @@ dump_mi(unsigned int *data, unsigned int offset, int count, unsigned int device, { 0x00, 0, 1, 1, "MI_NOOP" }, { 0x04, 0, 1, 1, "MI_FLUSH" }, { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" }, + { 0x26, 0x3f, 4, 5, "MI_FLUSH_DW" }, }; opcode = ((data[0] & MASK_MI_OPCODE) >> SHIFT_MI_OPCODE); @@ -350,6 +351,363 @@ dump_gfxpipe_bsd(unsigned int *data, unsigned int offset, int count, unsigned in return length; } +static void +dump_mfx_mode_select(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + instr_out(data, offset, 1, + "decoder mode: %d(%s)," + "post deblocking output enable %d," + "pre deblocking output enable %d," + "codec select: %d(%s)," + "standard select: %d(%s)" + "\n", + (data[1] >> 16) & 0x1, ((data[1] >> 16) & 0x1) ? "IT" : "VLD", + (data[1] >> 9) & 0x1, + (data[1] >> 8) & 0x1, + (data[1] >> 4) & 0x1, ((data[1] >> 4) & 0x1) ? "Encode" : "Decode", + (data[1] >> 0) & 0x3, ((data[1] >> 0) & 0x3) == 0 ? "MPEG2" : + ((data[1] >> 0) & 0x3) == 1 ? "VC1" : + ((data[1] >> 0) & 0x3) == 2 ? "AVC" : "Reserved"); + instr_out(data, offset, 2, "dword 02\n"); + instr_out(data, offset, 3, "dword 03\n"); +} + +static void +dump_mfx_surface_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + instr_out(data, offset, 1, "dword 01\n"); + instr_out(data, offset, 2, "dword 02\n"); + instr_out(data, offset, 3, "dword 03\n"); + instr_out(data, offset, 4, "dword 04\n"); + instr_out(data, offset, 5, "dword 05\n"); +} + +static void +dump_mfx_pipe_buf_addr_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + instr_out(data, offset, 1, "dword 01\n"); + instr_out(data, offset, 2, "dword 02\n"); + instr_out(data, offset, 3, "dword 03\n"); + instr_out(data, offset, 4, "dword 04\n"); + instr_out(data, offset, 5, "dword 05\n"); + instr_out(data, offset, 6, "dword 06\n"); + instr_out(data, offset, 7, "dword 07\n"); + instr_out(data, offset, 8, "dword 08\n"); + instr_out(data, offset, 9, "dword 09\n"); + instr_out(data, offset, 10, "dword 10\n"); + instr_out(data, offset, 11, "dword 11\n"); + instr_out(data, offset, 12, "dword 12\n"); + instr_out(data, offset, 13, "dword 13\n"); + instr_out(data, offset, 14, "dword 14\n"); + instr_out(data, offset, 15, "dword 15\n"); + instr_out(data, offset, 16, "dword 16\n"); + instr_out(data, offset, 17, "dword 17\n"); + instr_out(data, offset, 18, "dword 18\n"); + instr_out(data, offset, 19, "dword 19\n"); + instr_out(data, offset, 20, "dword 20\n"); + instr_out(data, offset, 21, "dword 21\n"); + instr_out(data, offset, 22, "dword 22\n"); + instr_out(data, offset, 24, "dword 23\n"); +} + +static void +dump_mfx_ind_obj_base_addr_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + instr_out(data, offset, 1, "dword 01\n"); + instr_out(data, offset, 2, "dword 02\n"); + instr_out(data, offset, 3, "dword 03\n"); + instr_out(data, offset, 4, "dword 04\n"); + instr_out(data, offset, 5, "dword 05\n"); + instr_out(data, offset, 6, "dword 06\n"); + instr_out(data, offset, 7, "dword 07\n"); + instr_out(data, offset, 8, "dword 08\n"); + instr_out(data, offset, 9, "dword 09\n"); + instr_out(data, offset, 10, "dword 10\n"); +} + +static void +dump_mfx_bsp_buf_base_addr_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + instr_out(data, offset, 1, "dword 01\n"); + instr_out(data, offset, 2, "dword 02\n"); + instr_out(data, offset, 3, "dword 03\n"); +} + +static void +dump_mfx_aes_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + instr_out(data, offset, 1, "dword 01\n"); + instr_out(data, offset, 2, "dword 02\n"); + instr_out(data, offset, 3, "dword 03\n"); + instr_out(data, offset, 4, "dword 04\n"); + instr_out(data, offset, 5, "dword 05\n"); + instr_out(data, offset, 6, "dword 06\n"); +} + +static void +dump_mfx_state_pointer(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + instr_out(data, offset, 1, "dword 01\n"); +} + +static int +dump_mfx_common(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures) +{ + unsigned int subopcode; + int length, i; + + struct { + unsigned int subopcode; + int min_len; + int max_len; + char *name; + void (*detail)(unsigned int *data, unsigned int offset, unsigned int device, int *failures); + } mfx_common_commands[] = { + { SUBOPCODE_MFX(0, 0), 0x04, 0x04, "MFX_PIPE_MODE_SELECT", dump_mfx_mode_select }, + { SUBOPCODE_MFX(0, 1), 0x06, 0x06, "MFX_SURFACE_STATE", dump_mfx_surface_state }, + { SUBOPCODE_MFX(0, 2), 0x18, 0x18, "MFX_PIPE_BUF_ADDR_STATE", dump_mfx_pipe_buf_addr_state }, + { SUBOPCODE_MFX(0, 3), 0x0b, 0x0b, "MFX_IND_OBJ_BASE_ADDR_STATE", dump_mfx_ind_obj_base_addr_state }, + { SUBOPCODE_MFX(0, 4), 0x04, 0x04, "MFX_BSP_BUF_BASE_ADDR_STATE", dump_mfx_bsp_buf_base_addr_state }, + { SUBOPCODE_MFX(0, 5), 0x07, 0x07, "MFX_AES_STATE", dump_mfx_aes_state }, + { SUBOPCODE_MFX(0, 6), 0x00, 0x00, "MFX_STATE_POINTER", dump_mfx_state_pointer }, + }; + + subopcode = ((data[0] & MASK_GFXPIPE_SUBOPCODE) >> SHIFT_GFXPIPE_SUBOPCODE); + + for (i = 0; i < ARRAY_ELEMS(mfx_common_commands); i++) { + if (subopcode == mfx_common_commands[i].subopcode) { + unsigned int index; + + length = (data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH; + length += 2; + instr_out(data, offset, 0, "%s\n", mfx_common_commands[i].name); + + if (length < mfx_common_commands[i].min_len || + length > mfx_common_commands[i].max_len) { + fprintf(gout, "Bad length(%d) in %s [%d, %d]\n", + length, mfx_common_commands[i].name, + mfx_common_commands[i].min_len, + mfx_common_commands[i].max_len); + } + + if (length - 1 >= count) + BUFFER_FAIL(count, length, mfx_common_commands[i].name); + + if (mfx_common_commands[i].detail) + mfx_common_commands[i].detail(data, offset, device, failures); + else { + for (index = 1; index < length; index++) + instr_out(data, offset, index, "dword %d\n", index); + } + + return length; + } + } + + instr_out(data, offset, 0, "UNKNOWN MFX COMMON COMMAND\n"); + (*failures)++; + return 1; +} + +static void +dump_mfx_avc_img_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + instr_out(data, offset, 1, "dword 01\n"); + instr_out(data, offset, 2, "dword 02\n"); + instr_out(data, offset, 3, "dword 03\n"); + instr_out(data, offset, 4, "dword 04\n"); + instr_out(data, offset, 5, "dword 05\n"); + instr_out(data, offset, 6, "dword 06\n"); + instr_out(data, offset, 7, "dword 07\n"); + instr_out(data, offset, 8, "dword 08\n"); + instr_out(data, offset, 9, "dword 09\n"); + instr_out(data, offset, 10, "dword 10\n"); + instr_out(data, offset, 11, "dword 11\n"); + instr_out(data, offset, 12, "dword 12\n"); +} + +static void +dump_mfx_avc_qm_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + unsigned int length = ((data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH) + 2; + int i; + + instr_out(data, offset, 1, "user default: %02x, QM list present: %02x\n", + (data[1] >> 8) & 0xff, data[1] & 0xff); + + for (i = 2; i < length; i++) { + instr_out(data, offset, i, "dword %d\n", i); + } +} + +static void +dump_mfx_avc_directmode_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + int i; + + for (i = 1; i < 33; i++) { + instr_out(data, offset, i, "Direct MV Buffer Base Address for Picture %d\n", i - 1); + } + + for (i = 33; i < 35; i++) { + instr_out(data, offset, i, "Direct MV Buffer Base Address for Current Decoding Frame/Field\n"); + } + + for (i = 35; i < 69; i++) { + instr_out(data, offset, i, "POC List\n"); + } +} + +static void +dump_mfx_avc_slice_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + instr_out(data, offset, 1, "dword 01\n"); + instr_out(data, offset, 2, "dword 02\n"); + instr_out(data, offset, 3, "dword 03\n"); + instr_out(data, offset, 4, "dword 04\n"); + instr_out(data, offset, 5, "dword 05\n"); + instr_out(data, offset, 6, "dword 06\n"); + instr_out(data, offset, 7, "dword 07\n"); + instr_out(data, offset, 8, "dword 08\n"); + instr_out(data, offset, 9, "dword 09\n"); +} + +static void +dump_mfx_avc_ref_idx_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + instr_out(data, offset, 1, "dword 01\n"); + instr_out(data, offset, 2, "dword 02\n"); + instr_out(data, offset, 3, "dword 03\n"); + instr_out(data, offset, 4, "dword 04\n"); + instr_out(data, offset, 5, "dword 05\n"); + instr_out(data, offset, 6, "dword 06\n"); + instr_out(data, offset, 7, "dword 07\n"); + instr_out(data, offset, 8, "dword 08\n"); + instr_out(data, offset, 9, "dword 09\n"); +} + +static void +dump_mfx_avc_weightoffset_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + int i; + + instr_out(data, offset, 1, + "Weight and Offset L%d table\n", + (data[1] >> 0) & 0x1); + + for (i = 2; i < 31; i++) { + instr_out(data, offset, i, "dword %d\n", i); + } +} + +static void +dump_mfd_bsd_object(unsigned int *data, unsigned int offset, unsigned int device, int *failures) +{ + int is_phantom_slice = ((data[1] & 0x3fffff) == 0); + + if (is_phantom_slice) { + instr_out(data, offset, 1, "phantom slice\n"); + instr_out(data, offset, 2, "dword 02\n"); + instr_out(data, offset, 3, "dword 03\n"); + instr_out(data, offset, 4, "dword 04\n"); + instr_out(data, offset, 5, "dword 05\n"); + } else { + instr_out(data, offset, 1, "Indirect BSD Data Length: %d\n", data[1] & 0x3fffff); + instr_out(data, offset, 2, "Indirect BSD Data Start Address: 0x%08x\n", data[2] & 0x1fffffff); + instr_out(data, offset, 3, "dword 03\n"); + instr_out(data, offset, 4, + "First_MB_Byte_Offset of Slice Data from Slice Header: 0x%08x," + "slice header skip mode: %d" + "\n", + (data[4] >> 16), + (data[4] >> 6) & 0x1); + instr_out(data, offset, 5, "dword 05\n"); + } +} + +static int +dump_mfx_avc(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures) +{ + unsigned int subopcode; + int length, i; + + struct { + unsigned int subopcode; + int min_len; + int max_len; + char *name; + void (*detail)(unsigned int *data, unsigned int offset, unsigned int device, int *failures); + } mfx_avc_commands[] = { + { SUBOPCODE_MFX(0, 0), 0x0d, 0x0d, "MFX_AVC_IMG_STATE", dump_mfx_avc_img_state }, + { SUBOPCODE_MFX(0, 1), 0x02, 0x3a, "MFX_AVC_QM_STATE", dump_mfx_avc_qm_state }, + { SUBOPCODE_MFX(0, 2), 0x45, 0x45, "MFX_AVC_DIRECTMODE_STATE", dump_mfx_avc_directmode_state }, + { SUBOPCODE_MFX(0, 3), 0x0b, 0x0b, "MFX_AVC_SLICE_STATE", dump_mfx_avc_slice_state }, + { SUBOPCODE_MFX(0, 4), 0x0a, 0x0a, "MFX_AVC_REF_IDX_STATE", dump_mfx_avc_ref_idx_state }, + { SUBOPCODE_MFX(0, 5), 0x32, 0x32, "MFX_AVC_WEIGHTOFFSET_STATE", dump_mfx_avc_weightoffset_state }, + { SUBOPCODE_MFX(1, 8), 0x06, 0x06, "MFD_AVC_BSD_OBJECT", dump_mfd_bsd_object }, + }; + + subopcode = ((data[0] & MASK_GFXPIPE_SUBOPCODE) >> SHIFT_GFXPIPE_SUBOPCODE); + + for (i = 0; i < ARRAY_ELEMS(mfx_avc_commands); i++) { + if (subopcode == mfx_avc_commands[i].subopcode) { + unsigned int index; + + length = (data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH; + length += 2; + instr_out(data, offset, 0, "%s\n", mfx_avc_commands[i].name); + + if (length < mfx_avc_commands[i].min_len || + length > mfx_avc_commands[i].max_len) { + fprintf(gout, "Bad length(%d) in %s [%d, %d]\n", + length, mfx_avc_commands[i].name, + mfx_avc_commands[i].min_len, + mfx_avc_commands[i].max_len); + } + + if (length - 1 >= count) + BUFFER_FAIL(count, length, mfx_avc_commands[i].name); + + if (mfx_avc_commands[i].detail) + mfx_avc_commands[i].detail(data, offset, device, failures); + else { + for (index = 1; index < length; index++) + instr_out(data, offset, index, "dword %d\n", index); + } + + return length; + } + } + + instr_out(data, offset, 0, "UNKNOWN MFX AVC COMMAND\n"); + (*failures)++; + return 1; +} + +static int +dump_gfxpipe_mfx(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures) +{ + int length; + + switch ((data[0] & MASK_GFXPIPE_OPCODE) >> SHIFT_GFXPIPE_OPCODE) { + case OPCODE_MFX_COMMON: + length = dump_mfx_common(data, offset, count, device, failures); + break; + + case OPCODE_MFX_AVC: + length = dump_mfx_avc(data, offset, count, device, failures); + break; + + default: + length = 1; + (*failures)++; + instr_out(data, offset, 0, "UNKNOWN MFX OPCODE\n"); + break; + } + + return length; +} + static int dump_gfxpipe(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures) { @@ -361,7 +719,11 @@ dump_gfxpipe(unsigned int *data, unsigned int offset, int count, unsigned int de break; case GFXPIPE_BSD: - length = dump_gfxpipe_bsd(data, offset, count, device, failures); + if (IS_GEN6(device)) + length = dump_gfxpipe_mfx(data, offset, count, device, failures); + else + length = dump_gfxpipe_bsd(data, offset, count, device, failures); + break; default: diff --git a/i965_drv_video/intel_batchbuffer_dump.h b/i965_drv_video/intel_batchbuffer_dump.h index ad096a9..e76b4f7 100644 --- a/i965_drv_video/intel_batchbuffer_dump.h +++ b/i965_drv_video/intel_batchbuffer_dump.h @@ -36,6 +36,12 @@ #define SUBOPCODE_BSD_IND_OBJ 4 #define SUBOPCODE_BSD_OBJECT 8 +/* MFX */ +#define OPCODE_MFX_COMMON 0 +#define OPCODE_MFX_AVC 1 + +#define SUBOPCODE_MFX(A, B) ((A) << 5 | (B)) + /* MI */ #define MASK_MI_OPCODE 0x1F800000 diff --git a/i965_drv_video/intel_driver.h b/i965_drv_video/intel_driver.h index 1e2adfa..436cccf 100644 --- a/i965_drv_video/intel_driver.h +++ b/i965_drv_video/intel_driver.h @@ -29,7 +29,10 @@ #define MI_BATCH_BUFFER_START (CMD_MI | (0x31 << 23)) #define MI_FLUSH (CMD_MI | (0x4 << 23)) -#define STATE_INSTRUCTION_CACHE_INVALIDATE (0x1 << 0) +#define MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE (0x1 << 0) + +#define MI_FLUSH_DW (CMD_MI | (0x26 << 23) | 0x2) +#define MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE (0x1 << 7) #define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 0x04) #define XY_COLOR_BLT_WRITE_ALPHA (1 << 21) @@ -124,9 +127,20 @@ struct intel_region #define PCI_CHIP_IRONLAKE_D_G 0x0042 #define PCI_CHIP_IRONLAKE_M_G 0x0046 -#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ - devid == PCI_CHIP_Q45_G || \ - devid == PCI_CHIP_G45_G || \ +#ifndef PCI_CHIP_SANDYBRIDGE_GT1 +#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */ +#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 +#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 +#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */ +#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 +#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 +#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A /* Server */ +#endif + + +#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ + devid == PCI_CHIP_Q45_G || \ + devid == PCI_CHIP_G45_G || \ devid == PCI_CHIP_G41_G) #define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) #define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) @@ -135,4 +149,12 @@ struct intel_region #define IS_IRONLAKE_M(devid) (devid == PCI_CHIP_IRONLAKE_M_G) #define IS_IRONLAKE(devid) (IS_IRONLAKE_D(devid) || IS_IRONLAKE_M(devid)) +#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS ||\ + devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \ + devid == PCI_CHIP_SANDYBRIDGE_S_GT) + #endif /* _INTEL_DRIVER_H_ */ diff --git a/i965_drv_video/shaders/render/Makefile.am b/i965_drv_video/shaders/render/Makefile.am index fb9b11e..ba019af 100644 --- a/i965_drv_video/shaders/render/Makefile.am +++ b/i965_drv_video/shaders/render/Makefile.am @@ -35,17 +35,36 @@ EXTRA_DIST = $(INTEL_G4I) \ $(INTEL_G4B) \ $(INTEL_G4B_GEN5) +INTEL_G6A = \ + exa_wm_src_affine.g6a \ + exa_wm_src_sample_argb.g6a \ + exa_wm_src_sample_planar.g6a \ + exa_wm_write.g6a \ + exa_wm_yuv_rgb.g6a + +INTEL_G6B = \ + exa_wm_src_affine.g6b \ + exa_wm_src_sample_argb.g6b \ + exa_wm_src_sample_planar.g6b \ + exa_wm_write.g6b \ + exa_wm_yuv_rgb.g6b + if HAVE_GEN4ASM -SUFFIXES = .g4a .g4b +SUFFIXES = .g4a .g4b .g6a .g6b .g4a.g4b: m4 $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && intel-gen4asm -g 5 -o $@.gen5 $*.g4m && rm $*.g4m +.g6a.g6b: + m4 -I$(srcdir) -s $< > $*.g6m && intel-gen4asm -g 6 -o $@ $*.g6m && rm $*.g6m + $(INTEL_G4B): $(INTEL_G4I) +$(INTEL_G6B): $(INTEL_G4I) -BUILT_SOURCES= $(INTEL_G4B) +BUILT_SOURCES= $(INTEL_G4B) $(INTEL_G6B) clean-local: -rm -f $(INTEL_G4B) -rm -f $(INTEL_G4B_GEN5) + -rm -f $(INTEL_G6B) endif diff --git a/i965_drv_video/shaders/render/exa_wm_src_affine.g6a b/i965_drv_video/shaders/render/exa_wm_src_affine.g6a new file mode 100644 index 0000000..08195a4 --- /dev/null +++ b/i965_drv_video/shaders/render/exa_wm_src_affine.g6a @@ -0,0 +1,47 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +/* + * Fragment to compute src u/v values + */ +include(`exa_wm.g4i') + +define(`ul', `src_u') +define(`uh', `m3') +define(`vl', `src_v') +define(`vh', `m5') + +define(`bl', `g2.0<8,8,1>F') +define(`bh', `g4.0<8,8,1>F') + +define(`a0_a_x',`g6.0<0,1,0>F') +define(`a0_a_y',`g6.16<0,1,0>F') + +/* U */ +pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */ +pln (8) uh<1>F a0_a_x bh { align1 }; /* pixel 8-15 */ + +/* V */ +pln (8) vl<1>F a0_a_y bl { align1 }; /* pixel 0-7 */ +pln (8) vh<1>F a0_a_y bh { align1 }; /* pixel 8-15 */ diff --git a/i965_drv_video/shaders/render/exa_wm_src_affine.g6b b/i965_drv_video/shaders/render/exa_wm_src_affine.g6b new file mode 100644 index 0000000..7035e6a --- /dev/null +++ b/i965_drv_video/shaders/render/exa_wm_src_affine.g6b @@ -0,0 +1,4 @@ + { 0x0060005a, 0x204077be, 0x000000c0, 0x008d0040 }, + { 0x0060005a, 0x206077be, 0x000000c0, 0x008d0080 }, + { 0x0060005a, 0x208077be, 0x000000d0, 0x008d0040 }, + { 0x0060005a, 0x20a077be, 0x000000d0, 0x008d0080 }, diff --git a/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6a b/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6a new file mode 100644 index 0000000..67bb888 --- /dev/null +++ b/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6a @@ -0,0 +1,48 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* Sample the src surface */ + +include(`exa_wm.g4i') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +/* load argb */ +mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable }; +mov (8) src_msg<1>UD g0<8,8,1>UD { align1 mask_disable }; + +/* src_msg will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ +send (16) src_msg_ind /* msg reg index */ + src_sample_base<1>UW /* readback */ + null + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ diff --git a/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6b b/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6b new file mode 100644 index 0000000..2846491 --- /dev/null +++ b/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x00600201, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a8a0001 }, diff --git a/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6a b/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6a new file mode 100644 index 0000000..1f78629 --- /dev/null +++ b/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6a @@ -0,0 +1,58 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* Sample the src surface in planar format */ + +include(`exa_wm.g4i') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +mov (1) g0.8<1>UD 0x0000c000UD { align1 mask_disable }; +mov (8) src_msg<1>UD g0<8,8,1>UD { align1 mask_disable }; + +/* sample UV (CrCb) */ +send (16) src_msg_ind /* msg reg index */ + src_sample_g<1>UW /* readback */ + null + sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 4 { align1 }; /* required message len 5, readback len 8 */ + +mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable }; +mov (8) src_msg<1>UD g0<8,8,1>UD { align1 mask_disable }; + +/* sample Y */ +send (16) src_msg_ind /* msg reg index */ + src_sample_r<1>UW /* readback */ + null + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + diff --git a/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6b b/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6b new file mode 100644 index 0000000..ef45022 --- /dev/null +++ b/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6b @@ -0,0 +1,6 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x0000c000 }, + { 0x00600201, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22001cc9, 0x00000020, 0x0a4a0203 }, + { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 }, + { 0x00600201, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a2a0001 }, diff --git a/i965_drv_video/shaders/render/exa_wm_write.g6a b/i965_drv_video/shaders/render/exa_wm_write.g6a new file mode 100644 index 0000000..c0f3cc1 --- /dev/null +++ b/i965_drv_video/shaders/render/exa_wm_write.g6a @@ -0,0 +1,77 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +include(`exa_wm.g4i') + +/* + * Prepare data in m2-m3 for Red channel, m4-m5 for Green channel, + * m6-m7 for Blue and m8-m9 for Alpha channel + */ +define(`slot_r_00', `m2') +define(`slot_r_01', `m3') +define(`slot_g_00', `m4') +define(`slot_g_01', `m5') +define(`slot_b_00', `m6') +define(`slot_b_01', `m7') +define(`slot_a_00', `m8') +define(`slot_a_01', `m9') +define(`data_port_msg_2_ind', `2') + +mov (8) slot_r_00<1>F src_sample_r_01<8,8,1>F { align1 }; +mov (8) slot_r_01<1>F src_sample_r_23<8,8,1>F { align1 }; + +mov (8) slot_g_00<1>F src_sample_g_01<8,8,1>F { align1 }; +mov (8) slot_g_01<1>F src_sample_g_23<8,8,1>F { align1 }; + +mov (8) slot_b_00<1>F src_sample_b_01<8,8,1>F { align1 }; +mov (8) slot_b_01<1>F src_sample_b_23<8,8,1>F { align1 }; + +mov (8) slot_a_00<1>F src_sample_a_01<8,8,1>F { align1 }; +mov (8) slot_a_01<1>F src_sample_a_23<8,8,1>F { align1 }; + +/* write */ +send (16) + data_port_msg_2_ind + acc0<1>UW + null + write ( + 0, /* binding_table */ + 16, /* pixel scordboard clear, msg type simd16 single source */ + 12, /* render target write */ + 0, /* no write commit message */ + 0 /* headerless render target write */ + ) + mlen 8 + rlen 0 + { align1 EOT }; + +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; + diff --git a/i965_drv_video/shaders/render/exa_wm_write.g6b b/i965_drv_video/shaders/render/exa_wm_write.g6b new file mode 100644 index 0000000..3cb6bff --- /dev/null +++ b/i965_drv_video/shaders/render/exa_wm_write.g6b @@ -0,0 +1,17 @@ + { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 }, + { 0x00600001, 0x20a003be, 0x008d0220, 0x00000000 }, + { 0x00600001, 0x20c003be, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 }, + { 0x00600001, 0x210003be, 0x008d0280, 0x00000000 }, + { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 }, + { 0x05800031, 0x24001cc8, 0x00000040, 0x90019000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6a b/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6a new file mode 100644 index 0000000..5b9e625 --- /dev/null +++ b/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6a @@ -0,0 +1,98 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Keith Packard <keithp@keithp.com> + * Eric Anholt <eric@anholt.net> + * + */ + +include(`exa_wm.g4i') + +define(`YCbCr_base', `src_sample_base') + +define(`Cr', `src_sample_b') +define(`Cr_01', `src_sample_b_01') +define(`Cr_23', `src_sample_b_23') + +define(`Y', `src_sample_r') +define(`Y_01', `src_sample_r_01') +define(`Y_23', `src_sample_r_23') + +define(`Cb', `src_sample_g') +define(`Cb_01', `src_sample_g_01') +define(`Cb_23', `src_sample_g_23') + +define(`Crn', `mask_sample_g') +define(`Crn_01', `mask_sample_g_01') +define(`Crn_23', `mask_sample_g_23') + +define(`Yn', `mask_sample_r') +define(`Yn_01', `mask_sample_r_01') +define(`Yn_23', `mask_sample_r_23') + +define(`Cbn', `mask_sample_b') +define(`Cbn_01', `mask_sample_b_01') +define(`Cbn_23', `mask_sample_b_23') + + /* color space conversion function: + * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) + * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) + * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) + */ + + /* Normalize Y, Cb and Cr: + * + * Yn = (Y - 16/255) * 1.164 + * Crn = Cr - 128 / 255 + * Cbn = Cb - 128 / 255 + */ +add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 }; +mul (16) Yn<1>F Yn<8,8,1>F 1.164F { compr align1 }; + +add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 }; + +add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; + + /* + * R = Y + Cr * 1.596 + */ +mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; +mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 }; + + /* + * G = Crn * -0.813 + Cbn * -0.392 + Y + */ +mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; +mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 }; +mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 }; + + /* + * B = Cbn * 2.017 + Y + */ +mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; +mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 }; + + /* + * A = 1.0 + */ +//mov (16) src_sample_a<1>F 1.0F { compr align1 }; diff --git a/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6b b/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6b new file mode 100644 index 0000000..21fa6fb --- /dev/null +++ b/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6b @@ -0,0 +1,11 @@ + { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbd808081 }, + { 0x00800041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 }, + { 0x00800040, 0x23007fbd, 0x008d0240, 0xbf008084 }, + { 0x00800040, 0x23407fbd, 0x008d0200, 0xbf008084 }, + { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, + { 0x80800048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba }, + { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, + { 0x00800048, 0x24007fbc, 0x008d0300, 0xbf5020c5 }, + { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, + { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, + { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 }, diff --git a/va/Android.mk b/va/Android.mk index 494f7c7..74ac5d2 100644 --- a/va/Android.mk +++ b/va/Android.mk @@ -10,6 +10,23 @@ include $(CLEAR_VARS) #LIBVA_MINOR_VERSION := 31 #LIBVA_MAJOR_VERSION := 0 + +LOCAL_MODULE := libva + +LOCAL_SHARED_LIBRARIES := libdl libdrm libcutils + +include $(BUILD_SHARED_LIBRARY) + +intermediates := $(local-intermediates-dir) +GEN := $(intermediates)/va_version.h +$(GEN): PRIVATE_GEN_VERSION := $(LOCAL_PATH)/../build/gen_version.sh +$(GEN): PRIVATE_INPUT_FILE := $(LOCAL_PATH)/va_version.h.in +$(GEN): PRIVATE_CUSTOM_TOOL = sh $(PRIVATE_GEN_VERSION) $(LOCAL_PATH)/.. $(PRIVATE_INPUT_FILE) > $@ +$(GEN): $(LOCAL_PATH)/va_version.h + $(transform-generated-source) + +LOCAL_GENERATED_SOURCES += $(GEN) + LOCAL_SRC_FILES := \ va.c \ va_trace.c \ @@ -19,6 +36,8 @@ LOCAL_CFLAGS += \ -DANDROID \ -DVA_DRIVERS_PATH="\"$(LIBVA_DRIVERS_PATH)\"" +LOCAL_COPY_HEADERS_TO := libva/va + LOCAL_C_INCLUDES += \ $(TARGET_OUT_HEADERS)/libva \ $(LOCAL_PATH)/x11 \ @@ -31,13 +50,6 @@ LOCAL_COPY_HEADERS := \ va_version.h.in \ x11/va_dricommon.h -LOCAL_COPY_HEADERS_TO := libva/va - -LOCAL_MODULE := libva - -LOCAL_SHARED_LIBRARIES := libdl libdrm libcutils - -include $(BUILD_SHARED_LIBRARY) # For libva-android diff --git a/va/glx/va_glx_impl.c b/va/glx/va_glx_impl.c index f5bbe91..9d38930 100644 --- a/va/glx/va_glx_impl.c +++ b/va/glx/va_glx_impl.c @@ -36,7 +36,7 @@ static void va_glx_error_message(const char *format, ...) { va_list args; va_start(args, format); - fprintf(stderr, "[%s] ", PACKAGE_NAME); + fprintf(stderr, "libva-glx error: "); vfprintf(stderr, format, args); va_end(args); } diff --git a/va/glx/va_glx_private.h b/va/glx/va_glx_private.h index eb1185c..e86efb6 100644 --- a/va/glx/va_glx_private.h +++ b/va/glx/va_glx_private.h @@ -25,7 +25,7 @@ #ifndef VA_GLX_PRIVATE_H #define VA_GLX_PRIVATE_H -#include "config.h" +#include "sysdeps.h" #include "va.h" #include "va_backend.h" #include "va_x11.h" diff --git a/va/sysdeps.h b/va/sysdeps.h new file mode 100644 index 0000000..0752b17 --- /dev/null +++ b/va/sysdeps.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2007-2009 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL INTEL AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SYSDEPS_H +#define SYSDEPS_H + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <assert.h> + +#ifdef ANDROID +# define Bool int +# define True 1 +# define False 0 +#endif + +#endif /* SYSDEPS_H */ @@ -23,11 +23,11 @@ */ #define _GNU_SOURCE 1 +#include "sysdeps.h" #include "va.h" #include "va_backend.h" #include "va_trace.h" #include "va_fool.h" -#include "config.h" #include <assert.h> #include <stdarg.h> @@ -37,12 +37,6 @@ #include <dlfcn.h> #include <unistd.h> -#ifdef ANDROID -#define Bool int -#define True 1 -#define False 0 -#endif - #define DRIVER_INIT_FUNC "__vaDriverInit_0_31" #define DRIVER_EXTENSION "_drv_video.so" @@ -612,6 +606,7 @@ VAStatus vaCreateSurfaces ( { VADriverContextP ctx; VAStatus vaStatus; + CHECK_DISPLAY(dpy); ctx = CTX(dpy); diff --git a/va/va_tpi.c b/va/va_tpi.c index 1f01ef0..4472913 100644 --- a/va/va_tpi.c +++ b/va/va_tpi.c @@ -23,10 +23,10 @@ */ #define _GNU_SOURCE 1 +#include "sysdeps.h" #include "va.h" #include "va_backend.h" #include "va_backend_tpi.h" -#include "config.h" #include <assert.h> #include <stdarg.h> diff --git a/va/x11/va_x11.c b/va/x11/va_x11.c index 70cea30..93eb243 100644 --- a/va/x11/va_x11.c +++ b/va/x11/va_x11.c @@ -23,7 +23,7 @@ */ #define _GNU_SOURCE 1 -#include "config.h" +#include "sysdeps.h" #include "va.h" #include "va_backend.h" #include "va_x11.h" |