summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAustin Yuan <shengquan.yuan@gmail.com>2010-12-22 16:59:19 +0800
committerAustin Yuan <shengquan.yuan@gmail.com>2010-12-22 16:59:19 +0800
commit1b5b86f4d846906bf408993a61f3b2b6856f0997 (patch)
tree963d16ec8044a28adf4431bd6887c2515ce44bb7
parent78d9fb9c0d95dc5abd2272d60009cd88ffae33e7 (diff)
parent8f056b1c0bd0dac3c57193d30506730218740c70 (diff)
downloadlibva-1b5b86f4d846906bf408993a61f3b2b6856f0997.tar.gz
Merge branch 'fdo-master'
Conflicts: Android.mk libva.spec va/Android.mk va/va.c va/va.h va/va_trace.c Signed-off-by: Austin Yuan <shengquan.yuan@gmail.com>
-rw-r--r--.gitignore20
-rw-r--r--Android.mk2
-rw-r--r--build/gen_version.sh32
-rw-r--r--configure.ac31
-rw-r--r--i965_drv_video/Makefile.am6
-rw-r--r--i965_drv_video/gen6_mfd.c1484
-rw-r--r--i965_drv_video/gen6_mfd.h95
-rw-r--r--i965_drv_video/i965_avc_ildb.c2
-rw-r--r--i965_drv_video/i965_defines.h145
-rw-r--r--i965_drv_video/i965_drv_video.c71
-rw-r--r--i965_drv_video/i965_media.c16
-rw-r--r--i965_drv_video/i965_render.c882
-rw-r--r--i965_drv_video/i965_render.h11
-rw-r--r--i965_drv_video/i965_structs.h101
-rw-r--r--i965_drv_video/intel_batchbuffer.c99
-rw-r--r--i965_drv_video/intel_batchbuffer.h25
-rw-r--r--i965_drv_video/intel_batchbuffer_dump.c364
-rw-r--r--i965_drv_video/intel_batchbuffer_dump.h6
-rw-r--r--i965_drv_video/intel_driver.h30
-rw-r--r--i965_drv_video/shaders/render/Makefile.am23
-rw-r--r--i965_drv_video/shaders/render/exa_wm_src_affine.g6a47
-rw-r--r--i965_drv_video/shaders/render/exa_wm_src_affine.g6b4
-rw-r--r--i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6a48
-rw-r--r--i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6b3
-rw-r--r--i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6a58
-rw-r--r--i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6b6
-rw-r--r--i965_drv_video/shaders/render/exa_wm_write.g6a77
-rw-r--r--i965_drv_video/shaders/render/exa_wm_write.g6b17
-rw-r--r--i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6a98
-rw-r--r--i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6b11
-rw-r--r--va/Android.mk26
-rw-r--r--va/glx/va_glx_impl.c2
-rw-r--r--va/glx/va_glx_private.h2
-rw-r--r--va/sysdeps.h44
-rw-r--r--va/va.c9
-rw-r--r--va/va_tpi.c2
-rw-r--r--va/x11/va_x11.c2
37 files changed, 3665 insertions, 236 deletions
diff --git a/.gitignore b/.gitignore
index fc9b033..a8ff985 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@
*.rej
*.loT
*.bin
+*.pc
.deps
.libs
install-sh
@@ -25,5 +26,20 @@ config.status
config.sub
configure
depcomp
-fw/msvdx_bin
-fw/msvdx_fw.bin
+TAGS
+/va/va_version.h
+/test/basic/test_01
+/test/basic/test_02
+/test/basic/test_03
+/test/basic/test_04
+/test/basic/test_05
+/test/basic/test_06
+/test/basic/test_07
+/test/basic/test_08
+/test/basic/test_09
+/test/basic/test_10
+/test/basic/test_11
+/test/decode/mpeg2vldemo
+/test/encode/h264encode
+/test/putsurface/putsurface
+/test/vainfo
diff --git a/Android.mk b/Android.mk
index f3f122c..5cbb9d8 100644
--- a/Android.mk
+++ b/Android.mk
@@ -1,4 +1,4 @@
# Recursive call sub-folder Android.mk
#
- include $(call all-subdir-makefiles)
+ include $(call all-subdir-makefiles)
diff --git a/build/gen_version.sh b/build/gen_version.sh
new file mode 100644
index 0000000..dd01d95
--- /dev/null
+++ b/build/gen_version.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+libva_topdir="$1"
+version_h_in="$2"
+
+parse_configure_ac() {
+ sed -n "/^m4_define.*${1}.*\[\([0-9]*\)\].*/s//\1/p" ${libva_topdir}/configure.ac
+}
+
+parse_configure() {
+ sed -n "/^${1}=\([0-9]*\)/s//\1/p" ${libva_topdir}/configure
+}
+
+if test -f "${libva_topdir}/configure.ac"; then
+ libva_major_version=`parse_configure_ac libva_major_version`
+ libva_minor_version=`parse_configure_ac libva_minor_version`
+ libva_micro_version=`parse_configure_ac libva_micro_version`
+elif test -f "${libva_topdir}/configure"; then
+ libva_major_version=`parse_configure LIBVA_MAJOR_VERSION`
+ libva_minor_version=`parse_configure LIBVA_MINOR_VERSION`
+ libva_micro_version=`parse_configure LIBVA_MICRO_VERSION`
+else
+ echo "ERROR: configure or configure.ac file not found in $libva_topdir/"
+ exit 1
+fi
+libva_version="$libva_major_version.$libva_minor_version.$libva_micro_version"
+
+sed -e "s/@LIBVA_MAJOR_VERSION@/${libva_major_version}/" \
+ -e "s/@LIBVA_MINOR_VERSION@/${libva_minor_version}/" \
+ -e "s/@LIBVA_MICRO_VERSION@/${libva_micro_version}/" \
+ -e "s/@LIBVA_VERSION@/${libva_version}/" \
+ $version_h_in
diff --git a/configure.ac b/configure.ac
index 6debe95..7ae538d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -42,6 +42,7 @@ AC_CONFIG_SRCDIR([Makefile.am])
AM_INIT_AUTOMAKE([dist-bzip2])
AM_CONFIG_HEADER([config.h])
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
LIBVA_MAJOR_VERSION=libva_major_version
LIBVA_MINOR_VERSION=libva_minor_version
@@ -100,12 +101,12 @@ PKG_CHECK_MODULES([XEXT],[xext])
PKG_CHECK_MODULES([XFIXES], [xfixes])
PKG_CHECK_MODULES([DRM], [libdrm])
-PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.0], [gen4asm=yes], [gen4asm=no])
+PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.1], [gen4asm=yes], [gen4asm=no])
AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes)
-# Check for libdrm >= 2.4.21 (needed for i965_drv_video.so)
-if test x$enable_i965_driver = xyes && ! $PKG_CONFIG --atleast-version=2.4.21 libdrm; then
- AC_MSG_WARN([libdrm < 2.4.21 found, disabling build of i965 video driver])
+# Check for libdrm >= 2.4.23 (needed for i965_drv_video.so)
+if test x$enable_i965_driver = xyes && ! $PKG_CONFIG --atleast-version=2.4.23 libdrm; then
+ AC_MSG_WARN([libdrm < 2.4.23 found, disabling build of i965 video driver])
enable_i965_driver=no
fi
AM_CONDITIONAL(BUILD_I965_DRIVER, test x$enable_i965_driver = xyes)
@@ -175,3 +176,25 @@ AC_OUTPUT([
libva-tpi.pc
])
+# Print a small summary
+
+echo ""
+echo "libva - ${LIBVA_VERSION}"
+echo ""
+
+echo " • Global :"
+echo " Prefix: ${prefix}"
+echo ""
+
+AS_IF([test x$enable_i965_driver = xyes], [DRIVERS="i965 $DRIVERS"])
+AS_IF([test x$enable_dummy_driver = xyes], [DRIVERS="dummy $DRIVERS"])
+
+echo " • Drivers: ${DRIVERS}"
+
+AS_IF([test x$USE_GLX = xyes], [BACKENDS="glx $BACKENDS"])
+BACKENDS="x11 $BACKENDS"
+AS_IF([test x$enable_dummy_backend = xyes], [BACKENDS="dummy
+$BACKENDS"])
+
+echo " • Winsys : ${BACKENDS}"
+
diff --git a/i965_drv_video/Makefile.am b/i965_drv_video/Makefile.am
index f32d579..8dd13bd 100644
--- a/i965_drv_video/Makefile.am
+++ b/i965_drv_video/Makefile.am
@@ -43,7 +43,8 @@ i965_drv_video_la_SOURCES = \
i965_avc_bsd.c \
i965_avc_hw_scoreboard.c\
i965_avc_ildb.c \
- i965_post_processing.c
+ i965_post_processing.c \
+ gen6_mfd.c
noinst_HEADERS = \
object_heap.h \
@@ -61,4 +62,5 @@ noinst_HEADERS = \
i965_avc_bsd.h \
i965_avc_hw_scoreboard.h\
i965_avc_ildb.h \
- i965_post_processing.h
+ i965_post_processing.h \
+ gen6_mfd.h
diff --git a/i965_drv_video/gen6_mfd.c b/i965_drv_video/gen6_mfd.c
new file mode 100644
index 0000000..0fe7860
--- /dev/null
+++ b/i965_drv_video/gen6_mfd.c
@@ -0,0 +1,1484 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Xiang Haihao <haihao.xiang@intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <va/va_backend.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+
+#include "gen6_mfd.h"
+
+#define DMV_SIZE 0x88000 /* 557056 bytes for a frame */
+
+static const uint32_t zigzag_direct[64] = {
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static void
+gen6_mfd_avc_frame_store_index(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_media_state *media_state = &i965->media_state;
+ struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)media_state->private_context;
+ int i, j;
+
+ assert(ARRAY_ELEMS(gen6_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
+
+ for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
+ int found = 0;
+
+ if (gen6_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
+ continue;
+
+ for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
+ VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
+ if (ref_pic->flags & VA_PICTURE_H264_INVALID)
+ continue;
+
+ if (gen6_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ struct object_surface *obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
+ obj_surface->flags &= ~SURFACE_REFERENCED;
+
+ if (obj_surface->flags & SURFACE_DISPLAYED) {
+ dri_bo_unreference(obj_surface->bo);
+ obj_surface->bo = NULL;
+ obj_surface->flags = 0;
+ }
+
+ if (obj_surface->free_private_data)
+ obj_surface->free_private_data(&obj_surface->private_data);
+
+ gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
+ gen6_mfd_context->reference_surface[i].frame_store_id = -1;
+ }
+ }
+
+ for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
+ VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
+ int found = 0;
+
+ if (ref_pic->flags & VA_PICTURE_H264_INVALID)
+ continue;
+
+ for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
+ if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
+ continue;
+
+ if (gen6_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ int frame_idx;
+ struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
+
+ if (obj_surface->bo == NULL) {
+ uint32_t tiling_mode = I915_TILING_Y;
+ unsigned long pitch;
+
+ obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr,
+ "vaapi surface",
+ obj_surface->width,
+ obj_surface->height + obj_surface->height / 2,
+ 1,
+ &tiling_mode,
+ &pitch,
+ 0);
+ assert(obj_surface->bo);
+ assert(tiling_mode == I915_TILING_Y);
+ assert(pitch == obj_surface->width);
+ }
+
+ for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
+ for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
+ if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
+ continue;
+
+ if (gen6_mfd_context->reference_surface[j].frame_store_id == frame_idx)
+ break;
+ }
+
+ if (j == ARRAY_ELEMS(gen6_mfd_context->reference_surface))
+ break;
+ }
+
+ assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
+
+ for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
+ if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
+ gen6_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
+ gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
+ break;
+ }
+ }
+ }
+ }
+
+ /* sort */
+ for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface) - 1; i++) {
+ if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
+ gen6_mfd_context->reference_surface[i].frame_store_id == i)
+ continue;
+
+ for (j = i + 1; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
+ if (gen6_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
+ gen6_mfd_context->reference_surface[j].frame_store_id == i) {
+ VASurfaceID id = gen6_mfd_context->reference_surface[i].surface_id;
+ int frame_idx = gen6_mfd_context->reference_surface[i].frame_store_id;
+
+ gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[j].surface_id;
+ gen6_mfd_context->reference_surface[i].frame_store_id = gen6_mfd_context->reference_surface[j].frame_store_id;
+ gen6_mfd_context->reference_surface[j].surface_id = id;
+ gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
+ break;
+ }
+ }
+ }
+}
+
+static void
+gen6_mfd_free_avc_surface(void **data)
+{
+ struct gen6_mfd_surface *gen6_mfd_surface = *data;
+
+ if (!gen6_mfd_surface)
+ return;
+
+ dri_bo_unreference(gen6_mfd_surface->dmv_top);
+ gen6_mfd_surface->dmv_top = NULL;
+ dri_bo_unreference(gen6_mfd_surface->dmv_bottom);
+ gen6_mfd_surface->dmv_bottom = NULL;
+
+ free(gen6_mfd_surface);
+ *data = NULL;
+}
+
+static void
+gen6_mfd_init_avc_surface(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ struct object_surface *obj_surface)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfd_surface *gen6_mfd_surface = obj_surface->private_data;
+
+ obj_surface->free_private_data = gen6_mfd_free_avc_surface;
+
+ if (!gen6_mfd_surface) {
+ gen6_mfd_surface = calloc(sizeof(struct gen6_mfd_surface), 1);
+ assert((obj_surface->size & 0x3f) == 0);
+ obj_surface->private_data = gen6_mfd_surface;
+ }
+
+ gen6_mfd_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
+ !pic_param->seq_fields.bits.direct_8x8_inference_flag);
+
+ if (gen6_mfd_surface->dmv_top == NULL) {
+ gen6_mfd_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
+ "direct mv w/r buffer",
+ DMV_SIZE,
+ 0x1000);
+ }
+
+ if (gen6_mfd_surface->dmv_bottom_flag &&
+ gen6_mfd_surface->dmv_bottom == NULL) {
+ gen6_mfd_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
+ "direct mv w/r buffer",
+ DMV_SIZE,
+ 0x1000);
+ }
+}
+
+static void
+gen6_mfd_pipe_mode_select(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_media_state *media_state = &i965->media_state;
+ struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
+
+ assert(standard_select == MFX_FORMAT_MPEG2 ||
+ standard_select == MFX_FORMAT_AVC ||
+ standard_select == MFX_FORMAT_VC1);
+
+ BEGIN_BCS_BATCH(ctx, 4);
+ OUT_BCS_BATCH(ctx, MFX_PIPE_MODE_SELECT | (4 - 2));
+ OUT_BCS_BATCH(ctx,
+ (MFD_MODE_VLD << 16) | /* VLD mode */
+ (0 << 10) | /* disable Stream-Out */
+ (gen6_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
+ (gen6_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
+ (0 << 7) | /* disable TLB prefectch */
+ (0 << 5) | /* not in stitch mode */
+ (MFX_CODEC_DECODE << 4) | /* decoding mode */
+ (standard_select << 0));
+ OUT_BCS_BATCH(ctx,
+ (0 << 20) | /* round flag in PB slice */
+ (0 << 19) | /* round flag in Intra8x8 */
+ (0 << 7) | /* expand NOA bus flag */
+ (1 << 6) | /* must be 1 */
+ (0 << 5) | /* disable clock gating for NOA */
+ (0 << 4) | /* terminate if AVC motion and POC table error occurs */
+ (0 << 3) | /* terminate if AVC mbdata error occurs */
+ (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
+ (0 << 1) | /* AVC long field motion vector */
+ (1 << 0)); /* always calculate AVC ILDB boundary strength */
+ OUT_BCS_BATCH(ctx, 0);
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_surface_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
+ assert(obj_surface);
+
+ BEGIN_BCS_BATCH(ctx, 6);
+ OUT_BCS_BATCH(ctx, MFX_SURFACE_STATE | (6 - 2));
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx,
+ ((obj_surface->height - 1) << 19) |
+ ((obj_surface->width - 1) << 6));
+ OUT_BCS_BATCH(ctx,
+ (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
+ (0 << 22) | /* surface object control state, FIXME??? */
+ ((obj_surface->width - 1) << 3) | /* pitch */
+ (0 << 2) | /* must be 0 for interleave U/V */
+ (1 << 1) | /* must be y-tiled */
+ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, FIXME: must be 1 ??? */
+ OUT_BCS_BATCH(ctx,
+ (0 << 16) | /* must be 0 for interleave U/V */
+ (obj_surface->height)); /* y offset for U(cb) */
+ OUT_BCS_BATCH(ctx, 0);
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_media_state *media_state = &i965->media_state;
+ struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
+ int i;
+
+ BEGIN_BCS_BATCH(ctx, 24);
+ OUT_BCS_BATCH(ctx, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
+ if (gen6_mfd_context->pre_deblocking_output.valid)
+ OUT_BCS_RELOC(ctx, gen6_mfd_context->pre_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(ctx, 0);
+
+ if (gen6_mfd_context->post_deblocking_output.valid)
+ OUT_BCS_RELOC(ctx, gen6_mfd_context->post_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(ctx, 0);
+
+ OUT_BCS_BATCH(ctx, 0); /* ignore for decoding */
+ OUT_BCS_BATCH(ctx, 0); /* ignore for decoding */
+
+ if (gen6_mfd_context->intra_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(ctx, gen6_mfd_context->intra_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(ctx, 0);
+
+ if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(ctx, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(ctx, 0);
+
+ /* DW 7..22 */
+ for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
+ struct object_surface *obj_surface;
+
+ if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+ obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
+ assert(obj_surface && obj_surface->bo);
+
+ OUT_BCS_RELOC(ctx, obj_surface->bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ } else {
+ OUT_BCS_BATCH(ctx, 0);
+ }
+ }
+
+ OUT_BCS_BATCH(ctx, 0); /* ignore DW23 for decoding */
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
+ dri_bo *slice_data_bo,
+ int standard_select)
+{
+ BEGIN_BCS_BATCH(ctx, 11);
+ OUT_BCS_BATCH(ctx, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
+ OUT_BCS_RELOC(ctx, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(ctx, 0);
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_media_state *media_state = &i965->media_state;
+ struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
+
+ BEGIN_BCS_BATCH(ctx, 4);
+ OUT_BCS_BATCH(ctx, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
+
+ if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(ctx, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(ctx, 0);
+
+ if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(ctx, gen6_mfd_context->mpr_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(ctx, 0);
+
+ if (gen6_mfd_context->bitplane_read_buffer.valid)
+ OUT_BCS_RELOC(ctx, gen6_mfd_context->bitplane_read_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ else
+ OUT_BCS_BATCH(ctx, 0);
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_aes_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select)
+{
+ /* FIXME */
+}
+
+static void
+gen6_mfd_wait(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select)
+{
+ BEGIN_BCS_BATCH(ctx, 1);
+ OUT_BCS_BATCH(ctx, MFX_WAIT | (1 << 8));
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_img_state(VADriverContextP ctx, struct decode_state *decode_state)
+{
+ int qm_present_flag;
+ int img_struct;
+ int mbaff_frame_flag;
+ unsigned int width_in_mbs, height_in_mbs;
+ VAPictureParameterBufferH264 *pic_param;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+ assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
+
+ if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
+ qm_present_flag = 1;
+ else
+ qm_present_flag = 0; /* built-in QM matrices */
+
+ if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
+ img_struct = 1;
+ else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
+ img_struct = 3;
+ else
+ img_struct = 0;
+
+ if ((img_struct & 0x1) == 0x1) {
+ assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
+ } else {
+ assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
+ }
+
+ if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
+ assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
+ assert(pic_param->pic_fields.bits.field_pic_flag == 0);
+ } else {
+ assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
+ }
+
+ mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
+ !pic_param->pic_fields.bits.field_pic_flag);
+
+ width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
+ height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
+ assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
+
+ /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
+ assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
+ pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
+ assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
+
+ BEGIN_BCS_BATCH(ctx, 13);
+ OUT_BCS_BATCH(ctx, MFX_AVC_IMG_STATE | (13 - 2));
+ OUT_BCS_BATCH(ctx,
+ ((width_in_mbs * height_in_mbs) & 0x7fff));
+ OUT_BCS_BATCH(ctx,
+ (height_in_mbs << 16) |
+ (width_in_mbs << 0));
+ OUT_BCS_BATCH(ctx,
+ ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
+ ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
+ (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
+ (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
+ (1 << 12) | /* always 1, hardware requirement */
+ (qm_present_flag << 10) |
+ (img_struct << 8) |
+ (16 << 0));
+ OUT_BCS_BATCH(ctx,
+ (pic_param->seq_fields.bits.chroma_format_idc << 10) |
+ (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
+ ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
+ (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
+ (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
+ (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
+ (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
+ (mbaff_frame_flag << 1) |
+ (pic_param->pic_fields.bits.field_pic_flag << 0));
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_qm_state(VADriverContextP ctx, struct decode_state *decode_state)
+{
+ int cmd_len;
+ VAIQMatrixBufferH264 *iq_matrix;
+ VAPictureParameterBufferH264 *pic_param;
+
+ if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
+ return;
+
+ iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+ cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
+
+ if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
+ cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
+
+ BEGIN_BCS_BATCH(ctx, cmd_len);
+ OUT_BCS_BATCH(ctx, MFX_AVC_QM_STATE | (cmd_len - 2));
+
+ if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
+ OUT_BCS_BATCH(ctx,
+ (0x0 << 8) | /* don't use default built-in matrices */
+ (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
+ else
+ OUT_BCS_BATCH(ctx,
+ (0x0 << 8) | /* don't use default built-in matrices */
+ (0x3f << 0)); /* six 4x4 scaling matrices */
+
+ intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
+
+ if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
+ intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_directmode_state(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_media_state *media_state = &i965->media_state;
+ struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
+ struct object_surface *obj_surface;
+ struct gen6_mfd_surface *gen6_mfd_surface;
+ VAPictureH264 *va_pic;
+ int i, j;
+
+ BEGIN_BCS_BATCH(ctx, 69);
+ OUT_BCS_BATCH(ctx, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
+
+ /* reference surfaces 0..15 */
+ for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
+ if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+ obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
+ assert(obj_surface);
+ gen6_mfd_surface = obj_surface->private_data;
+
+ if (gen6_mfd_surface == NULL) {
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ } else {
+ OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_top,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+
+ if (gen6_mfd_surface->dmv_bottom_flag == 1)
+ OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_bottom,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ else
+ OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_top,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ }
+ } else {
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ }
+ }
+
+ /* the current decoding frame/field */
+ va_pic = &pic_param->CurrPic;
+ assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+ obj_surface = SURFACE(va_pic->picture_id);
+ assert(obj_surface && obj_surface->bo && obj_surface->private_data);
+ gen6_mfd_surface = obj_surface->private_data;
+
+ OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_top,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ if (gen6_mfd_surface->dmv_bottom_flag == 1)
+ OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_bottom,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_top,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ /* POC List */
+ for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
+ if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+ int found = 0;
+ for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
+ va_pic = &pic_param->ReferenceFrames[j];
+
+ if (va_pic->flags & VA_PICTURE_H264_INVALID)
+ continue;
+
+ if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ assert(found == 1);
+ assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+
+ OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt);
+ OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt);
+ } else {
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ }
+ }
+
+ va_pic = &pic_param->CurrPic;
+ OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt);
+ OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt);
+
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_slice_state(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ VASliceParameterBufferH264 *next_slice_param)
+{
+ int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
+ int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
+ int num_ref_idx_l0, num_ref_idx_l1;
+ int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
+ pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
+ int weighted_pred_idc = 0;
+ int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
+ int slice_type;
+
+ if (slice_param->slice_type == SLICE_TYPE_I ||
+ slice_param->slice_type == SLICE_TYPE_SI) {
+ slice_type = SLICE_TYPE_I;
+ } else if (slice_param->slice_type == SLICE_TYPE_P ||
+ slice_param->slice_type == SLICE_TYPE_SP) {
+ slice_type = SLICE_TYPE_P;
+ } else {
+ assert(slice_param->slice_type == SLICE_TYPE_B);
+ slice_type = SLICE_TYPE_B;
+ }
+
+ if (slice_type == SLICE_TYPE_I) {
+ assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
+ assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+ num_ref_idx_l0 = 0;
+ num_ref_idx_l1 = 0;
+ } else if (slice_type == SLICE_TYPE_P) {
+ assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+ num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+ num_ref_idx_l1 = 0;
+ weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1);
+ } else {
+ num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+ num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
+ weighted_pred_idc = (pic_param->pic_fields.bits.weighted_bipred_idc == 1);
+ }
+
+ first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
+ slice_hor_pos = first_mb_in_slice % width_in_mbs;
+ slice_ver_pos = first_mb_in_slice / width_in_mbs;
+
+ if (next_slice_param) {
+ first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
+ next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
+ next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
+ } else {
+ next_slice_hor_pos = 0;
+ next_slice_ver_pos = height_in_mbs;
+ }
+
+ BEGIN_BCS_BATCH(ctx, 11); /* FIXME: is it 10??? */
+ OUT_BCS_BATCH(ctx, MFX_AVC_SLICE_STATE | (11 - 2));
+ OUT_BCS_BATCH(ctx, slice_type);
+ OUT_BCS_BATCH(ctx,
+ (num_ref_idx_l1 << 24) |
+ (num_ref_idx_l0 << 16) |
+ (slice_param->chroma_log2_weight_denom << 8) |
+ (slice_param->luma_log2_weight_denom << 0));
+ OUT_BCS_BATCH(ctx,
+ (weighted_pred_idc << 30) |
+ (slice_param->direct_spatial_mv_pred_flag << 29) |
+ (slice_param->disable_deblocking_filter_idc << 27) |
+ (slice_param->cabac_init_idc << 24) |
+ ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
+ ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
+ ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
+ OUT_BCS_BATCH(ctx,
+ (slice_ver_pos << 24) |
+ (slice_hor_pos << 16) |
+ (first_mb_in_slice << 0));
+ OUT_BCS_BATCH(ctx,
+ (next_slice_ver_pos << 16) |
+ (next_slice_hor_pos << 0));
+ OUT_BCS_BATCH(ctx,
+ (next_slice_param == NULL) << 19); /* last slice flag */
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param)
+{
+ int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
+
+ BEGIN_BCS_BATCH(ctx, 11); /* FIXME: is it 10??? */
+ OUT_BCS_BATCH(ctx, MFX_AVC_SLICE_STATE | (11 - 2));
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx,
+ height_in_mbs << 24 |
+ width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_media_state *media_state = &i965->media_state;
+ struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
+ int i, j, num_ref_list;
+ struct {
+ unsigned char bottom_idc:1;
+ unsigned char frame_store_index:4;
+ unsigned char field_picture:1;
+ unsigned char long_term:1;
+ unsigned char non_exist:1;
+ } refs[32];
+
+ if (slice_param->slice_type == SLICE_TYPE_I ||
+ slice_param->slice_type == SLICE_TYPE_SI)
+ return;
+
+ if (slice_param->slice_type == SLICE_TYPE_P ||
+ slice_param->slice_type == SLICE_TYPE_SP) {
+ num_ref_list = 1;
+ } else {
+ num_ref_list = 2;
+ }
+
+ for (i = 0; i < num_ref_list; i++) {
+ VAPictureH264 *va_pic;
+
+ if (i == 0) {
+ va_pic = slice_param->RefPicList0;
+ } else {
+ va_pic = slice_param->RefPicList1;
+ }
+
+ BEGIN_BCS_BATCH(ctx, 10);
+ OUT_BCS_BATCH(ctx, MFX_AVC_REF_IDX_STATE | (10 - 2));
+ OUT_BCS_BATCH(ctx, i);
+
+ for (j = 0; j < 32; j++) {
+ if (va_pic->flags & VA_PICTURE_H264_INVALID) {
+ refs[j].non_exist = 1;
+ refs[j].long_term = 1;
+ refs[j].field_picture = 1;
+ refs[j].frame_store_index = 0xf;
+ refs[j].bottom_idc = 1;
+ } else {
+ int frame_idx;
+
+ for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
+ if (gen6_mfd_context->reference_surface[frame_idx].surface_id != VA_INVALID_ID &&
+ va_pic->picture_id == gen6_mfd_context->reference_surface[frame_idx].surface_id) {
+ assert(frame_idx == gen6_mfd_context->reference_surface[frame_idx].frame_store_id);
+ break;
+ }
+ }
+
+ assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
+
+ refs[j].non_exist = 0;
+ refs[j].long_term = !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
+ refs[j].field_picture = !!(va_pic->flags &
+ (VA_PICTURE_H264_TOP_FIELD |
+ VA_PICTURE_H264_BOTTOM_FIELD));
+ refs[j].frame_store_index = frame_idx;
+ refs[j].bottom_idc = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
+ }
+
+ va_pic++;
+ }
+
+ intel_batchbuffer_data_bcs(ctx, refs, sizeof(refs));
+ ADVANCE_BCS_BATCH(ctx);
+ }
+}
+
+static void
+gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param)
+{
+ int i, j, num_weight_offset_table = 0;
+ short weightoffsets[32 * 6];
+
+ if ((slice_param->slice_type == SLICE_TYPE_P ||
+ slice_param->slice_type == SLICE_TYPE_SP) &&
+ (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
+ num_weight_offset_table = 1;
+ }
+
+ if ((slice_param->slice_type == SLICE_TYPE_B) &&
+ (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
+ num_weight_offset_table = 2;
+ }
+
+ for (i = 0; i < num_weight_offset_table; i++) {
+ BEGIN_BCS_BATCH(ctx, 98);
+ OUT_BCS_BATCH(ctx, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
+ OUT_BCS_BATCH(ctx, i);
+
+ if (i == 0) {
+ for (j = 0; j < 32; j++) {
+ weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
+ weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
+ weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
+ weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
+ weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
+ weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
+ }
+ } else {
+ for (j = 0; j < 32; j++) {
+ weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
+ weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
+ weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
+ weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
+ weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
+ weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
+ }
+ }
+
+ intel_batchbuffer_data_bcs(ctx, weightoffsets, sizeof(weightoffsets));
+ ADVANCE_BCS_BATCH(ctx);
+ }
+}
+
+static int
+gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
+{
+ int out_slice_data_bit_offset;
+ int slice_header_size = in_slice_data_bit_offset / 8;
+ int i, j;
+
+ for (i = 0, j = 0; i < slice_header_size; i++, j++) {
+ if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
+ i++, j += 2;
+ }
+ }
+
+ out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
+
+ if (mode_flag == ENTROPY_CABAC)
+ out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
+
+ return out_slice_data_bit_offset;
+}
+
+static void
+gen6_mfd_avc_bsd_object(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ dri_bo *slice_data_bo)
+{
+ int slice_data_bit_offset;
+ uint8_t *slice_data = NULL;
+
+ dri_bo_map(slice_data_bo, 0);
+ slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
+ slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
+ pic_param->pic_fields.bits.entropy_coding_mode_flag,
+ slice_param->slice_data_bit_offset);
+ dri_bo_unmap(slice_data_bo);
+
+ BEGIN_BCS_BATCH(ctx, 6);
+ OUT_BCS_BATCH(ctx, MFD_AVC_BSD_OBJECT | (6 - 2));
+ OUT_BCS_BATCH(ctx,
+ ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
+ OUT_BCS_BATCH(ctx, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
+ OUT_BCS_BATCH(ctx,
+ (0 << 31) |
+ (0 << 14) |
+ (0 << 12) |
+ (0 << 10) |
+ (0 << 8));
+ OUT_BCS_BATCH(ctx,
+ (0 << 16) |
+ (0 << 6) |
+ ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
+ OUT_BCS_BATCH(ctx, 0);
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param)
+{
+ BEGIN_BCS_BATCH(ctx, 6);
+ OUT_BCS_BATCH(ctx, MFD_AVC_BSD_OBJECT | (6 - 2));
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_phantom_slice(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param)
+{
+ gen6_mfd_avc_phantom_slice_state(ctx, pic_param);
+ gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param);
+}
+
+static void
+gen6_mfd_avc_decode_init(VADriverContextP ctx, struct decode_state *decode_state)
+{
+ VAPictureParameterBufferH264 *pic_param;
+ VASliceParameterBufferH264 *slice_param;
+ VAPictureH264 *va_pic;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_media_state *media_state = &i965->media_state;
+ struct gen6_mfd_context *gen6_mfd_context;
+ struct object_surface *obj_surface;
+ dri_bo *bo;
+ int i, j, enable_avc_ildb = 0;
+
+ for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
+
+ assert(decode_state->slice_params[j]->num_elements == 1);
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+ assert((slice_param->slice_type == SLICE_TYPE_I) ||
+ (slice_param->slice_type == SLICE_TYPE_SI) ||
+ (slice_param->slice_type == SLICE_TYPE_P) ||
+ (slice_param->slice_type == SLICE_TYPE_SP) ||
+ (slice_param->slice_type == SLICE_TYPE_B));
+
+ if (slice_param->disable_deblocking_filter_idc != 1) {
+ enable_avc_ildb = 1;
+ break;
+ }
+
+ slice_param++;
+ }
+ }
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+ gen6_mfd_context = media_state->private_context;
+
+ if (gen6_mfd_context == NULL) {
+ gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
+ media_state->private_context = gen6_mfd_context;
+
+ for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
+ gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
+ gen6_mfd_context->reference_surface[i].frame_store_id = -1;
+ }
+ }
+
+ /* Current decoded picture */
+ va_pic = &pic_param->CurrPic;
+ assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+ obj_surface = SURFACE(va_pic->picture_id);
+ assert(obj_surface);
+ obj_surface->flags = (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
+ gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
+
+ if (obj_surface->bo == NULL) {
+ uint32_t tiling_mode = I915_TILING_Y;
+ unsigned long pitch;
+
+ obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr,
+ "vaapi surface",
+ obj_surface->width,
+ obj_surface->height + obj_surface->height / 2,
+ 1,
+ &tiling_mode,
+ &pitch,
+ 0);
+ assert(obj_surface->bo);
+ assert(tiling_mode == I915_TILING_Y);
+ assert(pitch == obj_surface->width);
+ }
+
+ dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
+ gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
+ gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
+
+ dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
+ gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
+ gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
+
+ dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "intra row store",
+ 128 * 64,
+ 0x1000);
+ assert(bo);
+ gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
+ gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "deblocking filter row store",
+ 30720, /* 4 * 120 * 64 */
+ 0x1000);
+ assert(bo);
+ gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+ gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "bsd mpc row store",
+ 11520, /* 1.5 * 120 * 64 */
+ 0x1000);
+ assert(bo);
+ gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+ gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "mpr row store",
+ 7680, /* 1. 0 * 120 * 64 */
+ 0x1000);
+ assert(bo);
+ gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
+ gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
+
+ gen6_mfd_context->bitplane_read_buffer.valid = 0;
+ gen6_mfd_avc_frame_store_index(ctx, pic_param);
+}
+
+static void
+gen6_mfd_avc_decode_picture(VADriverContextP ctx, struct decode_state *decode_state)
+{
+ VAPictureParameterBufferH264 *pic_param;
+ VASliceParameterBufferH264 *slice_param, *next_slice_param;
+ dri_bo *slice_data_bo;
+ int i, j;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+ gen6_mfd_avc_decode_init(ctx, decode_state);
+ intel_batchbuffer_start_atomic_bcs(ctx, 0x1000);
+ intel_batchbuffer_emit_mi_flush_bcs(ctx);
+ gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC);
+ gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC);
+ gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC);
+ gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC);
+ gen6_mfd_avc_img_state(ctx, decode_state);
+ gen6_mfd_avc_qm_state(ctx, decode_state);
+
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
+ slice_data_bo = decode_state->slice_datas[j]->bo;
+
+ if (j == decode_state->num_slice_params - 1)
+ next_slice_param = NULL;
+ else
+ next_slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
+
+ gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC);
+ assert(decode_state->slice_params[j]->num_elements == 1);
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+ assert((slice_param->slice_type == SLICE_TYPE_I) ||
+ (slice_param->slice_type == SLICE_TYPE_SI) ||
+ (slice_param->slice_type == SLICE_TYPE_P) ||
+ (slice_param->slice_type == SLICE_TYPE_SP) ||
+ (slice_param->slice_type == SLICE_TYPE_B));
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+
+ gen6_mfd_avc_directmode_state(ctx, pic_param, slice_param);
+ gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param);
+ gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param);
+ gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param);
+ gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo);
+ slice_param++;
+ }
+ }
+
+ gen6_mfd_avc_phantom_slice(ctx, pic_param);
+ intel_batchbuffer_end_atomic_bcs(ctx);
+ intel_batchbuffer_flush_bcs(ctx);
+}
+
+static void
+gen6_mfd_mpeg2_decode_init(VADriverContextP ctx, struct decode_state *decode_state)
+{
+ VAPictureParameterBufferMPEG2 *pic_param;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_media_state *media_state = &i965->media_state;
+ struct gen6_mfd_context *gen6_mfd_context;
+ struct object_surface *obj_surface;
+ int i;
+ dri_bo *bo;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+ gen6_mfd_context = media_state->private_context;
+
+ if (gen6_mfd_context == NULL) {
+ gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
+ media_state->private_context = gen6_mfd_context;
+
+ for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
+ gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
+ gen6_mfd_context->reference_surface[i].frame_store_id = -1;
+ }
+ }
+
+ /* reference picture */
+ obj_surface = SURFACE(pic_param->forward_reference_picture);
+
+ if (obj_surface && obj_surface->bo)
+ gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
+ else
+ gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
+
+ obj_surface = SURFACE(pic_param->backward_reference_picture);
+
+ if (obj_surface && obj_surface->bo)
+ gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
+ else
+ gen6_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
+
+ /* must do so !!! */
+ for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
+ gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
+
+ /* Current decoded picture */
+ obj_surface = SURFACE(decode_state->current_render_target);
+ assert(obj_surface);
+ if (obj_surface->bo == NULL) {
+ uint32_t tiling_mode = I915_TILING_Y;
+ unsigned long pitch;
+
+ obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr,
+ "vaapi surface",
+ obj_surface->width,
+ obj_surface->height + obj_surface->height / 2,
+ 1,
+ &tiling_mode,
+ &pitch,
+ 0);
+ assert(obj_surface->bo);
+ assert(tiling_mode == I915_TILING_Y);
+ assert(pitch == obj_surface->width);
+ }
+
+ dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
+ gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
+ gen6_mfd_context->pre_deblocking_output.valid = 1;
+
+ dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "bsd mpc row store",
+ 11520, /* 1.5 * 120 * 64 */
+ 0x1000);
+ assert(bo);
+ gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+ gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+ gen6_mfd_context->post_deblocking_output.valid = 0;
+ gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0;
+ gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
+ gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
+ gen6_mfd_context->bitplane_read_buffer.valid = 0;
+}
+
+static void
+gen6_mfd_mpeg2_pic_state(VADriverContextP ctx, struct decode_state *decode_state)
+{
+ VAPictureParameterBufferMPEG2 *pic_param;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+
+ BEGIN_BCS_BATCH(ctx, 4);
+ OUT_BCS_BATCH(ctx, MFX_MPEG2_PIC_STATE | (4 - 2));
+ OUT_BCS_BATCH(ctx,
+ (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
+ ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
+ ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
+ ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
+ pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
+ pic_param->picture_coding_extension.bits.picture_structure << 12 |
+ pic_param->picture_coding_extension.bits.top_field_first << 11 |
+ pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
+ pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
+ pic_param->picture_coding_extension.bits.q_scale_type << 8 |
+ pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
+ pic_param->picture_coding_extension.bits.alternate_scan << 6);
+ OUT_BCS_BATCH(ctx,
+ pic_param->picture_coding_type << 9);
+ OUT_BCS_BATCH(ctx,
+ (ALIGN(pic_param->vertical_size, 16) / 16) << 16 |
+ (ALIGN(pic_param->horizontal_size, 16) / 16));
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_mpeg2_qm_state(VADriverContextP ctx, struct decode_state *decode_state)
+{
+ VAIQMatrixBufferMPEG2 *iq_matrix;
+ int i;
+
+ if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
+ return;
+
+ iq_matrix = (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
+
+ for (i = 0; i < 2; i++) {
+ int k, m;
+ unsigned char *qm = NULL;
+ unsigned char qmx[64];
+
+ if (i == 0) {
+ if (iq_matrix->load_intra_quantiser_matrix)
+ qm = iq_matrix->intra_quantiser_matrix;
+ } else {
+ if (iq_matrix->load_non_intra_quantiser_matrix)
+ qm = iq_matrix->non_intra_quantiser_matrix;
+ }
+
+ if (!qm)
+ continue;
+
+ /* Upload quantisation matrix in raster order. The mplayer vaapi
+ * patch passes quantisation matrix in zig-zag order to va library.
+ */
+ for (k = 0; k < 64; k++) {
+ m = zigzag_direct[k];
+ qmx[m] = qm[k];
+ }
+
+ BEGIN_BCS_BATCH(ctx, 18);
+ OUT_BCS_BATCH(ctx, MFX_MPEG2_QM_STATE | (18 - 2));
+ OUT_BCS_BATCH(ctx, i);
+ intel_batchbuffer_data_bcs(ctx, qmx, 64);
+ ADVANCE_BCS_BATCH(ctx);
+ }
+}
+
+static void
+gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
+ VAPictureParameterBufferMPEG2 *pic_param,
+ VASliceParameterBufferMPEG2 *slice_param,
+ VASliceParameterBufferMPEG2 *next_slice_param)
+{
+ unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
+ unsigned int height_in_mbs = ALIGN(pic_param->vertical_size, 16) / 16;
+ int mb_count;
+
+ if (next_slice_param == NULL)
+ mb_count = width_in_mbs * height_in_mbs -
+ (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position);
+ else
+ mb_count = (next_slice_param->slice_vertical_position * width_in_mbs + next_slice_param->slice_horizontal_position) -
+ (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position);
+
+ BEGIN_BCS_BATCH(ctx, 5);
+ OUT_BCS_BATCH(ctx, MFD_MPEG2_BSD_OBJECT | (5 - 2));
+ OUT_BCS_BATCH(ctx,
+ slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
+ OUT_BCS_BATCH(ctx,
+ slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
+ OUT_BCS_BATCH(ctx,
+ slice_param->slice_horizontal_position << 24 |
+ slice_param->slice_vertical_position << 16 |
+ mb_count << 8 |
+ (next_slice_param == NULL) << 5 |
+ (next_slice_param == NULL) << 3 |
+ (slice_param->macroblock_offset & 0x7));
+ OUT_BCS_BATCH(ctx,
+ slice_param->quantiser_scale_code << 24);
+ ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx, struct decode_state *decode_state)
+{
+ VAPictureParameterBufferMPEG2 *pic_param;
+ VASliceParameterBufferMPEG2 *slice_param, *next_slice_param;
+ dri_bo *slice_data_bo;
+ int i, j;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+
+ gen6_mfd_mpeg2_decode_init(ctx, decode_state);
+ intel_batchbuffer_start_atomic_bcs(ctx, 0x1000);
+ intel_batchbuffer_emit_mi_flush_bcs(ctx);
+ gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2);
+ gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2);
+ gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2);
+ gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2);
+ gen6_mfd_mpeg2_pic_state(ctx, decode_state);
+ gen6_mfd_mpeg2_qm_state(ctx, decode_state);
+
+ assert(decode_state->num_slice_params == 1);
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
+ slice_data_bo = decode_state->slice_datas[j]->bo;
+ gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2);
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+ else
+ next_slice_param = NULL;
+
+ gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param);
+ slice_param++;
+ }
+ }
+
+ intel_batchbuffer_end_atomic_bcs(ctx);
+ intel_batchbuffer_flush_bcs(ctx);
+}
+
+static void
+gen6_mfd_vc1_decode_picture(VADriverContextP ctx, struct decode_state *decode_state)
+{
+
+}
+
+void
+gen6_mfd_decode_picture(VADriverContextP ctx,
+ VAProfile profile,
+ struct decode_state *decode_state)
+{
+ switch (profile) {
+ case VAProfileMPEG2Simple:
+ case VAProfileMPEG2Main:
+ gen6_mfd_mpeg2_decode_picture(ctx, decode_state);
+ break;
+
+ case VAProfileH264Baseline:
+ case VAProfileH264Main:
+ case VAProfileH264High:
+ gen6_mfd_avc_decode_picture(ctx, decode_state);
+ break;
+
+ case VAProfileVC1Simple:
+ case VAProfileVC1Main:
+ case VAProfileVC1Advanced:
+ gen6_mfd_vc1_decode_picture(ctx, decode_state);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+}
+
+Bool
+gen6_mfd_init(VADriverContextP ctx)
+{
+ return True;
+}
+
+Bool
+gen6_mfd_terminate(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_media_state *media_state = &i965->media_state;
+ struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
+
+ if (gen6_mfd_context) {
+ dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
+ gen6_mfd_context->post_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
+ gen6_mfd_context->pre_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
+ gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+ gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+ gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
+ gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
+ gen6_mfd_context->bitplane_read_buffer.bo = NULL;
+
+ free(gen6_mfd_context);
+ }
+
+ media_state->private_context = NULL;
+ return True;
+}
+
diff --git a/i965_drv_video/gen6_mfd.h b/i965_drv_video/gen6_mfd.h
new file mode 100644
index 0000000..dba1d07
--- /dev/null
+++ b/i965_drv_video/gen6_mfd.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Xiang Haihao <haihao.xiang@intel.com>
+ *
+ */
+
+#ifndef _GEN6_MFD_H_
+#define _GEN6_MFD_H_
+
+#include <xf86drm.h>
+#include <drm.h>
+#include <i915_drm.h>
+#include <intel_bufmgr.h>
+
+struct gen6_mfd_surface
+{
+ dri_bo *dmv_top;
+ dri_bo *dmv_bottom;
+ int dmv_bottom_flag;
+};
+
+#define MAX_MFX_REFERENCE_SURFACES 16
+struct gen6_mfd_context
+{
+ struct {
+ VASurfaceID surface_id;
+ int frame_store_id;
+ } reference_surface[MAX_MFX_REFERENCE_SURFACES];
+
+ struct {
+ dri_bo *bo;
+ int valid;
+ } post_deblocking_output;
+
+ struct {
+ dri_bo *bo;
+ int valid;
+ } pre_deblocking_output;
+
+ struct {
+ dri_bo *bo;
+ int valid;
+ } intra_row_store_scratch_buffer;
+
+ struct {
+ dri_bo *bo;
+ int valid;
+ } deblocking_filter_row_store_scratch_buffer;
+
+ struct {
+ dri_bo *bo;
+ int valid;
+ } bsd_mpc_row_store_scratch_buffer;
+
+ struct {
+ dri_bo *bo;
+ int valid;
+ } mpr_row_store_scratch_buffer;
+
+ struct {
+ dri_bo *bo;
+ int valid;
+ } bitplane_read_buffer;
+};
+
+struct decode_state;
+
+Bool gen6_mfd_init(VADriverContextP ctx);
+Bool gen6_mfd_terminate(VADriverContextP ctx);
+void gen6_mfd_decode_picture(VADriverContextP ctx,
+ VAProfile profile,
+ struct decode_state *decode_state);
+#endif /* _GEN6_MFD_H_ */
diff --git a/i965_drv_video/i965_avc_ildb.c b/i965_drv_video/i965_avc_ildb.c
index a053062..a6be42b 100644
--- a/i965_drv_video/i965_avc_ildb.c
+++ b/i965_drv_video/i965_avc_ildb.c
@@ -360,7 +360,7 @@ i965_avc_ildb_upload_constants(VADriverContextP ctx, struct decode_state *decode
if (IS_IRONLAKE(i965->intel.device_id)) {
root_input->max_concurrent_threads = 76; /* 72 - 2 + 8 - 2 */
} else {
- root_input->max_concurrent_threads = 54; /* 50 - 2 + 8 = 2 */
+ root_input->max_concurrent_threads = 54; /* 50 - 2 + 8 - 2 */
}
if (pic_param->pic_fields.bits.field_pic_flag)
diff --git a/i965_drv_video/i965_defines.h b/i965_drv_video/i965_defines.h
index 839712e..6c202c7 100644
--- a/i965_drv_video/i965_defines.h
+++ b/i965_drv_video/i965_defines.h
@@ -29,6 +29,10 @@
#define CMD_PIPELINED_POINTERS CMD(3, 0, 0)
#define CMD_BINDING_TABLE_POINTERS CMD(3, 0, 1)
+# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
+# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
+# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
+
#define CMD_VERTEX_BUFFERS CMD(3, 0, 8)
#define CMD_VERTEX_ELEMENTS CMD(3, 0, 9)
#define CMD_DRAWING_RECTANGLE CMD(3, 1, 0)
@@ -36,6 +40,125 @@
#define CMD_3DPRIMITIVE CMD(3, 3, 0)
#define CMD_DEPTH_BUFFER CMD(3, 1, 5)
+# define CMD_DEPTH_BUFFER_TYPE_SHIFT 29
+# define CMD_DEPTH_BUFFER_FORMAT_SHIFT 18
+
+#define CMD_CLEAR_PARAMS CMD(3, 1, 0x10)
+/* DW1 */
+# define CMD_CLEAR_PARAMS_DEPTH_CLEAR_VALID (1 << 15)
+
+/* for GEN6+ */
+#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS CMD(3, 0, 0x02)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
+
+#define GEN6_3DSTATE_URB CMD(3, 0, 0x05)
+/* DW1 */
+# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
+# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
+/* DW2 */
+# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
+# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
+
+#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS CMD(3, 0, 0x0d)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
+
+#define GEN6_3DSTATE_CC_STATE_POINTERS CMD(3, 0, 0x0e)
+
+#define GEN6_3DSTATE_VS CMD(3, 0, 0x10)
+
+#define GEN6_3DSTATE_GS CMD(3, 0, 0x11)
+/* DW4 */
+# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
+
+#define GEN6_3DSTATE_CLIP CMD(3, 0, 0x12)
+
+#define GEN6_3DSTATE_SF CMD(3, 0, 0x13)
+/* DW1 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+/* DW3 */
+# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
+# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
+# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
+# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
+/* DW4 */
+# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
+
+
+#define GEN6_3DSTATE_WM CMD(3, 0, 0x14)
+/* DW2 */
+# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27
+# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW4 */
+# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
+/* DW5 */
+# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
+# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+
+
+#define GEN6_3DSTATE_CONSTANT_VS CMD(3, 0, 0x15)
+#define GEN6_3DSTATE_CONSTANT_GS CMD(3, 0, 0x16)
+#define GEN6_3DSTATE_CONSTANT_PS CMD(3, 0, 0x17)
+
+#define GEN6_3DSTATE_SAMPLE_MASK CMD(3, 0, 0x18)
+
+#define GEN6_3DSTATE_MULTISAMPLE CMD(3, 1, 0x0d)
+/* DW1 */
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
+
+#define MFX(pipeline, op, sub_opa, sub_opb) \
+ (3 << 29 | \
+ (pipeline) << 27 | \
+ (op) << 24 | \
+ (sub_opa) << 21 | \
+ (sub_opb) << 16)
+
+#define MFX_PIPE_MODE_SELECT MFX(2, 0, 0, 0)
+#define MFX_SURFACE_STATE MFX(2, 0, 0, 1)
+#define MFX_PIPE_BUF_ADDR_STATE MFX(2, 0, 0, 2)
+#define MFX_IND_OBJ_BASE_ADDR_STATE MFX(2, 0, 0, 3)
+#define MFX_BSP_BUF_BASE_ADDR_STATE MFX(2, 0, 0, 4)
+#define MFX_AES_STATE MFX(2, 0, 0, 5)
+#define MFX_STATE_POINTER MFX(2, 0, 0, 6)
+
+#define MFX_WAIT MFX(1, 0, 0, 0)
+
+#define MFX_AVC_IMG_STATE MFX(2, 1, 0, 0)
+#define MFX_AVC_QM_STATE MFX(2, 1, 0, 1)
+#define MFX_AVC_DIRECTMODE_STATE MFX(2, 1, 0, 2)
+#define MFX_AVC_SLICE_STATE MFX(2, 1, 0, 3)
+#define MFX_AVC_REF_IDX_STATE MFX(2, 1, 0, 4)
+#define MFX_AVC_WEIGHTOFFSET_STATE MFX(2, 1, 0, 5)
+
+#define MFD_AVC_BSD_OBJECT MFX(2, 1, 1, 8)
+
+#define MFX_MPEG2_PIC_STATE MFX(2, 3, 0, 0)
+#define MFX_MPEG2_QM_STATE MFX(2, 3, 0, 1)
+
+#define MFD_MPEG2_BSD_OBJECT MFX(2, 3, 1, 8)
+
#define I965_DEPTHFORMAT_D32_FLOAT 1
#define BASE_ADDRESS_MODIFY (1 << 0)
@@ -288,7 +411,9 @@
#define I965_VFCOMPONENT_STORE_PID 7
#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
+#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */
#define VE0_VALID (1 << 26)
+#define GEN6_VE0_VALID (1 << 25) /* for GEN6 */
#define VE0_FORMAT_SHIFT 16
#define VE0_OFFSET_SHIFT 0
#define VE1_VFCOMPONENT_0_SHIFT 28
@@ -298,8 +423,11 @@
#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
#define VB0_BUFFER_INDEX_SHIFT 27
+#define GEN6_VB0_BUFFER_INDEX_SHIFT 26
#define VB0_VERTEXDATA (0 << 26)
#define VB0_INSTANCEDATA (1 << 26)
+#define GEN6_VB0_VERTEXDATA (0 << 20)
+#define GEN6_VB0_INSTANCEDATA (1 << 20)
#define VB0_BUFFER_PITCH_SHIFT 0
#define _3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
@@ -380,6 +508,21 @@
#define IEF_FILTER_SIZE_3X3 0
#define IEF_FILTER_SIZE_5X5 1
-#define URB_SIZE(intel) (IS_IRONLAKE(intel->device_id) ? 1024 : \
+#define MFX_FORMAT_MPEG2 0
+#define MFX_FORMAT_VC1 1
+#define MFX_FORMAT_AVC 2
+
+#define MFX_CODEC_DECODE 0
+#define MFX_CODEC_ENCODE 1
+
+#define MFD_MODE_VLD 0
+#define MFD_MODE_IT 1
+
+#define MFX_SURFACE_PLANAR_420_8 4
+#define MFX_SURFACE_MONOCHROME 12
+
+#define URB_SIZE(intel) (IS_GEN6(intel->device_id) ? 1024 : \
+ IS_IRONLAKE(intel->device_id) ? 1024 : \
IS_G4X(intel->device_id) ? 384 : 256)
+
#endif /* _I965_DEFINES_H_ */
diff --git a/i965_drv_video/i965_drv_video.c b/i965_drv_video/i965_drv_video.c
index ec5412d..f0466ad 100644
--- a/i965_drv_video/i965_drv_video.c
+++ b/i965_drv_video/i965_drv_video.c
@@ -119,6 +119,7 @@ i965_QueryConfigProfiles(VADriverContextP ctx,
VAProfile *profile_list, /* out */
int *num_profiles) /* out */
{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
int i = 0;
profile_list[i++] = VAProfileMPEG2Simple;
@@ -127,6 +128,12 @@ i965_QueryConfigProfiles(VADriverContextP ctx,
profile_list[i++] = VAProfileH264Main;
profile_list[i++] = VAProfileH264High;
+ if (IS_GEN6(i965->intel.device_id)) {
+ profile_list[i++] = VAProfileVC1Simple;
+ profile_list[i++] = VAProfileVC1Main;
+ profile_list[i++] = VAProfileVC1Advanced;
+ }
+
/* If the assert fails then I965_MAX_PROFILES needs to be bigger */
assert(i <= I965_MAX_PROFILES);
*num_profiles = i;
@@ -156,6 +163,13 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx,
entrypoint_list[0] = VAEntrypointVLD;
break;
+ case VAProfileVC1Simple:
+ case VAProfileVC1Main:
+ case VAProfileVC1Advanced:
+ *num_entrypoints = 1;
+ entrypoint_list[0] = VAEntrypointVLD;
+ break;
+
default:
vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
*num_entrypoints = 0;
@@ -262,6 +276,17 @@ i965_CreateConfig(VADriverContextP ctx,
break;
+ case VAProfileVC1Simple:
+ case VAProfileVC1Main:
+ case VAProfileVC1Advanced:
+ if (VAEntrypointVLD == entrypoint) {
+ vaStatus = VA_STATUS_SUCCESS;
+ } else {
+ vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+ }
+
+ break;
+
default:
vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
break;
@@ -392,8 +417,15 @@ i965_CreateSurfaces(VADriverContextP ctx,
obj_surface->subpic = VA_INVALID_ID;
obj_surface->orig_width = width;
obj_surface->orig_height = height;
- obj_surface->width = ALIGN(obj_surface->orig_width, 16);
- obj_surface->height = ALIGN(obj_surface->orig_height, 16);
+
+ if (IS_GEN6(i965->intel.device_id)) {
+ obj_surface->width = ALIGN(obj_surface->orig_width, 128);
+ obj_surface->height = ALIGN(obj_surface->orig_height, 32);
+ } else {
+ obj_surface->width = ALIGN(obj_surface->orig_width, 16);
+ obj_surface->height = ALIGN(obj_surface->orig_height, 16);
+ }
+
obj_surface->size = SIZE_YUV420(obj_surface->width, obj_surface->height);
obj_surface->flags = SURFACE_REFERENCED;
obj_surface->bo = NULL;
@@ -724,14 +756,18 @@ i965_CreateContext(VADriverContextP ctx,
return vaStatus;
}
- switch (obj_config->profile) {
- case VAProfileH264Baseline:
- case VAProfileH264Main:
- case VAProfileH264High:
+ if (IS_GEN6(i965->intel.device_id))
render_state->interleaved_uv = 1;
- break;
- default:
- render_state->interleaved_uv = 0;
+ else {
+ switch (obj_config->profile) {
+ case VAProfileH264Baseline:
+ case VAProfileH264Main:
+ case VAProfileH264High:
+ render_state->interleaved_uv = 1;
+ break;
+ default:
+ render_state->interleaved_uv = 0;
+ }
}
obj_context->context_id = contextID;
@@ -978,6 +1014,12 @@ i965_BeginPicture(VADriverContextP ctx,
vaStatus = VA_STATUS_SUCCESS;
break;
+ case VAProfileVC1Simple:
+ case VAProfileVC1Main:
+ case VAProfileVC1Advanced:
+ vaStatus = VA_STATUS_SUCCESS;
+ break;
+
default:
assert(0);
vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
@@ -1252,7 +1294,8 @@ i965_Init(VADriverContextP ctx)
return VA_STATUS_ERROR_UNKNOWN;
if (!IS_G4X(i965->intel.device_id) &&
- !IS_IRONLAKE(i965->intel.device_id))
+ !IS_IRONLAKE(i965->intel.device_id) &&
+ !IS_GEN6(i965->intel.device_id))
return VA_STATUS_ERROR_UNKNOWN;
if (i965_media_init(ctx) == False)
@@ -1726,15 +1769,15 @@ i965_PutSurface(VADriverContextP ctx,
if (flags & (VA_BOTTOM_FIELD | VA_TOP_FIELD))
pp_flag |= I965_PP_FLAG_DEINTERLACING;
- i965_render_put_surface(ctx, surface,
+ intel_render_put_surface(ctx, surface,
srcx, srcy, srcw, srch,
destx, desty, destw, desth,
pp_flag);
if(obj_surface->subpic != VA_INVALID_ID) {
- i965_render_put_subpic(ctx, surface,
- srcx, srcy, srcw, srch,
- destx, desty, destw, desth);
+ intel_render_put_subpicture(ctx, surface,
+ srcx, srcy, srcw, srch,
+ destx, desty, destw, desth);
}
dri_swap_buffer(ctx, dri_drawable);
diff --git a/i965_drv_video/i965_media.c b/i965_drv_video/i965_media.c
index 8945444..172dde5 100644
--- a/i965_drv_video/i965_media.c
+++ b/i965_drv_video/i965_media.c
@@ -39,6 +39,7 @@
#include "i965_defines.h"
#include "i965_media_mpeg2.h"
#include "i965_media_h264.h"
+#include "gen6_mfd.h"
#include "i965_media.h"
#include "i965_drv_video.h"
@@ -177,7 +178,7 @@ i965_media_depth_buffer(VADriverContextP ctx)
OUT_BATCH(ctx, 0);
OUT_BATCH(ctx, 0);
OUT_BATCH(ctx, 0);
- ADVANCE_BATCH();
+ ADVANCE_BATCH(ctx);
}
static void
@@ -273,6 +274,11 @@ i965_media_decode_picture(VADriverContextP ctx,
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_media_state *media_state = &i965->media_state;
+ if (IS_GEN6(i965->intel.device_id)) {
+ gen6_mfd_decode_picture(ctx, profile, decode_state);
+ return;
+ }
+
i965_media_decode_init(ctx, profile, decode_state);
assert(media_state->media_states_setup);
media_state->media_states_setup(ctx, decode_state);
@@ -282,6 +288,11 @@ i965_media_decode_picture(VADriverContextP ctx,
Bool
i965_media_init(VADriverContextP ctx)
{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ if (IS_GEN6(i965->intel.device_id))
+ return gen6_mfd_init(ctx);
+
return True;
}
@@ -292,6 +303,9 @@ i965_media_terminate(VADriverContextP ctx)
struct i965_media_state *media_state = &i965->media_state;
int i;
+ if (IS_GEN6(i965->intel.device_id))
+ return gen6_mfd_terminate(ctx);
+
if (media_state->free_private_context)
media_state->free_private_context(&media_state->private_context);
diff --git a/i965_drv_video/i965_render.c b/i965_drv_video/i965_render.c
index ceef319..8789ca8 100644
--- a/i965_drv_video/i965_render.c
+++ b/i965_drv_video/i965_render.c
@@ -97,6 +97,28 @@ static const unsigned int ps_subpic_kernel_static_gen5[][4] =
#include "shaders/render/exa_wm_write.g4b.gen5"
};
+/* programs for Sandybridge */
+static const unsigned int sf_kernel_static_gen6[][4] =
+{
+};
+
+static const uint32_t ps_kernel_static_gen6[][4] = {
+#include "shaders/render/exa_wm_src_affine.g6b"
+#include "shaders/render/exa_wm_src_sample_planar.g6b"
+#include "shaders/render/exa_wm_yuv_rgb.g6b"
+#include "shaders/render/exa_wm_write.g6b"
+};
+
+static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
+#include "shaders/render/exa_wm_src_affine.g6b"
+#include "shaders/render/exa_wm_src_sample_argb.g6b"
+#include "shaders/render/exa_wm_write.g6b"
+};
+
+#define SURFACE_STATE_PADDED_SIZE ALIGN(sizeof(struct i965_surface_state), 32)
+#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
+
static uint32_t float_to_uint (float f)
{
union {
@@ -167,6 +189,28 @@ static struct render_kernel render_kernels_gen5[] = {
}
};
+static struct render_kernel render_kernels_gen6[] = {
+ {
+ "SF",
+ sf_kernel_static_gen6,
+ sizeof(sf_kernel_static_gen6),
+ NULL
+ },
+ {
+ "PS",
+ ps_kernel_static_gen6,
+ sizeof(ps_kernel_static_gen6),
+ NULL
+ },
+
+ {
+ "PS_SUBPIC",
+ ps_subpic_kernel_static_gen6,
+ sizeof(ps_subpic_kernel_static_gen6),
+ NULL
+ }
+};
+
static struct render_kernel *render_kernels = NULL;
#define NUM_RENDER_KERNEL (sizeof(render_kernels_gen4)/sizeof(render_kernels_gen4[0]))
@@ -532,6 +576,25 @@ i965_render_cc_unit(VADriverContextP ctx)
}
static void
+i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss3.tiled_surface = 0;
+ ss->ss3.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss3.tiled_surface = 1;
+ ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss3.tiled_surface = 1;
+ ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+static void
i965_render_src_surface_state(VADriverContextP ctx,
int index,
dri_bo *region,
@@ -542,70 +605,15 @@ i965_render_src_surface_state(VADriverContextP ctx,
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_render_state *render_state = &i965->render_state;
struct i965_surface_state *ss;
- dri_bo *ss_bo;
-
- ss_bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state), 32);
- assert(ss_bo);
- dri_bo_map(ss_bo, 1);
- assert(ss_bo->virtual);
- ss = ss_bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = format;
- ss->ss0.writedisable_alpha = 0;
- ss->ss0.writedisable_red = 0;
- ss->ss0.writedisable_green = 0;
- ss->ss0.writedisable_blue = 0;
- ss->ss0.color_blend = 1;
- ss->ss0.vert_line_stride = 0;
- ss->ss0.vert_line_stride_ofs = 0;
- ss->ss0.mipmap_layout_mode = 0;
- ss->ss0.render_cache_read_mode = 0;
-
- ss->ss1.base_addr = region->offset + offset;
-
- ss->ss2.width = w - 1;
- ss->ss2.height = h - 1;
- ss->ss2.mip_count = 0;
- ss->ss2.render_target_rotation = 0;
-
- ss->ss3.pitch = pitch - 1;
-
- dri_bo_emit_reloc(ss_bo,
- I915_GEM_DOMAIN_SAMPLER, 0,
- offset,
- offsetof(struct i965_surface_state, ss1),
- region);
-
- dri_bo_unmap(ss_bo);
+ dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
+ unsigned int tiling;
+ unsigned int swizzle;
assert(index < MAX_RENDER_SURFACES);
- assert(render_state->wm.surface[index] == NULL);
- render_state->wm.surface[index] = ss_bo;
- render_state->wm.sampler_count++;
-}
-static void
-i965_subpic_render_src_surface_state(VADriverContextP ctx,
- int index,
- dri_bo *region,
- unsigned long offset,
- int w, int h, int p, int format)
-{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_render_state *render_state = &i965->render_state;
- struct i965_surface_state *ss;
- dri_bo *ss_bo;
-
- ss_bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state), 32);
- assert(ss_bo);
dri_bo_map(ss_bo, 1);
assert(ss_bo->virtual);
- ss = ss_bo->virtual;
+ ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
memset(ss, 0, sizeof(*ss));
ss->ss0.surface_type = I965_SURFACE_2D;
ss->ss0.surface_format = format;
@@ -626,19 +634,19 @@ i965_subpic_render_src_surface_state(VADriverContextP ctx,
ss->ss2.mip_count = 0;
ss->ss2.render_target_rotation = 0;
- ss->ss3.pitch = p - 1;
+ ss->ss3.pitch = pitch - 1;
+
+ dri_bo_get_tiling(region, &tiling, &swizzle);
+ i965_render_set_surface_tiling(ss, tiling);
dri_bo_emit_reloc(ss_bo,
I915_GEM_DOMAIN_SAMPLER, 0,
offset,
- offsetof(struct i965_surface_state, ss1),
+ SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
region);
+ ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
dri_bo_unmap(ss_bo);
-
- assert(index < MAX_RENDER_SURFACES);
- assert(render_state->wm.surface[index] == NULL);
- render_state->wm.surface[index] = ss_bo;
render_state->wm.sampler_count++;
}
@@ -702,27 +710,8 @@ i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
region = obj_surface->bo;
subpic_region = obj_image->bo;
/*subpicture surface*/
- i965_subpic_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);
- i965_subpic_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);
-}
-
-static void
-i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
-{
- switch (tiling) {
- case I915_TILING_NONE:
- ss->ss3.tiled_surface = 0;
- ss->ss3.tile_walk = 0;
- break;
- case I915_TILING_X:
- ss->ss3.tiled_surface = 1;
- ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
- break;
- case I915_TILING_Y:
- ss->ss3.tiled_surface = 1;
- ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
- break;
- }
+ i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);
+ i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);
}
static void
@@ -732,15 +721,13 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index)
struct i965_render_state *render_state = &i965->render_state;
struct intel_region *dest_region = render_state->draw_region;
struct i965_surface_state *ss;
- dri_bo *ss_bo;
+ dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
+
+ assert(index < MAX_RENDER_SURFACES);
- ss_bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state), 32);
- assert(ss_bo);
dri_bo_map(ss_bo, 1);
assert(ss_bo->virtual);
- ss = ss_bo->virtual;
+ ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
memset(ss, 0, sizeof(*ss));
ss->ss0.surface_type = I965_SURFACE_2D;
@@ -774,41 +761,11 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index)
dri_bo_emit_reloc(ss_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
0,
- offsetof(struct i965_surface_state, ss1),
+ SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
dest_region->bo);
+ ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
dri_bo_unmap(ss_bo);
-
- assert(index < MAX_RENDER_SURFACES);
- assert(render_state->wm.surface[index] == NULL);
- render_state->wm.surface[index] = ss_bo;
-}
-
-static void
-i965_render_binding_table(VADriverContextP ctx)
-{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_render_state *render_state = &i965->render_state;
- int i;
- unsigned int *binding_table;
-
- dri_bo_map(render_state->wm.binding_table, 1);
- assert(render_state->wm.binding_table->virtual);
- binding_table = render_state->wm.binding_table->virtual;
- memset(binding_table, 0, render_state->wm.binding_table->size);
-
- for (i = 0; i < MAX_RENDER_SURFACES; i++) {
- if (render_state->wm.surface[i]) {
- binding_table[i] = render_state->wm.surface[i]->offset;
- dri_bo_emit_reloc(render_state->wm.binding_table,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- 0,
- i * sizeof(*binding_table),
- render_state->wm.surface[i]);
- }
- }
-
- dri_bo_unmap(render_state->wm.binding_table);
}
static void
@@ -964,7 +921,6 @@ i965_surface_render_state_setup(VADriverContextP ctx,
i965_render_wm_unit(ctx);
i965_render_cc_viewport(ctx);
i965_render_cc_unit(ctx);
- i965_render_binding_table(ctx);
i965_render_upload_vertex(ctx, surface,
srcx, srcy, srcw, srch,
destx, desty, destw, desth);
@@ -990,7 +946,6 @@ i965_subpic_render_state_setup(VADriverContextP ctx,
i965_subpic_render_wm_unit(ctx);
i965_render_cc_viewport(ctx);
i965_subpic_render_cc_unit(ctx);
- i965_render_binding_table(ctx);
VARectangle output_rect;
output_rect.x = destx;
@@ -1022,12 +977,13 @@ static void
i965_render_state_base_address(VADriverContextP ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
if (IS_IRONLAKE(i965->intel.device_id)) {
BEGIN_BATCH(ctx, 8);
OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
- OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+ OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
@@ -1038,7 +994,7 @@ i965_render_state_base_address(VADriverContextP ctx)
BEGIN_BATCH(ctx, 6);
OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
- OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+ OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
@@ -1049,16 +1005,13 @@ i965_render_state_base_address(VADriverContextP ctx)
static void
i965_render_binding_table_pointers(VADriverContextP ctx)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_render_state *render_state = &i965->render_state;
-
BEGIN_BATCH(ctx, 6);
OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS | 4);
OUT_BATCH(ctx, 0); /* vs */
OUT_BATCH(ctx, 0); /* gs */
OUT_BATCH(ctx, 0); /* clip */
OUT_BATCH(ctx, 0); /* sf */
- OUT_RELOC(ctx, render_state->wm.binding_table, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* wm */
+ OUT_BATCH(ctx, BINDING_TABLE_OFFSET);
ADVANCE_BATCH(ctx);
}
@@ -1312,7 +1265,10 @@ i965_clear_dest_region(VADriverContextP ctx)
br13 |= pitch;
- BEGIN_BATCH(ctx, 6);
+ if (IS_GEN6(i965->intel.device_id))
+ BEGIN_BLT_BATCH(ctx, 6);
+ else
+ BEGIN_BATCH(ctx, 6);
OUT_BATCH(ctx, blt_cmd);
OUT_BATCH(ctx, br13);
OUT_BATCH(ctx, (dest_region->y << 16) | (dest_region->x));
@@ -1328,9 +1284,9 @@ i965_clear_dest_region(VADriverContextP ctx)
static void
i965_surface_render_pipeline_setup(VADriverContextP ctx)
{
+ i965_clear_dest_region(ctx);
intel_batchbuffer_start_atomic(ctx, 0x1000);
intel_batchbuffer_emit_mi_flush(ctx);
- i965_clear_dest_region(ctx);
i965_render_pipeline_select(ctx);
i965_render_state_sip(ctx);
i965_render_state_base_address(ctx);
@@ -1371,7 +1327,6 @@ i965_render_initialize(VADriverContextP ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_render_state *render_state = &i965->render_state;
- int i;
dri_bo *bo;
/* VERTEX BUFFER */
@@ -1404,18 +1359,13 @@ i965_render_initialize(VADriverContextP ctx)
render_state->sf.state = bo;
/* WM */
- for (i = 0; i < MAX_RENDER_SURFACES; i++) {
- dri_bo_unreference(render_state->wm.surface[i]);
- render_state->wm.surface[i] = NULL;
- }
-
- dri_bo_unreference(render_state->wm.binding_table);
+ dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
- "binding table",
- MAX_RENDER_SURFACES * sizeof(unsigned int),
- 64);
+ "surface state & binding table",
+ (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
+ 4096);
assert(bo);
- render_state->wm.binding_table = bo;
+ render_state->wm.surface_state_binding_table_bo = bo;
dri_bo_unreference(render_state->wm.sampler);
bo = dri_bo_alloc(i965->intel.bufmgr,
@@ -1452,7 +1402,7 @@ i965_render_initialize(VADriverContextP ctx)
render_state->cc.viewport = bo;
}
-void
+static void
i965_render_put_surface(VADriverContextP ctx,
VASurfaceID surface,
short srcx,
@@ -1478,8 +1428,480 @@ i965_render_put_surface(VADriverContextP ctx,
intel_batchbuffer_flush(ctx);
}
-void
-i965_render_put_subpic(VADriverContextP ctx,
+static void
+i965_render_put_subpicture(VADriverContextP ctx,
+ VASurfaceID surface,
+ short srcx,
+ short srcy,
+ unsigned short srcw,
+ unsigned short srch,
+ short destx,
+ short desty,
+ unsigned short destw,
+ unsigned short desth)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface = SURFACE(surface);
+ struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
+ assert(obj_subpic);
+
+ i965_render_initialize(ctx);
+ i965_subpic_render_state_setup(ctx, surface,
+ srcx, srcy, srcw, srch,
+ destx, desty, destw, desth);
+ i965_subpic_render_pipeline_setup(ctx);
+ i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
+ intel_batchbuffer_flush(ctx);
+}
+
+/*
+ * for GEN6+
+ */
+static void
+gen6_render_initialize(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ dri_bo *bo;
+
+ /* VERTEX BUFFER */
+ dri_bo_unreference(render_state->vb.vertex_buffer);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "vertex buffer",
+ 4096,
+ 4096);
+ assert(bo);
+ render_state->vb.vertex_buffer = bo;
+
+ /* WM */
+ dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "surface state & binding table",
+ (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
+ 4096);
+ assert(bo);
+ render_state->wm.surface_state_binding_table_bo = bo;
+
+ dri_bo_unreference(render_state->wm.sampler);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "sampler state",
+ MAX_SAMPLERS * sizeof(struct i965_sampler_state),
+ 4096);
+ assert(bo);
+ render_state->wm.sampler = bo;
+ render_state->wm.sampler_count = 0;
+
+ /* COLOR CALCULATOR */
+ dri_bo_unreference(render_state->cc.state);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "color calc state",
+ sizeof(struct gen6_color_calc_state),
+ 4096);
+ assert(bo);
+ render_state->cc.state = bo;
+
+ /* CC VIEWPORT */
+ dri_bo_unreference(render_state->cc.viewport);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "cc viewport",
+ sizeof(struct i965_cc_viewport),
+ 4096);
+ assert(bo);
+ render_state->cc.viewport = bo;
+
+ /* BLEND STATE */
+ dri_bo_unreference(render_state->cc.blend);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "blend state",
+ sizeof(struct gen6_blend_state),
+ 4096);
+ assert(bo);
+ render_state->cc.blend = bo;
+
+ /* DEPTH & STENCIL STATE */
+ dri_bo_unreference(render_state->cc.depth_stencil);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "depth & stencil state",
+ sizeof(struct gen6_depth_stencil_state),
+ 4096);
+ assert(bo);
+ render_state->cc.depth_stencil = bo;
+}
+
+static void
+gen6_render_color_calc_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ struct gen6_color_calc_state *color_calc_state;
+
+ dri_bo_map(render_state->cc.state, 1);
+ assert(render_state->cc.state->virtual);
+ color_calc_state = render_state->cc.state->virtual;
+ memset(color_calc_state, 0, sizeof(*color_calc_state));
+ color_calc_state->constant_r = 1.0;
+ color_calc_state->constant_g = 0.0;
+ color_calc_state->constant_b = 1.0;
+ color_calc_state->constant_a = 1.0;
+ dri_bo_unmap(render_state->cc.state);
+}
+
+static void
+gen6_render_blend_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ struct gen6_blend_state *blend_state;
+
+ dri_bo_map(render_state->cc.blend, 1);
+ assert(render_state->cc.blend->virtual);
+ blend_state = render_state->cc.blend->virtual;
+ memset(blend_state, 0, sizeof(*blend_state));
+ blend_state->blend1.logic_op_enable = 1;
+ blend_state->blend1.logic_op_func = 0xc;
+ dri_bo_unmap(render_state->cc.blend);
+}
+
+static void
+gen6_render_depth_stencil_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ struct gen6_depth_stencil_state *depth_stencil_state;
+
+ dri_bo_map(render_state->cc.depth_stencil, 1);
+ assert(render_state->cc.depth_stencil->virtual);
+ depth_stencil_state = render_state->cc.depth_stencil->virtual;
+ memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
+ dri_bo_unmap(render_state->cc.depth_stencil);
+}
+
+static void
+gen6_render_setup_states(VADriverContextP ctx,
+ VASurfaceID surface,
+ short srcx,
+ short srcy,
+ unsigned short srcw,
+ unsigned short srch,
+ short destx,
+ short desty,
+ unsigned short destw,
+ unsigned short desth)
+{
+ i965_render_dest_surface_state(ctx, 0);
+ i965_render_src_surfaces_state(ctx, surface);
+ i965_render_sampler(ctx);
+ i965_render_cc_viewport(ctx);
+ gen6_render_color_calc_state(ctx);
+ gen6_render_blend_state(ctx);
+ gen6_render_depth_stencil_state(ctx);
+ i965_render_upload_vertex(ctx, surface,
+ srcx, srcy, srcw, srch,
+ destx, desty, destw, desth);
+}
+
+static void
+gen6_emit_invarient_states(VADriverContextP ctx)
+{
+ OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+ OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
+ OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
+ GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
+ OUT_BATCH(ctx, 0);
+
+ OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
+ OUT_BATCH(ctx, 1);
+
+ /* Set system instruction pointer */
+ OUT_BATCH(ctx, CMD_STATE_SIP | 0);
+ OUT_BATCH(ctx, 0);
+}
+
+static void
+gen6_emit_state_base_address(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+
+ OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2));
+ OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state base address */
+ OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
+ OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object base address */
+ OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction base address */
+ OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state upper bound */
+ OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
+ OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
+ OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
+}
+
+static void
+gen6_emit_viewport_state_pointers(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+
+ OUT_BATCH(ctx, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
+ GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
+ (4 - 2));
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_RELOC(ctx, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_emit_urb(VADriverContextP ctx)
+{
+ OUT_BATCH(ctx, GEN6_3DSTATE_URB | (3 - 2));
+ OUT_BATCH(ctx, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
+ (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
+ OUT_BATCH(ctx, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
+ (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
+}
+
+static void
+gen6_emit_cc_state_pointers(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+
+ OUT_BATCH(ctx, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
+ OUT_RELOC(ctx, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(ctx, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+}
+
+static void
+gen6_emit_sampler_state_pointers(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+
+ OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
+ GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
+ (4 - 2));
+ OUT_BATCH(ctx, 0); /* VS */
+ OUT_BATCH(ctx, 0); /* GS */
+ OUT_RELOC(ctx,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_emit_binding_table(VADriverContextP ctx)
+{
+ /* Binding table pointers */
+ OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS |
+ GEN6_BINDING_TABLE_MODIFY_PS |
+ (4 - 2));
+ OUT_BATCH(ctx, 0); /* vs */
+ OUT_BATCH(ctx, 0); /* gs */
+ /* Only the PS uses the binding table */
+ OUT_BATCH(ctx, BINDING_TABLE_OFFSET);
+}
+
+static void
+gen6_emit_depth_buffer_state(VADriverContextP ctx)
+{
+ OUT_BATCH(ctx, CMD_DEPTH_BUFFER | (7 - 2));
+ OUT_BATCH(ctx, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
+ (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+
+ OUT_BATCH(ctx, CMD_CLEAR_PARAMS | (2 - 2));
+ OUT_BATCH(ctx, 0);
+}
+
+static void
+gen6_emit_drawing_rectangle(VADriverContextP ctx)
+{
+ i965_render_drawing_rectangle(ctx);
+}
+
+static void
+gen6_emit_vs_state(VADriverContextP ctx)
+{
+ /* disable VS constant buffer */
+ OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+
+ OUT_BATCH(ctx, GEN6_3DSTATE_VS | (6 - 2));
+ OUT_BATCH(ctx, 0); /* without VS kernel */
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0); /* pass-through */
+}
+
+static void
+gen6_emit_gs_state(VADriverContextP ctx)
+{
+ /* disable GS constant buffer */
+ OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+
+ OUT_BATCH(ctx, GEN6_3DSTATE_GS | (7 - 2));
+ OUT_BATCH(ctx, 0); /* without GS kernel */
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0); /* pass-through */
+}
+
+static void
+gen6_emit_clip_state(VADriverContextP ctx)
+{
+ OUT_BATCH(ctx, GEN6_3DSTATE_CLIP | (4 - 2));
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0); /* pass-through */
+ OUT_BATCH(ctx, 0);
+}
+
+static void
+gen6_emit_sf_state(VADriverContextP ctx)
+{
+ OUT_BATCH(ctx, GEN6_3DSTATE_SF | (20 - 2));
+ OUT_BATCH(ctx, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
+ (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
+ (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, GEN6_3DSTATE_SF_CULL_NONE);
+ OUT_BATCH(ctx, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0); /* DW9 */
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0); /* DW14 */
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0); /* DW19 */
+}
+
+static void
+gen6_emit_wm_state(VADriverContextP ctx, int kernel)
+{
+ /* disable WM constant buffer */
+ OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+
+ OUT_BATCH(ctx, GEN6_3DSTATE_WM | (9 - 2));
+ OUT_RELOC(ctx, render_kernels[kernel].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
+ (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
+ OUT_BATCH(ctx, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
+ GEN6_3DSTATE_WM_DISPATCH_ENABLE |
+ GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
+ OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
+ GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+}
+
+static void
+gen6_emit_vertex_element_state(VADriverContextP ctx)
+{
+ /* Set up our vertex elements, sourced from the single vertex buffer. */
+ OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | (5 - 2));
+ /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
+ OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN6_VE0_VALID |
+ (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+ OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
+ OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN6_VE0_VALID |
+ (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (8 << VE0_OFFSET_SHIFT));
+ OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+}
+
+static void
+gen6_emit_vertices(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+
+ BEGIN_BATCH(ctx, 11);
+ OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3);
+ OUT_BATCH(ctx,
+ (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
+ GEN6_VB0_VERTEXDATA |
+ ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
+ OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
+ OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
+ OUT_BATCH(ctx, 0);
+
+ OUT_BATCH(ctx,
+ CMD_3DPRIMITIVE |
+ _3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) |
+ 4);
+ OUT_BATCH(ctx, 3); /* vertex count per instance */
+ OUT_BATCH(ctx, 0); /* start vertex offset */
+ OUT_BATCH(ctx, 1); /* single instance */
+ OUT_BATCH(ctx, 0); /* start instance location */
+ OUT_BATCH(ctx, 0); /* index buffer offset, ignored */
+ ADVANCE_BATCH(ctx);
+}
+
+static void
+gen6_render_emit_states(VADriverContextP ctx, int kernel)
+{
+ intel_batchbuffer_start_atomic(ctx, 0x1000);
+ intel_batchbuffer_emit_mi_flush(ctx);
+ gen6_emit_invarient_states(ctx);
+ gen6_emit_state_base_address(ctx);
+ gen6_emit_viewport_state_pointers(ctx);
+ gen6_emit_urb(ctx);
+ gen6_emit_cc_state_pointers(ctx);
+ gen6_emit_sampler_state_pointers(ctx);
+ gen6_emit_vs_state(ctx);
+ gen6_emit_gs_state(ctx);
+ gen6_emit_clip_state(ctx);
+ gen6_emit_sf_state(ctx);
+ gen6_emit_wm_state(ctx, kernel);
+ gen6_emit_binding_table(ctx);
+ gen6_emit_depth_buffer_state(ctx);
+ gen6_emit_drawing_rectangle(ctx);
+ gen6_emit_vertex_element_state(ctx);
+ gen6_emit_vertices(ctx);
+ intel_batchbuffer_end_atomic(ctx);
+}
+
+static void
+gen6_render_put_surface(VADriverContextP ctx,
VASurfaceID surface,
short srcx,
short srcy,
@@ -1488,22 +1910,147 @@ i965_render_put_subpic(VADriverContextP ctx,
short destx,
short desty,
unsigned short destw,
- unsigned short desth)
+ unsigned short desth,
+ unsigned int flag)
+{
+ gen6_render_initialize(ctx);
+ gen6_render_setup_states(ctx, surface,
+ srcx, srcy, srcw, srch,
+ destx, desty, destw, desth);
+ i965_clear_dest_region(ctx);
+ gen6_render_emit_states(ctx, PS_KERNEL);
+ intel_batchbuffer_flush(ctx);
+}
+
+static void
+gen6_subpicture_render_blend_state(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_render_state *render_state = &i965->render_state;
+ struct gen6_blend_state *blend_state;
+
+ dri_bo_unmap(render_state->cc.state);
+ dri_bo_map(render_state->cc.blend, 1);
+ assert(render_state->cc.blend->virtual);
+ blend_state = render_state->cc.blend->virtual;
+ memset(blend_state, 0, sizeof(*blend_state));
+ blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
+ blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
+ blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
+ blend_state->blend0.blend_enable = 1;
+ blend_state->blend1.post_blend_clamp_enable = 1;
+ blend_state->blend1.pre_blend_clamp_enable = 1;
+ blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
+ dri_bo_unmap(render_state->cc.blend);
+}
+
+static void
+gen6_subpicture_render_setup_states(VADriverContextP ctx,
+ VASurfaceID surface,
+ short srcx,
+ short srcy,
+ unsigned short srcw,
+ unsigned short srch,
+ short destx,
+ short desty,
+ unsigned short destw,
+ unsigned short desth)
+{
+ VARectangle output_rect;
+
+ output_rect.x = destx;
+ output_rect.y = desty;
+ output_rect.width = destw;
+ output_rect.height = desth;
+
+ i965_render_dest_surface_state(ctx, 0);
+ i965_subpic_render_src_surfaces_state(ctx, surface);
+ i965_render_sampler(ctx);
+ i965_render_cc_viewport(ctx);
+ gen6_render_color_calc_state(ctx);
+ gen6_subpicture_render_blend_state(ctx);
+ gen6_render_depth_stencil_state(ctx);
+ i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
+}
+
+static void
+gen6_render_put_subpicture(VADriverContextP ctx,
+ VASurfaceID surface,
+ short srcx,
+ short srcy,
+ unsigned short srcw,
+ unsigned short srch,
+ short destx,
+ short desty,
+ unsigned short destw,
+ unsigned short desth)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_surface *obj_surface = SURFACE(surface);
struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
- assert(obj_subpic);
- i965_render_initialize(ctx);
- i965_subpic_render_state_setup(ctx, surface,
- srcx, srcy, srcw, srch,
- destx, desty, destw, desth);
- i965_subpic_render_pipeline_setup(ctx);
- i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
+ assert(obj_subpic);
+ gen6_render_initialize(ctx);
+ gen6_subpicture_render_setup_states(ctx, surface,
+ srcx, srcy, srcw, srch,
+ destx, desty, destw, desth);
+ gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
intel_batchbuffer_flush(ctx);
}
+/*
+ * global functions
+ */
+void
+intel_render_put_surface(VADriverContextP ctx,
+ VASurfaceID surface,
+ short srcx,
+ short srcy,
+ unsigned short srcw,
+ unsigned short srch,
+ short destx,
+ short desty,
+ unsigned short destw,
+ unsigned short desth,
+ unsigned int flag)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ if (IS_GEN6(i965->intel.device_id))
+ gen6_render_put_surface(ctx, surface,
+ srcx, srcy, srcw, srch,
+ destx, desty, destw, desth,
+ flag);
+ else
+ i965_render_put_surface(ctx, surface,
+ srcx, srcy, srcw, srch,
+ destx, desty, destw, desth,
+ flag);
+}
+
+void
+intel_render_put_subpicture(VADriverContextP ctx,
+ VASurfaceID surface,
+ short srcx,
+ short srcy,
+ unsigned short srcw,
+ unsigned short srch,
+ short destx,
+ short desty,
+ unsigned short destw,
+ unsigned short desth)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ if (IS_GEN6(i965->intel.device_id))
+ gen6_render_put_subpicture(ctx, surface,
+ srcx, srcy, srcw, srch,
+ destx, desty, destw, desth);
+ else
+ i965_render_put_subpicture(ctx, surface,
+ srcx, srcy, srcw, srch,
+ destx, desty, destw, desth);
+}
Bool
i965_render_init(VADriverContextP ctx)
@@ -1515,14 +2062,22 @@ i965_render_init(VADriverContextP ctx)
/* kernel */
assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) /
sizeof(render_kernels_gen5[0])));
+ assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) /
+ sizeof(render_kernels_gen6[0])));
- if (IS_IRONLAKE(i965->intel.device_id))
+ if (IS_GEN6(i965->intel.device_id))
+ render_kernels = render_kernels_gen6;
+ else if (IS_IRONLAKE(i965->intel.device_id))
render_kernels = render_kernels_gen5;
else
render_kernels = render_kernels_gen4;
for (i = 0; i < NUM_RENDER_KERNEL; i++) {
struct render_kernel *kernel = &render_kernels[i];
+
+ if (!kernel->size)
+ continue;
+
kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
kernel->name,
kernel->size, 0x1000);
@@ -1567,22 +2122,19 @@ i965_render_terminate(VADriverContextP ctx)
render_state->vs.state = NULL;
dri_bo_unreference(render_state->sf.state);
render_state->sf.state = NULL;
- dri_bo_unreference(render_state->wm.binding_table);
- render_state->wm.binding_table = NULL;
dri_bo_unreference(render_state->wm.sampler);
render_state->wm.sampler = NULL;
dri_bo_unreference(render_state->wm.state);
render_state->wm.state = NULL;
-
- for (i = 0; i < MAX_RENDER_SURFACES; i++) {
- dri_bo_unreference(render_state->wm.surface[i]);
- render_state->wm.surface[i] = NULL;
- }
-
+ dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
dri_bo_unreference(render_state->cc.viewport);
render_state->cc.viewport = NULL;
dri_bo_unreference(render_state->cc.state);
render_state->cc.state = NULL;
+ dri_bo_unreference(render_state->cc.blend);
+ render_state->cc.blend = NULL;
+ dri_bo_unreference(render_state->cc.depth_stencil);
+ render_state->cc.depth_stencil = NULL;
if (render_state->draw_region) {
dri_bo_unreference(render_state->draw_region->bo);
diff --git a/i965_drv_video/i965_render.h b/i965_drv_video/i965_render.h
index 84b50f2..8ff4fe2 100644
--- a/i965_drv_video/i965_render.h
+++ b/i965_drv_video/i965_render.h
@@ -28,8 +28,8 @@
#ifndef _I965_RENDER_H_
#define _I965_RENDER_H_
-#define MAX_RENDER_SURFACES 16
#define MAX_SAMPLERS 16
+#define MAX_RENDER_SURFACES (MAX_SAMPLERS + 1)
#include "i965_post_processing.h"
@@ -50,14 +50,15 @@ struct i965_render_state
struct {
int sampler_count;
dri_bo *sampler;
- dri_bo *surface[MAX_RENDER_SURFACES];
- dri_bo *binding_table;
dri_bo *state;
+ dri_bo *surface_state_binding_table_bo;
} wm;
struct {
dri_bo *state;
dri_bo *viewport;
+ dri_bo *blend;
+ dri_bo *depth_stencil;
} cc;
struct {
@@ -74,7 +75,7 @@ struct i965_render_state
Bool i965_render_init(VADriverContextP ctx);
Bool i965_render_terminate(VADriverContextP ctx);
-void i965_render_put_surface(VADriverContextP ctx,
+void intel_render_put_surface(VADriverContextP ctx,
VASurfaceID surface,
short srcx,
short srcy,
@@ -88,7 +89,7 @@ void i965_render_put_surface(VADriverContextP ctx,
void
-i965_render_put_subpic(VADriverContextP ctx,
+intel_render_put_subpicture(VADriverContextP ctx,
VASurfaceID surface,
short srcx,
short srcy,
diff --git a/i965_drv_video/i965_structs.h b/i965_drv_video/i965_structs.h
index f8be616..5f85269 100644
--- a/i965_drv_video/i965_structs.h
+++ b/i965_drv_video/i965_structs.h
@@ -964,4 +964,105 @@ struct i965_sampler_dndi
} dw7;
};
+
+struct gen6_blend_state
+{
+ struct {
+ unsigned int dest_blend_factor:5;
+ unsigned int source_blend_factor:5;
+ unsigned int pad3:1;
+ unsigned int blend_func:3;
+ unsigned int pad2:1;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_source_blend_factor:5;
+ unsigned int pad1:1;
+ unsigned int ia_blend_func:3;
+ unsigned int pad0:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int blend_enable:1;
+ } blend0;
+
+ struct {
+ unsigned int post_blend_clamp_enable:1;
+ unsigned int pre_blend_clamp_enable:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:4;
+ unsigned int x_dither_offset:2;
+ unsigned int y_dither_offset:2;
+ unsigned int dither_enable:1;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test_enable:1;
+ unsigned int pad1:1;
+ unsigned int logic_op_func:4;
+ unsigned int logic_op_enable:1;
+ unsigned int pad2:1;
+ unsigned int write_disable_b:1;
+ unsigned int write_disable_g:1;
+ unsigned int write_disable_r:1;
+ unsigned int write_disable_a:1;
+ unsigned int pad3:1;
+ unsigned int alpha_to_coverage_dither:1;
+ unsigned int alpha_to_one:1;
+ unsigned int alpha_to_coverage:1;
+ } blend1;
+};
+
+struct gen6_color_calc_state
+{
+ struct {
+ unsigned int alpha_test_format:1;
+ unsigned int pad0:14;
+ unsigned int round_disable:1;
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_ref:8;
+ } cc0;
+
+ union {
+ float alpha_ref_f;
+ struct {
+ unsigned int ui:8;
+ unsigned int pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ float constant_r;
+ float constant_g;
+ float constant_b;
+ float constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;
+ unsigned int bf_stencil_pass_depth_fail_op:3;
+ unsigned int bf_stencil_fail_op:3;
+ unsigned int bf_stencil_func:3;
+ unsigned int bf_stencil_enable:1;
+ unsigned int pad1:2;
+ unsigned int stencil_write_enable:1;
+ unsigned int stencil_pass_depth_pass_op:3;
+ unsigned int stencil_pass_depth_fail_op:3;
+ unsigned int stencil_fail_op:3;
+ unsigned int stencil_func:3;
+ unsigned int stencil_enable:1;
+ } ds0;
+
+ struct {
+ unsigned int bf_stencil_write_mask:8;
+ unsigned int bf_stencil_test_mask:8;
+ unsigned int stencil_write_mask:8;
+ unsigned int stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ unsigned int pad0:26;
+ unsigned int depth_write_enable:1;
+ unsigned int depth_test_func:3;
+ unsigned int pad1:1;
+ unsigned int depth_test_enable:1;
+ } ds2;
+};
+
#endif /* _I965_STRUCTS_H_ */
diff --git a/i965_drv_video/intel_batchbuffer.c b/i965_drv_video/intel_batchbuffer.c
index 15c3201..4988e9c 100644
--- a/i965_drv_video/intel_batchbuffer.c
+++ b/i965_drv_video/intel_batchbuffer.c
@@ -40,6 +40,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
int batch_size = BATCH_SIZE;
assert(batch->flag == I915_EXEC_RENDER ||
+ batch->flag == I915_EXEC_BLT ||
batch->flag == I915_EXEC_BSD);
dri_bo_unreference(batch->buffer);
@@ -281,21 +282,23 @@ intel_batchbuffer_data_bcs(VADriverContextP ctx, void *data, unsigned int size)
intel_batchbuffer_data_helper(ctx, intel->batch_bcs, data, size);
}
-static void
-intel_batchbuffer_emit_mi_flush_helper(VADriverContextP ctx,
- struct intel_batchbuffer *batch)
-{
- intel_batchbuffer_require_space_helper(ctx, batch, 4);
- intel_batchbuffer_emit_dword_helper(batch,
- MI_FLUSH | STATE_INSTRUCTION_CACHE_INVALIDATE);
-}
-
void
intel_batchbuffer_emit_mi_flush(VADriverContextP ctx)
{
struct intel_driver_data *intel = intel_driver_data(ctx);
- intel_batchbuffer_emit_mi_flush_helper(ctx, intel->batch);
+ if (intel->batch->flag == I915_EXEC_BLT) {
+ BEGIN_BLT_BATCH(ctx, 4);
+ OUT_BATCH(ctx, MI_FLUSH_DW);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ OUT_BATCH(ctx, 0);
+ ADVANCE_BATCH(ctx);
+ } else if (intel->batch->flag == I915_EXEC_RENDER) {
+ BEGIN_BATCH(ctx, 1);
+ OUT_BATCH(ctx, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
+ ADVANCE_BATCH(ctx);
+ }
}
void
@@ -303,7 +306,18 @@ intel_batchbuffer_emit_mi_flush_bcs(VADriverContextP ctx)
{
struct intel_driver_data *intel = intel_driver_data(ctx);
- intel_batchbuffer_emit_mi_flush_helper(ctx, intel->batch_bcs);
+ if (IS_GEN6(intel->device_id)) {
+ BEGIN_BCS_BATCH(ctx, 4);
+ OUT_BCS_BATCH(ctx, MI_FLUSH_DW | MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ OUT_BCS_BATCH(ctx, 0);
+ ADVANCE_BCS_BATCH(ctx);
+ } else {
+ BEGIN_BCS_BATCH(ctx, 1);
+ OUT_BCS_BATCH(ctx, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
+ ADVANCE_BCS_BATCH(ctx);
+ }
}
void
@@ -320,7 +334,7 @@ void
intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size)
{
struct intel_driver_data *intel = intel_driver_data(ctx);
-
+ intel_batchbuffer_check_batchbuffer_flag(ctx, I915_EXEC_RENDER);
intel_batchbuffer_start_atomic_helper(ctx, intel->batch, size);
}
@@ -354,3 +368,64 @@ intel_batchbuffer_end_atomic_bcs(VADriverContextP ctx)
intel_batchbuffer_end_atomic_helper(intel->batch_bcs);
}
+static void
+intel_batchbuffer_begin_batch_helper(struct intel_batchbuffer *batch, int total)
+{
+ batch->emit_total = total * 4;
+ batch->emit_start = batch->ptr;
+}
+
+void
+intel_batchbuffer_begin_batch(VADriverContextP ctx, int total)
+{
+ struct intel_driver_data *intel = intel_driver_data(ctx);
+
+ intel_batchbuffer_begin_batch_helper(intel->batch, total);
+}
+
+void
+intel_batchbuffer_begin_batch_bcs(VADriverContextP ctx, int total)
+{
+ struct intel_driver_data *intel = intel_driver_data(ctx);
+
+ intel_batchbuffer_begin_batch_helper(intel->batch_bcs, total);
+}
+
+static void
+intel_batchbuffer_advance_batch_helper(struct intel_batchbuffer *batch)
+{
+ assert(batch->emit_total == (batch->ptr - batch->emit_start));
+}
+
+void
+intel_batchbuffer_advance_batch(VADriverContextP ctx)
+{
+ struct intel_driver_data *intel = intel_driver_data(ctx);
+
+ intel_batchbuffer_advance_batch_helper(intel->batch);
+}
+
+void
+intel_batchbuffer_advance_batch_bcs(VADriverContextP ctx)
+{
+ struct intel_driver_data *intel = intel_driver_data(ctx);
+
+ intel_batchbuffer_advance_batch_helper(intel->batch_bcs);
+}
+
+void
+intel_batchbuffer_check_batchbuffer_flag(VADriverContextP ctx, int flag)
+{
+ struct intel_driver_data *intel = intel_driver_data(ctx);
+
+ if (flag != I915_EXEC_RENDER &&
+ flag != I915_EXEC_BLT &&
+ flag != I915_EXEC_BSD)
+ return;
+
+ if (intel->batch->flag == flag)
+ return;
+
+ intel_batchbuffer_flush_helper(ctx, intel->batch);
+ intel->batch->flag = flag;
+}
diff --git a/i965_drv_video/intel_batchbuffer.h b/i965_drv_video/intel_batchbuffer.h
index 99ab08d..25652e1 100644
--- a/i965_drv_video/intel_batchbuffer.h
+++ b/i965_drv_video/intel_batchbuffer.h
@@ -18,6 +18,9 @@ struct intel_batchbuffer
int atomic;
int flag;
+ int emit_total;
+ unsigned char *emit_start;
+
int (*run)(drm_intel_bo *bo, int used,
drm_clip_rect_t *cliprects, int num_cliprects,
int DR4, int ring_flag);
@@ -37,6 +40,9 @@ void intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size);
void intel_batchbuffer_end_atomic(VADriverContextP ctx);
Bool intel_batchbuffer_flush(VADriverContextP ctx);
+void intel_batchbuffer_begin_batch(VADriverContextP ctx, int total);
+void intel_batchbuffer_advance_batch(VADriverContextP ctx);
+
void intel_batchbuffer_emit_dword_bcs(VADriverContextP ctx, unsigned int x);
void intel_batchbuffer_emit_reloc_bcs(VADriverContextP ctx, dri_bo *bo,
uint32_t read_domains, uint32_t write_domains,
@@ -48,9 +54,19 @@ void intel_batchbuffer_start_atomic_bcs(VADriverContextP ctx, unsigned int size)
void intel_batchbuffer_end_atomic_bcs(VADriverContextP ctx);
Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx);
-#define BEGIN_BATCH(ctx, n) do { \
- intel_batchbuffer_require_space(ctx, (n) * 4); \
-} while (0)
+void intel_batchbuffer_begin_batch_bcs(VADriverContextP ctx, int total);
+void intel_batchbuffer_advance_batch_bcs(VADriverContextP ctx);
+
+void intel_batchbuffer_check_batchbuffer_flag(VADriverContextP ctx, int flag);
+
+#define __BEGIN_BATCH(ctx, n, flag) do { \
+ intel_batchbuffer_check_batchbuffer_flag(ctx, flag); \
+ intel_batchbuffer_require_space(ctx, (n) * 4); \
+ intel_batchbuffer_begin_batch(ctx, (n)); \
+ } while (0)
+
+#define BEGIN_BATCH(ctx, n) __BEGIN_BATCH(ctx, n, I915_EXEC_RENDER)
+#define BEGIN_BLT_BATCH(ctx, n) __BEGIN_BATCH(ctx, n, I915_EXEC_BLT)
#define OUT_BATCH(ctx, d) do { \
intel_batchbuffer_emit_dword(ctx, d); \
@@ -63,10 +79,12 @@ Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx);
} while (0)
#define ADVANCE_BATCH(ctx) do { \
+ intel_batchbuffer_advance_batch(ctx); \
} while (0)
#define BEGIN_BCS_BATCH(ctx, n) do { \
intel_batchbuffer_require_space_bcs(ctx, (n) * 4); \
+ intel_batchbuffer_begin_batch_bcs(ctx, (n)); \
} while (0)
#define OUT_BCS_BATCH(ctx, d) do { \
@@ -80,6 +98,7 @@ Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx);
} while (0)
#define ADVANCE_BCS_BATCH(ctx) do { \
+ intel_batchbuffer_advance_batch_bcs(ctx); \
} while (0)
#endif /* _INTEL_BATCHBUFFER_H_ */
diff --git a/i965_drv_video/intel_batchbuffer_dump.c b/i965_drv_video/intel_batchbuffer_dump.c
index 0732f0f..99c2c1c 100644
--- a/i965_drv_video/intel_batchbuffer_dump.c
+++ b/i965_drv_video/intel_batchbuffer_dump.c
@@ -44,6 +44,7 @@ dump_mi(unsigned int *data, unsigned int offset, int count, unsigned int device,
{ 0x00, 0, 1, 1, "MI_NOOP" },
{ 0x04, 0, 1, 1, "MI_FLUSH" },
{ 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
+ { 0x26, 0x3f, 4, 5, "MI_FLUSH_DW" },
};
opcode = ((data[0] & MASK_MI_OPCODE) >> SHIFT_MI_OPCODE);
@@ -350,6 +351,363 @@ dump_gfxpipe_bsd(unsigned int *data, unsigned int offset, int count, unsigned in
return length;
}
+static void
+dump_mfx_mode_select(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ instr_out(data, offset, 1,
+ "decoder mode: %d(%s),"
+ "post deblocking output enable %d,"
+ "pre deblocking output enable %d,"
+ "codec select: %d(%s),"
+ "standard select: %d(%s)"
+ "\n",
+ (data[1] >> 16) & 0x1, ((data[1] >> 16) & 0x1) ? "IT" : "VLD",
+ (data[1] >> 9) & 0x1,
+ (data[1] >> 8) & 0x1,
+ (data[1] >> 4) & 0x1, ((data[1] >> 4) & 0x1) ? "Encode" : "Decode",
+ (data[1] >> 0) & 0x3, ((data[1] >> 0) & 0x3) == 0 ? "MPEG2" :
+ ((data[1] >> 0) & 0x3) == 1 ? "VC1" :
+ ((data[1] >> 0) & 0x3) == 2 ? "AVC" : "Reserved");
+ instr_out(data, offset, 2, "dword 02\n");
+ instr_out(data, offset, 3, "dword 03\n");
+}
+
+static void
+dump_mfx_surface_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ instr_out(data, offset, 1, "dword 01\n");
+ instr_out(data, offset, 2, "dword 02\n");
+ instr_out(data, offset, 3, "dword 03\n");
+ instr_out(data, offset, 4, "dword 04\n");
+ instr_out(data, offset, 5, "dword 05\n");
+}
+
+static void
+dump_mfx_pipe_buf_addr_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ instr_out(data, offset, 1, "dword 01\n");
+ instr_out(data, offset, 2, "dword 02\n");
+ instr_out(data, offset, 3, "dword 03\n");
+ instr_out(data, offset, 4, "dword 04\n");
+ instr_out(data, offset, 5, "dword 05\n");
+ instr_out(data, offset, 6, "dword 06\n");
+ instr_out(data, offset, 7, "dword 07\n");
+ instr_out(data, offset, 8, "dword 08\n");
+ instr_out(data, offset, 9, "dword 09\n");
+ instr_out(data, offset, 10, "dword 10\n");
+ instr_out(data, offset, 11, "dword 11\n");
+ instr_out(data, offset, 12, "dword 12\n");
+ instr_out(data, offset, 13, "dword 13\n");
+ instr_out(data, offset, 14, "dword 14\n");
+ instr_out(data, offset, 15, "dword 15\n");
+ instr_out(data, offset, 16, "dword 16\n");
+ instr_out(data, offset, 17, "dword 17\n");
+ instr_out(data, offset, 18, "dword 18\n");
+ instr_out(data, offset, 19, "dword 19\n");
+ instr_out(data, offset, 20, "dword 20\n");
+ instr_out(data, offset, 21, "dword 21\n");
+ instr_out(data, offset, 22, "dword 22\n");
+ instr_out(data, offset, 24, "dword 23\n");
+}
+
+static void
+dump_mfx_ind_obj_base_addr_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ instr_out(data, offset, 1, "dword 01\n");
+ instr_out(data, offset, 2, "dword 02\n");
+ instr_out(data, offset, 3, "dword 03\n");
+ instr_out(data, offset, 4, "dword 04\n");
+ instr_out(data, offset, 5, "dword 05\n");
+ instr_out(data, offset, 6, "dword 06\n");
+ instr_out(data, offset, 7, "dword 07\n");
+ instr_out(data, offset, 8, "dword 08\n");
+ instr_out(data, offset, 9, "dword 09\n");
+ instr_out(data, offset, 10, "dword 10\n");
+}
+
+static void
+dump_mfx_bsp_buf_base_addr_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ instr_out(data, offset, 1, "dword 01\n");
+ instr_out(data, offset, 2, "dword 02\n");
+ instr_out(data, offset, 3, "dword 03\n");
+}
+
+static void
+dump_mfx_aes_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ instr_out(data, offset, 1, "dword 01\n");
+ instr_out(data, offset, 2, "dword 02\n");
+ instr_out(data, offset, 3, "dword 03\n");
+ instr_out(data, offset, 4, "dword 04\n");
+ instr_out(data, offset, 5, "dword 05\n");
+ instr_out(data, offset, 6, "dword 06\n");
+}
+
+static void
+dump_mfx_state_pointer(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ instr_out(data, offset, 1, "dword 01\n");
+}
+
+static int
+dump_mfx_common(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
+{
+ unsigned int subopcode;
+ int length, i;
+
+ struct {
+ unsigned int subopcode;
+ int min_len;
+ int max_len;
+ char *name;
+ void (*detail)(unsigned int *data, unsigned int offset, unsigned int device, int *failures);
+ } mfx_common_commands[] = {
+ { SUBOPCODE_MFX(0, 0), 0x04, 0x04, "MFX_PIPE_MODE_SELECT", dump_mfx_mode_select },
+ { SUBOPCODE_MFX(0, 1), 0x06, 0x06, "MFX_SURFACE_STATE", dump_mfx_surface_state },
+ { SUBOPCODE_MFX(0, 2), 0x18, 0x18, "MFX_PIPE_BUF_ADDR_STATE", dump_mfx_pipe_buf_addr_state },
+ { SUBOPCODE_MFX(0, 3), 0x0b, 0x0b, "MFX_IND_OBJ_BASE_ADDR_STATE", dump_mfx_ind_obj_base_addr_state },
+ { SUBOPCODE_MFX(0, 4), 0x04, 0x04, "MFX_BSP_BUF_BASE_ADDR_STATE", dump_mfx_bsp_buf_base_addr_state },
+ { SUBOPCODE_MFX(0, 5), 0x07, 0x07, "MFX_AES_STATE", dump_mfx_aes_state },
+ { SUBOPCODE_MFX(0, 6), 0x00, 0x00, "MFX_STATE_POINTER", dump_mfx_state_pointer },
+ };
+
+ subopcode = ((data[0] & MASK_GFXPIPE_SUBOPCODE) >> SHIFT_GFXPIPE_SUBOPCODE);
+
+ for (i = 0; i < ARRAY_ELEMS(mfx_common_commands); i++) {
+ if (subopcode == mfx_common_commands[i].subopcode) {
+ unsigned int index;
+
+ length = (data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH;
+ length += 2;
+ instr_out(data, offset, 0, "%s\n", mfx_common_commands[i].name);
+
+ if (length < mfx_common_commands[i].min_len ||
+ length > mfx_common_commands[i].max_len) {
+ fprintf(gout, "Bad length(%d) in %s [%d, %d]\n",
+ length, mfx_common_commands[i].name,
+ mfx_common_commands[i].min_len,
+ mfx_common_commands[i].max_len);
+ }
+
+ if (length - 1 >= count)
+ BUFFER_FAIL(count, length, mfx_common_commands[i].name);
+
+ if (mfx_common_commands[i].detail)
+ mfx_common_commands[i].detail(data, offset, device, failures);
+ else {
+ for (index = 1; index < length; index++)
+ instr_out(data, offset, index, "dword %d\n", index);
+ }
+
+ return length;
+ }
+ }
+
+ instr_out(data, offset, 0, "UNKNOWN MFX COMMON COMMAND\n");
+ (*failures)++;
+ return 1;
+}
+
+static void
+dump_mfx_avc_img_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ instr_out(data, offset, 1, "dword 01\n");
+ instr_out(data, offset, 2, "dword 02\n");
+ instr_out(data, offset, 3, "dword 03\n");
+ instr_out(data, offset, 4, "dword 04\n");
+ instr_out(data, offset, 5, "dword 05\n");
+ instr_out(data, offset, 6, "dword 06\n");
+ instr_out(data, offset, 7, "dword 07\n");
+ instr_out(data, offset, 8, "dword 08\n");
+ instr_out(data, offset, 9, "dword 09\n");
+ instr_out(data, offset, 10, "dword 10\n");
+ instr_out(data, offset, 11, "dword 11\n");
+ instr_out(data, offset, 12, "dword 12\n");
+}
+
+static void
+dump_mfx_avc_qm_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ unsigned int length = ((data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH) + 2;
+ int i;
+
+ instr_out(data, offset, 1, "user default: %02x, QM list present: %02x\n",
+ (data[1] >> 8) & 0xff, data[1] & 0xff);
+
+ for (i = 2; i < length; i++) {
+ instr_out(data, offset, i, "dword %d\n", i);
+ }
+}
+
+static void
+dump_mfx_avc_directmode_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ int i;
+
+ for (i = 1; i < 33; i++) {
+ instr_out(data, offset, i, "Direct MV Buffer Base Address for Picture %d\n", i - 1);
+ }
+
+ for (i = 33; i < 35; i++) {
+ instr_out(data, offset, i, "Direct MV Buffer Base Address for Current Decoding Frame/Field\n");
+ }
+
+ for (i = 35; i < 69; i++) {
+ instr_out(data, offset, i, "POC List\n");
+ }
+}
+
+static void
+dump_mfx_avc_slice_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ instr_out(data, offset, 1, "dword 01\n");
+ instr_out(data, offset, 2, "dword 02\n");
+ instr_out(data, offset, 3, "dword 03\n");
+ instr_out(data, offset, 4, "dword 04\n");
+ instr_out(data, offset, 5, "dword 05\n");
+ instr_out(data, offset, 6, "dword 06\n");
+ instr_out(data, offset, 7, "dword 07\n");
+ instr_out(data, offset, 8, "dword 08\n");
+ instr_out(data, offset, 9, "dword 09\n");
+}
+
+static void
+dump_mfx_avc_ref_idx_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ instr_out(data, offset, 1, "dword 01\n");
+ instr_out(data, offset, 2, "dword 02\n");
+ instr_out(data, offset, 3, "dword 03\n");
+ instr_out(data, offset, 4, "dword 04\n");
+ instr_out(data, offset, 5, "dword 05\n");
+ instr_out(data, offset, 6, "dword 06\n");
+ instr_out(data, offset, 7, "dword 07\n");
+ instr_out(data, offset, 8, "dword 08\n");
+ instr_out(data, offset, 9, "dword 09\n");
+}
+
+static void
+dump_mfx_avc_weightoffset_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ int i;
+
+ instr_out(data, offset, 1,
+ "Weight and Offset L%d table\n",
+ (data[1] >> 0) & 0x1);
+
+ for (i = 2; i < 31; i++) {
+ instr_out(data, offset, i, "dword %d\n", i);
+ }
+}
+
+static void
+dump_mfd_bsd_object(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+ int is_phantom_slice = ((data[1] & 0x3fffff) == 0);
+
+ if (is_phantom_slice) {
+ instr_out(data, offset, 1, "phantom slice\n");
+ instr_out(data, offset, 2, "dword 02\n");
+ instr_out(data, offset, 3, "dword 03\n");
+ instr_out(data, offset, 4, "dword 04\n");
+ instr_out(data, offset, 5, "dword 05\n");
+ } else {
+ instr_out(data, offset, 1, "Indirect BSD Data Length: %d\n", data[1] & 0x3fffff);
+ instr_out(data, offset, 2, "Indirect BSD Data Start Address: 0x%08x\n", data[2] & 0x1fffffff);
+ instr_out(data, offset, 3, "dword 03\n");
+ instr_out(data, offset, 4,
+ "First_MB_Byte_Offset of Slice Data from Slice Header: 0x%08x,"
+ "slice header skip mode: %d"
+ "\n",
+ (data[4] >> 16),
+ (data[4] >> 6) & 0x1);
+ instr_out(data, offset, 5, "dword 05\n");
+ }
+}
+
+static int
+dump_mfx_avc(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
+{
+ unsigned int subopcode;
+ int length, i;
+
+ struct {
+ unsigned int subopcode;
+ int min_len;
+ int max_len;
+ char *name;
+ void (*detail)(unsigned int *data, unsigned int offset, unsigned int device, int *failures);
+ } mfx_avc_commands[] = {
+ { SUBOPCODE_MFX(0, 0), 0x0d, 0x0d, "MFX_AVC_IMG_STATE", dump_mfx_avc_img_state },
+ { SUBOPCODE_MFX(0, 1), 0x02, 0x3a, "MFX_AVC_QM_STATE", dump_mfx_avc_qm_state },
+ { SUBOPCODE_MFX(0, 2), 0x45, 0x45, "MFX_AVC_DIRECTMODE_STATE", dump_mfx_avc_directmode_state },
+ { SUBOPCODE_MFX(0, 3), 0x0b, 0x0b, "MFX_AVC_SLICE_STATE", dump_mfx_avc_slice_state },
+ { SUBOPCODE_MFX(0, 4), 0x0a, 0x0a, "MFX_AVC_REF_IDX_STATE", dump_mfx_avc_ref_idx_state },
+ { SUBOPCODE_MFX(0, 5), 0x32, 0x32, "MFX_AVC_WEIGHTOFFSET_STATE", dump_mfx_avc_weightoffset_state },
+ { SUBOPCODE_MFX(1, 8), 0x06, 0x06, "MFD_AVC_BSD_OBJECT", dump_mfd_bsd_object },
+ };
+
+ subopcode = ((data[0] & MASK_GFXPIPE_SUBOPCODE) >> SHIFT_GFXPIPE_SUBOPCODE);
+
+ for (i = 0; i < ARRAY_ELEMS(mfx_avc_commands); i++) {
+ if (subopcode == mfx_avc_commands[i].subopcode) {
+ unsigned int index;
+
+ length = (data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH;
+ length += 2;
+ instr_out(data, offset, 0, "%s\n", mfx_avc_commands[i].name);
+
+ if (length < mfx_avc_commands[i].min_len ||
+ length > mfx_avc_commands[i].max_len) {
+ fprintf(gout, "Bad length(%d) in %s [%d, %d]\n",
+ length, mfx_avc_commands[i].name,
+ mfx_avc_commands[i].min_len,
+ mfx_avc_commands[i].max_len);
+ }
+
+ if (length - 1 >= count)
+ BUFFER_FAIL(count, length, mfx_avc_commands[i].name);
+
+ if (mfx_avc_commands[i].detail)
+ mfx_avc_commands[i].detail(data, offset, device, failures);
+ else {
+ for (index = 1; index < length; index++)
+ instr_out(data, offset, index, "dword %d\n", index);
+ }
+
+ return length;
+ }
+ }
+
+ instr_out(data, offset, 0, "UNKNOWN MFX AVC COMMAND\n");
+ (*failures)++;
+ return 1;
+}
+
+static int
+dump_gfxpipe_mfx(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
+{
+ int length;
+
+ switch ((data[0] & MASK_GFXPIPE_OPCODE) >> SHIFT_GFXPIPE_OPCODE) {
+ case OPCODE_MFX_COMMON:
+ length = dump_mfx_common(data, offset, count, device, failures);
+ break;
+
+ case OPCODE_MFX_AVC:
+ length = dump_mfx_avc(data, offset, count, device, failures);
+ break;
+
+ default:
+ length = 1;
+ (*failures)++;
+ instr_out(data, offset, 0, "UNKNOWN MFX OPCODE\n");
+ break;
+ }
+
+ return length;
+}
+
static int
dump_gfxpipe(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
{
@@ -361,7 +719,11 @@ dump_gfxpipe(unsigned int *data, unsigned int offset, int count, unsigned int de
break;
case GFXPIPE_BSD:
- length = dump_gfxpipe_bsd(data, offset, count, device, failures);
+ if (IS_GEN6(device))
+ length = dump_gfxpipe_mfx(data, offset, count, device, failures);
+ else
+ length = dump_gfxpipe_bsd(data, offset, count, device, failures);
+
break;
default:
diff --git a/i965_drv_video/intel_batchbuffer_dump.h b/i965_drv_video/intel_batchbuffer_dump.h
index ad096a9..e76b4f7 100644
--- a/i965_drv_video/intel_batchbuffer_dump.h
+++ b/i965_drv_video/intel_batchbuffer_dump.h
@@ -36,6 +36,12 @@
#define SUBOPCODE_BSD_IND_OBJ 4
#define SUBOPCODE_BSD_OBJECT 8
+/* MFX */
+#define OPCODE_MFX_COMMON 0
+#define OPCODE_MFX_AVC 1
+
+#define SUBOPCODE_MFX(A, B) ((A) << 5 | (B))
+
/* MI */
#define MASK_MI_OPCODE 0x1F800000
diff --git a/i965_drv_video/intel_driver.h b/i965_drv_video/intel_driver.h
index 1e2adfa..436cccf 100644
--- a/i965_drv_video/intel_driver.h
+++ b/i965_drv_video/intel_driver.h
@@ -29,7 +29,10 @@
#define MI_BATCH_BUFFER_START (CMD_MI | (0x31 << 23))
#define MI_FLUSH (CMD_MI | (0x4 << 23))
-#define STATE_INSTRUCTION_CACHE_INVALIDATE (0x1 << 0)
+#define MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE (0x1 << 0)
+
+#define MI_FLUSH_DW (CMD_MI | (0x26 << 23) | 0x2)
+#define MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE (0x1 << 7)
#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 0x04)
#define XY_COLOR_BLT_WRITE_ALPHA (1 << 21)
@@ -124,9 +127,20 @@ struct intel_region
#define PCI_CHIP_IRONLAKE_D_G 0x0042
#define PCI_CHIP_IRONLAKE_M_G 0x0046
-#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
- devid == PCI_CHIP_Q45_G || \
- devid == PCI_CHIP_G45_G || \
+#ifndef PCI_CHIP_SANDYBRIDGE_GT1
+#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
+#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
+#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A /* Server */
+#endif
+
+
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
+ devid == PCI_CHIP_Q45_G || \
+ devid == PCI_CHIP_G45_G || \
devid == PCI_CHIP_G41_G)
#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
@@ -135,4 +149,12 @@ struct intel_region
#define IS_IRONLAKE_M(devid) (devid == PCI_CHIP_IRONLAKE_M_G)
#define IS_IRONLAKE(devid) (IS_IRONLAKE_D(devid) || IS_IRONLAKE_M(devid))
+#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+ devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+ devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS ||\
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
+ devid == PCI_CHIP_SANDYBRIDGE_S_GT)
+
#endif /* _INTEL_DRIVER_H_ */
diff --git a/i965_drv_video/shaders/render/Makefile.am b/i965_drv_video/shaders/render/Makefile.am
index fb9b11e..ba019af 100644
--- a/i965_drv_video/shaders/render/Makefile.am
+++ b/i965_drv_video/shaders/render/Makefile.am
@@ -35,17 +35,36 @@ EXTRA_DIST = $(INTEL_G4I) \
$(INTEL_G4B) \
$(INTEL_G4B_GEN5)
+INTEL_G6A = \
+ exa_wm_src_affine.g6a \
+ exa_wm_src_sample_argb.g6a \
+ exa_wm_src_sample_planar.g6a \
+ exa_wm_write.g6a \
+ exa_wm_yuv_rgb.g6a
+
+INTEL_G6B = \
+ exa_wm_src_affine.g6b \
+ exa_wm_src_sample_argb.g6b \
+ exa_wm_src_sample_planar.g6b \
+ exa_wm_write.g6b \
+ exa_wm_yuv_rgb.g6b
+
if HAVE_GEN4ASM
-SUFFIXES = .g4a .g4b
+SUFFIXES = .g4a .g4b .g6a .g6b
.g4a.g4b:
m4 $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && intel-gen4asm -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
+.g6a.g6b:
+ m4 -I$(srcdir) -s $< > $*.g6m && intel-gen4asm -g 6 -o $@ $*.g6m && rm $*.g6m
+
$(INTEL_G4B): $(INTEL_G4I)
+$(INTEL_G6B): $(INTEL_G4I)
-BUILT_SOURCES= $(INTEL_G4B)
+BUILT_SOURCES= $(INTEL_G4B) $(INTEL_G6B)
clean-local:
-rm -f $(INTEL_G4B)
-rm -f $(INTEL_G4B_GEN5)
+ -rm -f $(INTEL_G6B)
endif
diff --git a/i965_drv_video/shaders/render/exa_wm_src_affine.g6a b/i965_drv_video/shaders/render/exa_wm_src_affine.g6a
new file mode 100644
index 0000000..08195a4
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_src_affine.g6a
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Fragment to compute src u/v values
+ */
+include(`exa_wm.g4i')
+
+define(`ul', `src_u')
+define(`uh', `m3')
+define(`vl', `src_v')
+define(`vh', `m5')
+
+define(`bl', `g2.0<8,8,1>F')
+define(`bh', `g4.0<8,8,1>F')
+
+define(`a0_a_x',`g6.0<0,1,0>F')
+define(`a0_a_y',`g6.16<0,1,0>F')
+
+/* U */
+pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */
+pln (8) uh<1>F a0_a_x bh { align1 }; /* pixel 8-15 */
+
+/* V */
+pln (8) vl<1>F a0_a_y bl { align1 }; /* pixel 0-7 */
+pln (8) vh<1>F a0_a_y bh { align1 }; /* pixel 8-15 */
diff --git a/i965_drv_video/shaders/render/exa_wm_src_affine.g6b b/i965_drv_video/shaders/render/exa_wm_src_affine.g6b
new file mode 100644
index 0000000..7035e6a
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_src_affine.g6b
@@ -0,0 +1,4 @@
+ { 0x0060005a, 0x204077be, 0x000000c0, 0x008d0040 },
+ { 0x0060005a, 0x206077be, 0x000000c0, 0x008d0080 },
+ { 0x0060005a, 0x208077be, 0x000000d0, 0x008d0040 },
+ { 0x0060005a, 0x20a077be, 0x000000d0, 0x008d0080 },
diff --git a/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6a b/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6a
new file mode 100644
index 0000000..67bb888
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6a
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the src surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load argb */
+mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable };
+mov (8) src_msg<1>UD g0<8,8,1>UD { align1 mask_disable };
+
+/* src_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) src_msg_ind /* msg reg index */
+ src_sample_base<1>UW /* readback */
+ null
+ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
diff --git a/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6b b/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6b
new file mode 100644
index 0000000..2846491
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6b
@@ -0,0 +1,3 @@
+ { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a8a0001 },
diff --git a/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6a b/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6a
new file mode 100644
index 0000000..1f78629
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6a
@@ -0,0 +1,58 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the src surface in planar format */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+mov (1) g0.8<1>UD 0x0000c000UD { align1 mask_disable };
+mov (8) src_msg<1>UD g0<8,8,1>UD { align1 mask_disable };
+
+/* sample UV (CrCb) */
+send (16) src_msg_ind /* msg reg index */
+ src_sample_g<1>UW /* readback */
+ null
+ sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 4 { align1 }; /* required message len 5, readback len 8 */
+
+mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable };
+mov (8) src_msg<1>UD g0<8,8,1>UD { align1 mask_disable };
+
+/* sample Y */
+send (16) src_msg_ind /* msg reg index */
+ src_sample_r<1>UW /* readback */
+ null
+ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
+
diff --git a/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6b b/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6b
new file mode 100644
index 0000000..ef45022
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6b
@@ -0,0 +1,6 @@
+ { 0x00000201, 0x20080061, 0x00000000, 0x0000c000 },
+ { 0x00600201, 0x20200022, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x22001cc9, 0x00000020, 0x0a4a0203 },
+ { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+ { 0x00600201, 0x20200022, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a2a0001 },
diff --git a/i965_drv_video/shaders/render/exa_wm_write.g6a b/i965_drv_video/shaders/render/exa_wm_write.g6a
new file mode 100644
index 0000000..c0f3cc1
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_write.g6a
@@ -0,0 +1,77 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+include(`exa_wm.g4i')
+
+/*
+ * Prepare data in m2-m3 for Red channel, m4-m5 for Green channel,
+ * m6-m7 for Blue and m8-m9 for Alpha channel
+ */
+define(`slot_r_00', `m2')
+define(`slot_r_01', `m3')
+define(`slot_g_00', `m4')
+define(`slot_g_01', `m5')
+define(`slot_b_00', `m6')
+define(`slot_b_01', `m7')
+define(`slot_a_00', `m8')
+define(`slot_a_01', `m9')
+define(`data_port_msg_2_ind', `2')
+
+mov (8) slot_r_00<1>F src_sample_r_01<8,8,1>F { align1 };
+mov (8) slot_r_01<1>F src_sample_r_23<8,8,1>F { align1 };
+
+mov (8) slot_g_00<1>F src_sample_g_01<8,8,1>F { align1 };
+mov (8) slot_g_01<1>F src_sample_g_23<8,8,1>F { align1 };
+
+mov (8) slot_b_00<1>F src_sample_b_01<8,8,1>F { align1 };
+mov (8) slot_b_01<1>F src_sample_b_23<8,8,1>F { align1 };
+
+mov (8) slot_a_00<1>F src_sample_a_01<8,8,1>F { align1 };
+mov (8) slot_a_01<1>F src_sample_a_23<8,8,1>F { align1 };
+
+/* write */
+send (16)
+ data_port_msg_2_ind
+ acc0<1>UW
+ null
+ write (
+ 0, /* binding_table */
+ 16, /* pixel scordboard clear, msg type simd16 single source */
+ 12, /* render target write */
+ 0, /* no write commit message */
+ 0 /* headerless render target write */
+ )
+ mlen 8
+ rlen 0
+ { align1 EOT };
+
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+
diff --git a/i965_drv_video/shaders/render/exa_wm_write.g6b b/i965_drv_video/shaders/render/exa_wm_write.g6b
new file mode 100644
index 0000000..3cb6bff
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_write.g6b
@@ -0,0 +1,17 @@
+ { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0280, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
+ { 0x05800031, 0x24001cc8, 0x00000040, 0x90019000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6a b/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6a
new file mode 100644
index 0000000..5b9e625
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6a
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Packard <keithp@keithp.com>
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+include(`exa_wm.g4i')
+
+define(`YCbCr_base', `src_sample_base')
+
+define(`Cr', `src_sample_b')
+define(`Cr_01', `src_sample_b_01')
+define(`Cr_23', `src_sample_b_23')
+
+define(`Y', `src_sample_r')
+define(`Y_01', `src_sample_r_01')
+define(`Y_23', `src_sample_r_23')
+
+define(`Cb', `src_sample_g')
+define(`Cb_01', `src_sample_g_01')
+define(`Cb_23', `src_sample_g_23')
+
+define(`Crn', `mask_sample_g')
+define(`Crn_01', `mask_sample_g_01')
+define(`Crn_23', `mask_sample_g_23')
+
+define(`Yn', `mask_sample_r')
+define(`Yn_01', `mask_sample_r_01')
+define(`Yn_23', `mask_sample_r_23')
+
+define(`Cbn', `mask_sample_b')
+define(`Cbn_01', `mask_sample_b_01')
+define(`Cbn_23', `mask_sample_b_23')
+
+ /* color space conversion function:
+ * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1)
+ * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1)
+ * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1)
+ */
+
+ /* Normalize Y, Cb and Cr:
+ *
+ * Yn = (Y - 16/255) * 1.164
+ * Crn = Cr - 128 / 255
+ * Cbn = Cb - 128 / 255
+ */
+add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 };
+mul (16) Yn<1>F Yn<8,8,1>F 1.164F { compr align1 };
+
+add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 };
+
+add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 };
+
+ /*
+ * R = Y + Cr * 1.596
+ */
+mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
+mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 };
+
+ /*
+ * G = Crn * -0.813 + Cbn * -0.392 + Y
+ */
+mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
+mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 };
+mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 };
+
+ /*
+ * B = Cbn * 2.017 + Y
+ */
+mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
+mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 };
+
+ /*
+ * A = 1.0
+ */
+//mov (16) src_sample_a<1>F 1.0F { compr align1 };
diff --git a/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6b b/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6b
new file mode 100644
index 0000000..21fa6fb
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6b
@@ -0,0 +1,11 @@
+ { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbd808081 },
+ { 0x00800041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 },
+ { 0x00800040, 0x23007fbd, 0x008d0240, 0xbf008084 },
+ { 0x00800040, 0x23407fbd, 0x008d0200, 0xbf008084 },
+ { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 },
+ { 0x80800048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba },
+ { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 },
+ { 0x00800048, 0x24007fbc, 0x008d0300, 0xbf5020c5 },
+ { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
+ { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 },
+ { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 },
diff --git a/va/Android.mk b/va/Android.mk
index 494f7c7..74ac5d2 100644
--- a/va/Android.mk
+++ b/va/Android.mk
@@ -10,6 +10,23 @@ include $(CLEAR_VARS)
#LIBVA_MINOR_VERSION := 31
#LIBVA_MAJOR_VERSION := 0
+
+LOCAL_MODULE := libva
+
+LOCAL_SHARED_LIBRARIES := libdl libdrm libcutils
+
+include $(BUILD_SHARED_LIBRARY)
+
+intermediates := $(local-intermediates-dir)
+GEN := $(intermediates)/va_version.h
+$(GEN): PRIVATE_GEN_VERSION := $(LOCAL_PATH)/../build/gen_version.sh
+$(GEN): PRIVATE_INPUT_FILE := $(LOCAL_PATH)/va_version.h.in
+$(GEN): PRIVATE_CUSTOM_TOOL = sh $(PRIVATE_GEN_VERSION) $(LOCAL_PATH)/.. $(PRIVATE_INPUT_FILE) > $@
+$(GEN): $(LOCAL_PATH)/va_version.h
+ $(transform-generated-source)
+
+LOCAL_GENERATED_SOURCES += $(GEN)
+
LOCAL_SRC_FILES := \
va.c \
va_trace.c \
@@ -19,6 +36,8 @@ LOCAL_CFLAGS += \
-DANDROID \
-DVA_DRIVERS_PATH="\"$(LIBVA_DRIVERS_PATH)\""
+LOCAL_COPY_HEADERS_TO := libva/va
+
LOCAL_C_INCLUDES += \
$(TARGET_OUT_HEADERS)/libva \
$(LOCAL_PATH)/x11 \
@@ -31,13 +50,6 @@ LOCAL_COPY_HEADERS := \
va_version.h.in \
x11/va_dricommon.h
-LOCAL_COPY_HEADERS_TO := libva/va
-
-LOCAL_MODULE := libva
-
-LOCAL_SHARED_LIBRARIES := libdl libdrm libcutils
-
-include $(BUILD_SHARED_LIBRARY)
# For libva-android
diff --git a/va/glx/va_glx_impl.c b/va/glx/va_glx_impl.c
index f5bbe91..9d38930 100644
--- a/va/glx/va_glx_impl.c
+++ b/va/glx/va_glx_impl.c
@@ -36,7 +36,7 @@ static void va_glx_error_message(const char *format, ...)
{
va_list args;
va_start(args, format);
- fprintf(stderr, "[%s] ", PACKAGE_NAME);
+ fprintf(stderr, "libva-glx error: ");
vfprintf(stderr, format, args);
va_end(args);
}
diff --git a/va/glx/va_glx_private.h b/va/glx/va_glx_private.h
index eb1185c..e86efb6 100644
--- a/va/glx/va_glx_private.h
+++ b/va/glx/va_glx_private.h
@@ -25,7 +25,7 @@
#ifndef VA_GLX_PRIVATE_H
#define VA_GLX_PRIVATE_H
-#include "config.h"
+#include "sysdeps.h"
#include "va.h"
#include "va_backend.h"
#include "va_x11.h"
diff --git a/va/sysdeps.h b/va/sysdeps.h
new file mode 100644
index 0000000..0752b17
--- /dev/null
+++ b/va/sysdeps.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2007-2009 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL INTEL AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SYSDEPS_H
+#define SYSDEPS_H
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <assert.h>
+
+#ifdef ANDROID
+# define Bool int
+# define True 1
+# define False 0
+#endif
+
+#endif /* SYSDEPS_H */
diff --git a/va/va.c b/va/va.c
index 8cda412..765d6d2 100644
--- a/va/va.c
+++ b/va/va.c
@@ -23,11 +23,11 @@
*/
#define _GNU_SOURCE 1
+#include "sysdeps.h"
#include "va.h"
#include "va_backend.h"
#include "va_trace.h"
#include "va_fool.h"
-#include "config.h"
#include <assert.h>
#include <stdarg.h>
@@ -37,12 +37,6 @@
#include <dlfcn.h>
#include <unistd.h>
-#ifdef ANDROID
-#define Bool int
-#define True 1
-#define False 0
-#endif
-
#define DRIVER_INIT_FUNC "__vaDriverInit_0_31"
#define DRIVER_EXTENSION "_drv_video.so"
@@ -612,6 +606,7 @@ VAStatus vaCreateSurfaces (
{
VADriverContextP ctx;
VAStatus vaStatus;
+
CHECK_DISPLAY(dpy);
ctx = CTX(dpy);
diff --git a/va/va_tpi.c b/va/va_tpi.c
index 1f01ef0..4472913 100644
--- a/va/va_tpi.c
+++ b/va/va_tpi.c
@@ -23,10 +23,10 @@
*/
#define _GNU_SOURCE 1
+#include "sysdeps.h"
#include "va.h"
#include "va_backend.h"
#include "va_backend_tpi.h"
-#include "config.h"
#include <assert.h>
#include <stdarg.h>
diff --git a/va/x11/va_x11.c b/va/x11/va_x11.c
index 70cea30..93eb243 100644
--- a/va/x11/va_x11.c
+++ b/va/x11/va_x11.c
@@ -23,7 +23,7 @@
*/
#define _GNU_SOURCE 1
-#include "config.h"
+#include "sysdeps.h"
#include "va.h"
#include "va_backend.h"
#include "va_x11.h"