summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorZhao Yakui <yakui.zhao@intel.com>2012-12-28 16:30:57 +0800
committerXiang, Haihao <haihao.xiang@intel.com>2013-01-17 13:08:40 +0800
commitdf0914b9779110b014895d706dbf68e028392b63 (patch)
tree7ca4295e6f3b19e27985c61bf4a4552cbef87032 /src
parentf3d28947b7e4ba91fa7c273433ec54de56a9e83e (diff)
downloadlibva-intel-driver-df0914b9779110b014895d706dbf68e028392b63.tar.gz
Adjust the reference window based on MVP prediction to optimize VME param on Haswell
Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/shaders/vme/inter_bframe_haswell.asm13
-rw-r--r--src/shaders/vme/inter_bframe_haswell.g75b28
-rw-r--r--src/shaders/vme/inter_frame_haswell.asm11
-rw-r--r--src/shaders/vme/inter_frame_haswell.g75b14
-rw-r--r--src/shaders/vme/vme75.inc1
5 files changed, 57 insertions, 10 deletions
diff --git a/src/shaders/vme/inter_bframe_haswell.asm b/src/shaders/vme/inter_bframe_haswell.asm
index 9ab35d32..e02748ee 100644
--- a/src/shaders/vme/inter_bframe_haswell.asm
+++ b/src/shaders/vme/inter_bframe_haswell.asm
@@ -78,6 +78,7 @@ send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen
mov (8) vme_m1.0<1>:ud 0:ud {align1};
mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1};
+mov (8) mb_ref_win.0<1>:ud 0:ud {align1};
and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1};
(f0.0) jmpi (1) __mb_hwdep_end;
@@ -462,6 +463,9 @@ jmpi (1) word_imedian;
mov (1) mb_mvp_ref.6<1>:w RET_ARG<0,1,0>:w {align1};
__mb_hwdep_end:
+asr (4) mb_ref_win.0<1>:w mb_mvp_ref.0<4,4,1>:w 2:w {align1};
+add (4) mb_ref_win.8<1>:w mb_ref_win.0<4,4,1>:w 3:w {align1};
+and (4) mb_ref_win.16<1>:uw mb_ref_win.8<4,4,1>:uw 0xFFFC:uw {align1};
/* m2, get the MV/Mb cost passed from constant buffer when
spawning thread by MEDIA_OBJECT */
mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1};
@@ -580,7 +584,16 @@ mov (1) vme_m0.0<1>:W -8:W {align1};
mov (1) vme_m0.2<1>:W -8:W {align1};
mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1};
+(f0.0) add (1) vme_m0.4<1>:w vme_m0.4<0,1,0>:w 4:w {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1};
+(f0.0) add (1) vme_m0.6<1>:w vme_m0.6<0,1,0>:w 4:w {align1};
+add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
+add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.20<2,2,1>:w {align1};
+
mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
diff --git a/src/shaders/vme/inter_bframe_haswell.g75b b/src/shaders/vme/inter_bframe_haswell.g75b
index 6327a320..e0ef98ec 100644
--- a/src/shaders/vme/inter_bframe_haswell.g75b
+++ b/src/shaders/vme/inter_bframe_haswell.g75b
@@ -35,6 +35,7 @@
{ 0x04600031, 0x26201cb1, 0x00000800, 0x02190006 },
{ 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x2ac00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2a800061, 0x00000000, 0x00000000 },
{ 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 },
{ 0x00010020, 0x34001c00, 0x00001400, 0x00000cb0 },
{ 0x00600001, 0x2ae00061, 0x00000000, 0x00000000 },
@@ -63,7 +64,7 @@
{ 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
{ 0x00000001, 0x2fa80061, 0x00000000, 0x00000001 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000012f0 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000013a0 },
{ 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
{ 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
{ 0x00010001, 0x2af001e9, 0x00000000, 0x00010001 },
@@ -99,7 +100,7 @@
{ 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
{ 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000010b0 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00001160 },
{ 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
{ 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
{ 0x00010001, 0x2b1001e9, 0x00000000, 0x00010001 },
@@ -134,7 +135,7 @@
{ 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
{ 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x00000e80 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000f30 },
{ 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
{ 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
{ 0x00010001, 0x2b3001e9, 0x00000000, 0x00010001 },
@@ -166,7 +167,7 @@
{ 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
{ 0x00000001, 0x2fa80061, 0x00000000, 0x00000003 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x00000c80 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000d30 },
{ 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
{ 0x00000001, 0x2b3201ed, 0x00000000, 0x00010001 },
{ 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
@@ -207,13 +208,13 @@
{ 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000008f0 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000009a0 },
{ 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 },
{ 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 },
{ 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x00000890 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000940 },
{ 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 },
{ 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
{ 0x01000010, 0x20003da4, 0x00200af6, 0x00000000 },
@@ -232,14 +233,17 @@
{ 0x00000001, 0x2fa401ad, 0x00000b08, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b28, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x00000760 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000810 },
{ 0x00000001, 0x2ac401ad, 0x00000fe4, 0x00000000 },
{ 0x00000001, 0x2fa001ad, 0x00000aea, 0x00000000 },
{ 0x00000001, 0x2fa401ad, 0x00000b0a, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b2a, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x00000700 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000007b0 },
{ 0x00000001, 0x2ac601ad, 0x00000fe4, 0x00000000 },
+ { 0x0040000c, 0x2a803dad, 0x00690ac0, 0x00020002 },
+ { 0x00400040, 0x2a883dad, 0x00690a80, 0x00030003 },
+ { 0x00400005, 0x2a902d29, 0x00690a88, 0xfffcfffc },
{ 0x00600001, 0x25600021, 0x008d0020, 0x00000000 },
{ 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
{ 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
@@ -284,6 +288,14 @@
{ 0x00000001, 0x244001ed, 0x00000000, 0xfff8fff8 },
{ 0x00000001, 0x244201ed, 0x00000000, 0xfff8fff8 },
{ 0x00000001, 0x24440021, 0x00000440, 0x00000000 },
+ { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 },
+ { 0x00010040, 0x24403dad, 0x00000440, 0x00040004 },
+ { 0x00010040, 0x24443dad, 0x00000444, 0x00040004 },
+ { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 },
+ { 0x00010040, 0x24423dad, 0x00000442, 0x00040004 },
+ { 0x00010040, 0x24463dad, 0x00000446, 0x00040004 },
+ { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 },
+ { 0x00200040, 0x244435ad, 0x00450444, 0x00450a94 },
{ 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
{ 0x00000001, 0x24600061, 0x00000000, 0x00000002 },
{ 0x00000001, 0x24640231, 0x0000009c, 0x00000000 },
diff --git a/src/shaders/vme/inter_frame_haswell.asm b/src/shaders/vme/inter_frame_haswell.asm
index 36b394ae..fa9a0a04 100644
--- a/src/shaders/vme/inter_frame_haswell.asm
+++ b/src/shaders/vme/inter_frame_haswell.asm
@@ -77,6 +77,7 @@ mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1};
+mov (8) mb_ref_win.0<1>:ud 0:ud {align1};
and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1};
(f0.0) jmpi (1) __mb_hwdep_end;
/* read back the data for MB A */
@@ -364,6 +365,9 @@ jmpi (1) word_imedian;
mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1};
__mb_hwdep_end:
+asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1};
+add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1};
+and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1};
/* m2, get the MV/Mb cost passed from constant buffer when
spawning thread by MEDIA_OBJECT */
mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1};
@@ -481,6 +485,13 @@ mov (1) vme_m0.2<1>:W -12:W {align1};
mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
+(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 12:w {align1};
+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
+(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1};
+
+add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
+add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
diff --git a/src/shaders/vme/inter_frame_haswell.g75b b/src/shaders/vme/inter_frame_haswell.g75b
index 1ef526cb..2ef1826d 100644
--- a/src/shaders/vme/inter_frame_haswell.g75b
+++ b/src/shaders/vme/inter_frame_haswell.g75b
@@ -34,6 +34,7 @@
{ 0x00600001, 0x28000021, 0x008d0420, 0x00000000 },
{ 0x04600031, 0x26201cb1, 0x00000800, 0x02190006 },
{ 0x00600001, 0x2ac00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2a800061, 0x00000000, 0x00000000 },
{ 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 },
{ 0x00010020, 0x34001c00, 0x00001400, 0x00000710 },
{ 0x00600001, 0x2ae00061, 0x00000000, 0x00000000 },
@@ -141,14 +142,17 @@
{ 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000007b0 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000840 },
{ 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 },
{ 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 },
{ 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x00000750 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000007e0 },
{ 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 },
+ { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 },
+ { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 },
+ { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc },
{ 0x00600001, 0x25600021, 0x008d0020, 0x00000000 },
{ 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
{ 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
@@ -193,6 +197,12 @@
{ 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
{ 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
{ 0x00000001, 0x24440021, 0x00000440, 0x00000000 },
+ { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 },
+ { 0x00010040, 0x24403dad, 0x00000440, 0x000c000c },
+ { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 },
+ { 0x00010040, 0x24423dad, 0x00000442, 0x00080008 },
+ { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 },
+ { 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 },
{ 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
{ 0x00000001, 0x24600061, 0x00000000, 0x00000002 },
{ 0x00000001, 0x24640231, 0x0000009c, 0x00000000 },
diff --git a/src/shaders/vme/vme75.inc b/src/shaders/vme/vme75.inc
index 5a121b94..1c286c00 100644
--- a/src/shaders/vme/vme75.inc
+++ b/src/shaders/vme/vme75.inc
@@ -309,6 +309,7 @@ define(`mb_mv1', `r94')
define(`mb_mv2', `r95')
define(`mb_mv3', `r96')
define(`mb_ref', `r97')
+define(`mb_ref_win', `r84')
define(`DREF_REGION_SIZE', `0x2020:UW')
define(`PRED_L0', `0x0':uw)