diff options
-rwxr-xr-x | src/i965_post_processing.c | 2 | ||||
-rw-r--r-- | src/shaders/post_processing/gen8/Makefile.am | 6 | ||||
-rw-r--r-- | src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a | 470 | ||||
-rw-r--r-- | src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a | 470 | ||||
-rw-r--r-- | src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a | 470 | ||||
-rw-r--r-- | src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a | 470 | ||||
-rw-r--r-- | src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a | 362 | ||||
-rw-r--r-- | src/shaders/post_processing/gen8/pl3_to_pl2.asm | 17 | ||||
-rw-r--r-- | src/shaders/post_processing/gen8/pl3_to_pl2.g8b | 260 |
9 files changed, 2526 insertions, 1 deletions
diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index c60c705b..3d3de871 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -1176,7 +1176,7 @@ static const uint32_t pp_nv12_load_save_pl3_gen8[][4] = { }; static const uint32_t pp_pl3_load_save_nv12_gen8[][4] = { -#include "shaders/post_processing/gen7/pl3_to_pl2.g75b" +#include "shaders/post_processing/gen8/pl3_to_pl2.g8b" }; static const uint32_t pp_pl3_load_save_pl3_gen8[][4] = { diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am index 7a4860b9..4f28e7fd 100644 --- a/src/shaders/post_processing/gen8/Makefile.am +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -1,6 +1,7 @@ INTEL_PP_G8B = \ pl2_to_pl2.g8b \ pl2_to_pl3.g8b \ + pl3_to_pl2.g8b \ $(NULL) INTEL_PP_G8A = \ @@ -9,9 +10,14 @@ INTEL_PP_G8A = \ PL2_AVS_Buf_1.g8a \ PL2_AVS_Buf_2.g8a \ PL2_AVS_Buf_3.g8a \ + PL3_AVS_Buf_0.g8a \ + PL3_AVS_Buf_1.g8a \ + PL3_AVS_Buf_2.g8a \ + PL3_AVS_Buf_3.g8a \ Save_AVS_NV12.g8a \ Save_AVS_PL3.g8a \ Set_AVS_Buf_0123_PL2.g8a \ + Set_AVS_Buf_0123_PL3.g8a \ Set_Layer_0.g8a \ VP_Setup.g8a \ $(NULL) diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a new file mode 100644 index 00000000..b5b85d56 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a @@ -0,0 +1,470 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 0:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV + + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_0(4)<1> r16 0x2 a0.0:ud + // Returns U data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_0(8)<1> r16 0x2 a0.0:ud + // Returns V data in 4 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a new file mode 100644 index 00000000..8457ae17 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a @@ -0,0 +1,470 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 1:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV + + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_1(4)<1> r16 0x2 a0.0:ud + // Returns U data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_1(8)<1> r16 0x2 a0.0:ud + // Returns V data in 4 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a new file mode 100644 index 00000000..99b40fe9 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a @@ -0,0 +1,470 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 2:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV + + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_2(4)<1> r16 0x2 a0.0:ud + // Returns U data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_2(8)<1> r16 0x2 a0.0:ud + // Returns V data in 4 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a new file mode 100644 index 00000000..8659876b --- /dev/null +++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a @@ -0,0 +1,470 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 3:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV + + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_3(4)<1> r16 0x2 a0.0:ud + // Returns U data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_3(8)<1> r16 0x2 a0.0:ud + // Returns V data in 4 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a new file mode 100644 index 00000000..05336661 --- /dev/null +++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a @@ -0,0 +1,362 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 7 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +//Module Name: Set_AVS_Buf_0123_PL3.asm + + + +//Module Name: Set_Buf_0123_PL3 + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + //AVS LAYOUT: (YYUUVVAA) + //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V + //For PL3-AVS: V = 8, Y= 0, U = 4, A = 12. + mov (4) acc0.0<1>:w 0x6EA2:v + //Subtract 6 from 0,4,8,12 + add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back + shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address. + + //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT + //SU LAYOUT:(YUVAYUVA) + //V = 4, Y = 0, U = 2, A = 6 + mov (4) acc0.0<1>:w 0x6204:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw + shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address. + + //OFFSET: + mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk } + + diff --git a/src/shaders/post_processing/gen8/pl3_to_pl2.asm b/src/shaders/post_processing/gen8/pl3_to_pl2.asm new file mode 100644 index 00000000..713cb979 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pl2.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL3_TO_PL2 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL3.g8a" +#include "PL3_AVS_Buf_0.g8a" +#include "PL3_AVS_Buf_1.g8a" +#include "PL3_AVS_Buf_2.g8a" +#include "PL3_AVS_Buf_3.g8a" +#include "Save_AVS_NV12.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl3_to_pl2.g8b b/src/shaders/post_processing/gen8/pl3_to_pl2.g8b new file mode 100644 index 00000000..9a141e72 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pl2.g8b @@ -0,0 +1,260 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x29002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2b002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2d002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2f002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 }, + { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, |