diff options
Diffstat (limited to 'i965_drv_video/shaders/post_processing/Core_Kernels')
44 files changed, 0 insertions, 3903 deletions
diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/AVS_IEF.inc b/i965_drv_video/shaders/post_processing/Core_Kernels/AVS_IEF.inc deleted file mode 100644 index cbed61a..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/AVS_IEF.inc +++ /dev/null @@ -1,108 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -// Module name: AVS_IEF.inc - -#ifndef _AVS_INF_INC_ -#define _AVS_INF_INC_ - -#include "undefall.inc" //Undefine the SRC and DEST sysmbols - - // Message Header - // m0.7 31:0 Debug - // m0.6 31:0 Debug - // m0.5 31:0 Ignored - // m0.4 31:0 Ignored - // m0.3 31:0 Ignored - // m0.2 31:16 Ignored - // 15 Alpha Write Channel Mask enable=0, disable=1 - // 14 Blue Write Channel Mask (V) - // 13 Green Write Channel Mask (Y) - // 12 Red Write Channel Mask (U) - // 11:0 Ignored - // m0.1 Ignored - // m0.0 Ignored - -#define mAVS_8x8_HDR m0 // Message Header -#define mAVS_PAYLOAD m1 // Message Payload Header - -#define mAVS_8x8_HDR_2 m2 // Message Header -#define mAVS_PAYLOAD_2 m3 // Message Payload Header - -#define mAVS_8x8_HDR_UV m2 // Message Header -#define mAVS_PAYLOAD_UV m3 // Message Payload Header - -#define rAVS_8x8_HDR rMSGSRC // Mirror of Message Header -#define rAVS_PAYLOAD r9 // Mirror of Message Payload Header - - // AVS payload - // m1.7 Ignored - // m1.6 Pixel 0 V Address ---> ORIY (Y0) - // m1.5 Delta V ---> Step Y - // m1.4 Ignored - // m1.3 Ignored - // m1.2 Pixel 0 U Address ---> ORIX (X0) - // m1.1 U 2nd Derivative ---> NLAS dx - // m1.0 Delta U ---> Step X - - // Sampler Message Descriptor - // 31:29 Reserved 000 - // 28:25 Message length 0010 - // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel - // 19 Header Present 1 - // 18 MBZ 0 - // 17:16 SIMD Mode 11 ---> SIMD64 - // 15:12 Message Type 0011 ---> sample_8x8 - // 11:8 Sampler Index xxxx - // 7:0 Binding Table Index xxxxxxxx -#define nAVS_MSG_DSC_1CH 0x044BB000 -#define nAVS_MSG_DSC_2CH 0x048BB000 -#define nAVS_MSG_DSC_3CH 0x04CBB000 -#define nAVS_MSG_DSC_4CH 0x050BB000 - -#define nAVS_RED_CHANNEL_ONLY 0x0000E000 // Enable Red channel only -#define nAVS_GREEN_CHANNEL_ONLY 0x0000D000 // Enable Green channel only -#define nAVS_RED_BLUE_CHANNELS 0x0000A000 // Enable Red and Blue channels -#define nAVS_RGB_CHANNELS 0x00008000 // Enable RGB(YUV) channels -#define nAVS_ALL_CHANNELS 0x00000000 // Enable all channels (ARGB\AYUV) - - - -.declare ubAVS_RESPONSE Base=REG(r,nTEMP8) ElementSize=1 SrcRegion=REGION(16,1) Type=ub -.declare uwAVS_RESPONSE Base=REG(r,nTEMP8) ElementSize=2 SrcRegion=REGION(16,1) Type=uw - -.declare ubAVS_RESPONSE_2 Base=REG(r,nTEMP24) ElementSize=1 SrcRegion=REGION(16,1) Type=ub -.declare uwAVS_RESPONSE_2 Base=REG(r,nTEMP24) ElementSize=2 SrcRegion=REGION(16,1) Type=uw - - -#if (nSRC_REGION==nREGION_2) - #define uwDEST_Y uwBOT_Y - #define uwDEST_U uwBOT_U - #define uwDEST_V uwBOT_V - - #define ubDEST_Y ubBOT_Y - - #undef nSRC_REGION - #define nSRC_REGION nREGION_2 - -#else //(nSRC_REGION==nREGION_1) - #define uwDEST_Y uwTOP_Y - #define uwDEST_U uwTOP_U - #define uwDEST_V uwTOP_V - - #define ubDEST_Y ubTOP_Y - - #undef nSRC_REGION - #define nSRC_REGION nREGION_1 - -#endif - - -#endif //_AVS_INF_INC_ diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/AVS_SetupFirstBlock.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/AVS_SetupFirstBlock.asm deleted file mode 100644 index d45ce44..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/AVS_SetupFirstBlock.asm +++ /dev/null @@ -1,35 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//------------------------------------------------------------------------------ -// AVS_SetupFirstBlock.asm -//------------------------------------------------------------------------------ - - // Setup Message Header -// mov (8) mAVS_8x8_HDR<1>:ud rMSGSRC<8;8,1>:ud - - // Check NLAS Enable bit - and.z.f0.0 (1) wNULLREG uwNLAS_ENABLE:uw BIT15:uw - (f0.0)mov (1) fVIDEO_STEP_DELTA:f 0.0:f - - // Setup Message Payload Header for 1st block of Media Sampler 8x8 - mov (1) rAVS_PAYLOAD.0:f fVIDEO_STEP_DELTA:f //NLAS dx - mov (1) rAVS_PAYLOAD.1:f fVIDEO_STEP_X:f //Step X - mov (1) rAVS_PAYLOAD.5:f fVIDEO_STEP_Y:f //Step Y - mov (2) rAVS_PAYLOAD.2<4>:f fSRC_VID_H_ORI<2;2,1>:f //Orig X and Y - - - - - - - - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/AVS_SetupSecondBlock.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/AVS_SetupSecondBlock.asm deleted file mode 100644 index 8f125dc..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/AVS_SetupSecondBlock.asm +++ /dev/null @@ -1,27 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//------------------------------------------------------------------------------ -// AVS_SetupSecondBlock.asm -//------------------------------------------------------------------------------ - - //NLAS calculations for 2nd block of Media Sampler 8x8: - // X(i) = X0 + dx*i + ddx*i*(i-1)/2 ==> X(8) = X0 + dx*8 +ddx*28 - // dx(i)= dx(0) + ddx*i ==> dx(8)= dx + ddx*8 - - // Calculating X(8) - mov (1) acc0.2<1>:f fSRC_VID_H_ORI:f - mac (1) acc0.2<1>:f fVIDEO_STEP_X:f 8.0:f - mac (1) rAVS_PAYLOAD.2:f fVIDEO_STEP_DELTA:f 28.0:f - - // Calculating dx(8) - mov (1) acc0.1<1>:f fVIDEO_STEP_X:f - mac (1) rAVS_PAYLOAD.1:f fVIDEO_STEP_DELTA:f 8.0:f - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/DI.inc b/i965_drv_video/shaders/post_processing/Core_Kernels/DI.inc deleted file mode 100644 index 62f84c0..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/DI.inc +++ /dev/null @@ -1,194 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -// Module name: DI.inc - -#ifdef GT -// GT DI Kernel -#else // ILK -// ILK DI Kernel -#endif - - -//--------------------------------------------------------------------------- -// Binding table indices -//--------------------------------------------------------------------------- -#define nBIDX_DI_PRV 10 // Previous DI-ed frame -#define nBIDX_DI_CUR 13 // Current DI-ed frame -#define nBIDX_DN 7 // Denoised frame -#define nBIDX_STAT 20 // Statistics -#define nBIDX_DI_Source 4 // Source Surface - - -//--------------------------------------------------------------------------- -// Message descriptors -//--------------------------------------------------------------------------- -// Extended message descriptor -#define nSMPL_ENGINE 0x2 -#define nDATAPORT_WRITE 0x5 -#define nTS_EOT 0x27 // with End-Of-Thread bit ON - - // Message descriptor for end-of-thread - // = 000 0001 (message len) 00000 (resp len) - // 0 (header present 0) 00000000000000 0 (URB dereferenced) 0000 -#define nEOT_MSGDSC 0x02000000 - - // Message descriptor for sampler read - // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11) - // 1 (header present 1) 0 11 (SIMD32/64 mode) - // 1000 (message type) 0000 (DI state index) - // 00000000 (binding table index - set later) - // = 0x040b8000 - -// comment begin -// The following is commented out because of walker feature -// It corresponds to the #ifdef GT #else and #endif -//#define nSMPL_MSGDSC 0x040b8000 -//#define nSMPL_RESP_LEN_DI 0x00c00000 // 12 -//#define nSMPL_RESP_LEN_NODI_PL 0x00500000 // 5 -//#define nSMPL_RESP_LEN_NODI_PA 0x00900000 // 9 -//#define nSMPL_RESP_LEN_NODN 0x00900000 // 9 -//#define nSMPL_RESP_LEN_PDI 0x00b00000 // 11 -// comment end - -#ifdef GT - -#define nSMPL_MSGDSC 0x040b8000 -#define nSMPL_RESP_LEN_DI 0x00c00000 // 12 -#define nSMPL_RESP_LEN_NODI_PL 0x00500000 // 5 //DI disable, the XY stored in 5th GRF, no impact to return length -#define nSMPL_RESP_LEN_NODI_PA 0x00900000 // 9 //DI disable, the XY stored in 5th GRF, no impact to return length -#define nSMPL_RESP_LEN_NODN 0x00a00000 // 10 //NO DN, originally use 9, now we need use 10 to store the XY with walker -#define nSMPL_RESP_LEN_PDI 0x00b00000 // 11 - -#else - -#define nSMPL_MSGDSC 0x040b8000 -#define nSMPL_RESP_LEN_DI 0x00c00000 // 12 -#define nSMPL_RESP_LEN_NODI_PL 0x00500000 // 5 -#define nSMPL_RESP_LEN_NODI_PA 0x00900000 // 9 -#define nSMPL_RESP_LEN_NODN 0x00900000 // 9 -#define nSMPL_RESP_LEN_PDI 0x00b00000 // 11 - -#endif - - // Message descriptor for dataport media write -#ifdef GT - // = 000 0000 (message len - set later) 00000 (resp len 0) - // 1 (header present 1) 0 0 1010 (media block write) 00000 - // 00000000 (binding table index - set later) - // = 0x00094000 -#define nDPMW_MSGDSC 0x00094000 -#else // ILK - // = 000 0000 (message len - set later) 00000 (resp len 0) - // 1 (header present 1) 000 0 010 (media block write) 0000 - // 00000000 (binding table index - set later) - // = 0x00082000 -#define nDPMW_MSGDSC 0x00082000 -#endif -#define nDPMW_MSG_LEN_STMM 0x04000000 // 2 - STMM -#define nDPMW_MSG_LEN_DH 0x04000000 // 2 - Denoise history -#define nDPMW_MSG_LEN_PA_DN 0x0a000000 // 5 - Denoised output -#define nDPMW_MSG_LEN_PA_NODI 0x12000000 // 9 - Denoised output - denoise only - DI disabled -#define nDPMW_MSG_LEN_PL_DN 0x06000000 // 3 - Denoised output -#define nDPMW_MSG_LEN_PL_NODI 0x0a000000 // 5 - Denoised output - denoise only - DI disabled -#define nDPMW_MSG_LEN_DI 0x0a000000 // 5 - DI output - - -//--------------------------------------------------------------------------- -// Static and inline parameters -//--------------------------------------------------------------------------- -// Static parameters -.declare ubTFLD_FIRST Base=r1.27 ElementSize=1 Type=ub // top field first -.declare ubSRCYUVOFFSET Base=r1.4 ElementSize=1 Type=ub // source packed format -.declare ubDSTYUVOFFSET Base=r1.8 ElementSize=1 Type=ub // destination packed format -.declare uwSPITCH_DIV2 Base=r1.10 ElementSize=2 Type=uw // statistics surface pitch divided by 2 - -// Inline parameters -.declare uwXORIGIN Base=r5.0 ElementSize=2 Type=uw // X and Y origin -.declare uwYORIGIN Base=r5.1 ElementSize=2 Type=uw - - -//--------------------------------------------------------------------------- -// Kernel GRF variables -//--------------------------------------------------------------------------- -// Message response (Denoised & DI-ed pixels & statistics) -.declare dRESP Base=r8 ElementSize=4 Type=d // Response message (12 or 5 or 11) -.declare ubRESP Base=r8 ElementSize=1 Type=ub - -.declare dSTMM Base=r16 ElementSize=4 Type=d // STMM -.declare ubDN_HIST_NODI Base=r12 ElementSize=1 Type=ub // Denoise history data (DI disabled) -.declare ubDN_HIST_DI Base=r17 ElementSize=1 Type=ub // Denoise history data (DI enabled) -.declare uwRETURNED_POSITION_DI Base=r17 ElementSize=2 Type=uw // XY_Return_Data (DI enabled) -.declare uwRETURNED_POSITION_DN Base=r12 ElementSize=2 Type=uw // XY_Return_Data (DI disabled) - -.declare ub1ST_FLD_DN Base=r12 ElementSize=1 Type=ub // 1st field Denoised data (DI enabled) -.declare d1ST_FLD_DN Base=r12 ElementSize=4 Type=d -.declare ub2ND_FLD_DN Base=r18 ElementSize=1 Type=ub // 2nd field Denoised data (DI enabled) -.declare d2ND_FLD_DN Base=r18 ElementSize=4 Type=d -.declare ubPRV_DI Base=r8 ElementSize=1 Type=ub // Previous frame DI (DI enabled) -.declare ubCUR_DI Base=r12 ElementSize=1 Type=ub // Previous frame DI (DI enabled) - -// Packed denoised output -.declare ubDN_YUV Base=r22 ElementSize=1 Type=ub // Denoised YUV422 -.declare dDN_YUV Base=r22 ElementSize=4 Type=d -#define npDN_YUV 704 // = 22*32 = 0x280 - -// Packed DI output -.declare dDI_YUV_PRV Base=r32 ElementSize=4 Type=d // Previous frame DI output -.declare dDI_YUV_CUR Base=r36 ElementSize=4 Type=d // Current frame DI output -#define npDI_YUV 1024 // = 32*32 = 0x - -// For packed output -#define p422_YOFFSET a0.2 -#define p422_UOFFSET a0.3 -#define p422_VOFFSET a0.4 -#define pDN_TFLDSRC a0.6 -#define pDN_BFLDSRC a0.7 -#define npRESP 192 // = 6*32 - -// Message source -.declare udMSGSRC Base=r70 ElementSize=4 Type=ud -.declare uwMSGSRC Base=r70 ElementSize=2 Type=uw -.declare dMSGSRC Base=r70 ElementSize=4 Type=d - - -//--------------------------------------------------------------------------- -// Kernel MRF variables -//--------------------------------------------------------------------------- -#define mMSGHDR_SMPL m1 // Sampler response: m1~m2 -.declare mudMSGHDR_SMPL Base=m1 ElementSize=4 Type=ud -.declare muwMSGHDR_SMPL Base=m1 ElementSize=2 Type=uw -#define mMSGHDR_DN m3 // Denoise output: m3~m7 for PA, m3~m5 for PL -.declare mdMSGHDR_DN Base=m3 ElementSize=4 Type=d -#define mMSGHDR_STAT m8 // Statistics output: m8~m9 -.declare mdMSGHDR_STAT Base=m8 ElementSize=4 Type=d -.declare mubMSGHDR_STAT Base=m8 ElementSize=1 Type=ub -#define mMSGHDR_DI m10 // DI output: m10~m14 -.declare mdMSGHDR_DI Base=m10 ElementSize=4 Type=d -#define mMSGHDR_EOT m15 // EOT - -#ifdef GT -#define MSGSRC -#else -#define MSGSRC null:ud -#endif - - -//--------------------------------------------------------------------------- -// End of thread instruction -//--------------------------------------------------------------------------- -#ifdef GT -#define END_THREAD send (8) null<1>:d mMSGHDR_EOT nTS_EOT nEOT_MSGDSC -#else // ILK -#define END_THREAD send (8) null<1>:d mMSGHDR_EOT null:ud nTS_EOT nEOT_MSGDSC -#endif - - -// end of DI.inc diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/DI_Hist_Save.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/DI_Hist_Save.asm deleted file mode 100644 index ae8ff85..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/DI_Hist_Save.asm +++ /dev/null @@ -1,24 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - - -// Write denoise history to memory -shr (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w 2:w NODDCLR // X,Y origin / 4 -add (1) rMSGSRC.0<1>:ud rMSGSRC.0<0;1,0>:ud uwSPITCH_DIV2<0;1,0>:uw NODDCLR_NODDCHK // Add pitch to X origin -mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_HIST:ud NODDCHK // block width and height (4x2) - -mov (8) mMSGHDR_HIST<1>:ud rMSGSRC.0<8;8,1>:ud // message header -mov (1) mudMSGHDR_HIST(1)<1> udRESP(nDI_HIST_OFFSET,0)<0;1,0> // Move denoise history to MRF - -send (8) dNULLREG mMSGHDR_HIST udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_HIST+nBI_STMM_HISTORY_OUTPUT:ud - - - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/DI_SAVE_PA.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/DI_SAVE_PA.asm deleted file mode 100644 index f4e2fe7..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/DI_SAVE_PA.asm +++ /dev/null @@ -1,56 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - - shl (1) rMSGSRC.0<1>:ud wORIX<0;1,0>:w 1:w NODDCLR // H. block origin need to be doubled - mov (1) rMSGSRC.1<1>:ud wORIY<0;1,0>:w NODDCLR_NODDCHK // Block origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DI:ud NODDCHK // Block width and height (32x8) - - - add (4) pCF_Y_OFFSET<1>:uw ubDEST_CF_OFFSET<4;4,1>:ub nDEST_YUV_REG*nGRFWIB:w // Initial Y,U,V offset in YUV422 block - - // Pack 2nd field Y - $for(0; <nY_NUM_OF_ROWS; 1) { - mov (16) r[pCF_Y_OFFSET, %1*nGRFWIB]<2> ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16) - } - // Pack 1st field Y - $for(0; <nY_NUM_OF_ROWS; 1) { - mov (16) r[pCF_Y_OFFSET, %1+4*nGRFWIB]<2> ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16) - } - // Pack 2nd field U - $for(0; <nUV_NUM_OF_ROWS; 1) { - mov (8) r[pCF_U_OFFSET, %1*nGRFWIB]<4> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels - } - // Pack 1st field U - $for(0; <nUV_NUM_OF_ROWS; 1) { - mov (8) r[pCF_U_OFFSET, %1+4*nGRFWIB]<4> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels - } - // Pack 2nd field V - $for(0; <nUV_NUM_OF_ROWS; 1) { - mov (8) r[pCF_V_OFFSET, %1*nGRFWIB]<4> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //Vpixels - } - // Packs1st field V - $for(0; <nUV_NUM_OF_ROWS; 1) { - mov (8) r[pCF_V_OFFSET, %1+4*nGRFWIB]<4> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //Vpixels - } - - //save the previous frame - mov (8) mMSGHDR<1>:ud rMSGSRC<8;8,1>:ud - $for(0; <4; 1) { - mov (8) mudMSGPAYLOAD(%1)<1> udDEST_YUV(%1)REGION(8,1) - } - send (8) dNULLREG mMSGHDR udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPW_MSG_SIZE_DI+nBI_DESTINATION_1_YUV:ud - - //save the current frame - mov (8) mMSGHDR<1>:ud rMSGSRC<8;8,1>:ud - $for(0; <4; 1) { - mov (8) mudMSGPAYLOAD(%1)<1> udDEST_YUV(%1+4)REGION(8,1) - } - send (8) dNULLREG mMSGHDR udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPW_MSG_SIZE_DI+nBI_DESTINATION_2_YUV:ud - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/DNDI.inc b/i965_drv_video/shaders/post_processing/Core_Kernels/DNDI.inc deleted file mode 100644 index 3258756..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/DNDI.inc +++ /dev/null @@ -1,162 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -// Module name: DI.inc - -#ifdef GT -// GT DI Kernel -#else // ILK -// ILK DI Kernel -#endif - -#include "undefall.inc" - -//--------------------------------------------------------------------------- -// Message descriptors -//--------------------------------------------------------------------------- -// Extended message descriptor - // Message descriptor for sampler read -// // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11) -// // 1 (header present 1) 0 11 (SIMD32/64 mode) -// // 1000 (message type) 0000 (DI state index) -// // 00000000 (binding table index - set later) -// // = 0x040b8000 -#define nSMPL_DI_MSGDSC 0x040b8000 - -#define nSMPL_RESP_LEN_DNDI nRESLEN_12 // 12 - for DN + DI Alg -#define nSMPL_RESP_LEN_DN_PL nRESLEN_5 // 5 - for DN Planar Alg -#define nSMPL_RESP_LEN_DN_PA nRESLEN_9 // 9 - for DN Packed Alg -#define nSMPL_RESP_LEN_DI nRESLEN_9 // 9 - for DI Only Alg -#define nSMPL_RESP_LEN_PDI nRESLEN_11 // 11 - for Partial DI Alg - -// Attention: The Message Length is The Number of GRFs with Data Only, without the Header -#define nDPMW_MSG_LEN_STMM nMSGLEN_1 // 1 - For STMM Save -#define nDPMW_MSG_LEN_HIST nMSGLEN_1 // 1 - For Denoise History Save -#define nDPMW_MSG_LEN_PA_DN_DI nMSGLEN_4 // 4 - For DN Curr Save -#define nDPMW_MSG_LEN_PA_DN_NODI nMSGLEN_8 // 8 - For DN Curr Save (denoise only - DI disabled) -#define nDPMW_MSG_LEN_PL_DN_DI nMSGLEN_2 // 2 - For DN Curr Save -#define nDPMW_MSG_LEN_PL_DN_NODI nMSGLEN_4 // 4 - For DN Curr Save (denoise only - DI disabled) - -#define nDPW_BLOCK_SIZE_STMM nBLOCK_WIDTH_8+nBLOCK_HEIGHT_4 // Y block size 8x4 - -#undef nDPW_BLOCK_SIZE_DI -#undef nDPW_MSG_SIZE_DI -#define nDPW_BLOCK_SIZE_DI nBLOCK_WIDTH_32+nBLOCK_HEIGHT_4 -#define nDPW_MSG_SIZE_DI nMSGLEN_4 - - -//--------------------------------------------------------------------------- -// Kernel GRF variables -//--------------------------------------------------------------------------- -// Defines for DI enabled -#define nDI_PREV_FRAME_LUMA_OFFSET 0 -#define nDI_PREV_FRAME_CHROMA_OFFSET 2 -#define nDI_CURR_FRAME_LUMA_OFFSET 4 -#define nDI_CURR_FRAME_CHROMA_OFFSET 6 -#define nDI_STMM_OFFSET 8 -#define nDI_HIST_OFFSET 9 -#define nDI_CURR_2ND_FIELD_LUMA_OFFSET 10 -#define nDI_CURR_2ND_FIELD_CHROMA_OFFSET 11 - -// Defines for DI disabled -#define nNODI_LUMA_OFFSET 0 -#define nNODI_HIST_OFFSET 4 -#define nNODI_CHROMA_OFFSET 5 - -#ifdef DI_ENABLE - #define nHIST_OFFSET nDI_HIST_OFFSET - #undef nY_NUM_OF_ROWS - #define nY_NUM_OF_ROWS 8 // Number of Y rows per block (4 rows for each frame) - #undef nUV_NUM_OF_ROWS - #define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block - -#endif - -#ifdef DI_DISABLE - #define nHIST_OFFSET nNODI_HIST_OFFSET -#endif - -#if (nSRC_REGION==nREGION_2) - #define ub2SRC_Y ub2BOT_Y - #define ub2SRC_U ub2BOT_U - #define ub2SRC_V ub2BOT_V - #define uwDEST_Y uwBOT_Y - #define uwDEST_U uwBOT_U - #define uwDEST_V uwBOT_V - #define nDEST_YUV_REG nTOP_Y - #define udDEST_YUV udTOP_Y_IO - - #define nRESP nTEMP0 // DI return message requires 12 GRFs - #define nDN_YUV nTOP_Y // Space for Packing DN for next run requires 8 GRFs - - #undef nSRC_REGION - #define nSRC_REGION nREGION_2 - -#else - #define ub2SRC_Y ub2TOP_Y - #define ub2SRC_U ub2TOP_U - #define ub2SRC_V ub2TOP_V - #define uwDEST_Y uwTOP_Y - #define uwDEST_U uwTOP_U - #define uwDEST_V uwTOP_V - #define nDEST_YUV_REG nBOT_Y - #define udDEST_YUV udBOT_Y_IO - #define nRESP nTEMP0 // DI return message requires 12 GRFs - #define nDN_YUV nBOT_Y // Space for Packing DN for next run requires 8 GRFs - - #undef nSRC_REGION - #define nSRC_REGION nREGION_1 // REGION_1 will be the source region for first kernel - -#endif - - - - - - - - - -// Message response (Denoised & DI-ed pixels & statistics) -.declare udRESP Base=REG(r,nRESP) ElementSize=4 SrcRegion=REGION(8,1) DstRegion=<1> Type=ud -.declare ubRESP Base=REG(r,nRESP) ElementSize=1 SrcRegion=REGION(16,1) DstRegion=<1> Type=ub - -// For Denoised Curr Output (Used as Priv in Next Run) -.declare ubDN_YUV Base=REG(r,nDN_YUV) ElementSize=1 Type=ub -.declare udDN_YUV Base=REG(r,nDN_YUV) ElementSize=4 Type=ud -#define npDN_YUV nDN_YUV*nGRFWIB - -// For DI Process Output (1st and 2nd Frames Output) -//.declare udDI_YUV_PRIV Base=REG(r,nTEMP0) ElementSize=4 Type=ud // Previous frame DI output -//.declare udDI_YUV_CURR Base=REG(r,nTEMP0) ElementSize=4 Type=ud // Current frame DI output -//#define npDI_YUV nTEMP0*nGRFWIB - -//--------------------------------------------------------------------------- -// Kernel MRF variables -//--------------------------------------------------------------------------- -#define mMSG_SMPL m1 // Sampler Command is in: m1~m2 -.declare mudMSG_SMPL Base=mMSG_SMPL ElementSize=4 Type=ud -.declare muwMSG_SMPL Base=mMSG_SMPL ElementSize=2 Type=uw - -#define mMSGHDR_DN m1 // Denoise Output: m1~m9 for PA, m3~m5 for PL -.declare mudMSGHDR_DN Base=mMSGHDR_DN ElementSize=4 Type=ud -.declare mubMSGHDR_DN Base=mMSGHDR_DN ElementSize=1 Type=ub - -#define mMSGHDR_STMM m11 // STMM Output: m11~m12 -.declare mudMSGHDR_STMM Base=mMSGHDR_STMM ElementSize=4 Type=ud -#define mMSGHDR_HIST m13 // HIST Output: m13~m14 -.declare mudMSGHDR_HIST Base=mMSGHDR_HIST ElementSize=1 Type=ud - -#define mMSGHDR_DI_1ST m1 // DI output: m1~m5 -.declare mudMSGHDR_DI_1ST Base=mMSGHDR_DI_1ST ElementSize=4 Type=ud -#define mMSGHDR_DI_2ND m6 // DI output: m6~m10 -.declare mudMSGHDR_DI_2ND Base=mMSGHDR_DI_2ND ElementSize=4 Type=ud - -// end of DNDI.inc diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/DNDI_COMMAND.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/DNDI_COMMAND.asm deleted file mode 100644 index 2c041fc..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/DNDI_COMMAND.asm +++ /dev/null @@ -1,17 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -// Activate the DNDI send command -mov (8) mudMSG_SMPL(0)<1> rMSGSRC.0<8;8,1>:ud NODDCLR // message header -mov (1) muwMSG_SMPL(1,4)<1> wORIX<0;1,0>:w NODDCLR_NODDCHK// horizontal origin -mov (1) muwMSG_SMPL(1,12)<1> wORIY<0;1,0>:w NODDCLR_NODDCHK // vertical origin -//mov (2) muwMSG_SMPL(1,4)<2> wORIX<2;2,1>:w NODDCHK// problem during compile !! when using this line - -send (8) udRESP(0)<1> mMSG_SMPL udDUMMY_NULL nSMPL_ENGINE nSMPL_DI_MSGDSC+nSMPL_RESP_LEN+nBI_CURRENT_SRC_YUV_HW_DI:ud diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/DNDI_Hist_Save.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/DNDI_Hist_Save.asm deleted file mode 100644 index 91c5bc2..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/DNDI_Hist_Save.asm +++ /dev/null @@ -1,20 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - - -// Write denoise history to memory -shr (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w 2:w NODDCLR // X,Y origin / 4 -add (1) rMSGSRC.0<1>:ud rMSGSRC.0<0;1,0>:ud uwSPITCH_DIV2<0;1,0>:uw NODDCLR_NODDCHK// Add pitch to X origin -mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_HIST:ud NODDCHK // block width and height (4x2) - -mov (8) mMSGHDR_HIST<1>:ud rMSGSRC.0<8;8,1>:ud // message header -mov (2) mudMSGHDR_HIST(1)<1> udRESP(nNODI_HIST_OFFSET,0)<2;2,1> // Move denoise history to MRF - -send (8) dNULLREG mMSGHDR_HIST udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_HIST+nBI_STMM_HISTORY_OUTPUT:ud diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_16x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_16x8.asm deleted file mode 100644 index 55f71b5..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_16x8.asm +++ /dev/null @@ -1,26 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PA_AVS_IEF_16x8.asm ---------- - -#include "AVS_IEF.inc" - -//------------------------------------------------------------------------------ -// 2 sampler reads for 8x8 YUV packed -//------------------------------------------------------------------------------ -#include "PA_AVS_IEF_Sample.asm" - -//------------------------------------------------------------------------------ -// Unpacking sampler reads to 4:4:4 internal planar -//------------------------------------------------------------------------------ -#include "PA_AVS_IEF_Unpack_16x8.asm" - -//------------------------------------------------------------------------------ - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x4.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x4.asm deleted file mode 100644 index 55c201b..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x4.asm +++ /dev/null @@ -1,25 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PA_AVS_IEF_8x4.asm ---------- - -#include "AVS_IEF.inc" - -//------------------------------------------------------------------------------ -// 2 sampler reads for 8x8 YUV packed -//------------------------------------------------------------------------------ -#include "PA_AVS_IEF_Sample.asm" - -//------------------------------------------------------------------------------ -// Unpacking sampler data to 4:2:0 internal planar -//------------------------------------------------------------------------------ -#include "PA_AVS_IEF_Unpack_8x4.asm" - -//------------------------------------------------------------------------------ diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x8.asm deleted file mode 100644 index 6bde8c4..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x8.asm +++ /dev/null @@ -1,25 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PA_AVS_IEF_8x8.asm ---------- - -#include "AVS_IEF.inc" - -//------------------------------------------------------------------------------ -// 2 sampler reads for 8x8 YUV packed -//------------------------------------------------------------------------------ -#include "PA_AVS_IEF_Sample.asm" - -//------------------------------------------------------------------------------ -// Unpacking sampler data to 4:2:2 internal planar -//------------------------------------------------------------------------------ -#include "PA_AVS_IEF_Unpack_8x8.asm" - -//------------------------------------------------------------------------------ diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Sample.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Sample.asm deleted file mode 100644 index 0b533ef..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Sample.asm +++ /dev/null @@ -1,34 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PA_AVS_IEF_Sample.asm ---------- - -//------------------------------------------------------------------------------ -// 2 sampler reads for 8x8 YUV packed -//------------------------------------------------------------------------------ - - // 1st 8x8 setup - #include "AVS_SetupFirstBlock.asm" - - // Enable RGB(YUV) channels - mov (1) rAVS_8x8_HDR.2:ud nAVS_RGB_CHANNELS:ud - - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(0)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_3CH+nSI_SRC_YUV+nBI_CURRENT_SRC_YUV - // Return YUV in 12 GRFs - - // 2nd 8x8 setup - #include "AVS_SetupSecondBlock.asm" - - mov (16) mAVS_8x8_HDR_2.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE_2(0)<1> mAVS_8x8_HDR_2 udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_3CH+nSI_SRC_YUV+nBI_CURRENT_SRC_YUV - // Return YUV in 12 GRFs - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm deleted file mode 100644 index 5dcc988..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm +++ /dev/null @@ -1,288 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PA_AVS_IEF_Unpack_16x8.asm ---------- - -#ifdef AVS_OUTPUT_16_BIT //Output is packed in AVYU format -// Move first 8x8 words of Y to dest GRF (as packed) - mov (4) uwDEST_Y(0,1)<4> uwAVS_RESPONSE(2,0)<4;4,1> - mov (4) uwDEST_Y(1,1)<4> uwAVS_RESPONSE(2,8)<4;4,1> - mov (4) uwDEST_Y(4,1)<4> uwAVS_RESPONSE(2,4)<4;4,1> - mov (4) uwDEST_Y(5,1)<4> uwAVS_RESPONSE(2,12)<4;4,1> - mov (4) uwDEST_Y(8,1)<4> uwAVS_RESPONSE(3,0)<4;4,1> - mov (4) uwDEST_Y(9,1)<4> uwAVS_RESPONSE(3,8)<4;4,1> - mov (4) uwDEST_Y(12,1)<4> uwAVS_RESPONSE(3,4)<4;4,1> - mov (4) uwDEST_Y(13,1)<4> uwAVS_RESPONSE(3,12)<4;4,1> - mov (4) uwDEST_Y(16,1)<4> uwAVS_RESPONSE(8,0)<4;4,1> - mov (4) uwDEST_Y(17,1)<4> uwAVS_RESPONSE(8,8)<4;4,1> - mov (4) uwDEST_Y(20,1)<4> uwAVS_RESPONSE(8,4)<4;4,1> - mov (4) uwDEST_Y(21,1)<4> uwAVS_RESPONSE(8,12)<4;4,1> - mov (4) uwDEST_Y(24,1)<4> uwAVS_RESPONSE(9,0)<4;4,1> - mov (4) uwDEST_Y(25,1)<4> uwAVS_RESPONSE(9,8)<4;4,1> - mov (4) uwDEST_Y(28,1)<4> uwAVS_RESPONSE(9,4)<4;4,1> - mov (4) uwDEST_Y(29,1)<4> uwAVS_RESPONSE(9,12)<4;4,1> - -// Move first 8x8 words of U to dest GRF (as packed) - mov (4) uwDEST_Y(0,0)<4> uwAVS_RESPONSE(4,0)<4;4,1> - mov (4) uwDEST_Y(1,0)<4> uwAVS_RESPONSE(4,8)<4;4,1> - mov (4) uwDEST_Y(4,0)<4> uwAVS_RESPONSE(4,4)<4;4,1> - mov (4) uwDEST_Y(5,0)<4> uwAVS_RESPONSE(4,12)<4;4,1> - mov (4) uwDEST_Y(8,0)<4> uwAVS_RESPONSE(5,0)<4;4,1> - mov (4) uwDEST_Y(9,0)<4> uwAVS_RESPONSE(5,8)<4;4,1> - mov (4) uwDEST_Y(12,0)<4> uwAVS_RESPONSE(5,4)<4;4,1> - mov (4) uwDEST_Y(13,0)<4> uwAVS_RESPONSE(5,12)<4;4,1> - mov (4) uwDEST_Y(16,0)<4> uwAVS_RESPONSE(10,0)<4;4,1> - mov (4) uwDEST_Y(17,0)<4> uwAVS_RESPONSE(10,8)<4;4,1> - mov (4) uwDEST_Y(20,0)<4> uwAVS_RESPONSE(10,4)<4;4,1> - mov (4) uwDEST_Y(21,0)<4> uwAVS_RESPONSE(10,12)<4;4,1> - mov (4) uwDEST_Y(24,0)<4> uwAVS_RESPONSE(11,0)<4;4,1> - mov (4) uwDEST_Y(25,0)<4> uwAVS_RESPONSE(11,8)<4;4,1> - mov (4) uwDEST_Y(28,0)<4> uwAVS_RESPONSE(11,4)<4;4,1> - mov (4) uwDEST_Y(29,0)<4> uwAVS_RESPONSE(11,12)<4;4,1> - -// Move first 8x8 words of V to dest GRF (as packed) - mov (4) uwDEST_Y(0,2)<4> uwAVS_RESPONSE(0,0)<4;4,1> - mov (4) uwDEST_Y(1,2)<4> uwAVS_RESPONSE(0,8)<4;4,1> - mov (4) uwDEST_Y(4,2)<4> uwAVS_RESPONSE(0,4)<4;4,1> - mov (4) uwDEST_Y(5,2)<4> uwAVS_RESPONSE(0,12)<4;4,1> - mov (4) uwDEST_Y(8,2)<4> uwAVS_RESPONSE(1,0)<4;4,1> - mov (4) uwDEST_Y(9,2)<4> uwAVS_RESPONSE(1,8)<4;4,1> - mov (4) uwDEST_Y(12,2)<4> uwAVS_RESPONSE(1,4)<4;4,1> - mov (4) uwDEST_Y(13,2)<4> uwAVS_RESPONSE(1,12)<4;4,1> - mov (4) uwDEST_Y(16,2)<4> uwAVS_RESPONSE(6,0)<4;4,1> - mov (4) uwDEST_Y(17,2)<4> uwAVS_RESPONSE(6,8)<4;4,1> - mov (4) uwDEST_Y(20,2)<4> uwAVS_RESPONSE(6,4)<4;4,1> - mov (4) uwDEST_Y(21,2)<4> uwAVS_RESPONSE(6,12)<4;4,1> - mov (4) uwDEST_Y(24,2)<4> uwAVS_RESPONSE(7,0)<4;4,1> - mov (4) uwDEST_Y(25,2)<4> uwAVS_RESPONSE(7,8)<4;4,1> - mov (4) uwDEST_Y(28,2)<4> uwAVS_RESPONSE(7,4)<4;4,1> - mov (4) uwDEST_Y(29,2)<4> uwAVS_RESPONSE(7,12)<4;4,1> - -// Move first 8x8 words of A to dest GRF (as packed) - mov (4) uwDEST_Y(0,3)<4> 0:uw - mov (4) uwDEST_Y(1,3)<4> 0:uw - mov (4) uwDEST_Y(4,3)<4> 0:uw - mov (4) uwDEST_Y(5,3)<4> 0:uw - mov (4) uwDEST_Y(8,3)<4> 0:uw - mov (4) uwDEST_Y(9,3)<4> 0:uw - mov (4) uwDEST_Y(12,3)<4> 0:uw - mov (4) uwDEST_Y(13,3)<4> 0:uw - mov (4) uwDEST_Y(16,3)<4> 0:uw - mov (4) uwDEST_Y(17,3)<4> 0:uw - mov (4) uwDEST_Y(20,3)<4> 0:uw - mov (4) uwDEST_Y(21,3)<4> 0:uw - mov (4) uwDEST_Y(24,3)<4> 0:uw - mov (4) uwDEST_Y(25,3)<4> 0:uw - mov (4) uwDEST_Y(28,3)<4> 0:uw - mov (4) uwDEST_Y(29,3)<4> 0:uw - -// Move second 8x8 words of Y to dest GRF - mov (4) uwDEST_Y(2,1)<4> uwAVS_RESPONSE_2(2,0)<4;4,1> - mov (4) uwDEST_Y(3,1)<4> uwAVS_RESPONSE_2(2,8)<4;4,1> - mov (4) uwDEST_Y(6,1)<4> uwAVS_RESPONSE_2(2,4)<4;4,1> - mov (4) uwDEST_Y(7,1)<4> uwAVS_RESPONSE_2(2,12)<4;4,1> - mov (4) uwDEST_Y(10,1)<4> uwAVS_RESPONSE_2(3,0)<4;4,1> - mov (4) uwDEST_Y(11,1)<4> uwAVS_RESPONSE_2(3,8)<4;4,1> - mov (4) uwDEST_Y(14,1)<4> uwAVS_RESPONSE_2(3,4)<4;4,1> - mov (4) uwDEST_Y(15,1)<4> uwAVS_RESPONSE_2(3,12)<4;4,1> - mov (4) uwDEST_Y(18,1)<4> uwAVS_RESPONSE_2(8,0)<4;4,1> - mov (4) uwDEST_Y(19,1)<4> uwAVS_RESPONSE_2(8,8)<4;4,1> - mov (4) uwDEST_Y(22,1)<4> uwAVS_RESPONSE_2(8,4)<4;4,1> - mov (4) uwDEST_Y(23,1)<4> uwAVS_RESPONSE_2(8,12)<4;4,1> - mov (4) uwDEST_Y(26,1)<4> uwAVS_RESPONSE_2(9,0)<4;4,1> - mov (4) uwDEST_Y(27,1)<4> uwAVS_RESPONSE_2(9,8)<4;4,1> - mov (4) uwDEST_Y(30,1)<4> uwAVS_RESPONSE_2(9,4)<4;4,1> - mov (4) uwDEST_Y(31,1)<4> uwAVS_RESPONSE_2(9,12)<4;4,1> - -// Move second 8x8 words of U to dest GRF - mov (4) uwDEST_Y(2,0)<4> uwAVS_RESPONSE_2(4,0)<4;4,1> - mov (4) uwDEST_Y(3,0)<4> uwAVS_RESPONSE_2(4,8)<4;4,1> - mov (4) uwDEST_Y(6,0)<4> uwAVS_RESPONSE_2(4,4)<4;4,1> - mov (4) uwDEST_Y(7,0)<4> uwAVS_RESPONSE_2(4,12)<4;4,1> - mov (4) uwDEST_Y(10,0)<4> uwAVS_RESPONSE_2(5,0)<4;4,1> - mov (4) uwDEST_Y(11,0)<4> uwAVS_RESPONSE_2(5,8)<4;4,1> - mov (4) uwDEST_Y(14,0)<4> uwAVS_RESPONSE_2(5,4)<4;4,1> - mov (4) uwDEST_Y(15,0)<4> uwAVS_RESPONSE_2(5,12)<4;4,1> - mov (4) uwDEST_Y(18,0)<4> uwAVS_RESPONSE_2(10,0)<4;4,1> - mov (4) uwDEST_Y(19,0)<4> uwAVS_RESPONSE_2(10,8)<4;4,1> - mov (4) uwDEST_Y(22,0)<4> uwAVS_RESPONSE_2(10,4)<4;4,1> - mov (4) uwDEST_Y(23,0)<4> uwAVS_RESPONSE_2(10,12)<4;4,1> - mov (4) uwDEST_Y(26,0)<4> uwAVS_RESPONSE_2(11,0)<4;4,1> - mov (4) uwDEST_Y(27,0)<4> uwAVS_RESPONSE_2(11,8)<4;4,1> - mov (4) uwDEST_Y(30,0)<4> uwAVS_RESPONSE_2(11,4)<4;4,1> - mov (4) uwDEST_Y(31,0)<4> uwAVS_RESPONSE_2(11,12)<4;4,1> - -// Move second 8x8 words of V to dest GRF - mov (4) uwDEST_Y(2,2)<4> uwAVS_RESPONSE_2(0,0)<4;4,1> - mov (4) uwDEST_Y(3,2)<4> uwAVS_RESPONSE_2(0,8)<4;4,1> - mov (4) uwDEST_Y(6,2)<4> uwAVS_RESPONSE_2(0,4)<4;4,1> - mov (4) uwDEST_Y(7,2)<4> uwAVS_RESPONSE_2(0,12)<4;4,1> - mov (4) uwDEST_Y(10,2)<4> uwAVS_RESPONSE_2(1,0)<4;4,1> - mov (4) uwDEST_Y(11,2)<4> uwAVS_RESPONSE_2(1,8)<4;4,1> - mov (4) uwDEST_Y(14,2)<4> uwAVS_RESPONSE_2(1,4)<4;4,1> - mov (4) uwDEST_Y(15,2)<4> uwAVS_RESPONSE_2(1,12)<4;4,1> - mov (4) uwDEST_Y(18,2)<4> uwAVS_RESPONSE_2(6,0)<4;4,1> - mov (4) uwDEST_Y(19,2)<4> uwAVS_RESPONSE_2(6,8)<4;4,1> - mov (4) uwDEST_Y(22,2)<4> uwAVS_RESPONSE_2(6,4)<4;4,1> - mov (4) uwDEST_Y(23,2)<4> uwAVS_RESPONSE_2(6,12)<4;4,1> - mov (4) uwDEST_Y(26,2)<4> uwAVS_RESPONSE_2(7,0)<4;4,1> - mov (4) uwDEST_Y(27,2)<4> uwAVS_RESPONSE_2(7,8)<4;4,1> - mov (4) uwDEST_Y(30,2)<4> uwAVS_RESPONSE_2(7,4)<4;4,1> - mov (4) uwDEST_Y(31,2)<4> uwAVS_RESPONSE_2(7,12)<4;4,1> - -// Move second 8x8 words of A to dest GRF - mov (4) uwDEST_Y(2,3)<4> 0:uw - mov (4) uwDEST_Y(3,3)<4> 0:uw - mov (4) uwDEST_Y(6,3)<4> 0:uw - mov (4) uwDEST_Y(7,3)<4> 0:uw - mov (4) uwDEST_Y(10,3)<4> 0:uw - mov (4) uwDEST_Y(11,3)<4> 0:uw - mov (4) uwDEST_Y(14,3)<4> 0:uw - mov (4) uwDEST_Y(15,3)<4> 0:uw - mov (4) uwDEST_Y(18,3)<4> 0:uw - mov (4) uwDEST_Y(19,3)<4> 0:uw - mov (4) uwDEST_Y(22,3)<4> 0:uw - mov (4) uwDEST_Y(23,3)<4> 0:uw - mov (4) uwDEST_Y(26,3)<4> 0:uw - mov (4) uwDEST_Y(27,3)<4> 0:uw - mov (4) uwDEST_Y(30,3)<4> 0:uw - mov (4) uwDEST_Y(31,3)<4> 0:uw - -/* This section will be used if 16-bit output is needed in planar format -vK - // Move first 8x8 words of Y to dest GRF - mov (8) uwDEST_Y(0)<1> uwAVS_RESPONSE(2,0)<8;4,1> - mov (8) uwDEST_Y(1)<1> uwAVS_RESPONSE(2,8)<8;4,1> - mov (8) uwDEST_Y(2)<1> uwAVS_RESPONSE(3,0)<8;4,1> - mov (8) uwDEST_Y(3)<1> uwAVS_RESPONSE(3,8)<8;4,1> - mov (8) uwDEST_Y(4)<1> uwAVS_RESPONSE(8,0)<8;4,1> - mov (8) uwDEST_Y(5)<1> uwAVS_RESPONSE(8,8)<8;4,1> - mov (8) uwDEST_Y(6)<1> uwAVS_RESPONSE(9,0)<8;4,1> - mov (8) uwDEST_Y(7)<1> uwAVS_RESPONSE(9,8)<8;4,1> - - // Move first 8x8 words of V to dest GRF - mov (8) uwDEST_V(0)<1> ubAVS_RESPONSE(0,0)<8;4,1> - mov (8) uwDEST_V(1)<1> ubAVS_RESPONSE(0,8)<8;4,1> - mov (8) uwDEST_V(2)<1> ubAVS_RESPONSE(1,0)<8;4,1> - mov (8) uwDEST_V(3)<1> ubAVS_RESPONSE(1,8)<8;4,1> - mov (8) uwDEST_V(4)<1> ubAVS_RESPONSE(6,0)<8;4,1> - mov (8) uwDEST_V(5)<1> ubAVS_RESPONSE(6,8)<8;4,1> - mov (8) uwDEST_V(6)<1> ubAVS_RESPONSE(7,0)<8;4,1> - mov (8) uwDEST_V(7)<1> ubAVS_RESPONSE(7,8)<8;4,1> - - // Move first 8x8 words of U to dest GRF - mov (8) uwDEST_U(0)<1> ubAVS_RESPONSE(4,0)<8;4,1> - mov (8) uwDEST_U(1)<1> ubAVS_RESPONSE(4,8)<8;4,1> - mov (8) uwDEST_U(2)<1> ubAVS_RESPONSE(5,0)<8;4,1> - mov (8) uwDEST_U(3)<1> ubAVS_RESPONSE(5,8)<8;4,1> - mov (8) uwDEST_U(4)<1> ubAVS_RESPONSE(10,0)<8;4,1> - mov (8) uwDEST_U(5)<1> ubAVS_RESPONSE(10,8)<8;4,1> - mov (8) uwDEST_U(6)<1> ubAVS_RESPONSE(11,0)<8;4,1> - mov (8) uwDEST_U(7)<1> ubAVS_RESPONSE(11,8)<8;4,1> - - // Move second 8x8 words of Y to dest GRF - mov (8) uwDEST_Y(0,8)<1> uwAVS_RESPONSE_2(2,0)<8;4,1> - mov (8) uwDEST_Y(1,8)<1> uwAVS_RESPONSE_2(2,8)<8;4,1> - mov (8) uwDEST_Y(2,8)<1> uwAVS_RESPONSE_2(3,0)<8;4,1> - mov (8) uwDEST_Y(3,8)<1> uwAVS_RESPONSE_2(3,8)<8;4,1> - mov (8) uwDEST_Y(4,8)<1> uwAVS_RESPONSE_2(8,0)<8;4,1> - mov (8) uwDEST_Y(5,8)<1> uwAVS_RESPONSE_2(8,8)<8;4,1> - mov (8) uwDEST_Y(6,8)<1> uwAVS_RESPONSE_2(9,0)<8;4,1> - mov (8) uwDEST_Y(7,8)<1> uwAVS_RESPONSE_2(9,8)<8;4,1> - - // Move second 8x8 words of V to dest GRF - mov (8) uwDEST_V(0,8)<1> ubAVS_RESPONSE_2(0,0)<8;4,1> - mov (8) uwDEST_V(1,8)<1> ubAVS_RESPONSE_2(0,8)<8;4,1> - mov (8) uwDEST_V(2,8)<1> ubAVS_RESPONSE_2(1,0)<8;4,1> - mov (8) uwDEST_V(3,8)<1> ubAVS_RESPONSE_2(1,8)<8;4,1> - mov (8) uwDEST_V(4,8)<1> ubAVS_RESPONSE_2(6,0)<8;4,1> - mov (8) uwDEST_V(5,8)<1> ubAVS_RESPONSE_2(6,8)<8;4,1> - mov (8) uwDEST_V(6,8)<1> ubAVS_RESPONSE_2(7,0)<8;4,1> - mov (8) uwDEST_V(7,8)<1> ubAVS_RESPONSE_2(7,8)<8;4,1> - - // Move second 8x8 words of U to dest GRF - mov (8) uwDEST_U(0,8)<1> ubAVS_RESPONSE_2(4,0)<8;4,1> - mov (8) uwDEST_U(1,8)<1> ubAVS_RESPONSE_2(4,8)<8;4,1> - mov (8) uwDEST_U(2,8)<1> ubAVS_RESPONSE_2(5,0)<8;4,1> - mov (8) uwDEST_U(3,8)<1> ubAVS_RESPONSE_2(5,8)<8;4,1> - mov (8) uwDEST_U(4,8)<1> ubAVS_RESPONSE_2(10,0)<8;4,1> - mov (8) uwDEST_U(5,8)<1> ubAVS_RESPONSE_2(10,8)<8;4,1> - mov (8) uwDEST_U(6,8)<1> ubAVS_RESPONSE_2(11,0)<8;4,1> - mov (8) uwDEST_U(7,8)<1> ubAVS_RESPONSE_2(11,8)<8;4,1> -*/ -#else /* OUTPUT_8_BIT */ - // Move first 8x8 words of Y to dest GRF - mov (8) uwDEST_Y(0)<1> ubAVS_RESPONSE(2,1)<16;4,2> - mov (8) uwDEST_Y(1)<1> ubAVS_RESPONSE(2,8+1)<16;4,2> - mov (8) uwDEST_Y(2)<1> ubAVS_RESPONSE(3,1)<16;4,2> - mov (8) uwDEST_Y(3)<1> ubAVS_RESPONSE(3,8+1)<16;4,2> - mov (8) uwDEST_Y(4)<1> ubAVS_RESPONSE(8,1)<16;4,2> - mov (8) uwDEST_Y(5)<1> ubAVS_RESPONSE(8,8+1)<16;4,2> - mov (8) uwDEST_Y(6)<1> ubAVS_RESPONSE(9,1)<16;4,2> - mov (8) uwDEST_Y(7)<1> ubAVS_RESPONSE(9,8+1)<16;4,2> - - // Move first 8x8 words of V to dest GRF - mov (8) uwDEST_V(0)<1> ubAVS_RESPONSE(0,1)<16;4,2> - mov (8) uwDEST_V(1)<1> ubAVS_RESPONSE(0,8+1)<16;4,2> - mov (8) uwDEST_V(2)<1> ubAVS_RESPONSE(1,1)<16;4,2> - mov (8) uwDEST_V(3)<1> ubAVS_RESPONSE(1,8+1)<16;4,2> - mov (8) uwDEST_V(4)<1> ubAVS_RESPONSE(6,1)<16;4,2> - mov (8) uwDEST_V(5)<1> ubAVS_RESPONSE(6,8+1)<16;4,2> - mov (8) uwDEST_V(6)<1> ubAVS_RESPONSE(7,1)<16;4,2> - mov (8) uwDEST_V(7)<1> ubAVS_RESPONSE(7,8+1)<16;4,2> - - // Move first 8x8 words of U to dest GRF - mov (8) uwDEST_U(0)<1> ubAVS_RESPONSE(4,1)<16;4,2> - mov (8) uwDEST_U(1)<1> ubAVS_RESPONSE(4,8+1)<16;4,2> - mov (8) uwDEST_U(2)<1> ubAVS_RESPONSE(5,1)<16;4,2> - mov (8) uwDEST_U(3)<1> ubAVS_RESPONSE(5,8+1)<16;4,2> - mov (8) uwDEST_U(4)<1> ubAVS_RESPONSE(10,1)<16;4,2> - mov (8) uwDEST_U(5)<1> ubAVS_RESPONSE(10,8+1)<16;4,2> - mov (8) uwDEST_U(6)<1> ubAVS_RESPONSE(11,1)<16;4,2> - mov (8) uwDEST_U(7)<1> ubAVS_RESPONSE(11,8+1)<16;4,2> - - // Move second 8x8 words of Y to dest GRF - mov (8) uwDEST_Y(0,8)<1> ubAVS_RESPONSE_2(2,1)<16;4,2> - mov (8) uwDEST_Y(1,8)<1> ubAVS_RESPONSE_2(2,8+1)<16;4,2> - mov (8) uwDEST_Y(2,8)<1> ubAVS_RESPONSE_2(3,1)<16;4,2> - mov (8) uwDEST_Y(3,8)<1> ubAVS_RESPONSE_2(3,8+1)<16;4,2> - mov (8) uwDEST_Y(4,8)<1> ubAVS_RESPONSE_2(8,1)<16;4,2> - mov (8) uwDEST_Y(5,8)<1> ubAVS_RESPONSE_2(8,8+1)<16;4,2> - mov (8) uwDEST_Y(6,8)<1> ubAVS_RESPONSE_2(9,1)<16;4,2> - mov (8) uwDEST_Y(7,8)<1> ubAVS_RESPONSE_2(9,8+1)<16;4,2> - - // Move second 8x8 words of V to dest GRF - mov (8) uwDEST_V(0,8)<1> ubAVS_RESPONSE_2(0,1)<16;4,2> - mov (8) uwDEST_V(1,8)<1> ubAVS_RESPONSE_2(0,8+1)<16;4,2> - mov (8) uwDEST_V(2,8)<1> ubAVS_RESPONSE_2(1,1)<16;4,2> - mov (8) uwDEST_V(3,8)<1> ubAVS_RESPONSE_2(1,8+1)<16;4,2> - mov (8) uwDEST_V(4,8)<1> ubAVS_RESPONSE_2(6,1)<16;4,2> - mov (8) uwDEST_V(5,8)<1> ubAVS_RESPONSE_2(6,8+1)<16;4,2> - mov (8) uwDEST_V(6,8)<1> ubAVS_RESPONSE_2(7,1)<16;4,2> - mov (8) uwDEST_V(7,8)<1> ubAVS_RESPONSE_2(7,8+1)<16;4,2> - - // Move second 8x8 words of U to dest GRF - mov (8) uwDEST_U(0,8)<1> ubAVS_RESPONSE_2(4,1)<16;4,2> - mov (8) uwDEST_U(1,8)<1> ubAVS_RESPONSE_2(4,8+1)<16;4,2> - mov (8) uwDEST_U(2,8)<1> ubAVS_RESPONSE_2(5,1)<16;4,2> - mov (8) uwDEST_U(3,8)<1> ubAVS_RESPONSE_2(5,8+1)<16;4,2> - mov (8) uwDEST_U(4,8)<1> ubAVS_RESPONSE_2(10,1)<16;4,2> - mov (8) uwDEST_U(5,8)<1> ubAVS_RESPONSE_2(10,8+1)<16;4,2> - mov (8) uwDEST_U(6,8)<1> ubAVS_RESPONSE_2(11,1)<16;4,2> - mov (8) uwDEST_U(7,8)<1> ubAVS_RESPONSE_2(11,8+1)<16;4,2> -#endif -//------------------------------------------------------------------------------ - - // Re-define new number of lines - #undef nUV_NUM_OF_ROWS - #undef nY_NUM_OF_ROWS - - #define nY_NUM_OF_ROWS 8 - #define nUV_NUM_OF_ROWS 8 - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm deleted file mode 100644 index 01d451d..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm +++ /dev/null @@ -1,77 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PA_AVS_IEF_Unpack_8x8.asm ---------- - -// Yoni: In order to optimize unpacking, 3 methods are being checked: -// 1. AVS_ORIGINAL -// 2. AVS_ROUND_TO_8_BITS -// 3. AVS_INDIRECT_ACCESS -// -// Only 1 method should stay in the code - - -//#define AVS_ROUND_TO_8_BITS -//#define AVS_INDIRECT_ACCESS - - - // Move first 8x8 words of Y to dest GRF - mov (8) uwDEST_Y(0)<1> ubAVS_RESPONSE(2,1)<16;4,2> - mov (8) uwDEST_Y(1)<1> ubAVS_RESPONSE(2,8+1)<16;4,2> - mov (8) uwDEST_Y(2)<1> ubAVS_RESPONSE(3,1)<16;4,2> - mov (8) uwDEST_Y(3)<1> ubAVS_RESPONSE(3,8+1)<16;4,2> - mov (8) uwDEST_Y(4)<1> ubAVS_RESPONSE(8,1)<16;4,2> - mov (8) uwDEST_Y(5)<1> ubAVS_RESPONSE(8,8+1)<16;4,2> - mov (8) uwDEST_Y(6)<1> ubAVS_RESPONSE(9,1)<16;4,2> - mov (8) uwDEST_Y(7)<1> ubAVS_RESPONSE(9,8+1)<16;4,2> - - // Move first 4x8 words of V to dest GRF - mov (4) uwDEST_V(0)<1> ubAVS_RESPONSE(0,1)<16;2,4> - mov (4) uwDEST_V(0,8)<1> ubAVS_RESPONSE(1,1)<16;2,4> - mov (4) uwDEST_V(1)<1> ubAVS_RESPONSE(6,1)<16;2,4> - mov (4) uwDEST_V(1,8)<1> ubAVS_RESPONSE(7,1)<16;2,4> - - // Move first 4x8 words of U to dest GRF - mov (4) uwDEST_U(0)<1> ubAVS_RESPONSE(4,1)<16;2,4> - mov (4) uwDEST_U(0,8)<1> ubAVS_RESPONSE(5,1)<16;2,4> - mov (4) uwDEST_U(1)<1> ubAVS_RESPONSE(10,1)<16;2,4> - mov (4) uwDEST_U(1,8)<1> ubAVS_RESPONSE(11,1)<16;2,4> - - // Move second 8x8 words of Y to dest GRF - mov (8) uwDEST_Y(0,8)<1> ubAVS_RESPONSE_2(2,1)<16;4,2> - mov (8) uwDEST_Y(1,8)<1> ubAVS_RESPONSE_2(2,8+1)<16;4,2> - mov (8) uwDEST_Y(2,8)<1> ubAVS_RESPONSE_2(3,1)<16;4,2> - mov (8) uwDEST_Y(3,8)<1> ubAVS_RESPONSE_2(3,8+1)<16;4,2> - mov (8) uwDEST_Y(4,8)<1> ubAVS_RESPONSE_2(8,1)<16;4,2> - mov (8) uwDEST_Y(5,8)<1> ubAVS_RESPONSE_2(8,8+1)<16;4,2> - mov (8) uwDEST_Y(6,8)<1> ubAVS_RESPONSE_2(9,1)<16;4,2> - mov (8) uwDEST_Y(7,8)<1> ubAVS_RESPONSE_2(9,8+1)<16;4,2> - - // Move second 4x8 words of V to dest GRF - mov (4) uwDEST_V(0,4)<1> ubAVS_RESPONSE_2(0,1)<16;2,4> - mov (4) uwDEST_V(0,12)<1> ubAVS_RESPONSE_2(1,1)<16;2,4> - mov (4) uwDEST_V(1,4)<1> ubAVS_RESPONSE_2(6,1)<16;2,4> - mov (4) uwDEST_V(1,12)<1> ubAVS_RESPONSE_2(7,1)<16;2,4> - - // Move second 4x8 words of U to dest GRF - mov (4) uwDEST_U(0,4)<1> ubAVS_RESPONSE_2(4,1)<16;2,4> - mov (4) uwDEST_U(0,12)<1> ubAVS_RESPONSE_2(5,1)<16;2,4> - mov (4) uwDEST_U(1,4)<1> ubAVS_RESPONSE_2(10,1)<16;2,4> - mov (4) uwDEST_U(1,12)<1> ubAVS_RESPONSE_2(11,1)<16;2,4> - -//------------------------------------------------------------------------------ - - // Re-define new number of lines - #undef nUV_NUM_OF_ROWS - #undef nY_NUM_OF_ROWS - - #define nY_NUM_OF_ROWS 8 - #define nUV_NUM_OF_ROWS 8 - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm deleted file mode 100644 index 91b2398..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm +++ /dev/null @@ -1,93 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PA_AVS_IEF_Unpack_8x8.asm ---------- - -// Yoni: In order to optimize unpacking, 3 methods are being checked: -// 1. AVS_ORIGINAL -// 2. AVS_ROUND_TO_8_BITS -// 3. AVS_INDIRECT_ACCESS -// -// Only 1 method should stay in the code - - -//#define AVS_ROUND_TO_8_BITS -//#define AVS_INDIRECT_ACCESS - - - // Move first 8x8 words of Y to dest GRF - mov (8) uwDEST_Y(0)<1> ubAVS_RESPONSE(2,1)<16;4,2> - mov (8) uwDEST_Y(1)<1> ubAVS_RESPONSE(2,8+1)<16;4,2> - mov (8) uwDEST_Y(2)<1> ubAVS_RESPONSE(3,1)<16;4,2> - mov (8) uwDEST_Y(3)<1> ubAVS_RESPONSE(3,8+1)<16;4,2> - mov (8) uwDEST_Y(4)<1> ubAVS_RESPONSE(8,1)<16;4,2> - mov (8) uwDEST_Y(5)<1> ubAVS_RESPONSE(8,8+1)<16;4,2> - mov (8) uwDEST_Y(6)<1> ubAVS_RESPONSE(9,1)<16;4,2> - mov (8) uwDEST_Y(7)<1> ubAVS_RESPONSE(9,8+1)<16;4,2> - - // Move first 4x8 words of V to dest GRF - mov (4) uwDEST_V(0)<1> ubAVS_RESPONSE(0,1)<16;2,4> - mov (4) uwDEST_V(0,8)<1> ubAVS_RESPONSE(0,8+1)<16;2,4> - mov (4) uwDEST_V(1)<1> ubAVS_RESPONSE(1,1)<16;2,4> - mov (4) uwDEST_V(1,8)<1> ubAVS_RESPONSE(1,8+1)<16;2,4> - mov (4) uwDEST_V(2)<1> ubAVS_RESPONSE(6,1)<16;2,4> - mov (4) uwDEST_V(2,8)<1> ubAVS_RESPONSE(6,8+1)<16;2,4> - mov (4) uwDEST_V(3)<1> ubAVS_RESPONSE(7,1)<16;2,4> - mov (4) uwDEST_V(3,8)<1> ubAVS_RESPONSE(7,8+1)<16;2,4> - - // Move first 4x8 words of U to dest GRF - mov (4) uwDEST_U(0)<1> ubAVS_RESPONSE(4,1)<16;2,4> - mov (4) uwDEST_U(0,8)<1> ubAVS_RESPONSE(4,8+1)<16;2,4> - mov (4) uwDEST_U(1)<1> ubAVS_RESPONSE(5,1)<16;2,4> - mov (4) uwDEST_U(1,8)<1> ubAVS_RESPONSE(5,8+1)<16;2,4> - mov (4) uwDEST_U(2)<1> ubAVS_RESPONSE(10,1)<16;2,4> - mov (4) uwDEST_U(2,8)<1> ubAVS_RESPONSE(10,8+1)<16;2,4> - mov (4) uwDEST_U(3)<1> ubAVS_RESPONSE(11,1)<16;2,4> - mov (4) uwDEST_U(3,8)<1> ubAVS_RESPONSE(11,8+1)<16;2,4> - - // Move second 8x8 words of Y to dest GRF - mov (8) uwDEST_Y(0,8)<1> ubAVS_RESPONSE_2(2,1)<16;4,2> - mov (8) uwDEST_Y(1,8)<1> ubAVS_RESPONSE_2(2,8+1)<16;4,2> - mov (8) uwDEST_Y(2,8)<1> ubAVS_RESPONSE_2(3,1)<16;4,2> - mov (8) uwDEST_Y(3,8)<1> ubAVS_RESPONSE_2(3,8+1)<16;4,2> - mov (8) uwDEST_Y(4,8)<1> ubAVS_RESPONSE_2(8,1)<16;4,2> - mov (8) uwDEST_Y(5,8)<1> ubAVS_RESPONSE_2(8,8+1)<16;4,2> - mov (8) uwDEST_Y(6,8)<1> ubAVS_RESPONSE_2(9,1)<16;4,2> - mov (8) uwDEST_Y(7,8)<1> ubAVS_RESPONSE_2(9,8+1)<16;4,2> - - // Move second 4x8 words of V to dest GRF - mov (4) uwDEST_V(0,4)<1> ubAVS_RESPONSE_2(0,1)<16;2,4> - mov (4) uwDEST_V(0,12)<1> ubAVS_RESPONSE_2(0,8+1)<16;2,4> - mov (4) uwDEST_V(1,4)<1> ubAVS_RESPONSE_2(1,1)<16;2,4> - mov (4) uwDEST_V(1,12)<1> ubAVS_RESPONSE_2(1,8+1)<16;2,4> - mov (4) uwDEST_V(2,4)<1> ubAVS_RESPONSE_2(6,1)<16;2,4> - mov (4) uwDEST_V(2,12)<1> ubAVS_RESPONSE_2(6,8+1)<16;2,4> - mov (4) uwDEST_V(3,4)<1> ubAVS_RESPONSE_2(7,1)<16;2,4> - mov (4) uwDEST_V(3,12)<1> ubAVS_RESPONSE_2(7,8+1)<16;2,4> - - // Move second 4x8 words of U to dest GRF - mov (4) uwDEST_U(0,4)<1> ubAVS_RESPONSE_2(4,1)<16;2,4> - mov (4) uwDEST_U(0,12)<1> ubAVS_RESPONSE_2(4,8+1)<16;2,4> - mov (4) uwDEST_U(1,4)<1> ubAVS_RESPONSE_2(5,1)<16;2,4> - mov (4) uwDEST_U(1,12)<1> ubAVS_RESPONSE_2(5,8+1)<16;2,4> - mov (4) uwDEST_U(2,4)<1> ubAVS_RESPONSE_2(10,1)<16;2,4> - mov (4) uwDEST_U(2,12)<1> ubAVS_RESPONSE_2(10,8+1)<16;2,4> - mov (4) uwDEST_U(3,4)<1> ubAVS_RESPONSE_2(11,1)<16;2,4> - mov (4) uwDEST_U(3,12)<1> ubAVS_RESPONSE_2(11,8+1)<16;2,4> - -//------------------------------------------------------------------------------ - - // Re-define new number of lines - #undef nUV_NUM_OF_ROWS - #undef nY_NUM_OF_ROWS - - #define nY_NUM_OF_ROWS 8 - #define nUV_NUM_OF_ROWS 8 - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_DNDI_ALG.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PA_DNDI_ALG.asm deleted file mode 100644 index 6aa91c8..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_DNDI_ALG.asm +++ /dev/null @@ -1,139 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -#define DI_ENABLE - - #include "DNDI.inc" - - #ifdef DI_ONLY - #undef nSMPL_RESP_LEN - #define nSMPL_RESP_LEN nSMPL_RESP_LEN_DI // set the number of GRF - #else - #undef nSMPL_RESP_LEN - #define nSMPL_RESP_LEN nSMPL_RESP_LEN_DNDI // set the number of GRF - #endif - - #undef nDPW_BLOCK_SIZE_HIST - #define nDPW_BLOCK_SIZE_HIST nBLOCK_WIDTH_4+nBLOCK_HEIGHT_1 // HIST Block Size for Write is 4x2 - #undef nDPW_BLOCK_SIZE_DN - #define nDPW_BLOCK_SIZE_DN nBLOCK_WIDTH_32+nBLOCK_HEIGHT_4 // DN Block Size for Write is 32x4 - -////////////////////////////////////// Run the DN Algorithm /////////////////////////////////////// - #include "DNDI_Command.asm" - -////////////////////////////////////// Rearrange for Internal Planar ////////////////////////////// - //// move the previous frame Y component to internal planar format - //$for (0; <nY_NUM_OF_ROWS/2; 1) { - // mov (16) uwDEST_Y(%1,0)<1> ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16) - //} - //// move the previous frame U,V components to internal planar format - //$for (0; <nUV_NUM_OF_ROWS/2; 1) { - // mov (8) uwDEST_U(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels - // mov (8) uwDEST_V(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels - //} - //// move the current frame Y component to internal planar format - //$for (0; <nY_NUM_OF_ROWS/2; 1) { - // mov (16) uwDEST_Y(%1+4,0)<1> ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16) - //} - //// move the current frame U,V components to internal planar format - //$for (0; <nUV_NUM_OF_ROWS/2; 1) { - // mov (8) uwDEST_U(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels - // mov (8) uwDEST_V(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels - //} - -////////////////////////////////////// Save the STMM Data for Next Run ///////////////////////// - // Write STMM to memory - shr (1) rMSGSRC.0<1>:ud wORIX<0;1,0>:w 1:w NODDCLR // X origin / 2 - mov (1) rMSGSRC.1<1>:ud wORIY<0;1,0>:w NODDCLR_NODDCHK // Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_STMM:ud NODDCHK // block width and height (8x4) - mov (8) mudMSGHDR_STMM(0)<1> rMSGSRC.0<8;8,1>:ud // message header - mov (8) mudMSGHDR_STMM(1)<1> udRESP(nDI_STMM_OFFSET,0) // Move STMM to MRF - send (8) dNULLREG mMSGHDR_STMM udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_STMM+nBI_STMM_HISTORY_OUTPUT:ud - -////////////////////////////////////// Save the History Data for Next Run ///////////////////////// -#ifdef DI_ONLY -#else - - #include "DI_Hist_Save.asm" - -////////////////////////////////////// Pack and Save the DN Curr Frame for Next Run /////////////// - // check top/bottom field first - cmp.e.f0.0 (1) null<1>:w ubTFLD_FIRST<0;1,0>:ub 1:w - - add (4) pCF_Y_OFFSET<1>:uw ubSRC_CF_OFFSET<4;4,1>:ub npDN_YUV:uw - //set the save DN position - shl (1) rMSGSRC.0<1>:ud wORIX<0;1,0>:w 1:w NODDCLR // X origin * 2 - mov (1) rMSGSRC.1<1>:ud wORIY<0;1,0>:w NODDCLR_NODDCHK // Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud NODDCHK // block width and height (8x4) - mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud - - - (f0.0) jmpi (1) TOP_FIELD_FIRST - -BOTTOM_FIELD_FIRST: - //$for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - // mov (16) r[pCF_Y_OFFSET, %1*32]<2>:ub ubRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*16) // 2nd field luma from current frame (line 0,2) - // mov (16) r[pCF_Y_OFFSET, %1+1*32]<2>:ub ubRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,16) // 1st field luma from current frame (line 1,3) - // mov (8) r[pCF_U_OFFSET, %1*32]<4>:ub ubRESP(nDI_CURR_2ND_FIELD_CHROMA_OFFSET,%2*16+1)<16;8,2> // 2nd field U from current frame (line 0,2) - // mov (8) r[pCF_V_OFFSET, %1*32]<4>:ub ubRESP(nDI_CURR_2ND_FIELD_CHROMA_OFFSET,%2*16)<16;8,2> // 2nd field V from current frame (line 0,2) - // mov (8) r[pCF_U_OFFSET, %1+1*32]<4>:ub ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+%2,16+1)<16;8,2> // 1st field U from current frame (line 1,3) - // mov (8) r[pCF_V_OFFSET, %1+1*32]<4>:ub ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+%2,16)<16;8,2> // 1st field U from current frame (line 1,3) - //} - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (16) r[pCF_Y_OFFSET, %1*32]<2>:ub ubRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*16) // 2nd field luma from current frame (line 0,2) - mov (16) r[pCF_Y_OFFSET, %1+1*32]<2>:ub ubRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,16) // 1st field luma from current frame (line 1,3) - } - - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (8) r[pCF_U_OFFSET, %1*32]<4>:ub ubRESP(nDI_CURR_2ND_FIELD_CHROMA_OFFSET,%2*16+1)<16;8,2> // 2nd field U from current frame (line 0,2) - mov (8) r[pCF_U_OFFSET, %1+1*32]<4>:ub ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+%2,16+1)<16;8,2> // 1st field U from current frame (line 1,3) - } - - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (8) r[pCF_V_OFFSET, %1*32]<4>:ub ubRESP(nDI_CURR_2ND_FIELD_CHROMA_OFFSET,%2*16)<16;8,2> // 2nd field V from current frame (line 0,2) - mov (8) r[pCF_V_OFFSET, %1+1*32]<4>:ub ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+%2,16)<16;8,2> // 1st field U from current frame (line 1,3) - } - - jmpi (1) SAVE_DN_CURR - -TOP_FIELD_FIRST: - //$for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - // mov (16) r[pCF_Y_OFFSET, %1*32]<2>:ub ubRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0) // 1st field luma from current frame (line 0,2) - // mov (16) r[pCF_Y_OFFSET, %1+1*32]<2>:ub ubRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*16) // 2nd field luma from current frame (line 1,3) - // mov (8) r[pCF_U_OFFSET, %1*32]<4>:ub ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+%2,1)<16;8,2> // 1st field U from current frame (line 0,2) - // mov (8) r[pCF_V_OFFSET, %1*32]<4>:ub ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+%2,0)<16;8,2> // 1st field V from current frame (line 0,2) - // mov (8) r[pCF_U_OFFSET, %1+1*32]<4>:ub ubRESP(nDI_CURR_2ND_FIELD_CHROMA_OFFSET,%2*16+1)<16;8,2> // 2nd field U from current frame (line 1,3) - // mov (8) r[pCF_V_OFFSET, %1+1*32]<4>:ub ubRESP(nDI_CURR_2ND_FIELD_CHROMA_OFFSET,%2*16)<16;8,2> // 2nd field V from current frame (line 1,3) - //} - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (16) r[pCF_Y_OFFSET, %1*32]<2>:ub ubRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0) // 1st field luma from current frame (line 0,2) - mov (16) r[pCF_Y_OFFSET, %1+1*32]<2>:ub ubRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*16) // 2nd field luma from current frame (line 1,3) - } - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (8) r[pCF_U_OFFSET, %1*32]<4>:ub ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+%2,1)<16;8,2> // 1st field U from current frame (line 0,2) - mov (8) r[pCF_U_OFFSET, %1+1*32]<4>:ub ubRESP(nDI_CURR_2ND_FIELD_CHROMA_OFFSET,%2*16+1)<16;8,2> // 2nd field U from current frame (line 1,3) - } - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (8) r[pCF_V_OFFSET, %1*32]<4>:ub ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+%2,0)<16;8,2> // 1st field V from current frame (line 0,2) - mov (8) r[pCF_V_OFFSET, %1+1*32]<4>:ub ubRESP(nDI_CURR_2ND_FIELD_CHROMA_OFFSET,%2*16)<16;8,2> // 2nd field V from current frame (line 1,3) - } - -SAVE_DN_CURR: - $for(0; <nY_NUM_OF_ROWS/2; 1) { - mov (8) mudMSGHDR_DN(%1+1)<1> udDN_YUV(%1)REGION(8,1) - } - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PA_DN_DI+nBI_DESTINATION_YUV:ud -#endif - -// Save Processed frames -#include "DI_Save_PA.asm" - - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_DN_ALG.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PA_DN_ALG.asm deleted file mode 100644 index ef88a3c..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_DN_ALG.asm +++ /dev/null @@ -1,54 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -#define DI_DISABLE - -#include "DNDI.inc" - -#undef nY_NUM_OF_ROWS -#define nY_NUM_OF_ROWS 8 // Number of Y rows per block -#undef nUV_NUM_OF_ROWS -#define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block - -#undef nSMPL_RESP_LEN -#define nSMPL_RESP_LEN nSMPL_RESP_LEN_DN_PA // Set the Number of GRFs in DNDI response -#undef nDPW_BLOCK_SIZE_DN -#define nDPW_BLOCK_SIZE_DN nBLOCK_WIDTH_32+nBLOCK_HEIGHT_8 // DN Curr Block Size for Write is 32x8 -#undef nDPW_BLOCK_SIZE_HIST -#define nDPW_BLOCK_SIZE_HIST nBLOCK_WIDTH_4+nBLOCK_HEIGHT_2 // HIST Block Size for Write is 4x2 - -////////////////////////////////////// Run the DN Algorithm /////////////////////////////////////// -#include "DNDI_COMMAND.asm" - -////////////////////////////////////// Save the History Data for Next Run ///////////////////////// -#include "DNDI_Hist_Save.asm" - -////////////////////////////////////// Pack and Save the DN Curr Frame for Next Run /////////////// -add (4) pCF_Y_OFFSET<1>:uw ubDEST_CF_OFFSET<4;4,1>:ub npDN_YUV:w -$for (0; <nY_NUM_OF_ROWS; 1) { - mov (16) r[pCF_Y_OFFSET, %1*32]<2>:ub ubRESP(nNODI_LUMA_OFFSET,%1*16)<16;16,1> // copy line of Y -} -$for (0; <nUV_NUM_OF_ROWS; 1) { - mov (8) r[pCF_U_OFFSET, %1*32]<4>:ub ubRESP(nNODI_CHROMA_OFFSET,%1*16+1)<16;8,2> // copy line of U - mov (8) r[pCF_V_OFFSET, %1*32]<4>:ub ubRESP(nNODI_CHROMA_OFFSET,%1*16)<16;8,2> // copy line of V -} - -shl (1) rMSGSRC.0<1>:ud wORIX<0;1,0>:w 1:w // X origin * 2 (422 output) -mov (1) rMSGSRC.1<1>:ud wORIY<0;1,0>:w // Y origin -mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (32x8) -mov (8) mMSGHDR_DN<1>:ud rMSGSRC<8;8,1>:ud // message header - -$for(0; <nY_NUM_OF_ROWS; 2) { - mov (16) mudMSGHDR_DN(1+%1)<1> udDN_YUV(%1)REGION(8,1) // Move DN Curr to MRF -} -send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PA_DN_NODI+nBI_DESTINATION_YUV:ud - - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_Scaling.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PA_Scaling.asm deleted file mode 100644 index c2a1b1e..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PA_Scaling.asm +++ /dev/null @@ -1,70 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PA_Scaling.asm ---------- -#include "Scaling.inc" - - // Build 16 elements ramp in float32 and normalized it -// mov (8) SAMPLER_RAMP(0)<1> 0x76543210:v -// add (8) SAMPLER_RAMP(1)<1> SAMPLER_RAMP(0) 8.0:f -mov (4) SAMPLER_RAMP(0)<1> 0x48403000:vf //3, 2, 1, 0 in float vector -mov (4) SAMPLER_RAMP(0,4)<1> 0x5C585450:vf //7, 6, 5, 4 in float vector -add (8) SAMPLER_RAMP(1)<1> SAMPLER_RAMP(0) 8.0:f - -//Module: PrepareScaleCoord.asm - - // Setup for sampler msg hdr - mov (2) rMSGSRC.0<1>:ud 0:ud { NoDDClr } // Unused fields - mov (1) rMSGSRC.2<1>:ud 0:ud { NoDDChk } // Write and offset - - // Calculate 16 v based on the step Y and vertical origin - mov (16) mfMSGPAYLOAD(2)<1> fSRC_VID_V_ORI<0;1,0>:f - mov (16) SCALE_COORD_Y<1>:f fSRC_VID_V_ORI<0;1,0>:f - - // Calculate 16 u based on the step X and hori origin -// line (16) mfMSGPAYLOAD(0)<1> SCALE_STEP_X<0;1,0>:f SAMPLER_RAMP(0) // Assign to mrf directly - mov (16) acc0:f fSRC_VID_H_ORI<0;1,0>:f { Compr } - mac (16) mfMSGPAYLOAD(0)<1> fVIDEO_STEP_X<0;1,0>:f SAMPLER_RAMP(0) { Compr } - - //Setup the constants for line instruction - mov (1) SCALE_LINE_P255<1>:f 255.0:f { NoDDClr } //{ NoDDClr, NoDDChk } - mov (1) SCALE_LINE_P0_5<1>:f 0.5:f { NoDDChk } - -//------------------------------------------------------------------------------ - -$for (0; <nY_NUM_OF_ROWS; 1) { - - // Read 16 sampled pixels and store them in float32 in 8 GRFs in the order of BGRA (VYUA). - mov (8) MSGHDR_SCALE.0:ud rMSGSRC.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (16) SCALE_RESPONSE_YW(0)<1> MSGHDR_SCALE udDUMMY_NULL nSMPL_ENGINE SMPLR_MSG_DSC+nSI_SRC_SIMD16_YUV+nBI_CURRENT_SRC_YUV - - // Calculate 16 v for next line - add (16) mfMSGPAYLOAD(2)<1> SCALE_COORD_Y<8;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly - add (16) SCALE_COORD_Y<1>:f SCALE_COORD_Y<8;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly - - // Scale back to [0, 255], convert f to ud - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(0) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_YD(0)<1> acc0:f { Compr } - - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(2) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_YD(2)<1> acc0:f { Compr } - - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(4) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_YD(4)<1> acc0:f { Compr } - - mov (16) DEST_V(%1)<1> SCALE_RESPONSE_YB(0) //possible error due to truncation - vK - mov (16) DEST_Y(%1)<1> SCALE_RESPONSE_YB(2) //possible error due to truncation - vK - mov (16) DEST_U(%1)<1> SCALE_RESPONSE_YB(4) //possible error due to truncation - vK - -} - - #define nSRC_REGION nREGION_1 - -//------------------------------------------------------------------------------ diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_16x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_16x8.asm deleted file mode 100644 index 2f7b735..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_16x8.asm +++ /dev/null @@ -1,60 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL2_AVS_IEF_16x8.asm ---------- - -#include "AVS_IEF.inc" - -//------------------------------------------------------------------------------ -// 2 sampler reads for 8x8 Y each -// 2 sampler read for 8x8 U and 8x8 V (NV11\P208 input surface) -//------------------------------------------------------------------------------ - - // 1st 8x8 setup - #include "AVS_SetupFirstBlock.asm" - - // Enable green channel only - mov (1) rAVS_8x8_HDR.2:ud nAVS_GREEN_CHANNEL_ONLY:ud - - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(0)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_Y+nBI_CURRENT_SRC_Y - // Return Y in 4 GRFs - - // 8x8 U and V sampling - // Enable red and blue channels - mov (1) rAVS_8x8_HDR.2:ud nAVS_RED_BLUE_CHANNELS:ud - - mov (16) mAVS_8x8_HDR_UV.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(4)<1> mAVS_8x8_HDR_UV udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_2CH+nSI_SRC_UV+nBI_CURRENT_SRC_UV - // Return U and V in 8 GRFs - - // 2nd 8x8 setup - #include "AVS_SetupSecondBlock.asm" - - // 2nd 8x8 Y sampling - // Enable green channel only - mov (1) rAVS_8x8_HDR.2:ud nAVS_GREEN_CHANNEL_ONLY:ud - - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE_2(0)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_Y+nBI_CURRENT_SRC_Y - - // 2nd 8x8 U and V sampling - // Enable red and blue channels - mov (1) rAVS_8x8_HDR.2:ud nAVS_RED_BLUE_CHANNELS:ud - - mov (16) mAVS_8x8_HDR_UV.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE_2(4)<1> mAVS_8x8_HDR_UV udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_2CH+nSI_SRC_UV+nBI_CURRENT_SRC_UV - // Return U and V in 8 GRFs - -//------------------------------------------------------------------------------ -// Unpacking sampler reads to 4:4:4 internal planar -//------------------------------------------------------------------------------ - #include "PL2_AVS_IEF_Unpack_16x8.asm" - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x4.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x4.asm deleted file mode 100644 index 9b221e7..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x4.asm +++ /dev/null @@ -1,58 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL2_AVS_IEF_8x4.asm ---------- - -#include "AVS_IEF.inc" - -//------------------------------------------------------------------------------ -// 2 sampler reads for 8x8 Y each -// 1 sampler read for 8x8 U and 8x8 V (NV11\NV12 input surface) -//------------------------------------------------------------------------------ - - // 1st 8x8 setup - #include "AVS_SetupFirstBlock.asm" - - // Enable green channel only - mov (1) rAVS_8x8_HDR.2:ud nAVS_GREEN_CHANNEL_ONLY:ud - - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(0)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_Y+nBI_CURRENT_SRC_Y - // Return Y in 4 GRFs - - // 8x8 U and V sampling - // Enable red and blue channels - //Only 8x4 wil be used - mov (1) rAVS_8x8_HDR.2:ud nAVS_RED_BLUE_CHANNELS:ud - - // Calculate Chroma Step Size: - // for H direction: 16 Luma samples are covered by 8 Chroma samples. Thus Chroma_Step_X = 2 * Luma_Step_X - // for V direction: 8 Luma samples are covered by 8 Chroma samples. Thus Chroma_Step_Y = Luma_Step_Y - mul (1) rAVS_PAYLOAD.1:f fVIDEO_STEP_X:f 2.0:f // Step X for chroma - - mov (16) mAVS_8x8_HDR_UV.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(4)<1> mAVS_8x8_HDR_UV udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_2CH+nSI_SRC_UV+nBI_CURRENT_SRC_UV - // Return U and V in 8 GRFs - - // 2nd 8x8 setup - #include "AVS_SetupSecondBlock.asm" - - // 2nd 8x8 Y sampling - // Enable green channel only - mov (1) rAVS_8x8_HDR.2:ud nAVS_GREEN_CHANNEL_ONLY:ud - - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE_2(0)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_Y+nBI_CURRENT_SRC_Y - -//------------------------------------------------------------------------------ -// Unpacking sampler reads to 4:2:0 internal planar -//------------------------------------------------------------------------------ - #include "PL2_AVS_IEF_Unpack_8x4.asm" - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x8.asm deleted file mode 100644 index 404fbd0..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x8.asm +++ /dev/null @@ -1,57 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL2_AVS_IEF_8x8.asm ---------- - -#include "AVS_IEF.inc" - -//------------------------------------------------------------------------------ -// 2 sampler reads for 8x8 Y each -// 1 sampler read for 8x8 U and 8x8 V (NV11\NV12 input surface) -//------------------------------------------------------------------------------ - - // 1st 8x8 setup - #include "AVS_SetupFirstBlock.asm" - - // Enable green channel only - mov (1) rAVS_8x8_HDR.2:ud nAVS_GREEN_CHANNEL_ONLY:ud - - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(0)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_Y+nBI_CURRENT_SRC_Y - // Return Y in 4 GRFs - - // 8x8 U and V sampling - // Enable red and blue channels - mov (1) rAVS_8x8_HDR.2:ud nAVS_RED_BLUE_CHANNELS:ud - - // Calculate Chroma Step Size: - // for H direction: 16 Luma samples are covered by 8 Chroma samples. Thus Chroma_Step_X = 2 * Luma_Step_X - // for V direction: 8 Luma samples are covered by 8 Chroma samples. Thus Chroma_Step_Y = Luma_Step_Y - mul (1) rAVS_PAYLOAD.1:f fVIDEO_STEP_X:f 2.0:f // Step X for chroma - - mov (16) mAVS_8x8_HDR_UV.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(4)<1> mAVS_8x8_HDR_UV udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_2CH+nSI_SRC_UV+nBI_CURRENT_SRC_UV - // Return U and V in 8 GRFs - - // 2nd 8x8 setup - #include "AVS_SetupSecondBlock.asm" - - // 2nd 8x8 Y sampling - // Enable green channel only - mov (1) rAVS_8x8_HDR.2:ud nAVS_GREEN_CHANNEL_ONLY:ud - - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE_2(0)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_Y+nBI_CURRENT_SRC_Y - -//------------------------------------------------------------------------------ -// Unpacking sampler reads to 4:2:2 internal planar -//------------------------------------------------------------------------------ - #include "PL2_AVS_IEF_Unpack_8x8.asm" - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm deleted file mode 100644 index 6e67557..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm +++ /dev/null @@ -1,271 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL2_AVS_IEF_Unpack_16x8.asm ---------- - -#ifdef AVS_OUTPUT_16_BIT //Output is packed in AVYU format -// Move first 8x8 words of Y to dest GRF (as packed) - mov (4) uwDEST_Y(0,1)<4> uwAVS_RESPONSE(0,0)<4;4,1> - mov (4) uwDEST_Y(1,1)<4> uwAVS_RESPONSE(0,8)<4;4,1> - mov (4) uwDEST_Y(4,1)<4> uwAVS_RESPONSE(0,4)<4;4,1> - mov (4) uwDEST_Y(5,1)<4> uwAVS_RESPONSE(0,12)<4;4,1> - mov (4) uwDEST_Y(8,1)<4> uwAVS_RESPONSE(1,0)<4;4,1> - mov (4) uwDEST_Y(9,1)<4> uwAVS_RESPONSE(1,8)<4;4,1> - mov (4) uwDEST_Y(12,1)<4> uwAVS_RESPONSE(1,4)<4;4,1> - mov (4) uwDEST_Y(13,1)<4> uwAVS_RESPONSE(1,12)<4;4,1> - mov (4) uwDEST_Y(16,1)<4> uwAVS_RESPONSE(2,0)<4;4,1> - mov (4) uwDEST_Y(17,1)<4> uwAVS_RESPONSE(2,8)<4;4,1> - mov (4) uwDEST_Y(20,1)<4> uwAVS_RESPONSE(2,4)<4;4,1> - mov (4) uwDEST_Y(21,1)<4> uwAVS_RESPONSE(2,12)<4;4,1> - mov (4) uwDEST_Y(24,1)<4> uwAVS_RESPONSE(3,0)<4;4,1> - mov (4) uwDEST_Y(25,1)<4> uwAVS_RESPONSE(3,8)<4;4,1> - mov (4) uwDEST_Y(28,1)<4> uwAVS_RESPONSE(3,4)<4;4,1> - mov (4) uwDEST_Y(29,1)<4> uwAVS_RESPONSE(3,12)<4;4,1> - -// Move first 8x8 words of U to dest GRF (as packed) - mov (4) uwDEST_Y(0,0)<4> uwAVS_RESPONSE(4,0)<4;4,1> - mov (4) uwDEST_Y(1,0)<4> uwAVS_RESPONSE(4,8)<4;4,1> - mov (4) uwDEST_Y(4,0)<4> uwAVS_RESPONSE(4,4)<4;4,1> - mov (4) uwDEST_Y(5,0)<4> uwAVS_RESPONSE(4,12)<4;4,1> - mov (4) uwDEST_Y(8,0)<4> uwAVS_RESPONSE(5,0)<4;4,1> - mov (4) uwDEST_Y(9,0)<4> uwAVS_RESPONSE(5,8)<4;4,1> - mov (4) uwDEST_Y(12,0)<4> uwAVS_RESPONSE(5,4)<4;4,1> - mov (4) uwDEST_Y(13,0)<4> uwAVS_RESPONSE(5,12)<4;4,1> - mov (4) uwDEST_Y(16,0)<4> uwAVS_RESPONSE(8,0)<4;4,1> - mov (4) uwDEST_Y(17,0)<4> uwAVS_RESPONSE(8,8)<4;4,1> - mov (4) uwDEST_Y(20,0)<4> uwAVS_RESPONSE(8,4)<4;4,1> - mov (4) uwDEST_Y(21,0)<4> uwAVS_RESPONSE(8,12)<4;4,1> - mov (4) uwDEST_Y(24,0)<4> uwAVS_RESPONSE(9,0)<4;4,1> - mov (4) uwDEST_Y(25,0)<4> uwAVS_RESPONSE(9,8)<4;4,1> - mov (4) uwDEST_Y(28,0)<4> uwAVS_RESPONSE(9,4)<4;4,1> - mov (4) uwDEST_Y(29,0)<4> uwAVS_RESPONSE(9,12)<4;4,1> - -// Move first 8x8 words of V to dest GRF (as packed) - mov (4) uwDEST_Y(0,2)<4> uwAVS_RESPONSE(6,0)<4;4,1> - mov (4) uwDEST_Y(1,2)<4> uwAVS_RESPONSE(6,8)<4;4,1> - mov (4) uwDEST_Y(4,2)<4> uwAVS_RESPONSE(6,4)<4;4,1> - mov (4) uwDEST_Y(5,2)<4> uwAVS_RESPONSE(6,12)<4;4,1> - mov (4) uwDEST_Y(8,2)<4> uwAVS_RESPONSE(7,0)<4;4,1> - mov (4) uwDEST_Y(9,2)<4> uwAVS_RESPONSE(7,8)<4;4,1> - mov (4) uwDEST_Y(12,2)<4> uwAVS_RESPONSE(7,4)<4;4,1> - mov (4) uwDEST_Y(13,2)<4> uwAVS_RESPONSE(7,12)<4;4,1> - mov (4) uwDEST_Y(16,2)<4> uwAVS_RESPONSE(10,0)<4;4,1> - mov (4) uwDEST_Y(17,2)<4> uwAVS_RESPONSE(10,8)<4;4,1> - mov (4) uwDEST_Y(20,2)<4> uwAVS_RESPONSE(10,4)<4;4,1> - mov (4) uwDEST_Y(21,2)<4> uwAVS_RESPONSE(10,12)<4;4,1> - mov (4) uwDEST_Y(24,2)<4> uwAVS_RESPONSE(11,0)<4;4,1> - mov (4) uwDEST_Y(25,2)<4> uwAVS_RESPONSE(11,8)<4;4,1> - mov (4) uwDEST_Y(28,2)<4> uwAVS_RESPONSE(11,4)<4;4,1> - mov (4) uwDEST_Y(29,2)<4> uwAVS_RESPONSE(11,12)<4;4,1> - -// Move first 8x8 words of A to dest GRF (as packed) - mov (4) uwDEST_Y(0,3)<4> 0:uw - mov (4) uwDEST_Y(1,3)<4> 0:uw - mov (4) uwDEST_Y(4,3)<4> 0:uw - mov (4) uwDEST_Y(5,3)<4> 0:uw - mov (4) uwDEST_Y(8,3)<4> 0:uw - mov (4) uwDEST_Y(9,3)<4> 0:uw - mov (4) uwDEST_Y(12,3)<4> 0:uw - mov (4) uwDEST_Y(13,3)<4> 0:uw - mov (4) uwDEST_Y(16,3)<4> 0:uw - mov (4) uwDEST_Y(17,3)<4> 0:uw - mov (4) uwDEST_Y(20,3)<4> 0:uw - mov (4) uwDEST_Y(21,3)<4> 0:uw - mov (4) uwDEST_Y(24,3)<4> 0:uw - mov (4) uwDEST_Y(25,3)<4> 0:uw - mov (4) uwDEST_Y(28,3)<4> 0:uw - mov (4) uwDEST_Y(29,3)<4> 0:uw - -// Move second 8x8 words of Y to dest GRF - mov (4) uwDEST_Y(2,1)<4> uwAVS_RESPONSE_2(0,0)<4;4,1> - mov (4) uwDEST_Y(3,1)<4> uwAVS_RESPONSE_2(0,8)<4;4,1> - mov (4) uwDEST_Y(6,1)<4> uwAVS_RESPONSE_2(0,4)<4;4,1> - mov (4) uwDEST_Y(7,1)<4> uwAVS_RESPONSE_2(0,12)<4;4,1> - mov (4) uwDEST_Y(10,1)<4> uwAVS_RESPONSE_2(1,0)<4;4,1> - mov (4) uwDEST_Y(11,1)<4> uwAVS_RESPONSE_2(1,8)<4;4,1> - mov (4) uwDEST_Y(14,1)<4> uwAVS_RESPONSE_2(1,4)<4;4,1> - mov (4) uwDEST_Y(15,1)<4> uwAVS_RESPONSE_2(1,12)<4;4,1> - mov (4) uwDEST_Y(18,1)<4> uwAVS_RESPONSE_2(2,0)<4;4,1> - mov (4) uwDEST_Y(19,1)<4> uwAVS_RESPONSE_2(2,8)<4;4,1> - mov (4) uwDEST_Y(22,1)<4> uwAVS_RESPONSE_2(2,4)<4;4,1> - mov (4) uwDEST_Y(23,1)<4> uwAVS_RESPONSE_2(2,12)<4;4,1> - mov (4) uwDEST_Y(26,1)<4> uwAVS_RESPONSE_2(3,0)<4;4,1> - mov (4) uwDEST_Y(27,1)<4> uwAVS_RESPONSE_2(3,8)<4;4,1> - mov (4) uwDEST_Y(30,1)<4> uwAVS_RESPONSE_2(3,4)<4;4,1> - mov (4) uwDEST_Y(31,1)<4> uwAVS_RESPONSE_2(3,12)<4;4,1> - -// Move second 8x8 words of U to dest GRF - mov (4) uwDEST_Y(2,0)<4> uwAVS_RESPONSE_2(4,0)<4;4,1> - mov (4) uwDEST_Y(3,0)<4> uwAVS_RESPONSE_2(4,8)<4;4,1> - mov (4) uwDEST_Y(6,0)<4> uwAVS_RESPONSE_2(4,4)<4;4,1> - mov (4) uwDEST_Y(7,0)<4> uwAVS_RESPONSE_2(4,12)<4;4,1> - mov (4) uwDEST_Y(10,0)<4> uwAVS_RESPONSE_2(5,0)<4;4,1> - mov (4) uwDEST_Y(11,0)<4> uwAVS_RESPONSE_2(5,8)<4;4,1> - mov (4) uwDEST_Y(14,0)<4> uwAVS_RESPONSE_2(5,4)<4;4,1> - mov (4) uwDEST_Y(15,0)<4> uwAVS_RESPONSE_2(5,12)<4;4,1> - mov (4) uwDEST_Y(18,0)<4> uwAVS_RESPONSE_2(8,0)<4;4,1> - mov (4) uwDEST_Y(19,0)<4> uwAVS_RESPONSE_2(8,8)<4;4,1> - mov (4) uwDEST_Y(22,0)<4> uwAVS_RESPONSE_2(8,4)<4;4,1> - mov (4) uwDEST_Y(23,0)<4> uwAVS_RESPONSE_2(8,12)<4;4,1> - mov (4) uwDEST_Y(26,0)<4> uwAVS_RESPONSE_2(9,0)<4;4,1> - mov (4) uwDEST_Y(27,0)<4> uwAVS_RESPONSE_2(9,8)<4;4,1> - mov (4) uwDEST_Y(30,0)<4> uwAVS_RESPONSE_2(9,4)<4;4,1> - mov (4) uwDEST_Y(31,0)<4> uwAVS_RESPONSE_2(9,12)<4;4,1> - -// Move second 8x8 words of V to dest GRF - mov (4) uwDEST_Y(2,2)<4> uwAVS_RESPONSE_2(6,0)<4;4,1> - mov (4) uwDEST_Y(3,2)<4> uwAVS_RESPONSE_2(6,8)<4;4,1> - mov (4) uwDEST_Y(6,2)<4> uwAVS_RESPONSE_2(6,4)<4;4,1> - mov (4) uwDEST_Y(7,2)<4> uwAVS_RESPONSE_2(6,12)<4;4,1> - mov (4) uwDEST_Y(10,2)<4> uwAVS_RESPONSE_2(7,0)<4;4,1> - mov (4) uwDEST_Y(11,2)<4> uwAVS_RESPONSE_2(7,8)<4;4,1> - mov (4) uwDEST_Y(14,2)<4> uwAVS_RESPONSE_2(7,4)<4;4,1> - mov (4) uwDEST_Y(15,2)<4> uwAVS_RESPONSE_2(7,12)<4;4,1> - mov (4) uwDEST_Y(18,2)<4> uwAVS_RESPONSE_2(10,0)<4;4,1> - mov (4) uwDEST_Y(19,2)<4> uwAVS_RESPONSE_2(10,8)<4;4,1> - mov (4) uwDEST_Y(22,2)<4> uwAVS_RESPONSE_2(10,4)<4;4,1> - mov (4) uwDEST_Y(23,2)<4> uwAVS_RESPONSE_2(10,12)<4;4,1> - mov (4) uwDEST_Y(26,2)<4> uwAVS_RESPONSE_2(11,0)<4;4,1> - mov (4) uwDEST_Y(27,2)<4> uwAVS_RESPONSE_2(11,8)<4;4,1> - mov (4) uwDEST_Y(30,2)<4> uwAVS_RESPONSE_2(11,4)<4;4,1> - mov (4) uwDEST_Y(31,2)<4> uwAVS_RESPONSE_2(11,12)<4;4,1> - -// Move second 8x8 words of A to dest GRF - mov (4) uwDEST_Y(2,3)<4> 0:uw - mov (4) uwDEST_Y(3,3)<4> 0:uw - mov (4) uwDEST_Y(6,3)<4> 0:uw - mov (4) uwDEST_Y(7,3)<4> 0:uw - mov (4) uwDEST_Y(10,3)<4> 0:uw - mov (4) uwDEST_Y(11,3)<4> 0:uw - mov (4) uwDEST_Y(14,3)<4> 0:uw - mov (4) uwDEST_Y(15,3)<4> 0:uw - mov (4) uwDEST_Y(18,3)<4> 0:uw - mov (4) uwDEST_Y(19,3)<4> 0:uw - mov (4) uwDEST_Y(22,3)<4> 0:uw - mov (4) uwDEST_Y(23,3)<4> 0:uw - mov (4) uwDEST_Y(26,3)<4> 0:uw - mov (4) uwDEST_Y(27,3)<4> 0:uw - mov (4) uwDEST_Y(30,3)<4> 0:uw - mov (4) uwDEST_Y(31,3)<4> 0:uw - -/* This section will be used if 16-bit output is needed in planar format -vK - // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2)<1> uwAVS_RESPONSE(%1,0)<8;4,1> - mov (8) uwDEST_Y(%1*2+1)<1> uwAVS_RESPONSE(%1,8)<8;4,1> - } - - // Move 1st 8x8 words of U to dest GRF (Copy high byte in a word) - mov (8) uwDEST_U(0)<1> uwAVS_RESPONSE(4,0)<8;4,1> - mov (8) uwDEST_U(1)<1> uwAVS_RESPONSE(4,8)<8;4,1> - mov (8) uwDEST_U(2)<1> uwAVS_RESPONSE(5,0)<8;4,1> - mov (8) uwDEST_U(3)<1> uwAVS_RESPONSE(5,8)<8;4,1> - mov (8) uwDEST_U(4)<1> uwAVS_RESPONSE(8,0)<8;4,1> - mov (8) uwDEST_U(5)<1> uwAVS_RESPONSE(8,8)<8;4,1> - mov (8) uwDEST_U(6)<1> uwAVS_RESPONSE(9,0)<8;4,1> - mov (8) uwDEST_U(7)<1> uwAVS_RESPONSE(9,8)<8;4,1> - - // Move 1st 8x8 words of V to dest GRF - mov (8) uwDEST_V(0)<1> uwAVS_RESPONSE(6,0)<8;4,1> - mov (8) uwDEST_V(1)<1> uwAVS_RESPONSE(6,8)<8;4,1> - mov (8) uwDEST_V(2)<1> uwAVS_RESPONSE(7,0)<8;4,1> - mov (8) uwDEST_V(3)<1> uwAVS_RESPONSE(7,8)<8;4,1> - mov (8) uwDEST_V(4)<1> uwAVS_RESPONSE(10,0)<8;4,1> - mov (8) uwDEST_V(5)<1> uwAVS_RESPONSE(10,8)<8;4,1> - mov (8) uwDEST_V(6)<1> uwAVS_RESPONSE(11,0)<8;4,1> - mov (8) uwDEST_V(7)<1> uwAVS_RESPONSE(11,8)<8;4,1> - - // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each GRF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2,8)<1> uwAVS_RESPONSE_2(%1,0)<8;4,1> - mov (8) uwDEST_Y(%1*2+1,8)<1> uwAVS_RESPONSE_2(%1,8)<8;4,1> - } - - // Move 2st 8x8 words of U to dest GRF (Copy high byte in a word) - mov (8) uwDEST_U(0,8)<1> uwAVS_RESPONSE_2(4,0)<8;4,1> - mov (8) uwDEST_U(1,8)<1> uwAVS_RESPONSE_2(4,8)<8;4,1> - mov (8) uwDEST_U(2,8)<1> uwAVS_RESPONSE_2(5,0)<8;4,1> - mov (8) uwDEST_U(3,8)<1> uwAVS_RESPONSE_2(5,8)<8;4,1> - mov (8) uwDEST_U(4,8)<1> uwAVS_RESPONSE_2(8,0)<8;4,1> - mov (8) uwDEST_U(5,8)<1> uwAVS_RESPONSE_2(8,8)<8;4,1> - mov (8) uwDEST_U(6,8)<1> uwAVS_RESPONSE_2(9,0)<8;4,1> - mov (8) uwDEST_U(7,8)<1> uwAVS_RESPONSE_2(9,8)<8;4,1> - - // Move 2st 8x8 words of V to dest GRF - mov (8) uwDEST_V(0,8)<1> uwAVS_RESPONSE_2(6,0)<8;4,1> - mov (8) uwDEST_V(1,8)<1> uwAVS_RESPONSE_2(6,8)<8;4,1> - mov (8) uwDEST_V(2,8)<1> uwAVS_RESPONSE_2(7,0)<8;4,1> - mov (8) uwDEST_V(3,8)<1> uwAVS_RESPONSE_2(7,8)<8;4,1> - mov (8) uwDEST_V(4,8)<1> uwAVS_RESPONSE_2(10,0)<8;4,1> - mov (8) uwDEST_V(5,8)<1> uwAVS_RESPONSE_2(10,8)<8;4,1> - mov (8) uwDEST_V(6,8)<1> uwAVS_RESPONSE_2(11,0)<8;4,1> - mov (8) uwDEST_V(7,8)<1> uwAVS_RESPONSE_2(11,8)<8;4,1> -*/ -#else - // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2)<1> ubAVS_RESPONSE(%1,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_Y(%1*2+1)<1> ubAVS_RESPONSE(%1,8+1)<16;4,2> // Copy high byte in a word - } - - // Move 1st 8x8 words of U to dest GRF (Copy high byte in a word) - mov (8) uwDEST_V(0)<1> ubAVS_RESPONSE(4,1)<16;4,2> - mov (8) uwDEST_V(1)<1> ubAVS_RESPONSE(4,8+1)<16;4,2> - mov (8) uwDEST_V(2)<1> ubAVS_RESPONSE(5,1)<16;4,2> - mov (8) uwDEST_V(3)<1> ubAVS_RESPONSE(5,8+1)<16;4,2> - mov (8) uwDEST_V(4)<1> ubAVS_RESPONSE(8,1)<16;4,2> - mov (8) uwDEST_V(5)<1> ubAVS_RESPONSE(8,8+1)<16;4,2> - mov (8) uwDEST_V(6)<1> ubAVS_RESPONSE(9,1)<16;4,2> - mov (8) uwDEST_V(7)<1> ubAVS_RESPONSE(9,8+1)<16;4,2> - - // Move 1st 8x8 words of V to dest GRF - mov (8) uwDEST_U(0)<1> ubAVS_RESPONSE(6,1)<16;4,2> - mov (8) uwDEST_U(1)<1> ubAVS_RESPONSE(6,8+1)<16;4,2> - mov (8) uwDEST_U(2)<1> ubAVS_RESPONSE(7,1)<16;4,2> - mov (8) uwDEST_U(3)<1> ubAVS_RESPONSE(7,8+1)<16;4,2> - mov (8) uwDEST_U(4)<1> ubAVS_RESPONSE(10,1)<16;4,2> - mov (8) uwDEST_U(5)<1> ubAVS_RESPONSE(10,8+1)<16;4,2> - mov (8) uwDEST_U(6)<1> ubAVS_RESPONSE(11,1)<16;4,2> - mov (8) uwDEST_U(7)<1> ubAVS_RESPONSE(11,8+1)<16;4,2> - - // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each GRF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2,8)<1> ubAVS_RESPONSE_2(%1,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_Y(%1*2+1,8)<1> ubAVS_RESPONSE_2(%1,8+1)<16;4,2> // Copy high byte in a word - } - - // Move 2st 8x8 words of U to dest GRF (Copy high byte in a word) - mov (8) uwDEST_V(0,8)<1> ubAVS_RESPONSE_2(4,1)<16;4,2> - mov (8) uwDEST_V(1,8)<1> ubAVS_RESPONSE_2(4,8+1)<16;4,2> - mov (8) uwDEST_V(2,8)<1> ubAVS_RESPONSE_2(5,1)<16;4,2> - mov (8) uwDEST_V(3,8)<1> ubAVS_RESPONSE_2(5,8+1)<16;4,2> - mov (8) uwDEST_V(4,8)<1> ubAVS_RESPONSE_2(8,1)<16;4,2> - mov (8) uwDEST_V(5,8)<1> ubAVS_RESPONSE_2(8,8+1)<16;4,2> - mov (8) uwDEST_V(6,8)<1> ubAVS_RESPONSE_2(9,1)<16;4,2> - mov (8) uwDEST_V(7,8)<1> ubAVS_RESPONSE_2(9,8+1)<16;4,2> - - // Move 2st 8x8 words of V to dest GRF - mov (8) uwDEST_U(0,8)<1> ubAVS_RESPONSE_2(6,1)<16;4,2> - mov (8) uwDEST_U(1,8)<1> ubAVS_RESPONSE_2(6,8+1)<16;4,2> - mov (8) uwDEST_U(2,8)<1> ubAVS_RESPONSE_2(7,1)<16;4,2> - mov (8) uwDEST_U(3,8)<1> ubAVS_RESPONSE_2(7,8+1)<16;4,2> - mov (8) uwDEST_U(4,8)<1> ubAVS_RESPONSE_2(10,1)<16;4,2> - mov (8) uwDEST_U(5,8)<1> ubAVS_RESPONSE_2(10,8+1)<16;4,2> - mov (8) uwDEST_U(6,8)<1> ubAVS_RESPONSE_2(11,1)<16;4,2> - mov (8) uwDEST_U(7,8)<1> ubAVS_RESPONSE_2(11,8+1)<16;4,2> -#endif - - // Re-define new # of lines - #undef nUV_NUM_OF_ROWS - #undef nY_NUM_OF_ROWS - - #define nY_NUM_OF_ROWS 8 - #define nUV_NUM_OF_ROWS 8 - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm deleted file mode 100644 index 37202f4..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm +++ /dev/null @@ -1,45 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL2_AVS_IEF_8x4.asm ---------- - - // Move first 8x8 words of Y to dest GRF at lower 8 words of each RGF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2)<1> ubAVS_RESPONSE(%1,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_Y(%1*2+1)<1> ubAVS_RESPONSE(%1,8+1)<16;4,2> // Copy high byte in a word - } - - // Move 8x4 words of U to dest GRF (Copy high byte in a word) - mov (8) uwDEST_U(0)<1> ubAVS_RESPONSE(4,1)<16;4,2> - mov (8) uwDEST_U(0,8)<1> ubAVS_RESPONSE(5,1)<16;4,2> - mov (8) uwDEST_U(1)<1> ubAVS_RESPONSE(8,1)<16;4,2> - mov (8) uwDEST_U(1,8)<1> ubAVS_RESPONSE(9,1)<16;4,2> - - // Move 8x4 words of V to dest GRF - mov (8) uwDEST_V(0)<1> ubAVS_RESPONSE(6,1)<16;4,2> - mov (8) uwDEST_V(0,8)<1> ubAVS_RESPONSE(7,1)<16;4,2> - mov (8) uwDEST_V(1)<1> ubAVS_RESPONSE(10,1)<16;4,2> - mov (8) uwDEST_V(1,8)<1> ubAVS_RESPONSE(11,1)<16;4,2> - - // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each GRF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2,8)<1> ubAVS_RESPONSE_2(%1,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_Y(%1*2+1,8)<1> ubAVS_RESPONSE_2(%1,8+1)<16;4,2> // Copy high byte in a word - } - -//------------------------------------------------------------------------------ - - // Re-define new # of lines - #undef nUV_NUM_OF_ROWS - #undef nY_NUM_OF_ROWS - - #define nY_NUM_OF_ROWS 8 - #define nUV_NUM_OF_ROWS 4 - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm deleted file mode 100644 index ec9f754..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm +++ /dev/null @@ -1,53 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL2_AVS_IEF_8x8.asm ---------- - - // Move first 8x8 words of Y to dest GRF at lower 8 words of each RGF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2)<1> ubAVS_RESPONSE(%1,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_Y(%1*2+1)<1> ubAVS_RESPONSE(%1,8+1)<16;4,2> // Copy high byte in a word - } - - // Move 8x8 words of U to dest GRF (Copy high byte in a word) - mov (8) uwDEST_U(0)<1> ubAVS_RESPONSE(4,1)<16;4,2> - mov (8) uwDEST_U(0,8)<1> ubAVS_RESPONSE(4,8+1)<16;4,2> - mov (8) uwDEST_U(1)<1> ubAVS_RESPONSE(5,1)<16;4,2> - mov (8) uwDEST_U(1,8)<1> ubAVS_RESPONSE(5,8+1)<16;4,2> - mov (8) uwDEST_U(2)<1> ubAVS_RESPONSE(8,1)<16;4,2> - mov (8) uwDEST_U(2,8)<1> ubAVS_RESPONSE(8,8+1)<16;4,2> - mov (8) uwDEST_U(3)<1> ubAVS_RESPONSE(9,1)<16;4,2> - mov (8) uwDEST_U(3,8)<1> ubAVS_RESPONSE(9,8+1)<16;4,2> - - // Move 8x8 words of V to dest GRF - mov (8) uwDEST_V(0)<1> ubAVS_RESPONSE(6,1)<16;4,2> - mov (8) uwDEST_V(0,8)<1> ubAVS_RESPONSE(6,8+1)<16;4,2> - mov (8) uwDEST_V(1)<1> ubAVS_RESPONSE(7,1)<16;4,2> - mov (8) uwDEST_V(1,8)<1> ubAVS_RESPONSE(7,8+1)<16;4,2> - mov (8) uwDEST_V(2)<1> ubAVS_RESPONSE(10,1)<16;4,2> - mov (8) uwDEST_V(2,8)<1> ubAVS_RESPONSE(10,8+1)<16;4,2> - mov (8) uwDEST_V(3)<1> ubAVS_RESPONSE(11,1)<16;4,2> - mov (8) uwDEST_V(3,8)<1> ubAVS_RESPONSE(11,8+1)<16;4,2> - - // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each GRF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2,8)<1> ubAVS_RESPONSE_2(%1,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_Y(%1*2+1,8)<1> ubAVS_RESPONSE_2(%1,8+1)<16;4,2> // Copy high byte in a word - } - -//------------------------------------------------------------------------------ - - // Re-define new # of lines - #undef nUV_NUM_OF_ROWS - #undef nY_NUM_OF_ROWS - - #define nY_NUM_OF_ROWS 8 - #define nUV_NUM_OF_ROWS 8 - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_Scaling.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_Scaling.asm deleted file mode 100644 index 7849afd..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL2_Scaling.asm +++ /dev/null @@ -1,71 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL2_Scaling.asm ---------- -#include "Scaling.inc" - - // Build 16 elements ramp in float32 and normalized it -// mov (8) SAMPLER_RAMP(0)<1> 0x76543210:v -// add (8) SAMPLER_RAMP(1)<1> SAMPLER_RAMP(0) 8.0:f -mov (4) SAMPLER_RAMP(0)<1> 0x48403000:vf //3, 2, 1, 0 in float vector -mov (4) SAMPLER_RAMP(0,4)<1> 0x5C585450:vf //7, 6, 5, 4 in float vector -add (8) SAMPLER_RAMP(1)<1> SAMPLER_RAMP(0) 8.0:f - -//Module: PrepareScaleCoord.asm - - // Setup for sampler msg hdr - mov (2) rMSGSRC.0<1>:ud 0:ud { NoDDClr } // Unused fields - mov (1) rMSGSRC.2<1>:ud 0:ud { NoDDChk } // Write and offset - - // Calculate 16 v based on the step Y and vertical origin - mov (16) mfMSGPAYLOAD(2)<1> fSRC_VID_V_ORI<0;1,0>:f - mov (16) SCALE_COORD_Y<1>:f fSRC_VID_V_ORI<0;1,0>:f - - // Calculate 16 u based on the step X and hori origin -// line (16) mfMSGPAYLOAD(0)<1> SCALE_STEP_X<0;1,0>:f SAMPLER_RAMP(0) // Assign to mrf directly - mov (16) acc0:f fSRC_VID_H_ORI<0;1,0>:f { Compr } - mac (16) mfMSGPAYLOAD(0)<1> fVIDEO_STEP_X<0;1,0>:f SAMPLER_RAMP(0) { Compr } - - //Setup the constants for line instruction - mov (1) SCALE_LINE_P255<1>:f 255.0:f { NoDDClr } //{ NoDDClr, NoDDChk } - mov (1) SCALE_LINE_P0_5<1>:f 0.5:f { NoDDChk } - -//------------------------------------------------------------------------------ - -$for (0; <nY_NUM_OF_ROWS; 1) { - - // Read 16 sampled pixels and store them in float32 in 8 GRFs in the order of BGRA (VYUA). - mov (8) MSGHDR_SCALE.0:ud rMSGSRC.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (16) SCALE_RESPONSE_YW(0)<1> MSGHDR_SCALE udDUMMY_NULL nSMPL_ENGINE SMPLR_MSG_DSC+nSI_SRC_SIMD16_Y+nBI_CURRENT_SRC_Y - send (16) SCALE_RESPONSE_UW(0)<1> MSGHDR_SCALE udDUMMY_NULL nSMPL_ENGINE SMPLR_MSG_DSC+nSI_SRC_SIMD16_UV+nBI_CURRENT_SRC_UV - - // Calculate 16 v for next line - add (16) mfMSGPAYLOAD(2)<1> SCALE_COORD_Y<8;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly - add (16) SCALE_COORD_Y<1>:f SCALE_COORD_Y<8;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly - - // Scale back to [0, 255], convert f to ud - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(0) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_YD(0)<1> acc0:f { Compr } - - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_UF(0) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_UD(0)<1> acc0:f { Compr } - - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_UF(2) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_UD(2)<1> acc0:f { Compr } - - mov (16) DEST_Y(%1)<1> SCALE_RESPONSE_YB(0) //possible error due to truncation - vK - mov (16) DEST_U(%1)<1> SCALE_RESPONSE_UB(0) //possible error due to truncation - vK - mov (16) DEST_V(%1)<1> SCALE_RESPONSE_UB(2) //possible error due to truncation - vK - -} - - #define nSRC_REGION nREGION_1 - -//------------------------------------------------------------------------------ diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_16x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_16x8.asm deleted file mode 100644 index 50a050c..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_16x8.asm +++ /dev/null @@ -1,69 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL3_AVS_IEF_16x8.asm ---------- - -#include "AVS_IEF.inc" - -//------------------------------------------------------------------------------ -// 2 sampler reads for 8x8 Y surface -// 2 sampler read for 8x8 U surface -// 2 sampler read for 8x8 V surface -//------------------------------------------------------------------------------ - - // 1st 8x8 setup - #include "AVS_SetupFirstBlock.asm" - - // 1st 8x8 Y sampling - mov (1) rAVS_8x8_HDR.2:ud nAVS_GREEN_CHANNEL_ONLY:ud // Enable green channel - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(0)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_Y+nBI_CURRENT_SRC_Y - // Return Y in 4 GRFs - - // 1st 8x8 U sampling - mov (1) rAVS_8x8_HDR.2:ud nAVS_RED_CHANNEL_ONLY:ud // Enable red channel - mov (16) mAVS_8x8_HDR_UV.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(4)<1> mAVS_8x8_HDR_UV udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_U+nBI_CURRENT_SRC_U - // Return U in 4 GRFs - - // 1st 8x8 V sampling - mov (1) rAVS_8x8_HDR.2:ud nAVS_RED_CHANNEL_ONLY:ud // Dummy instruction to avoid back-2-back send instructions - mov (16) mAVS_8x8_HDR_UV.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(8)<1> mAVS_8x8_HDR_UV udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_V+nBI_CURRENT_SRC_V - // Return V in 4 GRFs - - // 2nd 8x8 setup - #include "AVS_SetupSecondBlock.asm" - - // 2nd 8x8 Y sampling - mov (1) rAVS_8x8_HDR.2:ud nAVS_GREEN_CHANNEL_ONLY:ud // Enable green channel - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE_2(0)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_Y+nBI_CURRENT_SRC_Y - // Return Y in 4 GRFs - - // 2nd 8x8 U sampling - mov (1) rAVS_8x8_HDR.2:ud nAVS_RED_CHANNEL_ONLY:ud // Enable red channel - mov (16) mAVS_8x8_HDR_UV.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE_2(4)<1> mAVS_8x8_HDR_UV udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_U+nBI_CURRENT_SRC_U - // Return U in 4 GRFs - - mov (1) rAVS_8x8_HDR.2:ud nAVS_RED_CHANNEL_ONLY:ud // Dummy instruction just in order to avoid back-2-back send instructions! - - // 2nd 8x8 V sampling - mov (16) mAVS_8x8_HDR_UV.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE_2(8)<1> mAVS_8x8_HDR_UV udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_V+nBI_CURRENT_SRC_V - // Return V in 4 GRFs - -//------------------------------------------------------------------------------ -// Unpacking sampler reads to 4:4:4 internal planar -//------------------------------------------------------------------------------ - #include "PL3_AVS_IEF_Unpack_16x8.asm" - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x4.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x4.asm deleted file mode 100644 index 35a5dd3..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x4.asm +++ /dev/null @@ -1,60 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL3_AVS_IEF_8x4.asm ---------- - -#include "AVS_IEF.inc" - -//------------------------------------------------------------------------------ -// 2 sampler reads for 8x8 Y surface -// 1 sampler read for 8x8 U surface -// 1 sampler read for 8x8 V surface -//------------------------------------------------------------------------------ - - // 1st 8x8 setup - #include "AVS_SetupFirstBlock.asm" - - // 1st 8x8 Y sampling - mov (1) rAVS_8x8_HDR.2:ud nAVS_GREEN_CHANNEL_ONLY:ud // Enable green channel - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(0)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_Y+nBI_CURRENT_SRC_Y - // Return Y in 4 GRFs - - // 8x8 U sampling ; Only 8x4 will be used - mov (1) rAVS_8x8_HDR.2:ud nAVS_RED_CHANNEL_ONLY:ud // Enable red channel - mul (1) rAVS_PAYLOAD.1:f fVIDEO_STEP_X:f 2.0:f // Calculate Step X for chroma - mov (16) mAVS_8x8_HDR_UV.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(4)<1> mAVS_8x8_HDR_UV udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_U+nBI_CURRENT_SRC_U - // Return U in 4 GRFs - - // 8x8 V sampling ; Only 8x4 will be used - mov (1) rAVS_8x8_HDR.2:ud nAVS_RED_CHANNEL_ONLY:ud // Dummy instruction just in order to avoid back-2-back send instructions! - mov (16) mAVS_8x8_HDR_UV.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(8)<1> mAVS_8x8_HDR_UV udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_V+nBI_CURRENT_SRC_V - // Return V in 4 GRFs - - // 2nd 8x8 setup - #include "AVS_SetupSecondBlock.asm" - - // 2nd 8x8 Y sampling - mov (1) rAVS_8x8_HDR.2:ud nAVS_GREEN_CHANNEL_ONLY:ud // Enable green channel - mov (1) rAVS_PAYLOAD.1:f fVIDEO_STEP_X:f // Restore Step X for luma - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(12)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_Y+nBI_CURRENT_SRC_Y - // Return Y in 4 GRFs - -//------------------------------------------------------------------------------ -// Unpacking sampler reads to 4:2:0 internal planar -//------------------------------------------------------------------------------ - #include "PL3_AVS_IEF_Unpack_8x4.asm" - - - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x8.asm deleted file mode 100644 index d67ad04..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x8.asm +++ /dev/null @@ -1,60 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL3_AVS_IEF_8x8.asm ---------- - -#include "AVS_IEF.inc" - -//------------------------------------------------------------------------------ -// 2 sampler reads for 8x8 Y surface -// 1 sampler read for 8x8 U surface -// 1 sampler read for 8x8 V surface -//------------------------------------------------------------------------------ - - // 1st 8x8 setup - #include "AVS_SetupFirstBlock.asm" - - // 1st 8x8 Y sampling - mov (1) rAVS_8x8_HDR.2:ud nAVS_GREEN_CHANNEL_ONLY:ud // Enable green channel - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(0)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_Y+nBI_CURRENT_SRC_Y - // Return Y in 4 GRFs - - // 8x8 U sampling - mov (1) rAVS_8x8_HDR.2:ud nAVS_RED_CHANNEL_ONLY:ud // Enable red channel - mul (1) rAVS_PAYLOAD.1:f fVIDEO_STEP_X:f 2.0:f // Calculate Step X for chroma - mov (16) mAVS_8x8_HDR_UV.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(4)<1> mAVS_8x8_HDR_UV udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_U+nBI_CURRENT_SRC_U - // Return U in 4 GRFs - - // 8x8 V sampling - mov (1) rAVS_8x8_HDR.2:ud nAVS_RED_CHANNEL_ONLY:ud // Dummy instruction just in order to avoid back-2-back send instructions! - mov (16) mAVS_8x8_HDR_UV.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(8)<1> mAVS_8x8_HDR_UV udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_V+nBI_CURRENT_SRC_V - // Return V in 4 GRFs - - // 2nd 8x8 setup - #include "AVS_SetupSecondBlock.asm" - - // 2nd 8x8 Y sampling - mov (1) rAVS_8x8_HDR.2:ud nAVS_GREEN_CHANNEL_ONLY:ud // Enable green channel - mov (1) rAVS_PAYLOAD.1:f fVIDEO_STEP_X:f // Restore Step X for luma - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(12)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_1CH+nSI_SRC_Y+nBI_CURRENT_SRC_Y - // Return Y in 4 GRFs - -//------------------------------------------------------------------------------ -// Unpacking sampler reads to 4:2:2 internal planar -//------------------------------------------------------------------------------ - #include "PL3_AVS_IEF_Unpack_8x8.asm" - - - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm deleted file mode 100644 index f88ab89..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm +++ /dev/null @@ -1,240 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL3_AVS_IEF_Unpack_16x8.asm ---------- - -#ifdef AVS_OUTPUT_16_BIT //Output is packed in AVYU format -// Move first 8x8 words of Y to dest GRF (as packed) - mov (4) uwDEST_Y(0,1)<4> uwAVS_RESPONSE(0,0)<4;4,1> - mov (4) uwDEST_Y(1,1)<4> uwAVS_RESPONSE(0,8)<4;4,1> - mov (4) uwDEST_Y(4,1)<4> uwAVS_RESPONSE(0,4)<4;4,1> - mov (4) uwDEST_Y(5,1)<4> uwAVS_RESPONSE(0,12)<4;4,1> - mov (4) uwDEST_Y(8,1)<4> uwAVS_RESPONSE(1,0)<4;4,1> - mov (4) uwDEST_Y(9,1)<4> uwAVS_RESPONSE(1,8)<4;4,1> - mov (4) uwDEST_Y(12,1)<4> uwAVS_RESPONSE(1,4)<4;4,1> - mov (4) uwDEST_Y(13,1)<4> uwAVS_RESPONSE(1,12)<4;4,1> - mov (4) uwDEST_Y(16,1)<4> uwAVS_RESPONSE(2,0)<4;4,1> - mov (4) uwDEST_Y(17,1)<4> uwAVS_RESPONSE(2,8)<4;4,1> - mov (4) uwDEST_Y(20,1)<4> uwAVS_RESPONSE(2,4)<4;4,1> - mov (4) uwDEST_Y(21,1)<4> uwAVS_RESPONSE(2,12)<4;4,1> - mov (4) uwDEST_Y(24,1)<4> uwAVS_RESPONSE(3,0)<4;4,1> - mov (4) uwDEST_Y(25,1)<4> uwAVS_RESPONSE(3,8)<4;4,1> - mov (4) uwDEST_Y(28,1)<4> uwAVS_RESPONSE(3,4)<4;4,1> - mov (4) uwDEST_Y(29,1)<4> uwAVS_RESPONSE(3,12)<4;4,1> - -// Move first 8x8 words of U to dest GRF (as packed) - mov (4) uwDEST_Y(0,0)<4> uwAVS_RESPONSE(4,0)<4;4,1> - mov (4) uwDEST_Y(1,0)<4> uwAVS_RESPONSE(4,8)<4;4,1> - mov (4) uwDEST_Y(4,0)<4> uwAVS_RESPONSE(4,4)<4;4,1> - mov (4) uwDEST_Y(5,0)<4> uwAVS_RESPONSE(4,12)<4;4,1> - mov (4) uwDEST_Y(8,0)<4> uwAVS_RESPONSE(5,0)<4;4,1> - mov (4) uwDEST_Y(9,0)<4> uwAVS_RESPONSE(5,8)<4;4,1> - mov (4) uwDEST_Y(12,0)<4> uwAVS_RESPONSE(5,4)<4;4,1> - mov (4) uwDEST_Y(13,0)<4> uwAVS_RESPONSE(5,12)<4;4,1> - mov (4) uwDEST_Y(16,0)<4> uwAVS_RESPONSE(6,0)<4;4,1> - mov (4) uwDEST_Y(17,0)<4> uwAVS_RESPONSE(6,8)<4;4,1> - mov (4) uwDEST_Y(20,0)<4> uwAVS_RESPONSE(6,4)<4;4,1> - mov (4) uwDEST_Y(21,0)<4> uwAVS_RESPONSE(6,12)<4;4,1> - mov (4) uwDEST_Y(24,0)<4> uwAVS_RESPONSE(7,0)<4;4,1> - mov (4) uwDEST_Y(25,0)<4> uwAVS_RESPONSE(7,8)<4;4,1> - mov (4) uwDEST_Y(28,0)<4> uwAVS_RESPONSE(7,4)<4;4,1> - mov (4) uwDEST_Y(29,0)<4> uwAVS_RESPONSE(7,12)<4;4,1> - -// Move first 8x8 words of V to dest GRF (as packed) - mov (4) uwDEST_Y(0,2)<4> uwAVS_RESPONSE(8,0)<4;4,1> - mov (4) uwDEST_Y(1,2)<4> uwAVS_RESPONSE(8,8)<4;4,1> - mov (4) uwDEST_Y(4,2)<4> uwAVS_RESPONSE(8,4)<4;4,1> - mov (4) uwDEST_Y(5,2)<4> uwAVS_RESPONSE(8,12)<4;4,1> - mov (4) uwDEST_Y(8,2)<4> uwAVS_RESPONSE(9,0)<4;4,1> - mov (4) uwDEST_Y(9,2)<4> uwAVS_RESPONSE(9,8)<4;4,1> - mov (4) uwDEST_Y(12,2)<4> uwAVS_RESPONSE(9,4)<4;4,1> - mov (4) uwDEST_Y(13,2)<4> uwAVS_RESPONSE(9,12)<4;4,1> - mov (4) uwDEST_Y(16,2)<4> uwAVS_RESPONSE(10,0)<4;4,1> - mov (4) uwDEST_Y(17,2)<4> uwAVS_RESPONSE(10,8)<4;4,1> - mov (4) uwDEST_Y(20,2)<4> uwAVS_RESPONSE(10,4)<4;4,1> - mov (4) uwDEST_Y(21,2)<4> uwAVS_RESPONSE(10,12)<4;4,1> - mov (4) uwDEST_Y(24,2)<4> uwAVS_RESPONSE(11,0)<4;4,1> - mov (4) uwDEST_Y(25,2)<4> uwAVS_RESPONSE(11,8)<4;4,1> - mov (4) uwDEST_Y(28,2)<4> uwAVS_RESPONSE(11,4)<4;4,1> - mov (4) uwDEST_Y(29,2)<4> uwAVS_RESPONSE(11,12)<4;4,1> - -// Move first 8x8 words of A to dest GRF (as packed) - mov (4) uwDEST_Y(0,3)<4> 0:uw - mov (4) uwDEST_Y(1,3)<4> 0:uw - mov (4) uwDEST_Y(4,3)<4> 0:uw - mov (4) uwDEST_Y(5,3)<4> 0:uw - mov (4) uwDEST_Y(8,3)<4> 0:uw - mov (4) uwDEST_Y(9,3)<4> 0:uw - mov (4) uwDEST_Y(12,3)<4> 0:uw - mov (4) uwDEST_Y(13,3)<4> 0:uw - mov (4) uwDEST_Y(16,3)<4> 0:uw - mov (4) uwDEST_Y(17,3)<4> 0:uw - mov (4) uwDEST_Y(20,3)<4> 0:uw - mov (4) uwDEST_Y(21,3)<4> 0:uw - mov (4) uwDEST_Y(24,3)<4> 0:uw - mov (4) uwDEST_Y(25,3)<4> 0:uw - mov (4) uwDEST_Y(28,3)<4> 0:uw - mov (4) uwDEST_Y(29,3)<4> 0:uw - -// Move second 8x8 words of Y to dest GRF - mov (4) uwDEST_Y(2,1)<4> uwAVS_RESPONSE_2(0,0)<4;4,1> - mov (4) uwDEST_Y(3,1)<4> uwAVS_RESPONSE_2(0,8)<4;4,1> - mov (4) uwDEST_Y(6,1)<4> uwAVS_RESPONSE_2(0,4)<4;4,1> - mov (4) uwDEST_Y(7,1)<4> uwAVS_RESPONSE_2(0,12)<4;4,1> - mov (4) uwDEST_Y(10,1)<4> uwAVS_RESPONSE_2(1,0)<4;4,1> - mov (4) uwDEST_Y(11,1)<4> uwAVS_RESPONSE_2(1,8)<4;4,1> - mov (4) uwDEST_Y(14,1)<4> uwAVS_RESPONSE_2(1,4)<4;4,1> - mov (4) uwDEST_Y(15,1)<4> uwAVS_RESPONSE_2(1,12)<4;4,1> - mov (4) uwDEST_Y(18,1)<4> uwAVS_RESPONSE_2(2,0)<4;4,1> - mov (4) uwDEST_Y(19,1)<4> uwAVS_RESPONSE_2(2,8)<4;4,1> - mov (4) uwDEST_Y(22,1)<4> uwAVS_RESPONSE_2(2,4)<4;4,1> - mov (4) uwDEST_Y(23,1)<4> uwAVS_RESPONSE_2(2,12)<4;4,1> - mov (4) uwDEST_Y(26,1)<4> uwAVS_RESPONSE_2(3,0)<4;4,1> - mov (4) uwDEST_Y(27,1)<4> uwAVS_RESPONSE_2(3,8)<4;4,1> - mov (4) uwDEST_Y(30,1)<4> uwAVS_RESPONSE_2(3,4)<4;4,1> - mov (4) uwDEST_Y(31,1)<4> uwAVS_RESPONSE_2(3,12)<4;4,1> - -// Move second 8x8 words of U to dest GRF - mov (4) uwDEST_Y(2,0)<4> uwAVS_RESPONSE_2(4,0)<4;4,1> - mov (4) uwDEST_Y(3,0)<4> uwAVS_RESPONSE_2(4,8)<4;4,1> - mov (4) uwDEST_Y(6,0)<4> uwAVS_RESPONSE_2(4,4)<4;4,1> - mov (4) uwDEST_Y(7,0)<4> uwAVS_RESPONSE_2(4,12)<4;4,1> - mov (4) uwDEST_Y(10,0)<4> uwAVS_RESPONSE_2(5,0)<4;4,1> - mov (4) uwDEST_Y(11,0)<4> uwAVS_RESPONSE_2(5,8)<4;4,1> - mov (4) uwDEST_Y(14,0)<4> uwAVS_RESPONSE_2(5,4)<4;4,1> - mov (4) uwDEST_Y(15,0)<4> uwAVS_RESPONSE_2(5,12)<4;4,1> - mov (4) uwDEST_Y(18,0)<4> uwAVS_RESPONSE_2(6,0)<4;4,1> - mov (4) uwDEST_Y(19,0)<4> uwAVS_RESPONSE_2(6,8)<4;4,1> - mov (4) uwDEST_Y(22,0)<4> uwAVS_RESPONSE_2(6,4)<4;4,1> - mov (4) uwDEST_Y(23,0)<4> uwAVS_RESPONSE_2(6,12)<4;4,1> - mov (4) uwDEST_Y(26,0)<4> uwAVS_RESPONSE_2(7,0)<4;4,1> - mov (4) uwDEST_Y(27,0)<4> uwAVS_RESPONSE_2(7,8)<4;4,1> - mov (4) uwDEST_Y(30,0)<4> uwAVS_RESPONSE_2(7,4)<4;4,1> - mov (4) uwDEST_Y(31,0)<4> uwAVS_RESPONSE_2(7,12)<4;4,1> - -// Move second 8x8 words of V to dest GRF - mov (4) uwDEST_Y(2,2)<4> uwAVS_RESPONSE_2(8,0)<4;4,1> - mov (4) uwDEST_Y(3,2)<4> uwAVS_RESPONSE_2(8,8)<4;4,1> - mov (4) uwDEST_Y(6,2)<4> uwAVS_RESPONSE_2(8,4)<4;4,1> - mov (4) uwDEST_Y(7,2)<4> uwAVS_RESPONSE_2(8,12)<4;4,1> - mov (4) uwDEST_Y(10,2)<4> uwAVS_RESPONSE_2(9,0)<4;4,1> - mov (4) uwDEST_Y(11,2)<4> uwAVS_RESPONSE_2(9,8)<4;4,1> - mov (4) uwDEST_Y(14,2)<4> uwAVS_RESPONSE_2(9,4)<4;4,1> - mov (4) uwDEST_Y(15,2)<4> uwAVS_RESPONSE_2(9,12)<4;4,1> - mov (4) uwDEST_Y(18,2)<4> uwAVS_RESPONSE_2(10,0)<4;4,1> - mov (4) uwDEST_Y(19,2)<4> uwAVS_RESPONSE_2(10,8)<4;4,1> - mov (4) uwDEST_Y(22,2)<4> uwAVS_RESPONSE_2(10,4)<4;4,1> - mov (4) uwDEST_Y(23,2)<4> uwAVS_RESPONSE_2(10,12)<4;4,1> - mov (4) uwDEST_Y(26,2)<4> uwAVS_RESPONSE_2(11,0)<4;4,1> - mov (4) uwDEST_Y(27,2)<4> uwAVS_RESPONSE_2(11,8)<4;4,1> - mov (4) uwDEST_Y(30,2)<4> uwAVS_RESPONSE_2(11,4)<4;4,1> - mov (4) uwDEST_Y(31,2)<4> uwAVS_RESPONSE_2(11,12)<4;4,1> - -// Move second 8x8 words of A to dest GRF - mov (4) uwDEST_Y(2,3)<4> 0:uw - mov (4) uwDEST_Y(3,3)<4> 0:uw - mov (4) uwDEST_Y(6,3)<4> 0:uw - mov (4) uwDEST_Y(7,3)<4> 0:uw - mov (4) uwDEST_Y(10,3)<4> 0:uw - mov (4) uwDEST_Y(11,3)<4> 0:uw - mov (4) uwDEST_Y(14,3)<4> 0:uw - mov (4) uwDEST_Y(15,3)<4> 0:uw - mov (4) uwDEST_Y(18,3)<4> 0:uw - mov (4) uwDEST_Y(19,3)<4> 0:uw - mov (4) uwDEST_Y(22,3)<4> 0:uw - mov (4) uwDEST_Y(23,3)<4> 0:uw - mov (4) uwDEST_Y(26,3)<4> 0:uw - mov (4) uwDEST_Y(27,3)<4> 0:uw - mov (4) uwDEST_Y(30,3)<4> 0:uw - mov (4) uwDEST_Y(31,3)<4> 0:uw - -/* This section will be used if 16-bit output is needed in planar format -vK - // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2)<1> uwAVS_RESPONSE(%1)<8;4,1> - mov (8) uwDEST_Y(%1*2+1)<1> uwAVS_RESPONSE(%1,8)<8;4,1> - } - - // Move 8x8 words of U to dest GRF - $for(0; <8/2; 1) { - mov (8) uwDEST_U(%1*2)<1> uwAVS_RESPONSE(%1+4)<8;4,1> - mov (8) uwDEST_U(%1*2+1)<1> uwAVS_RESPONSE(%1+4,8)<8;4,1> - } - - // Move 8x8 words of V to dest GRF - $for(0; <8/2; 1) { - mov (8) uwDEST_V(%1*2)<1> uwAVS_RESPONSE(%1+8)<8;4,1> - mov (8) uwDEST_V(%1*2+1)<1> uwAVS_RESPONSE(%1+8,8)<8;4,1> - } - - // Move 2nd 8x8 words of Y to dest GRF - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2,8)<1> uwAVS_RESPONSE_2(%1)<8;4,1> - mov (8) uwDEST_Y(%1*2+1,8)<1> uwAVS_RESPONSE_2(%1,8)<8;4,1> - } - - // Move 2nd 8x8 words of U to dest GRF - $for(0; <8/2; 1) { - mov (8) uwDEST_U(%1*2,8)<1> uwAVS_RESPONSE_2(%1+4)<8;4,1> - mov (8) uwDEST_U(%1*2+1,8)<1> uwAVS_RESPONSE_2(%1+4,8)<8;4,1> - } - - // Move 2nd 8x8 words of V to dest GRF - $for(0; <8/2; 1) { - mov (8) uwDEST_V(%1*2,8)<1> uwAVS_RESPONSE_2(%1+8)<8;4,1> - mov (8) uwDEST_V(%1*2+1,8)<1> uwAVS_RESPONSE_2(%1+8,8)<8;4,1> - } -*/ -#else /* OUTPUT_8_BIT */ - // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2)<1> ubAVS_RESPONSE(%1,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_Y(%1*2+1)<1> ubAVS_RESPONSE(%1,8+1)<16;4,2> // Copy high byte in a word - } - - // Move 8x8 words of U to dest GRF - $for(0; <8/2; 1) { - mov (8) uwDEST_U(%1*2)<1> ubAVS_RESPONSE(%1+4,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_U(%1*2+1)<1> ubAVS_RESPONSE(%1+4,8+1)<16;4,2> // Copy high byte in a word - } - - // Move 8x8 words of V to dest GRF - $for(0; <8/2; 1) { - mov (8) uwDEST_V(%1*2)<1> ubAVS_RESPONSE(%1+8,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_V(%1*2+1)<1> ubAVS_RESPONSE(%1+8,8+1)<16;4,2> // Copy high byte in a word - } - - // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each RGF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2,8)<1> ubAVS_RESPONSE_2(%1,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_Y(%1*2+1,8)<1> ubAVS_RESPONSE_2(%1,8+1)<16;4,2> // Copy high byte in a word - } - - // Move 2nd 8x8 words of U to dest GRF - $for(0; <8/2; 1) { - mov (8) uwDEST_U(%1*2,8)<1> ubAVS_RESPONSE_2(%1+4,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_U(%1*2+1,8)<1> ubAVS_RESPONSE_2(%1+4,8+1)<16;4,2> // Copy high byte in a word - } - - // Move 2nd 8x8 words of V to dest GRF - $for(0; <8/2; 1) { - mov (8) uwDEST_V(%1*2,8)<1> ubAVS_RESPONSE_2(%1+8,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_V(%1*2+1,8)<1> ubAVS_RESPONSE_2(%1+8,8+1)<16;4,2> // Copy high byte in a word - } -#endif -//------------------------------------------------------------------------------ - // Re-define new # of lines - #undef nUV_NUM_OF_ROWS - #undef nY_NUM_OF_ROWS - - #define nY_NUM_OF_ROWS 8 - #define nUV_NUM_OF_ROWS 8 - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm deleted file mode 100644 index 53586e6..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm +++ /dev/null @@ -1,45 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL3_AVS_IEF_Unpack_8x4.asm ---------- - - // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2)<1> ubAVS_RESPONSE(%1,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_Y(%1*2+1)<1> ubAVS_RESPONSE(%1,8+1)<16;4,2> // Copy high byte in a word - } - - // Move 8x4 words of U to dest GRF (Copy high byte in a word) - mov (8) uwDEST_U(0)<1> ubAVS_RESPONSE(4,1)<16;4,2> - mov (8) uwDEST_U(0,8)<1> ubAVS_RESPONSE(4,9)<16;4,2> - mov (8) uwDEST_U(1)<1> ubAVS_RESPONSE(5,1)<16;4,2> - mov (8) uwDEST_U(1,8)<1> ubAVS_RESPONSE(5,9)<16;4,2> - - // Move 8x4 words of V to dest GRF - mov (8) uwDEST_V(0)<1> ubAVS_RESPONSE(8,1)<16;4,2> - mov (8) uwDEST_V(0,8)<1> ubAVS_RESPONSE(8,9)<16;4,2> - mov (8) uwDEST_V(1)<1> ubAVS_RESPONSE(9,1)<16;4,2> - mov (8) uwDEST_V(1,8)<1> ubAVS_RESPONSE(9,9)<16;4,2> - - // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each RGF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2,8)<1> ubAVS_RESPONSE(%1+12,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_Y(%1*2+1,8)<1> ubAVS_RESPONSE(%1+12,8+1)<16;4,2> // Copy high byte in a word - } - -//------------------------------------------------------------------------------ - // Re-define new # of lines - #undef nUV_NUM_OF_ROWS - #undef nY_NUM_OF_ROWS - - #define nY_NUM_OF_ROWS 8 - #define nUV_NUM_OF_ROWS 4 - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm deleted file mode 100644 index f16d04a..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm +++ /dev/null @@ -1,44 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL3_AVS_IEF_Unpack_8x8.asm ---------- - - // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2)<1> ubAVS_RESPONSE(%1,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_Y(%1*2+1)<1> ubAVS_RESPONSE(%1,8+1)<16;4,2> // Copy high byte in a word - } - // Move 8x8 words of U to dest GRF - $for(0; <8/2; 1) { - mov (8) uwDEST_U(%1)<1> ubAVS_RESPONSE(%1+4,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_U(%1,8)<1> ubAVS_RESPONSE(%1+4,8+1)<16;4,2> // Copy high byte in a word - } - - // Move 8x8 words of V to dest GRF - $for(0; <8/2; 1) { - mov (8) uwDEST_V(%1)<1> ubAVS_RESPONSE(%1+8,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_V(%1,8)<1> ubAVS_RESPONSE(%1+8,8+1)<16;4,2> // Copy high byte in a word - } - - // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each RGF. - $for(0; <8/2; 1) { - mov (8) uwDEST_Y(%1*2,8)<1> ubAVS_RESPONSE(%1+12,1)<16;4,2> // Copy high byte in a word - mov (8) uwDEST_Y(%1*2+1,8)<1> ubAVS_RESPONSE(%1+12,8+1)<16;4,2> // Copy high byte in a word - } - -//------------------------------------------------------------------------------ - // Re-define new # of lines - #undef nUV_NUM_OF_ROWS - #undef nY_NUM_OF_ROWS - - #define nY_NUM_OF_ROWS 8 - #define nUV_NUM_OF_ROWS 8 - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_Scaling.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_Scaling.asm deleted file mode 100644 index 3d5c689..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL3_Scaling.asm +++ /dev/null @@ -1,72 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- PL3_Scaling.asm ---------- -#include "Scaling.inc" - - // Build 16 elements ramp in float32 and normalized it -// mov (8) SAMPLER_RAMP(0)<1> 0x76543210:v -// add (8) SAMPLER_RAMP(1)<1> SAMPLER_RAMP(0) 8.0:f -mov (4) SAMPLER_RAMP(0)<1> 0x48403000:vf { NoDDClr }//3, 2, 1, 0 in float vector -mov (4) SAMPLER_RAMP(0,4)<1> 0x5C585450:vf { NoDDChk }//7, 6, 5, 4 in float vector -add (8) SAMPLER_RAMP(1)<1> SAMPLER_RAMP(0) 8.0:f - - -//Module: PrepareScaleCoord.asm - - // Setup for sampler msg hdr - mov (2) rMSGSRC.0<1>:ud 0:ud { NoDDClr } // Unused fields - mov (1) rMSGSRC.2<1>:ud 0:ud { NoDDChk } // Write and offset - - // Calculate 16 v based on the step Y and vertical origin - mov (16) mfMSGPAYLOAD(2)<1> fSRC_VID_V_ORI<0;1,0>:f - mov (16) SCALE_COORD_Y<1>:f fSRC_VID_V_ORI<0;1,0>:f - - // Calculate 16 u based on the step X and hori origin -// line (16) mfMSGPAYLOAD(0)<1> SCALE_STEP_X<0;1,0>:f SAMPLER_RAMP(0) // Assign to mrf directly - mov (16) acc0:f fSRC_VID_H_ORI<0;1,0>:f { Compr } - mac (16) mfMSGPAYLOAD(0)<1> fVIDEO_STEP_X<0;1,0>:f SAMPLER_RAMP(0) { Compr } - - //Setup the constants for line instruction - mov (1) SCALE_LINE_P255<1>:f 255.0:f { NoDDClr } //{ NoDDClr, NoDDChk } - mov (1) SCALE_LINE_P0_5<1>:f 0.5:f { NoDDChk } - -//------------------------------------------------------------------------------ - -$for (0; <nY_NUM_OF_ROWS; 1) { - // Read 16 sampled pixels and store them in float32 in 8 GRFs in the order of BGRA (VYUA). - mov (8) MSGHDR_SCALE<1>:ud rMSGSRC<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (16) SCALE_RESPONSE_VW(0)<1> MSGHDR_SCALE udDUMMY_NULL nSMPL_ENGINE SMPLR_MSG_DSC+nSI_SRC_SIMD16_V+nBI_CURRENT_SRC_V - send (16) SCALE_RESPONSE_YW(0)<1> MSGHDR_SCALE udDUMMY_NULL nSMPL_ENGINE SMPLR_MSG_DSC+nSI_SRC_SIMD16_Y+nBI_CURRENT_SRC_Y - send (16) SCALE_RESPONSE_UW(0)<1> MSGHDR_SCALE udDUMMY_NULL nSMPL_ENGINE SMPLR_MSG_DSC+nSI_SRC_SIMD16_U+nBI_CURRENT_SRC_U - - // Calculate 16 v for next line - add (16) mfMSGPAYLOAD(2)<1> SCALE_COORD_Y<8;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly - add (16) SCALE_COORD_Y<1>:f SCALE_COORD_Y<8;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly - - // Scale back to [0, 255], convert f to ud - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_VF(0) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_VD(0)<1> acc0:f { Compr } - - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(0) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_YD(0)<1> acc0:f { Compr } - - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_UF(0) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_UD(0)<1> acc0:f { Compr } - - mov (16) DEST_V(%1)<1> SCALE_RESPONSE_VB(0) //possible error due to truncation - vK - mov (16) DEST_Y(%1)<1> SCALE_RESPONSE_YB(0) //possible error due to truncation - vK - mov (16) DEST_U(%1)<1> SCALE_RESPONSE_UB(0) //possible error due to truncation - vK - -} - - #define nSRC_REGION nREGION_1 - -//------------------------------------------------------------------------------ diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG.asm deleted file mode 100644 index e6d8fb2..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG.asm +++ /dev/null @@ -1,85 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -#define DI_ENABLE - - #include "DNDI.inc" - - #ifdef DI_ONLY - #undef nSMPL_RESP_LEN - #define nSMPL_RESP_LEN nSMPL_RESP_LEN_DI // set the number of GRF - #else - #undef nSMPL_RESP_LEN - #define nSMPL_RESP_LEN nSMPL_RESP_LEN_DNDI // set the number of GRF - #endif - - #undef nDPW_BLOCK_SIZE_HIST - #define nDPW_BLOCK_SIZE_HIST nBLOCK_WIDTH_4+nBLOCK_HEIGHT_1 // HIST Block Size for Write is 4x2 - #undef nDPW_BLOCK_SIZE_DN - #define nDPW_BLOCK_SIZE_DN nBLOCK_WIDTH_16+nBLOCK_HEIGHT_4 // DN Block Size for Write is 16x4 - -////////////////////////////////////// Run the DN Algorithm /////////////////////////////////////// - #include "DNDI_Command.asm" - -////////////////////////////////////// Rearrange for Internal Planar ////////////////////////////// - -////////////////////////////////////// Save the STMM Data for Next Run ///////////////////////// - // Write STMM to memory - shr (1) rMSGSRC.0<1>:ud wORIX<0;1,0>:w 1:w NODDCLR_NODDCHK // X origin / 2 - mov (1) rMSGSRC.1<1>:ud wORIY<0;1,0>:w NODDCLR_NODDCHK // Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_STMM:ud NODDCHK // block width and height (8x4) - mov (8) mudMSGHDR_STMM(0)<1> rMSGSRC.0<8;8,1>:ud // message header - mov (8) mudMSGHDR_STMM(1)<1> udRESP(nDI_STMM_OFFSET,0) // Move STMM to MRF - send (8) dNULLREG mMSGHDR_STMM udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_STMM+nBI_STMM_HISTORY_OUTPUT:ud - -#ifdef DI_ONLY -#else - -////////////////////////////////////// Save the History Data for Next Run ///////////////////////// - #include "DI_Hist_Save.asm" - -////////////////////////////////////// Save the DN Curr Frame for Next Run //////////////////////// - - //set the save DN parameters - mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w NODDCLR // X origin and Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud NODDCLR_NODDCHK // block width and height (16x4) - mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud - - // check top/bottom field first - cmp.e.f0.0 (1) null<1>:w ubTFLD_FIRST<0;1,0>:ub 1:w - (f0.0) jmpi (1) TOP_FIELD_FIRST - -BOTTOM_FIELD_FIRST: - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 2nd field luma from current frame (line 0,2) - } - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,4)<4;4,1> // 1st field luma from current frame (line 1,3) - } - - jmpi (1) SAVE_DN_CURR - -TOP_FIELD_FIRST: - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0)<4;4,1> // 2nd field luma from current frame (line 0,2) - } - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 1st field luma from current frame (line 1,3) - } - -SAVE_DN_CURR: - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud -#endif - -// Save Processed frames -#include "DI_Save_PA.asm" - - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm deleted file mode 100644 index 96aed78..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm +++ /dev/null @@ -1,103 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -#define DI_ENABLE - - #include "DNDI.inc" - - #undef nY_NUM_OF_ROWS - #define nY_NUM_OF_ROWS 8 // Number of Y rows per block (4 rows for each frame) - #undef nUV_NUM_OF_ROWS - #define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block - - #undef nSMPL_RESP_LEN - #define nSMPL_RESP_LEN nSMPL_RESP_LEN_DNDI // set the number of GRF - #undef nDPW_BLOCK_SIZE_HIST - #define nDPW_BLOCK_SIZE_HIST nBLOCK_WIDTH_4+nBLOCK_HEIGHT_1 // HIST Block Size for Write is 4x2 - #undef nDPW_BLOCK_SIZE_DN - #define nDPW_BLOCK_SIZE_DN nBLOCK_WIDTH_16+nBLOCK_HEIGHT_4 // DN Block Size for Write is 16x4 - #undef nDPR_BLOCK_SIZE_UV - #define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_8+nBLOCK_HEIGHT_4 // DN Block Size for UV Write/Read is 8x4 - -////////////////////////////////////// Run the DN Algorithm /////////////////////////////////////// - #include "DNDI_Command.asm" - -////////////////////////////////////// Rearrange for Internal Planar ////////////////////////////// - // move the previous frame Y component to internal planar format - $for (0; <nY_NUM_OF_ROWS/2; 1) { - mov (16) uwDEST_Y(%1,0)<1> ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16) - } - // move the previous frame U,V components to internal planar format - $for (0; <nUV_NUM_OF_ROWS/2; 1) { - mov (8) uwDEST_U(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels - mov (8) uwDEST_V(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels - } - // move the current frame Y component to internal planar format - $for (0; <nY_NUM_OF_ROWS/2; 1) { - mov (16) uwDEST_Y(%1+4,0)<1> ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16) - } - // move the current frame U,V components to internal planar format - $for (0; <nUV_NUM_OF_ROWS/2; 1) { - mov (8) uwDEST_U(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels - mov (8) uwDEST_V(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels - } - -////////////////////////////////////// Save the STMM Data for Next Run ///////////////////////// - // Write STMM to memory - shr (1) rMSGSRC.0<1>:ud wORIX<0;1,0>:w 1:w // X origin / 2 - mov (1) rMSGSRC.1<1>:ud wORIY<0;1,0>:w // Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_STMM:ud // block width and height (8x4) - mov (8) mudMSGHDR_STMM(0)<1> rMSGSRC.0<8;8,1>:ud // message header - mov (8) mudMSGHDR_STMM(1)<1> udRESP(nDI_STMM_OFFSET,0) // Move STMM to MRF - send (8) dNULLREG mMSGHDR_STMM udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_STMM+nBI_STMM_HISTORY_OUTPUT:ud - -////////////////////////////////////// Save the History Data for Next Run ///////////////////////// - #include "DI_Hist_Save.asm" - -////////////////////////////////////// Save the DN Curr Frame for Next Run //////////////////////// - add (4) pCF_Y_OFFSET<1>:uw ubSRC_CF_OFFSET<4;4,1>:ub npDN_YUV:w - // check top/bottom field first - cmp.e.f0.0 (1) null<1>:w ubTFLD_FIRST<0;1,0>:ub 1:w - (f0.0) jmpi (1) TOP_FIELD_FIRST - -BOTTOM_FIELD_FIRST: - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 2nd field luma from current frame (line 0,2) - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,4)<4;4,1> // 1st field luma from current frame (line 1,3) - } - jmpi (1) SAVE_DN_CURR - -TOP_FIELD_FIRST: - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0)<4;4,1> // 2nd field luma from current frame (line 0,2) - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 1st field luma from current frame (line 1,3) - } -SAVE_DN_CURR: - mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4) - mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud - - -/////////////////////////////P208 UV Copy 422///////////////////////////////////////////////////// - //Read UV through DATAPORT - add (2) rMSGSRC.0<1>:d wORIX<2;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin - asr (1) rMSGSRC.0<1>:d rMSGSRC.0<0;1,0>:d 1:w // U/V block origin should be half of Y's - mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // U/V block width and height (16x2) - mov (8) mudMSGHDR_DN<1> rMSGSRC<8;8,1>:ud - send (8) udBOT_U_IO(0)<1> mMSGHDR_DN udDUMMY_NULL nDATAPORT_READ nDPMR_MSGDSC+nRESLEN_1+nBI_CURRENT_SRC_UV:ud - - //Write UV through DATAPORT - mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin - asr (1) rMSGSRC.0<1>:d rMSGSRC.0<0;1,0>:d 1:w // U/V block origin should be half of Y's - mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // block width and height (16x2) - mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud - mov (8) mudMSGHDR_DN(1)<1> udBOT_U_IO(0)<8;8,1> - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_UV:ud
\ No newline at end of file diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm deleted file mode 100644 index 69330ba..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm +++ /dev/null @@ -1,103 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -#define DI_ENABLE - - #include "DNDI.inc" - - #undef nY_NUM_OF_ROWS - #define nY_NUM_OF_ROWS 8 // Number of Y rows per block (4 rows for each frame) - #undef nUV_NUM_OF_ROWS - #define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block - - #undef nSMPL_RESP_LEN - #define nSMPL_RESP_LEN nSMPL_RESP_LEN_DNDI // set the number of GRF - #undef nDPW_BLOCK_SIZE_HIST - #define nDPW_BLOCK_SIZE_HIST nBLOCK_WIDTH_4+nBLOCK_HEIGHT_1 // HIST Block Size for Write is 4x2 - #undef nDPW_BLOCK_SIZE_DN - #define nDPW_BLOCK_SIZE_DN nBLOCK_WIDTH_16+nBLOCK_HEIGHT_4 // DN Block Size for Write is 16x4 - #undef nDPR_BLOCK_SIZE_UV - #define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_16+nBLOCK_HEIGHT_2 // DN Block Size for UV Write/Read is 16x2 - -////////////////////////////////////// Run the DN Algorithm /////////////////////////////////////// - #include "DNDI_COMMAND.asm" - -////////////////////////////////////// Rearrange for Internal Planar ////////////////////////////// - // move the previous frame Y component to internal planar format - $for (0; <nY_NUM_OF_ROWS/2; 1) { - mov (16) uwDEST_Y(%1,0)<1> ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16) - } - // move the previous frame U,V components to internal planar format - $for (0; <nUV_NUM_OF_ROWS/2; 1) { - mov (8) uwDEST_U(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels - mov (8) uwDEST_V(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels - } - // move the current frame Y component to internal planar format - $for (0; <nY_NUM_OF_ROWS/2; 1) { - mov (16) uwDEST_Y(%1+4,0)<1> ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16) - } - // move the current frame U,V components to internal planar format - $for (0; <nUV_NUM_OF_ROWS/2; 1) { - mov (8) uwDEST_U(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels - mov (8) uwDEST_V(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels - } - -////////////////////////////////////// Save the STMM Data for Next Run ///////////////////////// - // Write STMM to memory - shr (1) rMSGSRC.0<1>:ud wORIX<0;1,0>:w 1:w // X origin / 2 - mov (1) rMSGSRC.1<1>:ud wORIY<0;1,0>:w // Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_STMM:ud // block width and height (8x4) - mov (8) mudMSGHDR_STMM(0)<1> rMSGSRC.0<8;8,1>:ud // message header - mov (8) mudMSGHDR_STMM(1)<1> udRESP(nDI_STMM_OFFSET,0) // Move STMM to MRF - send (8) dNULLREG mMSGHDR_STMM udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_STMM+nBI_STMM_HISTORY_OUTPUT:ud - -////////////////////////////////////// Save the History Data for Next Run ///////////////////////// - #include "DI_Hist_Save.asm" - -////////////////////////////////////// Save the DN Curr Frame for Next Run //////////////////////// - add (4) pCF_Y_OFFSET<1>:uw ubSRC_CF_OFFSET<4;4,1>:ub npDN_YUV:w - // check top/bottom field first - cmp.e.f0.0 (1) null<1>:w ubTFLD_FIRST<0;1,0>:ub 1:w - (f0.0) jmpi (1) TOP_FIELD_FIRST - -BOTTOM_FIELD_FIRST: - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 2nd field luma from current frame (line 0,2) - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,4)<4;4,1> // 1st field luma from current frame (line 1,3) - } - jmpi (1) SAVE_DN_CURR - -TOP_FIELD_FIRST: - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0)<4;4,1> // 2nd field luma from current frame (line 0,2) - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 1st field luma from current frame (line 1,3) - } -SAVE_DN_CURR: - mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4) - mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud - - -/////////////////////////////NV12 UV Copy 422///////////////////////////////////////////////////// - //Read UV through DATAPORT - add (2) rMSGSRC.0<1>:d wORIX<2;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin - asr (1) rMSGSRC.1<1>:d rMSGSRC.1<0;1,0>:d 1:w // U/V block origin should be half of Y's - mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // U/V block width and height (16x2) - mov (8) mudMSGHDR_DN<1> rMSGSRC<8;8,1>:ud - send (8) udBOT_U_IO(0)<1> mMSGHDR_DN udDUMMY_NULL nDATAPORT_READ nDPMR_MSGDSC+nRESLEN_1+nBI_CURRENT_SRC_UV:ud - - //Write UV through DATAPORT - mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin - asr (1) rMSGSRC.1<1>:d rMSGSRC.1<0;1,0>:d 1:w // U/V block origin should be half of Y's - mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // block width and height (16x2) - mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud - mov (8) mudMSGHDR_DN(1)<1> udBOT_U_IO(0)<8;8,1> - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_UV:ud
\ No newline at end of file diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm deleted file mode 100644 index 7fba14c..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm +++ /dev/null @@ -1,101 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -#define DI_ENABLE - - #include "DNDI.inc" - - #undef nY_NUM_OF_ROWS - #define nY_NUM_OF_ROWS 8 // Number of Y rows per block (4 rows for each frame) - #undef nUV_NUM_OF_ROWS - #define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block - - #undef nSMPL_RESP_LEN - #define nSMPL_RESP_LEN nSMPL_RESP_LEN_DNDI // set the number of GRF - #undef nDPW_BLOCK_SIZE_HIST - #define nDPW_BLOCK_SIZE_HIST nBLOCK_WIDTH_4+nBLOCK_HEIGHT_1 // HIST Block Size for Write is 4x2 - #undef nDPW_BLOCK_SIZE_DN - #define nDPW_BLOCK_SIZE_DN nBLOCK_WIDTH_16+nBLOCK_HEIGHT_4 // DN Block Size for Write is 16x4 - -////////////////////////////////////// Run the DN Algorithm /////////////////////////////////////// - #include "DNDI_Command.asm" - -////////////////////////////////////// Rearrange for Internal Planar ////////////////////////////// - // move the previous frame Y component to internal planar format - $for (0; <nY_NUM_OF_ROWS/2; 1) { - mov (16) uwDEST_Y(%1,0)<1> ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16) - } - // move the previous frame U,V components to internal planar format - $for (0; <nUV_NUM_OF_ROWS/2; 1) { - mov (8) uwDEST_U(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels - mov (8) uwDEST_V(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels - } - // move the current frame Y component to internal planar format - $for (0; <nY_NUM_OF_ROWS/2; 1) { - mov (16) uwDEST_Y(%1+4,0)<1> ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16) - } - // move the current frame U,V components to internal planar format - $for (0; <nUV_NUM_OF_ROWS/2; 1) { - mov (8) uwDEST_U(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels - mov (8) uwDEST_V(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels - } - -////////////////////////////////////// Save the STMM Data for Next Run ///////////////////////// - // Write STMM to memory - shr (1) rMSGSRC.0<1>:ud wORIX<0;1,0>:w 1:w // X origin / 2 - mov (1) rMSGSRC.1<1>:ud wORIY<0;1,0>:w // Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_STMM:ud // block width and height (8x4) - mov (8) mudMSGHDR_STMM(0)<1> rMSGSRC.0<8;8,1>:ud // message header - mov (8) mudMSGHDR_STMM(1)<1> udRESP(nDI_STMM_OFFSET,0) // Move STMM to MRF - send (8) dNULLREG mMSGHDR_STMM udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_STMM+nBI_STMM_HISTORY_OUTPUT:ud - -////////////////////////////////////// Save the History Data for Next Run ///////////////////////// - #include "DI_Hist_Save.asm" - -////////////////////////////////////// Save the DN Curr Frame for Next Run //////////////////////// - add (4) pCF_Y_OFFSET<1>:uw ubSRC_CF_OFFSET<4;4,1>:ub npDN_YUV:w - // check top/bottom field first - cmp.e.f0.0 (1) null<1>:w ubTFLD_FIRST<0;1,0>:ub 1:w - (f0.0) jmpi (1) TOP_FIELD_FIRST - -BOTTOM_FIELD_FIRST: - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 2nd field luma from current frame (line 0,2) - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,4)<4;4,1> // 1st field luma from current frame (line 1,3) - } - jmpi (1) SAVE_DN_CURR - -TOP_FIELD_FIRST: - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0)<4;4,1> // 2nd field luma from current frame (line 0,2) - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 1st field luma from current frame (line 1,3) - } -SAVE_DN_CURR: - mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4) - mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud - - -/////////////////////////////P208 UV Copy 422///////////////////////////////////////////////////// - //Read UV through DATAPORT - add (2) rMSGSRC.0<1>:d wORIX<2;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // Y Block width and height (16x4) (U/V block size is the same) - mov (8) mudMSGHDR_DN<1> rMSGSRC<8;8,1>:ud - send (8) udBOT_U_IO(0)<1> mMSGHDR_DN udDUMMY_NULL nDATAPORT_READ nDPMR_MSGDSC+nRESLEN_2+nBI_CURRENT_SRC_UV:ud - - //Write UV through DATAPORT - mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4) - mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud - mov (8) mudMSGHDR_DN(1)<1> udBOT_U_IO(0)<8;8,1> - mov (8) mudMSGHDR_DN(2)<1> udBOT_U_IO(1)<8;8,1> - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_UV:ud -
\ No newline at end of file diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm deleted file mode 100644 index f7b891d..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm +++ /dev/null @@ -1,106 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -#define DI_ENABLE - - #include "DNDI.inc" - - #undef nY_NUM_OF_ROWS - #define nY_NUM_OF_ROWS 8 // Number of Y rows per block (4 rows for each frame) - #undef nUV_NUM_OF_ROWS - #define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block - - #undef nSMPL_RESP_LEN - #define nSMPL_RESP_LEN nSMPL_RESP_LEN_DNDI // set the number of GRF - #undef nDPW_BLOCK_SIZE_HIST - #define nDPW_BLOCK_SIZE_HIST nBLOCK_WIDTH_4+nBLOCK_HEIGHT_1 // HIST Block Size for Write is 4x2 - #undef nDPW_BLOCK_SIZE_DN - #define nDPW_BLOCK_SIZE_DN nBLOCK_WIDTH_16+nBLOCK_HEIGHT_4 // DN Block Size for Write is 16x4 - #undef nDPR_BLOCK_SIZE_UV - #define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_8+nBLOCK_HEIGHT_2 // DN Block Size for UV Write/Read is 8x2 - -////////////////////////////////////// Run the DN Algorithm /////////////////////////////////////// - #include "DNDI_Command.asm" - -////////////////////////////////////// Rearrange for Internal Planar ////////////////////////////// - // move the previous frame Y component to internal planar format - $for (0; <nY_NUM_OF_ROWS/2; 1) { - mov (16) uwDEST_Y(%1,0)<1> ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16) - } - // move the previous frame U,V components to internal planar format - $for (0; <nUV_NUM_OF_ROWS/2; 1) { - mov (8) uwDEST_U(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels - mov (8) uwDEST_V(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels - } - // move the current frame Y component to internal planar format - $for (0; <nY_NUM_OF_ROWS/2; 1) { - mov (16) uwDEST_Y(%1+4,0)<1> ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16) - } - // move the current frame U,V components to internal planar format - $for (0; <nUV_NUM_OF_ROWS/2; 1) { - mov (8) uwDEST_U(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels - mov (8) uwDEST_V(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels - } - -////////////////////////////////////// Save the STMM Data for Next Run ///////////////////////// - // Write STMM to memory - shr (1) rMSGSRC.0<1>:ud wORIX<0;1,0>:w 1:w // X origin / 2 - mov (1) rMSGSRC.1<1>:ud wORIY<0;1,0>:w // Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_STMM:ud // block width and height (8x4) - mov (8) mudMSGHDR_STMM(0)<1> rMSGSRC.0<8;8,1>:ud // message header - mov (8) mudMSGHDR_STMM(1)<1> udRESP(nDI_STMM_OFFSET,0) // Move STMM to MRF - send (8) dNULLREG mMSGHDR_STMM udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_STMM+nBI_STMM_HISTORY_OUTPUT:ud - -////////////////////////////////////// Save the History Data for Next Run ///////////////////////// - #include "DI_Hist_Save.asm" - -////////////////////////////////////// Save the DN Curr Frame for Next Run //////////////////////// - add (4) pCF_Y_OFFSET<1>:uw ubSRC_CF_OFFSET<4;4,1>:ub npDN_YUV:w - // check top/bottom field first - cmp.e.f0.0 (1) null<1>:w ubTFLD_FIRST<0;1,0>:ub 1:w - (f0.0) jmpi (1) TOP_FIELD_FIRST - -BOTTOM_FIELD_FIRST: - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 2nd field luma from current frame (line 0,2) - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,4)<4;4,1> // 1st field luma from current frame (line 1,3) - } - jmpi (1) SAVE_DN_CURR - -TOP_FIELD_FIRST: - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0)<4;4,1> // 2nd field luma from current frame (line 0,2) - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 1st field luma from current frame (line 1,3) - } -SAVE_DN_CURR: - mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin - mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4) - mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud - - -/////////////////////////////IMC3 UV Copy 422///////////////////////////////////////////////////// - //Read UV through DATAPORT - add (2) rMSGSRC.0<1>:d wORIX<2;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin - asr (2) rMSGSRC.0<1>:d rMSGSRC.0<2;2,1>:d 1:w // U/V block origin should be half of Y's - mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // U/V block width and height (8x2) - mov (8) mudMSGHDR_DN<1> rMSGSRC<8;8,1>:ud - send (4) udBOT_U_IO(0)<1> mMSGHDR_DN udDUMMY_NULL nDATAPORT_READ nDPMR_MSGDSC+nRESLEN_1+nBI_CURRENT_SRC_U:ud - send (4) udBOT_V_IO(0)<1> mMSGHDR_DN udDUMMY_NULL nDATAPORT_READ nDPMR_MSGDSC+nRESLEN_1+nBI_CURRENT_SRC_V:ud - - //Write UV through DATAPORT - mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin - asr (2) rMSGSRC.0<1>:d wORIX<2;2,1>:w 1:w // U/V block origin should be half of Y's - mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // block width and height (8x2) - mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud - mov (4) mudMSGHDR_DN(1)<1> udBOT_U_IO(0)<4;4,1> - send (4) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_U:ud - mov (4) mudMSGHDR_DN(1)<1> udBOT_V_IO(0)<4;4,1> - send (4) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_V:ud
\ No newline at end of file diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DN_ALG.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DN_ALG.asm deleted file mode 100644 index 0b9aa4c..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/PL_DN_ALG.asm +++ /dev/null @@ -1,35 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -#define DI_DISABLE - -#include "DNDI.inc" - -#undef nY_NUM_OF_ROWS -#define nY_NUM_OF_ROWS 8 // Number of Y rows per block - -#undef nSMPL_RESP_LEN -#define nSMPL_RESP_LEN nSMPL_RESP_LEN_DN_PL // Set the Number of GRFs in DNDI response -#undef nDPW_BLOCK_SIZE_DN -#define nDPW_BLOCK_SIZE_DN nBLOCK_WIDTH_16+nBLOCK_HEIGHT_8 // DN Curr Block Size for Write is 16x8 -#undef nDPW_BLOCK_SIZE_HIST -#define nDPW_BLOCK_SIZE_HIST nBLOCK_WIDTH_4+nBLOCK_HEIGHT_2 // HIST Block Size for Write is 4x2 - -////////////////////////////////////// Run the DN Algorithm /////////////////////////////////////// -#include "DNDI_COMMAND.asm" - -////////////////////////////////////// Rearrange for Internal Planar ////////////////////////////// -$for (0; <nY_NUM_OF_ROWS; 1) { - mov (16) uwDEST_Y(0,%1*16)<1> ubRESP(nNODI_LUMA_OFFSET,%1*16)<16;16,1> // copy line of Y -} - -////////////////////////////////////// Save the History Data for Next Run ///////////////////////// -#include "DNDI_Hist_Save.asm" - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_16x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_16x8.asm deleted file mode 100644 index efc7cd6..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_16x8.asm +++ /dev/null @@ -1,33 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- RGB_AVS_IEF_16x8.asm ---------- - -#include "AVS_IEF.inc" - -//------------------------------------------------------------------------------ -// 2 sampler reads for 8x8 ARGB packed -//------------------------------------------------------------------------------ - - // 1st 8x8 setup - #include "AVS_SetupFirstBlock.asm" - - mov (1) rAVS_8x8_HDR.2:ud nAVS_ALL_CHANNELS:ud // Enable ARGB channels - mov (16) mAVS_8x8_HDR.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE(0)<1> mAVS_8x8_HDR udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_4CH+nSI_SRC_RGB+nBI_CURRENT_SRC_YUV - // Return ARGB in 16 GRFs - - // 2nd 8x8 setup - #include "AVS_SetupSecondBlock.asm" - mov (16) mAVS_8x8_HDR_2.0:ud rAVS_8x8_HDR.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (1) uwAVS_RESPONSE_2(0)<1> mAVS_8x8_HDR_2 udDUMMY_NULL nSMPL_ENGINE nAVS_MSG_DSC_4CH+nSI_SRC_RGB+nBI_CURRENT_SRC_YUV - // Return ARGB in 16 GRFs - - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm deleted file mode 100644 index 6e2de97..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm +++ /dev/null @@ -1,251 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- RGB_AVS_IEF_Unpack_16x8.asm ---------- -#include "AVS_IEF.inc" - -#ifdef AVS_OUTPUT_16_BIT -// Move first 8x8 words of B to dest GRF (as packed) - mov (4) uwDEST_Y(0,2)<4> uwAVS_RESPONSE(4,0)<4;4,1> - mov (4) uwDEST_Y(1,2)<4> uwAVS_RESPONSE(4,8)<4;4,1> - mov (4) uwDEST_Y(4,2)<4> uwAVS_RESPONSE(4,4)<4;4,1> - mov (4) uwDEST_Y(5,2)<4> uwAVS_RESPONSE(4,12)<4;4,1> - mov (4) uwDEST_Y(8,2)<4> uwAVS_RESPONSE(5,0)<4;4,1> - mov (4) uwDEST_Y(9,2)<4> uwAVS_RESPONSE(5,8)<4;4,1> - mov (4) uwDEST_Y(12,2)<4> uwAVS_RESPONSE(5,4)<4;4,1> - mov (4) uwDEST_Y(13,2)<4> uwAVS_RESPONSE(5,12)<4;4,1> - mov (4) uwDEST_Y(16,2)<4> uwAVS_RESPONSE(12,0)<4;4,1> - mov (4) uwDEST_Y(17,2)<4> uwAVS_RESPONSE(12,8)<4;4,1> - mov (4) uwDEST_Y(20,2)<4> uwAVS_RESPONSE(12,4)<4;4,1> - mov (4) uwDEST_Y(21,2)<4> uwAVS_RESPONSE(12,12)<4;4,1> - mov (4) uwDEST_Y(24,2)<4> uwAVS_RESPONSE(13,0)<4;4,1> - mov (4) uwDEST_Y(25,2)<4> uwAVS_RESPONSE(13,8)<4;4,1> - mov (4) uwDEST_Y(28,2)<4> uwAVS_RESPONSE(13,4)<4;4,1> - mov (4) uwDEST_Y(29,2)<4> uwAVS_RESPONSE(13,12)<4;4,1> - -// Move first 8x8 words of G to dest GRF (as packed) - mov (4) uwDEST_Y(0,1)<4> uwAVS_RESPONSE(2,0)<4;4,1> - mov (4) uwDEST_Y(1,1)<4> uwAVS_RESPONSE(2,8)<4;4,1> - mov (4) uwDEST_Y(4,1)<4> uwAVS_RESPONSE(2,4)<4;4,1> - mov (4) uwDEST_Y(5,1)<4> uwAVS_RESPONSE(2,12)<4;4,1> - mov (4) uwDEST_Y(8,1)<4> uwAVS_RESPONSE(3,0)<4;4,1> - mov (4) uwDEST_Y(9,1)<4> uwAVS_RESPONSE(3,8)<4;4,1> - mov (4) uwDEST_Y(12,1)<4> uwAVS_RESPONSE(3,4)<4;4,1> - mov (4) uwDEST_Y(13,1)<4> uwAVS_RESPONSE(3,12)<4;4,1> - mov (4) uwDEST_Y(16,1)<4> uwAVS_RESPONSE(10,0)<4;4,1> - mov (4) uwDEST_Y(17,1)<4> uwAVS_RESPONSE(10,8)<4;4,1> - mov (4) uwDEST_Y(20,1)<4> uwAVS_RESPONSE(10,4)<4;4,1> - mov (4) uwDEST_Y(21,1)<4> uwAVS_RESPONSE(10,12)<4;4,1> - mov (4) uwDEST_Y(24,1)<4> uwAVS_RESPONSE(11,0)<4;4,1> - mov (4) uwDEST_Y(25,1)<4> uwAVS_RESPONSE(11,8)<4;4,1> - mov (4) uwDEST_Y(28,1)<4> uwAVS_RESPONSE(11,4)<4;4,1> - mov (4) uwDEST_Y(29,1)<4> uwAVS_RESPONSE(11,12)<4;4,1> - -// Move first 8x8 words of R to dest GRF (as packed) - mov (4) uwDEST_Y(0,0)<4> uwAVS_RESPONSE(0,0)<4;4,1> - mov (4) uwDEST_Y(1,0)<4> uwAVS_RESPONSE(0,8)<4;4,1> - mov (4) uwDEST_Y(4,0)<4> uwAVS_RESPONSE(0,4)<4;4,1> - mov (4) uwDEST_Y(5,0)<4> uwAVS_RESPONSE(0,12)<4;4,1> - mov (4) uwDEST_Y(8,0)<4> uwAVS_RESPONSE(1,0)<4;4,1> - mov (4) uwDEST_Y(9,0)<4> uwAVS_RESPONSE(1,8)<4;4,1> - mov (4) uwDEST_Y(12,0)<4> uwAVS_RESPONSE(1,4)<4;4,1> - mov (4) uwDEST_Y(13,0)<4> uwAVS_RESPONSE(1,12)<4;4,1> - mov (4) uwDEST_Y(16,0)<4> uwAVS_RESPONSE(8,0)<4;4,1> - mov (4) uwDEST_Y(17,0)<4> uwAVS_RESPONSE(8,8)<4;4,1> - mov (4) uwDEST_Y(20,0)<4> uwAVS_RESPONSE(8,4)<4;4,1> - mov (4) uwDEST_Y(21,0)<4> uwAVS_RESPONSE(8,12)<4;4,1> - mov (4) uwDEST_Y(24,0)<4> uwAVS_RESPONSE(9,0)<4;4,1> - mov (4) uwDEST_Y(25,0)<4> uwAVS_RESPONSE(9,8)<4;4,1> - mov (4) uwDEST_Y(28,0)<4> uwAVS_RESPONSE(9,4)<4;4,1> - mov (4) uwDEST_Y(29,0)<4> uwAVS_RESPONSE(9,12)<4;4,1> - -// Move first 8x8 words of A to dest GRF (as packed) - mov (4) uwDEST_Y(0,3)<4> uwAVS_RESPONSE(6,0)<4;4,1> - mov (4) uwDEST_Y(1,3)<4> uwAVS_RESPONSE(6,8)<4;4,1> - mov (4) uwDEST_Y(4,3)<4> uwAVS_RESPONSE(6,4)<4;4,1> - mov (4) uwDEST_Y(5,3)<4> uwAVS_RESPONSE(6,12)<4;4,1> - mov (4) uwDEST_Y(8,3)<4> uwAVS_RESPONSE(7,0)<4;4,1> - mov (4) uwDEST_Y(9,3)<4> uwAVS_RESPONSE(7,8)<4;4,1> - mov (4) uwDEST_Y(12,3)<4> uwAVS_RESPONSE(7,4)<4;4,1> - mov (4) uwDEST_Y(13,3)<4> uwAVS_RESPONSE(7,12)<4;4,1> - mov (4) uwDEST_Y(16,3)<4> uwAVS_RESPONSE(14,0)<4;4,1> - mov (4) uwDEST_Y(17,3)<4> uwAVS_RESPONSE(14,8)<4;4,1> - mov (4) uwDEST_Y(20,3)<4> uwAVS_RESPONSE(14,4)<4;4,1> - mov (4) uwDEST_Y(21,3)<4> uwAVS_RESPONSE(14,12)<4;4,1> - mov (4) uwDEST_Y(24,3)<4> uwAVS_RESPONSE(15,0)<4;4,1> - mov (4) uwDEST_Y(25,3)<4> uwAVS_RESPONSE(15,8)<4;4,1> - mov (4) uwDEST_Y(28,3)<4> uwAVS_RESPONSE(15,4)<4;4,1> - mov (4) uwDEST_Y(29,3)<4> uwAVS_RESPONSE(15,12)<4;4,1> - -// Move second 8x8 words of B to dest GRF - mov (4) uwDEST_Y(2,2)<4> uwAVS_RESPONSE_2(4,0)<4;4,1> - mov (4) uwDEST_Y(3,2)<4> uwAVS_RESPONSE_2(4,8)<4;4,1> - mov (4) uwDEST_Y(6,2)<4> uwAVS_RESPONSE_2(4,4)<4;4,1> - mov (4) uwDEST_Y(7,2)<4> uwAVS_RESPONSE_2(4,12)<4;4,1> - mov (4) uwDEST_Y(10,2)<4> uwAVS_RESPONSE_2(5,0)<4;4,1> - mov (4) uwDEST_Y(11,2)<4> uwAVS_RESPONSE_2(5,8)<4;4,1> - mov (4) uwDEST_Y(14,2)<4> uwAVS_RESPONSE_2(5,4)<4;4,1> - mov (4) uwDEST_Y(15,2)<4> uwAVS_RESPONSE_2(5,12)<4;4,1> - mov (4) uwDEST_Y(18,2)<4> uwAVS_RESPONSE_2(12,0)<4;4,1> - mov (4) uwDEST_Y(19,2)<4> uwAVS_RESPONSE_2(12,8)<4;4,1> - mov (4) uwDEST_Y(22,2)<4> uwAVS_RESPONSE_2(12,4)<4;4,1> - mov (4) uwDEST_Y(23,2)<4> uwAVS_RESPONSE_2(12,12)<4;4,1> - mov (4) uwDEST_Y(26,2)<4> uwAVS_RESPONSE_2(13,0)<4;4,1> - mov (4) uwDEST_Y(27,2)<4> uwAVS_RESPONSE_2(13,8)<4;4,1> - mov (4) uwDEST_Y(30,2)<4> uwAVS_RESPONSE_2(13,4)<4;4,1> - mov (4) uwDEST_Y(31,2)<4> uwAVS_RESPONSE_2(13,12)<4;4,1> - -// Move second 8x8 words of G to dest GRF - mov (4) uwDEST_Y(2,1)<4> uwAVS_RESPONSE_2(2,0)<4;4,1> - mov (4) uwDEST_Y(3,1)<4> uwAVS_RESPONSE_2(2,8)<4;4,1> - mov (4) uwDEST_Y(6,1)<4> uwAVS_RESPONSE_2(2,4)<4;4,1> - mov (4) uwDEST_Y(7,1)<4> uwAVS_RESPONSE_2(2,12)<4;4,1> - mov (4) uwDEST_Y(10,1)<4> uwAVS_RESPONSE_2(3,0)<4;4,1> - mov (4) uwDEST_Y(11,1)<4> uwAVS_RESPONSE_2(3,8)<4;4,1> - mov (4) uwDEST_Y(14,1)<4> uwAVS_RESPONSE_2(3,4)<4;4,1> - mov (4) uwDEST_Y(15,1)<4> uwAVS_RESPONSE_2(3,12)<4;4,1> - mov (4) uwDEST_Y(18,1)<4> uwAVS_RESPONSE_2(10,0)<4;4,1> - mov (4) uwDEST_Y(19,1)<4> uwAVS_RESPONSE_2(10,8)<4;4,1> - mov (4) uwDEST_Y(22,1)<4> uwAVS_RESPONSE_2(10,4)<4;4,1> - mov (4) uwDEST_Y(23,1)<4> uwAVS_RESPONSE_2(10,12)<4;4,1> - mov (4) uwDEST_Y(26,1)<4> uwAVS_RESPONSE_2(11,0)<4;4,1> - mov (4) uwDEST_Y(27,1)<4> uwAVS_RESPONSE_2(11,8)<4;4,1> - mov (4) uwDEST_Y(30,1)<4> uwAVS_RESPONSE_2(11,4)<4;4,1> - mov (4) uwDEST_Y(31,1)<4> uwAVS_RESPONSE_2(11,12)<4;4,1> - -// Move second 8x8 words of R to dest GRF - mov (4) uwDEST_Y(2,0)<4> uwAVS_RESPONSE_2(0,0)<4;4,1> - mov (4) uwDEST_Y(3,0)<4> uwAVS_RESPONSE_2(0,8)<4;4,1> - mov (4) uwDEST_Y(6,0)<4> uwAVS_RESPONSE_2(0,4)<4;4,1> - mov (4) uwDEST_Y(7,0)<4> uwAVS_RESPONSE_2(0,12)<4;4,1> - mov (4) uwDEST_Y(10,0)<4> uwAVS_RESPONSE_2(1,0)<4;4,1> - mov (4) uwDEST_Y(11,0)<4> uwAVS_RESPONSE_2(1,8)<4;4,1> - mov (4) uwDEST_Y(14,0)<4> uwAVS_RESPONSE_2(1,4)<4;4,1> - mov (4) uwDEST_Y(15,0)<4> uwAVS_RESPONSE_2(1,12)<4;4,1> - mov (4) uwDEST_Y(18,0)<4> uwAVS_RESPONSE_2(8,0)<4;4,1> - mov (4) uwDEST_Y(19,0)<4> uwAVS_RESPONSE_2(8,8)<4;4,1> - mov (4) uwDEST_Y(22,0)<4> uwAVS_RESPONSE_2(8,4)<4;4,1> - mov (4) uwDEST_Y(23,0)<4> uwAVS_RESPONSE_2(8,12)<4;4,1> - mov (4) uwDEST_Y(26,0)<4> uwAVS_RESPONSE_2(9,0)<4;4,1> - mov (4) uwDEST_Y(27,0)<4> uwAVS_RESPONSE_2(9,8)<4;4,1> - mov (4) uwDEST_Y(30,0)<4> uwAVS_RESPONSE_2(9,4)<4;4,1> - mov (4) uwDEST_Y(31,0)<4> uwAVS_RESPONSE_2(9,12)<4;4,1> - -// Move second 8x8 words of A to dest GRF - mov (4) uwDEST_Y(2,3)<4> uwAVS_RESPONSE_2(6,0)<4;4,1> - mov (4) uwDEST_Y(3,3)<4> uwAVS_RESPONSE_2(6,8)<4;4,1> - mov (4) uwDEST_Y(6,3)<4> uwAVS_RESPONSE_2(6,4)<4;4,1> - mov (4) uwDEST_Y(7,3)<4> uwAVS_RESPONSE_2(6,12)<4;4,1> - mov (4) uwDEST_Y(10,3)<4> uwAVS_RESPONSE_2(7,0)<4;4,1> - mov (4) uwDEST_Y(11,3)<4> uwAVS_RESPONSE_2(7,8)<4;4,1> - mov (4) uwDEST_Y(14,3)<4> uwAVS_RESPONSE_2(7,4)<4;4,1> - mov (4) uwDEST_Y(15,3)<4> uwAVS_RESPONSE_2(7,12)<4;4,1> - mov (4) uwDEST_Y(18,3)<4> uwAVS_RESPONSE_2(14,0)<4;4,1> - mov (4) uwDEST_Y(19,3)<4> uwAVS_RESPONSE_2(14,8)<4;4,1> - mov (4) uwDEST_Y(22,3)<4> uwAVS_RESPONSE_2(14,4)<4;4,1> - mov (4) uwDEST_Y(23,3)<4> uwAVS_RESPONSE_2(14,12)<4;4,1> - mov (4) uwDEST_Y(26,3)<4> uwAVS_RESPONSE_2(15,0)<4;4,1> - mov (4) uwDEST_Y(27,3)<4> uwAVS_RESPONSE_2(15,8)<4;4,1> - mov (4) uwDEST_Y(30,3)<4> uwAVS_RESPONSE_2(15,4)<4;4,1> - mov (4) uwDEST_Y(31,3)<4> uwAVS_RESPONSE_2(15,12)<4;4,1> - -#else /* OUTPUT_8_BIT */ -// Move first 8x8 words of B to dest GRF - mov (8) ubDEST_Y(0,2)<4> ubAVS_RESPONSE(4,1)<16;4,2> - mov (8) ubDEST_Y(2,2)<4> ubAVS_RESPONSE(4,8+1)<16;4,2> - mov (8) ubDEST_Y(4,2)<4> ubAVS_RESPONSE(5,1)<16;4,2> - mov (8) ubDEST_Y(6,2)<4> ubAVS_RESPONSE(5,8+1)<16;4,2> - mov (8) ubDEST_Y(8,2)<4> ubAVS_RESPONSE(12,1)<16;4,2> - mov (8) ubDEST_Y(10,2)<4> ubAVS_RESPONSE(12,8+1)<16;4,2> - mov (8) ubDEST_Y(12,2)<4> ubAVS_RESPONSE(13,1)<16;4,2> - mov (8) ubDEST_Y(14,2)<4> ubAVS_RESPONSE(13,8+1)<16;4,2> - -// Move first 8x8 words of G to dest GRF - mov (8) ubDEST_Y(0,1)<4> ubAVS_RESPONSE(2,1)<16;4,2> - mov (8) ubDEST_Y(2,1)<4> ubAVS_RESPONSE(2,8+1)<16;4,2> - mov (8) ubDEST_Y(4,1)<4> ubAVS_RESPONSE(3,1)<16;4,2> - mov (8) ubDEST_Y(6,1)<4> ubAVS_RESPONSE(3,8+1)<16;4,2> - mov (8) ubDEST_Y(8,1)<4> ubAVS_RESPONSE(10,1)<16;4,2> - mov (8) ubDEST_Y(10,1)<4> ubAVS_RESPONSE(10,8+1)<16;4,2> - mov (8) ubDEST_Y(12,1)<4> ubAVS_RESPONSE(11,1)<16;4,2> - mov (8) ubDEST_Y(14,1)<4> ubAVS_RESPONSE(11,8+1)<16;4,2> - -// Move first 8x8 words of R to dest GRF - mov (8) ubDEST_Y(0,0)<4> ubAVS_RESPONSE(0,1)<16;4,2> - mov (8) ubDEST_Y(2,0)<4> ubAVS_RESPONSE(0,8+1)<16;4,2> - mov (8) ubDEST_Y(4,0)<4> ubAVS_RESPONSE(1,1)<16;4,2> - mov (8) ubDEST_Y(6,0)<4> ubAVS_RESPONSE(1,8+1)<16;4,2> - mov (8) ubDEST_Y(8,0)<4> ubAVS_RESPONSE(8,1)<16;4,2> - mov (8) ubDEST_Y(10,0)<4> ubAVS_RESPONSE(8,8+1)<16;4,2> - mov (8) ubDEST_Y(12,0)<4> ubAVS_RESPONSE(9,1)<16;4,2> - mov (8) ubDEST_Y(14,0)<4> ubAVS_RESPONSE(9,8+1)<16;4,2> - -// Move first 8x8 words of A to dest GRF - mov (8) ubDEST_Y(0,3)<4> ubAVS_RESPONSE(6,1)<16;4,2> - mov (8) ubDEST_Y(2,3)<4> ubAVS_RESPONSE(6,8+1)<16;4,2> - mov (8) ubDEST_Y(4,3)<4> ubAVS_RESPONSE(7,1)<16;4,2> - mov (8) ubDEST_Y(6,3)<4> ubAVS_RESPONSE(7,8+1)<16;4,2> - mov (8) ubDEST_Y(8,3)<4> ubAVS_RESPONSE(14,1)<16;4,2> - mov (8) ubDEST_Y(10,3)<4> ubAVS_RESPONSE(14,8+1)<16;4,2> - mov (8) ubDEST_Y(12,3)<4> ubAVS_RESPONSE(15,1)<16;4,2> - mov (8) ubDEST_Y(14,3)<4> ubAVS_RESPONSE(15,8+1)<16;4,2> - -// Move second 8x8 words of B to dest GRF - mov (8) ubDEST_Y(1,2)<4> ubAVS_RESPONSE_2(4,1)<16;4,2> - mov (8) ubDEST_Y(3,2)<4> ubAVS_RESPONSE_2(4,8+1)<16;4,2> - mov (8) ubDEST_Y(5,2)<4> ubAVS_RESPONSE_2(5,1)<16;4,2> - mov (8) ubDEST_Y(7,2)<4> ubAVS_RESPONSE_2(5,8+1)<16;4,2> - mov (8) ubDEST_Y(9,2)<4> ubAVS_RESPONSE_2(12,1)<16;4,2> - mov (8) ubDEST_Y(11,2)<4> ubAVS_RESPONSE_2(12,8+1)<16;4,2> - mov (8) ubDEST_Y(13,2)<4> ubAVS_RESPONSE_2(13,1)<16;4,2> - mov (8) ubDEST_Y(15,2)<4> ubAVS_RESPONSE_2(13,8+1)<16;4,2> - -// Move second 8x8 words of G to dest GRF - mov (8) ubDEST_Y(1,1)<4> ubAVS_RESPONSE_2(2,1)<16;4,2> - mov (8) ubDEST_Y(3,1)<4> ubAVS_RESPONSE_2(2,8+1)<16;4,2> - mov (8) ubDEST_Y(5,1)<4> ubAVS_RESPONSE_2(3,1)<16;4,2> - mov (8) ubDEST_Y(7,1)<4> ubAVS_RESPONSE_2(3,8+1)<16;4,2> - mov (8) ubDEST_Y(9,1)<4> ubAVS_RESPONSE_2(10,1)<16;4,2> - mov (8) ubDEST_Y(11,1)<4> ubAVS_RESPONSE_2(10,8+1)<16;4,2> - mov (8) ubDEST_Y(13,1)<4> ubAVS_RESPONSE_2(11,1)<16;4,2> - mov (8) ubDEST_Y(15,1)<4> ubAVS_RESPONSE_2(11,8+1)<16;4,2> - -// Move second 8x8 words of R to dest GRF - mov (8) ubDEST_Y(1,0)<4> ubAVS_RESPONSE_2(0,1)<16;4,2> - mov (8) ubDEST_Y(3,0)<4> ubAVS_RESPONSE_2(0,8+1)<16;4,2> - mov (8) ubDEST_Y(5,0)<4> ubAVS_RESPONSE_2(1,1)<16;4,2> - mov (8) ubDEST_Y(7,0)<4> ubAVS_RESPONSE_2(1,8+1)<16;4,2> - mov (8) ubDEST_Y(9,0)<4> ubAVS_RESPONSE_2(8,1)<16;4,2> - mov (8) ubDEST_Y(11,0)<4> ubAVS_RESPONSE_2(8,8+1)<16;4,2> - mov (8) ubDEST_Y(13,0)<4> ubAVS_RESPONSE_2(9,1)<16;4,2> - mov (8) ubDEST_Y(15,0)<4> ubAVS_RESPONSE_2(9,8+1)<16;4,2> - -// Move second 8x8 words of A to dest GRF - mov (8) ubDEST_Y(1,3)<4> ubAVS_RESPONSE_2(6,1)<16;4,2> - mov (8) ubDEST_Y(3,3)<4> ubAVS_RESPONSE_2(6,8+1)<16;4,2> - mov (8) ubDEST_Y(5,3)<4> ubAVS_RESPONSE_2(7,1)<16;4,2> - mov (8) ubDEST_Y(7,3)<4> ubAVS_RESPONSE_2(7,8+1)<16;4,2> - mov (8) ubDEST_Y(9,3)<4> ubAVS_RESPONSE_2(14,1)<16;4,2> - mov (8) ubDEST_Y(11,3)<4> ubAVS_RESPONSE_2(14,8+1)<16;4,2> - mov (8) ubDEST_Y(13,3)<4> ubAVS_RESPONSE_2(15,1)<16;4,2> - mov (8) ubDEST_Y(15,3)<4> ubAVS_RESPONSE_2(15,8+1)<16;4,2> -#endif -//------------------------------------------------------------------------------ - - // Set to write bottom region to memory - #define SRC_REGION REGION_2 - - // Re-define new # of lines - #undef nUV_NUM_OF_ROWS - #undef nY_NUM_OF_ROWS - - #define nY_NUM_OF_ROWS 8 - #define nUV_NUM_OF_ROWS 8 - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm deleted file mode 100644 index b81923f..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm +++ /dev/null @@ -1,260 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- RGB_AVS_IEF_Unpack_16x8.asm ---------- -#include "AVS_IEF.inc" - -.declare DEST_B Base=REG(r,10) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw -.declare DEST_G Base=REG(r,18) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw -.declare DEST_R Base=REG(r,26) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw -.declare DEST_A Base=REG(r,34) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw - - -#ifdef AVS_OUTPUT_16_BIT -//This portion will need to be changed if unpacking is required for Y416 kernels (in case of blending etc) - vK - -//// Move first 8x8 words of B to dest GRF (as packed) -// mov (4) uwDEST_Y(0,2)<4> uwAVS_RESPONSE(4,0)<4;4,1> -// mov (4) uwDEST_Y(1,2)<4> uwAVS_RESPONSE(4,8)<4;4,1> -// mov (4) uwDEST_Y(4,2)<4> uwAVS_RESPONSE(4,4)<4;4,1> -// mov (4) uwDEST_Y(5,2)<4> uwAVS_RESPONSE(4,12)<4;4,1> -// mov (4) uwDEST_Y(8,2)<4> uwAVS_RESPONSE(5,0)<4;4,1> -// mov (4) uwDEST_Y(9,2)<4> uwAVS_RESPONSE(5,8)<4;4,1> -// mov (4) uwDEST_Y(12,2)<4> uwAVS_RESPONSE(5,4)<4;4,1> -// mov (4) uwDEST_Y(13,2)<4> uwAVS_RESPONSE(5,12)<4;4,1> -// mov (4) uwDEST_Y(16,2)<4> uwAVS_RESPONSE(12,0)<4;4,1> -// mov (4) uwDEST_Y(17,2)<4> uwAVS_RESPONSE(12,8)<4;4,1> -// mov (4) uwDEST_Y(20,2)<4> uwAVS_RESPONSE(12,4)<4;4,1> -// mov (4) uwDEST_Y(21,2)<4> uwAVS_RESPONSE(12,12)<4;4,1> -// mov (4) uwDEST_Y(24,2)<4> uwAVS_RESPONSE(13,0)<4;4,1> -// mov (4) uwDEST_Y(25,2)<4> uwAVS_RESPONSE(13,8)<4;4,1> -// mov (4) uwDEST_Y(28,2)<4> uwAVS_RESPONSE(13,4)<4;4,1> -// mov (4) uwDEST_Y(29,2)<4> uwAVS_RESPONSE(13,12)<4;4,1> -// -//// Move first 8x8 words of G to dest GRF (as packed) -// mov (4) uwDEST_Y(0,1)<4> uwAVS_RESPONSE(2,0)<4;4,1> -// mov (4) uwDEST_Y(1,1)<4> uwAVS_RESPONSE(2,8)<4;4,1> -// mov (4) uwDEST_Y(4,1)<4> uwAVS_RESPONSE(2,4)<4;4,1> -// mov (4) uwDEST_Y(5,1)<4> uwAVS_RESPONSE(2,12)<4;4,1> -// mov (4) uwDEST_Y(8,1)<4> uwAVS_RESPONSE(3,0)<4;4,1> -// mov (4) uwDEST_Y(9,1)<4> uwAVS_RESPONSE(3,8)<4;4,1> -// mov (4) uwDEST_Y(12,1)<4> uwAVS_RESPONSE(3,4)<4;4,1> -// mov (4) uwDEST_Y(13,1)<4> uwAVS_RESPONSE(3,12)<4;4,1> -// mov (4) uwDEST_Y(16,1)<4> uwAVS_RESPONSE(10,0)<4;4,1> -// mov (4) uwDEST_Y(17,1)<4> uwAVS_RESPONSE(10,8)<4;4,1> -// mov (4) uwDEST_Y(20,1)<4> uwAVS_RESPONSE(10,4)<4;4,1> -// mov (4) uwDEST_Y(21,1)<4> uwAVS_RESPONSE(10,12)<4;4,1> -// mov (4) uwDEST_Y(24,1)<4> uwAVS_RESPONSE(11,0)<4;4,1> -// mov (4) uwDEST_Y(25,1)<4> uwAVS_RESPONSE(11,8)<4;4,1> -// mov (4) uwDEST_Y(28,1)<4> uwAVS_RESPONSE(11,4)<4;4,1> -// mov (4) uwDEST_Y(29,1)<4> uwAVS_RESPONSE(11,12)<4;4,1> -// -//// Move first 8x8 words of R to dest GRF (as packed) -// mov (4) uwDEST_Y(0,0)<4> uwAVS_RESPONSE(0,0)<4;4,1> -// mov (4) uwDEST_Y(1,0)<4> uwAVS_RESPONSE(0,8)<4;4,1> -// mov (4) uwDEST_Y(4,0)<4> uwAVS_RESPONSE(0,4)<4;4,1> -// mov (4) uwDEST_Y(5,0)<4> uwAVS_RESPONSE(0,12)<4;4,1> -// mov (4) uwDEST_Y(8,0)<4> uwAVS_RESPONSE(1,0)<4;4,1> -// mov (4) uwDEST_Y(9,0)<4> uwAVS_RESPONSE(1,8)<4;4,1> -// mov (4) uwDEST_Y(12,0)<4> uwAVS_RESPONSE(1,4)<4;4,1> -// mov (4) uwDEST_Y(13,0)<4> uwAVS_RESPONSE(1,12)<4;4,1> -// mov (4) uwDEST_Y(16,0)<4> uwAVS_RESPONSE(8,0)<4;4,1> -// mov (4) uwDEST_Y(17,0)<4> uwAVS_RESPONSE(8,8)<4;4,1> -// mov (4) uwDEST_Y(20,0)<4> uwAVS_RESPONSE(8,4)<4;4,1> -// mov (4) uwDEST_Y(21,0)<4> uwAVS_RESPONSE(8,12)<4;4,1> -// mov (4) uwDEST_Y(24,0)<4> uwAVS_RESPONSE(9,0)<4;4,1> -// mov (4) uwDEST_Y(25,0)<4> uwAVS_RESPONSE(9,8)<4;4,1> -// mov (4) uwDEST_Y(28,0)<4> uwAVS_RESPONSE(9,4)<4;4,1> -// mov (4) uwDEST_Y(29,0)<4> uwAVS_RESPONSE(9,12)<4;4,1> -// -//// Move first 8x8 words of A to dest GRF (as packed) -// mov (4) uwDEST_Y(0,3)<4> uwAVS_RESPONSE(6,0)<4;4,1> -// mov (4) uwDEST_Y(1,3)<4> uwAVS_RESPONSE(6,8)<4;4,1> -// mov (4) uwDEST_Y(4,3)<4> uwAVS_RESPONSE(6,4)<4;4,1> -// mov (4) uwDEST_Y(5,3)<4> uwAVS_RESPONSE(6,12)<4;4,1> -// mov (4) uwDEST_Y(8,3)<4> uwAVS_RESPONSE(7,0)<4;4,1> -// mov (4) uwDEST_Y(9,3)<4> uwAVS_RESPONSE(7,8)<4;4,1> -// mov (4) uwDEST_Y(12,3)<4> uwAVS_RESPONSE(7,4)<4;4,1> -// mov (4) uwDEST_Y(13,3)<4> uwAVS_RESPONSE(7,12)<4;4,1> -// mov (4) uwDEST_Y(16,3)<4> uwAVS_RESPONSE(14,0)<4;4,1> -// mov (4) uwDEST_Y(17,3)<4> uwAVS_RESPONSE(14,8)<4;4,1> -// mov (4) uwDEST_Y(20,3)<4> uwAVS_RESPONSE(14,4)<4;4,1> -// mov (4) uwDEST_Y(21,3)<4> uwAVS_RESPONSE(14,12)<4;4,1> -// mov (4) uwDEST_Y(24,3)<4> uwAVS_RESPONSE(15,0)<4;4,1> -// mov (4) uwDEST_Y(25,3)<4> uwAVS_RESPONSE(15,8)<4;4,1> -// mov (4) uwDEST_Y(28,3)<4> uwAVS_RESPONSE(15,4)<4;4,1> -// mov (4) uwDEST_Y(29,3)<4> uwAVS_RESPONSE(15,12)<4;4,1> -// -//// Move second 8x8 words of B to dest GRF -// mov (4) uwDEST_Y(2,2)<4> uwAVS_RESPONSE_2(4,0)<4;4,1> -// mov (4) uwDEST_Y(3,2)<4> uwAVS_RESPONSE_2(4,8)<4;4,1> -// mov (4) uwDEST_Y(6,2)<4> uwAVS_RESPONSE_2(4,4)<4;4,1> -// mov (4) uwDEST_Y(7,2)<4> uwAVS_RESPONSE_2(4,12)<4;4,1> -// mov (4) uwDEST_Y(10,2)<4> uwAVS_RESPONSE_2(5,0)<4;4,1> -// mov (4) uwDEST_Y(11,2)<4> uwAVS_RESPONSE_2(5,8)<4;4,1> -// mov (4) uwDEST_Y(14,2)<4> uwAVS_RESPONSE_2(5,4)<4;4,1> -// mov (4) uwDEST_Y(15,2)<4> uwAVS_RESPONSE_2(5,12)<4;4,1> -// mov (4) uwDEST_Y(18,2)<4> uwAVS_RESPONSE_2(12,0)<4;4,1> -// mov (4) uwDEST_Y(19,2)<4> uwAVS_RESPONSE_2(12,8)<4;4,1> -// mov (4) uwDEST_Y(22,2)<4> uwAVS_RESPONSE_2(12,4)<4;4,1> -// mov (4) uwDEST_Y(23,2)<4> uwAVS_RESPONSE_2(12,12)<4;4,1> -// mov (4) uwDEST_Y(26,2)<4> uwAVS_RESPONSE_2(13,0)<4;4,1> -// mov (4) uwDEST_Y(27,2)<4> uwAVS_RESPONSE_2(13,8)<4;4,1> -// mov (4) uwDEST_Y(30,2)<4> uwAVS_RESPONSE_2(13,4)<4;4,1> -// mov (4) uwDEST_Y(31,2)<4> uwAVS_RESPONSE_2(13,12)<4;4,1> -// -//// Move second 8x8 words of G to dest GRF -// mov (4) uwDEST_Y(2,1)<4> uwAVS_RESPONSE_2(2,0)<4;4,1> -// mov (4) uwDEST_Y(3,1)<4> uwAVS_RESPONSE_2(2,8)<4;4,1> -// mov (4) uwDEST_Y(6,1)<4> uwAVS_RESPONSE_2(2,4)<4;4,1> -// mov (4) uwDEST_Y(7,1)<4> uwAVS_RESPONSE_2(2,12)<4;4,1> -// mov (4) uwDEST_Y(10,1)<4> uwAVS_RESPONSE_2(3,0)<4;4,1> -// mov (4) uwDEST_Y(11,1)<4> uwAVS_RESPONSE_2(3,8)<4;4,1> -// mov (4) uwDEST_Y(14,1)<4> uwAVS_RESPONSE_2(3,4)<4;4,1> -// mov (4) uwDEST_Y(15,1)<4> uwAVS_RESPONSE_2(3,12)<4;4,1> -// mov (4) uwDEST_Y(18,1)<4> uwAVS_RESPONSE_2(10,0)<4;4,1> -// mov (4) uwDEST_Y(19,1)<4> uwAVS_RESPONSE_2(10,8)<4;4,1> -// mov (4) uwDEST_Y(22,1)<4> uwAVS_RESPONSE_2(10,4)<4;4,1> -// mov (4) uwDEST_Y(23,1)<4> uwAVS_RESPONSE_2(10,12)<4;4,1> -// mov (4) uwDEST_Y(26,1)<4> uwAVS_RESPONSE_2(11,0)<4;4,1> -// mov (4) uwDEST_Y(27,1)<4> uwAVS_RESPONSE_2(11,8)<4;4,1> -// mov (4) uwDEST_Y(30,1)<4> uwAVS_RESPONSE_2(11,4)<4;4,1> -// mov (4) uwDEST_Y(31,1)<4> uwAVS_RESPONSE_2(11,12)<4;4,1> -// -//// Move second 8x8 words of R to dest GRF -// mov (4) uwDEST_Y(2,0)<4> uwAVS_RESPONSE_2(0,0)<4;4,1> -// mov (4) uwDEST_Y(3,0)<4> uwAVS_RESPONSE_2(0,8)<4;4,1> -// mov (4) uwDEST_Y(6,0)<4> uwAVS_RESPONSE_2(0,4)<4;4,1> -// mov (4) uwDEST_Y(7,0)<4> uwAVS_RESPONSE_2(0,12)<4;4,1> -// mov (4) uwDEST_Y(10,0)<4> uwAVS_RESPONSE_2(1,0)<4;4,1> -// mov (4) uwDEST_Y(11,0)<4> uwAVS_RESPONSE_2(1,8)<4;4,1> -// mov (4) uwDEST_Y(14,0)<4> uwAVS_RESPONSE_2(1,4)<4;4,1> -// mov (4) uwDEST_Y(15,0)<4> uwAVS_RESPONSE_2(1,12)<4;4,1> -// mov (4) uwDEST_Y(18,0)<4> uwAVS_RESPONSE_2(8,0)<4;4,1> -// mov (4) uwDEST_Y(19,0)<4> uwAVS_RESPONSE_2(8,8)<4;4,1> -// mov (4) uwDEST_Y(22,0)<4> uwAVS_RESPONSE_2(8,4)<4;4,1> -// mov (4) uwDEST_Y(23,0)<4> uwAVS_RESPONSE_2(8,12)<4;4,1> -// mov (4) uwDEST_Y(26,0)<4> uwAVS_RESPONSE_2(9,0)<4;4,1> -// mov (4) uwDEST_Y(27,0)<4> uwAVS_RESPONSE_2(9,8)<4;4,1> -// mov (4) uwDEST_Y(30,0)<4> uwAVS_RESPONSE_2(9,4)<4;4,1> -// mov (4) uwDEST_Y(31,0)<4> uwAVS_RESPONSE_2(9,12)<4;4,1> -// -//// Move second 8x8 words of A to dest GRF -// mov (4) uwDEST_Y(2,3)<4> uwAVS_RESPONSE_2(6,0)<4;4,1> -// mov (4) uwDEST_Y(3,3)<4> uwAVS_RESPONSE_2(6,8)<4;4,1> -// mov (4) uwDEST_Y(6,3)<4> uwAVS_RESPONSE_2(6,4)<4;4,1> -// mov (4) uwDEST_Y(7,3)<4> uwAVS_RESPONSE_2(6,12)<4;4,1> -// mov (4) uwDEST_Y(10,3)<4> uwAVS_RESPONSE_2(7,0)<4;4,1> -// mov (4) uwDEST_Y(11,3)<4> uwAVS_RESPONSE_2(7,8)<4;4,1> -// mov (4) uwDEST_Y(14,3)<4> uwAVS_RESPONSE_2(7,4)<4;4,1> -// mov (4) uwDEST_Y(15,3)<4> uwAVS_RESPONSE_2(7,12)<4;4,1> -// mov (4) uwDEST_Y(18,3)<4> uwAVS_RESPONSE_2(14,0)<4;4,1> -// mov (4) uwDEST_Y(19,3)<4> uwAVS_RESPONSE_2(14,8)<4;4,1> -// mov (4) uwDEST_Y(22,3)<4> uwAVS_RESPONSE_2(14,4)<4;4,1> -// mov (4) uwDEST_Y(23,3)<4> uwAVS_RESPONSE_2(14,12)<4;4,1> -// mov (4) uwDEST_Y(26,3)<4> uwAVS_RESPONSE_2(15,0)<4;4,1> -// mov (4) uwDEST_Y(27,3)<4> uwAVS_RESPONSE_2(15,8)<4;4,1> -// mov (4) uwDEST_Y(30,3)<4> uwAVS_RESPONSE_2(15,4)<4;4,1> -// mov (4) uwDEST_Y(31,3)<4> uwAVS_RESPONSE_2(15,12)<4;4,1> - -#else /* OUTPUT_8_BIT */ - -// Move first 8x8 words of B to dest GRF - mov (8) DEST_B(0)<1> ubAVS_RESPONSE(4,1)<16;4,2> - mov (8) DEST_B(1)<1> ubAVS_RESPONSE(4,8+1)<16;4,2> - mov (8) DEST_B(2)<1> ubAVS_RESPONSE(5,1)<16;4,2> - mov (8) DEST_B(3)<1> ubAVS_RESPONSE(5,8+1)<16;4,2> - mov (8) DEST_B(4)<1> ubAVS_RESPONSE(12,1)<16;4,2> - mov (8) DEST_B(5)<1> ubAVS_RESPONSE(12,8+1)<16;4,2> - mov (8) DEST_B(6)<1> ubAVS_RESPONSE(13,1)<16;4,2> - mov (8) DEST_B(7)<1> ubAVS_RESPONSE(13,8+1)<16;4,2> - -// Move first 8x8 words of G to dest GRF - mov (8) DEST_G(0)<1> ubAVS_RESPONSE(2,1)<16;4,2> - mov (8) DEST_G(1)<1> ubAVS_RESPONSE(2,8+1)<16;4,2> - mov (8) DEST_G(2)<1> ubAVS_RESPONSE(3,1)<16;4,2> - mov (8) DEST_G(3)<1> ubAVS_RESPONSE(3,8+1)<16;4,2> - mov (8) DEST_G(4)<1> ubAVS_RESPONSE(10,1)<16;4,2> - mov (8) DEST_G(5)<1> ubAVS_RESPONSE(10,8+1)<16;4,2> - mov (8) DEST_G(6)<1> ubAVS_RESPONSE(11,1)<16;4,2> - mov (8) DEST_G(7)<1> ubAVS_RESPONSE(11,8+1)<16;4,2> - -// Move first 8x8 words of R to dest GRF - mov (8) DEST_R(0)<1> ubAVS_RESPONSE(0,1)<16;4,2> - mov (8) DEST_R(1)<1> ubAVS_RESPONSE(0,8+1)<16;4,2> - mov (8) DEST_R(2)<1> ubAVS_RESPONSE(1,1)<16;4,2> - mov (8) DEST_R(3)<1> ubAVS_RESPONSE(1,8+1)<16;4,2> - mov (8) DEST_R(4)<1> ubAVS_RESPONSE(8,1)<16;4,2> - mov (8) DEST_R(5)<1> ubAVS_RESPONSE(8,8+1)<16;4,2> - mov (8) DEST_R(6)<1> ubAVS_RESPONSE(9,1)<16;4,2> - mov (8) DEST_R(7)<1> ubAVS_RESPONSE(9,8+1)<16;4,2> - -// Move first 8x8 words of A to dest GRF - mov (8) DEST_A(0)<1> ubAVS_RESPONSE(6,1)<16;4,2> - mov (8) DEST_A(1)<1> ubAVS_RESPONSE(6,8+1)<16;4,2> - mov (8) DEST_A(2)<1> ubAVS_RESPONSE(7,1)<16;4,2> - mov (8) DEST_A(3)<1> ubAVS_RESPONSE(7,8+1)<16;4,2> - mov (8) DEST_A(4)<1> ubAVS_RESPONSE(14,1)<16;4,2> - mov (8) DEST_A(5)<1> ubAVS_RESPONSE(14,8+1)<16;4,2> - mov (8) DEST_A(6)<1> ubAVS_RESPONSE(15,1)<16;4,2> - mov (8) DEST_A(7)<1> ubAVS_RESPONSE(15,8+1)<16;4,2> - -// Move second 8x8 words of B to dest GRF - mov (8) DEST_B(0,8)<1> ubAVS_RESPONSE_2(4,1)<16;4,2> - mov (8) DEST_B(1,8)<1> ubAVS_RESPONSE_2(4,8+1)<16;4,2> - mov (8) DEST_B(2,8)<1> ubAVS_RESPONSE_2(5,1)<16;4,2> - mov (8) DEST_B(3,8)<1> ubAVS_RESPONSE_2(5,8+1)<16;4,2> - mov (8) DEST_B(4,8)<1> ubAVS_RESPONSE_2(12,1)<16;4,2> - mov (8) DEST_B(5,8)<1> ubAVS_RESPONSE_2(12,8+1)<16;4,2> - mov (8) DEST_B(6,8)<1> ubAVS_RESPONSE_2(13,1)<16;4,2> - mov (8) DEST_B(7,8)<1> ubAVS_RESPONSE_2(13,8+1)<16;4,2> - -// Move second 8x8 words of G to dest GRF - mov (8) DEST_G(0,8)<1> ubAVS_RESPONSE_2(2,1)<16;4,2> - mov (8) DEST_G(1,8)<1> ubAVS_RESPONSE_2(2,8+1)<16;4,2> - mov (8) DEST_G(2,8)<1> ubAVS_RESPONSE_2(3,1)<16;4,2> - mov (8) DEST_G(3,8)<1> ubAVS_RESPONSE_2(3,8+1)<16;4,2> - mov (8) DEST_G(4,8)<1> ubAVS_RESPONSE_2(10,1)<16;4,2> - mov (8) DEST_G(5,8)<1> ubAVS_RESPONSE_2(10,8+1)<16;4,2> - mov (8) DEST_G(6,8)<1> ubAVS_RESPONSE_2(11,1)<16;4,2> - mov (8) DEST_G(7,8)<1> ubAVS_RESPONSE_2(11,8+1)<16;4,2> - -// Move second 8x8 words of R to dest GRF - mov (8) DEST_R(0,8)<1> ubAVS_RESPONSE_2(0,1)<16;4,2> - mov (8) DEST_R(1,8)<1> ubAVS_RESPONSE_2(0,8+1)<16;4,2> - mov (8) DEST_R(2,8)<1> ubAVS_RESPONSE_2(1,1)<16;4,2> - mov (8) DEST_R(3,8)<1> ubAVS_RESPONSE_2(1,8+1)<16;4,2> - mov (8) DEST_R(4,8)<1> ubAVS_RESPONSE_2(8,1)<16;4,2> - mov (8) DEST_R(5,8)<1> ubAVS_RESPONSE_2(8,8+1)<16;4,2> - mov (8) DEST_R(6,8)<1> ubAVS_RESPONSE_2(9,1)<16;4,2> - mov (8) DEST_R(7,8)<1> ubAVS_RESPONSE_2(9,8+1)<16;4,2> - -// Move second 8x8 words of A to dest GRF - mov (8) DEST_A(0,8)<1> ubAVS_RESPONSE_2(6,1)<16;4,2> - mov (8) DEST_A(1,8)<1> ubAVS_RESPONSE_2(6,8+1)<16;4,2> - mov (8) DEST_A(2,8)<1> ubAVS_RESPONSE_2(7,1)<16;4,2> - mov (8) DEST_A(3,8)<1> ubAVS_RESPONSE_2(7,8+1)<16;4,2> - mov (8) DEST_A(4,8)<1> ubAVS_RESPONSE_2(14,1)<16;4,2> - mov (8) DEST_A(5,8)<1> ubAVS_RESPONSE_2(14,8+1)<16;4,2> - mov (8) DEST_A(6,8)<1> ubAVS_RESPONSE_2(15,1)<16;4,2> - mov (8) DEST_A(7,8)<1> ubAVS_RESPONSE_2(15,8+1)<16;4,2> -#endif -//------------------------------------------------------------------------------ - - // Set to write bottom region to memory - #define SRC_REGION REGION_1 - - // Re-define new # of lines - #undef nUV_NUM_OF_ROWS - #undef nY_NUM_OF_ROWS - - #define nY_NUM_OF_ROWS 8 - #define nUV_NUM_OF_ROWS 8 - diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/RGB_Scaling.asm b/i965_drv_video/shaders/post_processing/Core_Kernels/RGB_Scaling.asm deleted file mode 100644 index 7429790..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/RGB_Scaling.asm +++ /dev/null @@ -1,72 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -//---------- RGB_Scaling.asm ---------- -#include "Scaling.inc" - - // Build 16 elements ramp in float32 and normalized it -// mov (8) SAMPLER_RAMP(0)<1> 0x76543210:v -// add (8) SAMPLER_RAMP(1)<1> SAMPLER_RAMP(0) 8.0:f -mov (4) SAMPLER_RAMP(0)<1> 0x48403000:vf //3, 2, 1, 0 in float vector -mov (4) SAMPLER_RAMP(0,4)<1> 0x5C585450:vf //7, 6, 5, 4 in float vector -add (8) SAMPLER_RAMP(1)<1> SAMPLER_RAMP(0) 8.0:f - -//Module: PrepareScaleCoord.asm - - // Setup for sampler msg hdr - mov (2) rMSGSRC.0<1>:ud 0:ud { NoDDClr } // Unused fields - mov (1) rMSGSRC.2<1>:ud 0:ud { NoDDChk } // Write and offset - - // Calculate 16 v based on the step Y and vertical origin - mov (16) mfMSGPAYLOAD(2)<1> fSRC_VID_V_ORI<0;1,0>:f - mov (16) SCALE_COORD_Y<1>:f fSRC_VID_V_ORI<0;1,0>:f - - // Calculate 16 u based on the step X and hori origin -// line (16) mfMSGPAYLOAD(0)<1> SCALE_STEP_X<0;1,0>:f SAMPLER_RAMP(0) // Assign to mrf directly - mov (16) acc0:f fSRC_VID_H_ORI<0;1,0>:f { Compr } - mac (16) mfMSGPAYLOAD(0)<1> fVIDEO_STEP_X<0;1,0>:f SAMPLER_RAMP(0) { Compr } - - //Setup the constants for line instruction - mov (1) SCALE_LINE_P255<1>:f 255.0:f { NoDDClr } //{ NoDDClr, NoDDChk } - mov (1) SCALE_LINE_P0_5<1>:f 0.5:f { NoDDChk } - - -//------------------------------------------------------------------------------ - -$for (0; <nY_NUM_OF_ROWS; 1) { - - // Read 16 sampled pixels and store them in float32 in 8 GRFs in the order of BGRA (VYUA). - mov (8) MSGHDR_SCALE.0:ud rMSGSRC.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs - send (16) SCALE_RESPONSE_YW(0)<1> MSGHDR_SCALE udDUMMY_NULL nSMPL_ENGINE SMPLR_MSG_DSC+nSI_SRC_SIMD16_RGB+nBI_CURRENT_SRC_RGB - - // Calculate 16 v for next line - add (16) mfMSGPAYLOAD(2)<1> SCALE_COORD_Y<8;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly - add (16) SCALE_COORD_Y<1>:f SCALE_COORD_Y<8;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly - - // Scale back to [0, 255], convert f to ud - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(0) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_YD(0)<1> acc0:f { Compr } - - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(2) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_YD(2)<1> acc0:f { Compr } - - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(4) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_YD(4)<1> acc0:f { Compr } - -//#if defined(SAVE_ARGB) //Only needed if Alpha value is written to the destination - line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(6) { Compr } // Process B, V - mov (16) SCALE_RESPONSE_YD(6)<1> acc0:f { Compr } -//#endif - - mov (16) DEST_R(%1)<1> SCALE_RESPONSE_YB(0) //possible error due to truncation - vK - mov (16) DEST_G(%1)<1> SCALE_RESPONSE_YB(2) //possible error due to truncation - vK - mov (16) DEST_B(%1)<1> SCALE_RESPONSE_YB(4) //possible error due to truncation - vK - mov (16) DEST_A(%1)<1> SCALE_RESPONSE_YB(6) //possible error due to truncation - vK -} diff --git a/i965_drv_video/shaders/post_processing/Core_Kernels/Scaling.inc b/i965_drv_video/shaders/post_processing/Core_Kernels/Scaling.inc deleted file mode 100644 index bf66d4c..0000000 --- a/i965_drv_video/shaders/post_processing/Core_Kernels/Scaling.inc +++ /dev/null @@ -1,75 +0,0 @@ -/* - * All Video Processing kernels - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ - -// File name: Scaling.inc - -#ifndef _SCALING_INC_ -#define _SCALING_INC_ - -// Local variables---------------------------------------------------------------------------------- -#define MSGHDR_SCALE m1 // Message Payload Header (Uses m2, m3, m4, m5 implicitly) - -//-------------------------------------------------------------------------------------------------- -//r10.0 thru r33.0; Primary surface read from sampler (16x8) -#define DEST_Y uwTOP_Y -#define DEST_U uwTOP_U -#define DEST_V uwTOP_V - -//r10.0 thru r41.0 -.declare DEST_B Base=REG(r,10) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw -.declare DEST_G Base=REG(r,18) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw -.declare DEST_R Base=REG(r,26) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw -.declare DEST_A Base=REG(r,34) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw - -//r56.0 thru r79.0 -.declare SCALE_RESPONSE_YF Base=REG(r,nBOT_Y) ElementSize=4 SrcRegion=REGION(8,1) Type=f -.declare SCALE_RESPONSE_UF Base=REG(r,nBOT_U) ElementSize=4 SrcRegion=REGION(8,1) Type=f -.declare SCALE_RESPONSE_VF Base=REG(r,nBOT_V) ElementSize=4 SrcRegion=REGION(8,1) Type=f - -.declare SCALE_RESPONSE_YW Base=REG(r,nBOT_Y) ElementSize=2 SrcRegion=REGION(16,1) Type=uw -.declare SCALE_RESPONSE_UW Base=REG(r,nBOT_U) ElementSize=2 SrcRegion=REGION(16,1) Type=uw -.declare SCALE_RESPONSE_VW Base=REG(r,nBOT_V) ElementSize=2 SrcRegion=REGION(16,1) Type=uw - -.declare SCALE_RESPONSE_YD Base=REG(r,nBOT_Y) ElementSize=4 SrcRegion=REGION(8,1) Type=ud -.declare SCALE_RESPONSE_UD Base=REG(r,nBOT_U) ElementSize=4 SrcRegion=REGION(8,1) Type=ud -.declare SCALE_RESPONSE_VD Base=REG(r,nBOT_V) ElementSize=4 SrcRegion=REGION(8,1) Type=ud - -.declare SCALE_RESPONSE_YB Base=REG(r,nBOT_Y) ElementSize=1 SrcRegion=REGION(8,4) Type=ub -.declare SCALE_RESPONSE_UB Base=REG(r,nBOT_U) ElementSize=1 SrcRegion=REGION(8,4) Type=ub -.declare SCALE_RESPONSE_VB Base=REG(r,nBOT_V) ElementSize=1 SrcRegion=REGION(8,4) Type=ub - -.declare SAMPLER_RAMP Base=REG(r,42) ElementSize=4 SrcRegion=<8;8,1> Type=f // 2 GRFs, 16 elements - -//#define SCALE_STEP_X REG2(r,43,0) -//#define SCALE_COORD_X REG2(r,43,3) - -#define SCALE_LINE_P255 REG2(r,43,4) // = 255.0 Used in 'line' inst to multiply 255, add 0.5, and round to int. -#define SCALE_LINE_P0_5 REG2(r,43,7) // = 0.5 - -//r44.0 thru r45.0 -#define SCALE_COORD_Y REG(r,44) //2GRF - - -// Send Message [DevILK] Message Descriptor -// MBZ MsgL=5 MsgR=8 H MBZ SIMD MsgType SmplrIndx BindTab -// 000 0 101 0 1000 1 0 10 0000 0000 00000000 -// 0 A 8 A 0 0 0 0 -// MsgL=1+2*2(u,v)=5 MsgR=8 -#define SMPLR_MSG_DSC 0x0A8A0000 // ILK Sampler Message Descriptor - -// Re-define new number of lines -#undef nY_NUM_OF_ROWS -#undef nUV_NUM_OF_ROWS - -#define nY_NUM_OF_ROWS 8 -#define nUV_NUM_OF_ROWS 8 - - -#endif //_SCALING_INC_ |