From 6c41b3d7a8799daa9d0b34dcfbfc5c891c24fffa Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 17 Apr 2012 11:07:05 +1000 Subject: nv30/nv40: upload shaders directly from rankine/curie init Also removes A8 surface shader hack, and implements proper shaders for the specific purpose. Signed-off-by: Ben Skeggs --- src/Makefile.am | 2 - src/nv04_accel.h | 46 ++++++- src/nv30_exa.c | 307 +++++++++++++++++++++++++++++++++++++---------- src/nv30_shaders.c | 344 ---------------------------------------------------- src/nv30_shaders.h | 72 ----------- src/nv30_xv_tex.c | 17 ++- src/nv40_exa.c | 346 ++++++++++++++++++++++++++++++++++++++++++----------- src/nv40_xv_tex.c | 19 ++- 8 files changed, 583 insertions(+), 570 deletions(-) delete mode 100644 src/nv30_shaders.c delete mode 100644 src/nv30_shaders.h diff --git a/src/Makefile.am b/src/Makefile.am index 879f79c..0bdd780 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -48,8 +48,6 @@ nouveau_drv_la_SOURCES = \ nv04_xv_blit.c \ nv10_exa.c \ nv10_xv_ovl.c \ - nv30_shaders.c \ - nv30_shaders.h \ nv30_exa.c \ nv30_xv_tex.c \ nv40_exa.c \ diff --git a/src/nv04_accel.h b/src/nv04_accel.h index e1b4d8f..7100e85 100644 --- a/src/nv04_accel.h +++ b/src/nv04_accel.h @@ -1,11 +1,22 @@ #ifndef __NV04_ACCEL_H__ #define __NV04_ACCEL_H__ +#include "hwdefs/nv_object.xml.h" +#include "hwdefs/nv01_2d.xml.h" + #define XV_TABLE_SIZE 512 /* scratch buffer offsets */ -#define FRAGPROG 0x00000000 -#define XV_TABLE 0x00001000 +#define PFP_PASS 0x00000000 +#define PFP_S 0x00000100 +#define PFP_C 0x00000200 +#define PFP_CCA 0x00000300 +#define PFP_CCASA 0x00000400 +#define PFP_S_A8 0x00000500 +#define PFP_C_A8 0x00000600 +#define PFP_NV12_BILINEAR 0x00000700 +#define PFP_NV12_BICUBIC 0x00000800 +#define XV_TABLE 0x00001000 /* subchannel assignments */ #define SUBC_M2MF(mthd) 0, (mthd) @@ -37,4 +48,35 @@ #define NV30_3D(mthd) SUBC_3D(NV30_3D_##mthd) #define NV40_3D(mthd) SUBC_3D(NV40_3D_##mthd) +static __inline__ void +PUSH_DATAu(struct nouveau_pushbuf *push, struct nouveau_bo *bo, + unsigned delta, unsigned dwords) +{ + const uint32_t domain = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; + struct nouveau_pushbuf_refn refs[] = { { bo, domain } }; + unsigned pitch = ((dwords * 4) + 63) & ~63; + + if (nouveau_pushbuf_space(push, 32 + dwords, 2, 0) || + nouveau_pushbuf_refn (push, refs, 1)) + return; + + BEGIN_NV04(push, NV01_SUBC(MISC, OBJECT), 1); + PUSH_DATA (push, NvClipRectangle); + BEGIN_NV04(push, NV01_CLIP(POINT), 2); + PUSH_DATA (push, (0 << 16) | 0); + PUSH_DATA (push, (1 << 16) | dwords); + BEGIN_NV04(push, NV04_SF2D(FORMAT), 4); + PUSH_DATA (push, NV04_SURFACE_2D_FORMAT_A8R8G8B8); + PUSH_DATA (push, (pitch << 16) | pitch); + PUSH_RELOC(push, bo, delta, NOUVEAU_BO_LOW, 0, 0); + PUSH_RELOC(push, bo, delta, NOUVEAU_BO_LOW, 0, 0); + BEGIN_NV04(push, NV01_IFC(OPERATION), 5); + PUSH_DATA (push, NV01_IFC_OPERATION_SRCCOPY); + PUSH_DATA (push, NV01_IFC_COLOR_FORMAT_A8R8G8B8); + PUSH_DATA (push, (0 << 16) | 0); + PUSH_DATA (push, (1 << 16) | dwords); + PUSH_DATA (push, (1 << 16) | dwords); + BEGIN_NV04(push, NV01_IFC(COLOR(0)), dwords); +} + #endif diff --git a/src/nv30_exa.c b/src/nv30_exa.c index 7ac318c..0df2d0f 100644 --- a/src/nv30_exa.c +++ b/src/nv30_exa.c @@ -24,7 +24,6 @@ */ #include "nv_include.h" -#include "nv30_shaders.h" #include "hwdefs/nv_object.xml.h" #include "hwdefs/nv30-40_3d.xml.h" @@ -85,48 +84,6 @@ NV30_GetPictSurfaceFormat(int format) return NULL; } -enum { - NV30EXA_FPID_PASS_COL0 = 0, - NV30EXA_FPID_PASS_TEX0 = 1, - NV30EXA_FPID_COMPOSITE_MASK = 2, - NV30EXA_FPID_COMPOSITE_MASK_SA_CA = 3, - NV30EXA_FPID_COMPOSITE_MASK_CA = 4, - NV30EXA_FPID_MAX = 5 -} NV30EXA_FPID; - -static nv_shader_t *nv40_fp_map[NV30EXA_FPID_MAX] = { - &nv30_fp_pass_col0, - &nv30_fp_pass_tex0, - &nv30_fp_composite_mask, - &nv30_fp_composite_mask_sa_ca, - &nv30_fp_composite_mask_ca -}; - -static nv_shader_t *nv40_fp_map_a8[NV30EXA_FPID_MAX]; - -static void -NV30EXAHackupA8Shaders(ScrnInfoPtr pScrn) -{ - int s; - - for (s = 0; s < NV30EXA_FPID_MAX; s++) { - nv_shader_t *def, *a8; - - def = nv40_fp_map[s]; - a8 = calloc(1, sizeof(nv_shader_t)); - a8->card_priv.NV30FP.num_regs = def->card_priv.NV30FP.num_regs; - a8->size = def->size + 4; - memcpy(a8->data, def->data, def->size * sizeof(uint32_t)); - nv40_fp_map_a8[s] = a8; - - a8->data[a8->size - 8 + 0] &= ~0x00000081; - a8->data[a8->size - 4 + 0] = 0x01401e81; - a8->data[a8->size - 4 + 1] = 0x1c9dfe00; - a8->data[a8->size - 4 + 2] = 0x0001c800; - a8->data[a8->size - 4 + 3] = 0x0001c800; - } -} - /* should be in nouveau_reg.h at some point.. */ #define NV30_3D_TEX_SWIZZLE_UNIT_S0_X_ZERO 0 #define NV30_3D_TEX_SWIZZLE_UNIT_S0_X_ONE 1 @@ -462,7 +419,7 @@ NV30EXAPrepareComposite(int op, PicturePtr psPict, NVPtr pNv = NVPTR(pScrn); nv_pict_op_t *blend = NV30_GetPictOpRec(op); struct nouveau_pushbuf *push = pNv->pushbuf; - int fpid = NV30EXA_FPID_PASS_COL0; + uint32_t fragprog; NV30EXA_STATE; if (!PUSH_SPACE(push, 128)) @@ -481,26 +438,37 @@ NV30EXAPrepareComposite(int op, PicturePtr psPict, if (!NV30EXATexture(pScrn, pmPix, pmPict, 1)) return FALSE; + if (pdPict->format == PICT_a8) { + fragprog = PFP_C_A8; + } else if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) { if (blend->src_alpha) - fpid = NV30EXA_FPID_COMPOSITE_MASK_SA_CA; + fragprog = PFP_CCASA; else - fpid = NV30EXA_FPID_COMPOSITE_MASK_CA; + fragprog = PFP_CCA; } else { - fpid = NV30EXA_FPID_COMPOSITE_MASK; + fragprog = PFP_C; } state->have_mask = TRUE; } else { - fpid = NV30EXA_FPID_PASS_TEX0; - + if (pdPict->format == PICT_a8) + fragprog = PFP_S_A8; + else + fragprog = PFP_S; state->have_mask = FALSE; } - if (!NV30_LoadFragProg(pScrn, (pdPict->format == PICT_a8) ? - nv40_fp_map_a8[fpid] : nv40_fp_map[fpid])) - return FALSE; - + BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1); + PUSH_MTHD (push, NV30_3D(FP_ACTIVE_PROGRAM), pNv->scratch, fragprog, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | + NOUVEAU_BO_OR, + NV30_3D_FP_ACTIVE_PROGRAM_DMA0, + NV30_3D_FP_ACTIVE_PROGRAM_DMA1); + BEGIN_NV04(push, NV30_3D(FP_REG_CONTROL), 1); + PUSH_DATA (push, 0x0001000f); + BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1); + PUSH_DATA (push, 0x00000000); BEGIN_NV04(push, NV30_3D(TEX_UNITS_ENABLE), 1); PUSH_DATA (push, pmPict ? 3 : 1); @@ -629,10 +597,8 @@ NVAccelInitNV30TCL(ScrnInfoPtr pScrn) struct nouveau_pushbuf *push = pNv->pushbuf; struct nv04_fifo *fifo = pNv->channel->data; uint32_t class = 0, chipset; - int next_hw_offset = FRAGPROG, i; + int i; - if (!nv40_fp_map_a8[0]) - NV30EXAHackupA8Shaders(pScrn); NVXVComputeBicubicFilter(pNv->scratch, XV_TABLE, XV_TABLE_SIZE); #define NV30TCL_CHIPSET_3X_MASK 0x00000003 @@ -702,11 +668,11 @@ NVAccelInitNV30TCL(ScrnInfoPtr pScrn) PUSH_DATA (push, 0); PUSH_DATA (push, 0x3f800000); BEGIN_NV04(push, SUBC_3D(0x1f80), 16); - PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); - PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); + PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); + PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0x0000ffff); - PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); - PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); + PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); + PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); BEGIN_NV04(push, SUBC_3D(0x120), 3); PUSH_DATA (push, 0); @@ -721,6 +687,8 @@ NVAccelInitNV30TCL(ScrnInfoPtr pScrn) BEGIN_NV04(push, SUBC_3D(0x1d88), 1); PUSH_DATA (push, 0x00001200); + BEGIN_NV04(push, NV30_3D(MULTISAMPLE_CONTROL), 1); + PUSH_DATA (push, 0xffff0000); BEGIN_NV04(push, NV30_3D(RC_ENABLE), 1); PUSH_DATA (push, 0); @@ -857,12 +825,219 @@ NVAccelInitNV30TCL(ScrnInfoPtr pScrn) PUSH_DATA (push, 4096<<16); PUSH_DATA (push, 4096<<16); - for (i = 0; i < NV30EXA_FPID_MAX; i++) { - NV30_UploadFragProg(pNv, nv40_fp_map[i], &next_hw_offset); - NV30_UploadFragProg(pNv, nv40_fp_map_a8[i], &next_hw_offset); - } - NV30_UploadFragProg(pNv, &nv30_fp_yv12_bicubic, &next_hw_offset); - NV30_UploadFragProg(pNv, &nv30_fp_yv12_bilinear, &next_hw_offset); + PUSH_DATAu(push, pNv->scratch, PFP_PASS, 1 * 4); + PUSH_DATA (push, 0x01403e81); /* mov r0, a[col0] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + + PUSH_DATAu(push, pNv->scratch, PFP_S, 2 * 4); + PUSH_DATA (push, 0x17009e00); /* tex r0, a[tex0], t[0] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x01401e81); /* mov r0, r0 */ + PUSH_DATA (push, 0x1c9dc800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + + PUSH_DATAu(push, pNv->scratch, PFP_S_A8, 2 * 4); + PUSH_DATA (push, 0x17009000); /* tex r0.w, a[tex0], t[0] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x01401e81); /* mov r0, r0.w */ + PUSH_DATA (push, 0x1c9dfe00); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + + PUSH_DATAu(push, pNv->scratch, PFP_C, 3 * 4); + PUSH_DATA (push, 0x1702b102); /* texc0 r1.w, a[tex1], t[1] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x17009e00); /* tex r0 (ne0.w), a[tex0], t[0] */ + PUSH_DATA (push, 0x1ff5c801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x02001e81); /* mul r0, r0, r1.w */ + PUSH_DATA (push, 0x1c9dc800); + PUSH_DATA (push, 0x0001fe04); + PUSH_DATA (push, 0x0001c800); + + PUSH_DATAu(push, pNv->scratch, PFP_C_A8, 3 * 4); + PUSH_DATA (push, 0x1702b102); /* texc0 r1.w, a[tex1], t[1] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x17009000); /* tex r0.w (ne0.w), a[tex0], t[0] */ + PUSH_DATA (push, 0x1ff5c801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x02001e81); /* mul r0, r0.w, r1.w */ + PUSH_DATA (push, 0x1c9dfe00); + PUSH_DATA (push, 0x0001fe04); + PUSH_DATA (push, 0x0001c800); + + PUSH_DATAu(push, pNv->scratch, PFP_CCA, 3 * 4); + PUSH_DATA (push, 0x17009f00); /* texc0 r0, a[tex0], t[0] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x1702be02); /* tex r1 (ne0), a[tex1], t[1] */ + PUSH_DATA (push, 0x1c95c801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x02001e81); /* mul r0, r0, r1 */ + PUSH_DATA (push, 0x1c9dc800); + PUSH_DATA (push, 0x0001c804); + PUSH_DATA (push, 0x0001c800); + + PUSH_DATAu(push, pNv->scratch, PFP_CCASA, 3 * 4); + PUSH_DATA (push, 0x17009102); /* texc0 r1.w, a[tex0], t[0] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x1702be00); /* tex r0 (ne0.w), a[tex1], t[1] */ + PUSH_DATA (push, 0x1ff5c801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x02001e81); /* mul r0, r1.w, r0 */ + PUSH_DATA (push, 0x1c9dfe04); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + + PUSH_DATAu(push, pNv->scratch, PFP_NV12_BILINEAR, 8 * 4); + PUSH_DATA (push, 0x17028200); /* texr r0.x, a[tex0], t[1] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x04000e02); /* madr r1.xyz, r0.x, imm.x, imm.yzww */ + PUSH_DATA (push, 0x1c9c0000); + PUSH_DATA (push, 0x00000002); + PUSH_DATA (push, 0x0001f202); + PUSH_DATA (push, 0x3f9507c8); /* { 1.16, -0.87, 0.53, -1.08 } */ + PUSH_DATA (push, 0xbf5ee393); + PUSH_DATA (push, 0x3f078fef); + PUSH_DATA (push, 0xbf8a6762); + PUSH_DATA (push, 0x1704ac80); /* texr r0.yz, a[tex1], t[2] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x04000e02); /* madr r1.xyz, r0.y, imm, r1 */ + PUSH_DATA (push, 0x1c9cab00); + PUSH_DATA (push, 0x0001c802); + PUSH_DATA (push, 0x0001c804); + PUSH_DATA (push, 0x00000000); /* { 0.00, -0.39, 2.02, 0.00 } */ + PUSH_DATA (push, 0xbec890d6); + PUSH_DATA (push, 0x40011687); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x04000e81); /* madr r0.xyz, r0.z, imm, r1 */ + PUSH_DATA (push, 0x1c9d5500); + PUSH_DATA (push, 0x0001c802); + PUSH_DATA (push, 0x0001c804); + PUSH_DATA (push, 0x3fcc432d); /* { 1.60, -0.81, 0.00, 0.00 } */ + PUSH_DATA (push, 0xbf501a37); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); + + PUSH_DATAu(push, pNv->scratch, PFP_NV12_BICUBIC, 24 * 4); + PUSH_DATA (push, 0x01008604); /* movr r2.xy, a[tex0] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x03000600); /* addr r0.xy, r2, imm.x */ + PUSH_DATA (push, 0x1c9dc808); + PUSH_DATA (push, 0x00000002); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3f000000); /* { 0.50, 0.00, 0.00, 0.00 } */ + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x17000e06); /* texr r3.xyz, r0, t[0] */ + PUSH_DATA (push, 0x1c9dc800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x17000e00); /* texr r0.xyz, r0.y, t[0] */ + PUSH_DATA (push, 0x1c9caa00); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x02000a02); /* mulr r1.xz, r3.xxyy, imm.xxyy */ + PUSH_DATA (push, 0x1c9ca00c); + PUSH_DATA (push, 0x0000a002); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0xbf800000); /* { -1.00, 1.00, 0.00, 0.00 } */ + PUSH_DATA (push, 0x3f800000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x02001402); /* mulr r1.yw, r0.xxyy, imm.xxyy */ + PUSH_DATA (push, 0x1c9ca000); + PUSH_DATA (push, 0x0000a002); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0xbf800000); /* { -1.00, 1.00, 0.00, 0.00 } */ + PUSH_DATA (push, 0x3f800000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x03001e04); /* addr r2, r2.xyxy, r1 */ + PUSH_DATA (push, 0x1c9c8808); + PUSH_DATA (push, 0x0001c804); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x17020200); /* texr r0.x, r2, t[1] */ + PUSH_DATA (push, 0x1c9dc808); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x17020402); /* texr r1.y, r2.xwxw, t[1] */ + PUSH_DATA (push, 0x1c9d9808); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x17020202); /* texr r1.x, r2.zyxy, t[1] */ + PUSH_DATA (push, 0x1c9c8c08); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x1f400280); /* lrph r0.x, r0.z, r0, r1.y */ + PUSH_DATA (push, 0x1c9d5400); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0000aa04); + PUSH_DATA (push, 0x17020400); /* texr r0.y, r2.zwzz, t[1] */ + PUSH_DATA (push, 0x1c9d5c08); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x1f400480); /* lrph r0.y, r0.z, r1.x, r0 */ + PUSH_DATA (push, 0x1c9d5400); + PUSH_DATA (push, 0x00000004); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x1f400280); /* lrph r0.x, r3.z, r0, r0.y */ + PUSH_DATA (push, 0x1c9d540c); + PUSH_DATA (push, 0x0001c900); + PUSH_DATA (push, 0x0000ab00); + PUSH_DATA (push, 0x04400e80); /* madh r0.xyz, r0.x, imm.x, imm.yzww */ + PUSH_DATA (push, 0x1c9c0100); + PUSH_DATA (push, 0x00000002); + PUSH_DATA (push, 0x0001f202); + PUSH_DATA (push, 0x3f9507c8); /* { 1.16, -0.87, 0.53, -1.08 } */ + PUSH_DATA (push, 0xbf5ee393); + PUSH_DATA (push, 0x3f078fef); + PUSH_DATA (push, 0xbf8a6762); + PUSH_DATA (push, 0x1704ac02); /* texr r1.yz, a[tex1], t[2] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x04400e80); /* madh r0.xyz, r1.y, imm, r0 */ + PUSH_DATA (push, 0x1c9caa04); + PUSH_DATA (push, 0x0001c802); + PUSH_DATA (push, 0x0001c900); + PUSH_DATA (push, 0x00000000); /* { 0.00, -0.39, 2.02, 0.00 } */ + PUSH_DATA (push, 0xbec890d6); + PUSH_DATA (push, 0x40011687); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x04400e81); /* madh r0.xyz, r1.z, imm, r0 */ + PUSH_DATA (push, 0x1c9d5404); + PUSH_DATA (push, 0x0001c802); + PUSH_DATA (push, 0x0001c900); + PUSH_DATA (push, 0x3fcc432d); /* { 1.60, -0.81, 0.00, 0.00 } */ + PUSH_DATA (push, 0xbf501a37); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); return TRUE; } diff --git a/src/nv30_shaders.c b/src/nv30_shaders.c deleted file mode 100644 index 6a72d8d..0000000 --- a/src/nv30_shaders.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - - -#include "nv30_shaders.h" - -#include "hwdefs/nv30-40_3d.xml.h" -#include "nv04_accel.h" - -void NV30_UploadFragProg(NVPtr pNv, nv_shader_t *shader, int *hw_offset) -{ - uint32_t data, i; - uint32_t *map; - - shader->hw_id = *hw_offset; - - map = pNv->scratch->map + *hw_offset; - for (i = 0; i < shader->size; i++) { - data = shader->data[i]; -#if (X_BYTE_ORDER != X_LITTLE_ENDIAN) - data = ((data >> 16) | ((data & 0xffff) << 16)); -#endif - map[i] = data; - } - - *hw_offset += (shader->size * sizeof(uint32_t)); - *hw_offset = (*hw_offset + 63) & ~63; -} - -void NV40_UploadVtxProg(NVPtr pNv, nv_shader_t *shader, int *hw_id) -{ - struct nouveau_pushbuf *push = pNv->pushbuf; - int i; - - shader->hw_id = *hw_id; - - BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1); - PUSH_DATA (push, (shader->hw_id)); - for (i=0; isize; i+=4) { - BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); - PUSH_DATA (push, shader->data[i + 0]); - PUSH_DATA (push, shader->data[i + 1]); - PUSH_DATA (push, shader->data[i + 2]); - PUSH_DATA (push, shader->data[i + 3]); - (*hw_id)++; - } -} - -Bool -NV30_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader) -{ - NVPtr pNv = NVPTR(pScrn); - struct nouveau_pushbuf *push = pNv->pushbuf; - - BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1); - PUSH_MTHD (push, NV30_3D(FP_ACTIVE_PROGRAM), pNv->scratch, - shader->hw_id, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - NV30_3D_FP_ACTIVE_PROGRAM_DMA0, - NV30_3D_FP_ACTIVE_PROGRAM_DMA1); - BEGIN_NV04(push, NV30_3D(FP_REG_CONTROL), 1); - PUSH_DATA (push, (1 << 16) | 0xf); - BEGIN_NV04(push, NV30_3D(MULTISAMPLE_CONTROL), 1); - PUSH_DATA (push, 0xffff0000); - BEGIN_NV04(push, NV30_3D(FP_CONTROL),1); - PUSH_DATA (push, (shader->card_priv.NV30FP.num_regs - 1) / 2); - - return TRUE; -} - -void -NV40_LoadVtxProg(ScrnInfoPtr pScrn, nv_shader_t *shader) -{ - NVPtr pNv = NVPTR(pScrn); - struct nouveau_pushbuf *push = pNv->pushbuf; - - BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1); - PUSH_DATA (push, (shader->hw_id)); - BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2); - PUSH_DATA (push, shader->card_priv.NV30VP.vp_in_reg); - PUSH_DATA (push, shader->card_priv.NV30VP.vp_out_reg); -} - -Bool -NV40_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader) -{ - NVPtr pNv = NVPTR(pScrn); - struct nouveau_pushbuf *push = pNv->pushbuf; - - BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1); - PUSH_MTHD (push, NV30_3D(FP_ACTIVE_PROGRAM), pNv->scratch, - shader->hw_id, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - NV30_3D_FP_ACTIVE_PROGRAM_DMA0, - NV30_3D_FP_ACTIVE_PROGRAM_DMA1); - BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1); - PUSH_DATA (push, shader->card_priv.NV30FP.num_regs << - NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT); - - return TRUE; -} - -/******************************************************************************* - * NV40/G70 vertex shaders - */ - -nv_shader_t nv40_vp_exa_render = { - .card_priv.NV30VP.vp_in_reg = 0x00000309, - .card_priv.NV30VP.vp_out_reg = 0x0000c001, - .size = (3*4), - .data = { - /* MOV result.position, vertex.position */ - 0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff80, - /* MOV result.texcoord[0], vertex.texcoord[0] */ - 0x401f9c6c, 0x0040080d, 0x8106c083, 0x6041ff9c, - /* MOV result.texcoord[1], vertex.texcoord[1] */ - 0x401f9c6c, 0x0040090d, 0x8106c083, 0x6041ffa1, - } -}; - -/******************************************************************************* - * NV30/NV40/G70 fragment shaders - */ - -nv_shader_t nv30_fp_pass_col0 = { - .card_priv.NV30FP.num_regs = 2, - .size = (1*4), - .data = { - /* MOV R0, fragment.color */ - 0x01403e81, 0x1c9dc801, 0x0001c800, 0x3fe1c800, - } -}; - -nv_shader_t nv30_fp_pass_tex0 = { - .card_priv.NV30FP.num_regs = 2, - .size = (2*4), - .data = { - /* TEX R0, fragment.texcoord[0], texture[0], 2D */ - 0x17009e00, 0x1c9dc801, 0x0001c800, 0x3fe1c800, - /* MOV R0, R0 */ - 0x01401e81, 0x1c9dc800, 0x0001c800, 0x0001c800, - } -}; - -nv_shader_t nv30_fp_composite_mask = { - .card_priv.NV30FP.num_regs = 2, - .size = (3*4), - .data = { - /* TEXC0 R1.w , fragment.texcoord[1], texture[1], 2D */ - 0x1702b102, 0x1c9dc801, 0x0001c800, 0x3fe1c800, - /* TEX R0 (NE0.wwww), fragment.texcoord[0], texture[0], 2D */ - 0x17009e00, 0x1ff5c801, 0x0001c800, 0x3fe1c800, - /* MUL R0 , R0, R1.w */ - 0x02001e81, 0x1c9dc800, 0x0001fe04, 0x0001c800, - } -}; - -nv_shader_t nv30_fp_composite_mask_sa_ca = { - .card_priv.NV30FP.num_regs = 2, - .size = (3*4), - .data = { - /* TEXC0 R1.w , fragment.texcoord[0], texture[0], 2D */ - 0x17009102, 0x1c9dc801, 0x0001c800, 0x3fe1c800, - /* TEX R0 (NE0.wwww), fragment.texcoord[1], texture[1], 2D */ - 0x1702be00, 0x1ff5c801, 0x0001c800, 0x3fe1c800, - /* MUL R0 , R1,wwww, R0 */ - 0x02001e81, 0x1c9dfe04, 0x0001c800, 0x0001c800, - } -}; - -nv_shader_t nv30_fp_composite_mask_ca = { - .card_priv.NV30FP.num_regs = 2, - .size = (3*4), - .data = { - /* TEXC0 R0 , fragment.texcoord[0], texture[0], 2D */ - 0x17009f00, 0x1c9dc801, 0x0001c800, 0x3fe1c800, - /* TEX R1 (NE0.xyzw), fragment.texcoord[1], texture[1], 2D */ - 0x1702be02, 0x1c95c801, 0x0001c800, 0x3fe1c800, - /* MUL R0 , R0, R1 */ - 0x02001e81, 0x1c9dc800, 0x0001c804, 0x0001c800, - } -}; - -nv_shader_t nv40_vp_video = { - .card_priv.NV30VP.vp_in_reg = 0x00000309, - .card_priv.NV30VP.vp_out_reg = 0x0000c001, - .size = (3*4), - .data = { - /* MOV result.position, vertex.position */ - 0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff80, - /* MOV result.texcoord[0], vertex.texcoord[0] */ - 0x401f9c6c, 0x0040080d, 0x8106c083, 0x6041ff9c, - /* MOV result.texcoord[1], vertex.texcoord[1] */ - 0x401f9c6c, 0x0040090d, 0x8106c083, 0x6041ffa1, - } -}; - -nv_shader_t nv40_fp_yv12_bicubic = { - .card_priv.NV30FP.num_regs = 4, - .size = (29*4), - .data = { - /* INST 0: MOVR R0.xy (TR0.xyzw), attrib.texcoord[0] */ - 0x01008600, 0x1c9dc801, 0x0001c800, 0x3fe1c800, - /* INST 1: ADDR R0.z (TR0.xyzw), R0.yyyy, { 0.50, 0.00, 0.00, 0.00 }.xxxx */ - 0x03000800, 0x1c9caa00, 0x00000002, 0x0001c800, - 0x3f000000, 0x00000000, 0x00000000, 0x00000000, - /* INST 2: ADDR R1.x (TR0.xyzw), R0, { 0.50, 0.00, 0.00, 0.00 }.xxxx */ - 0x03000202, 0x1c9dc800, 0x00000002, 0x0001c800, - 0x3f000000, 0x00000000, 0x00000000, 0x00000000, - /* INST 3: TEXRC0 R1.xyz (TR0.xyzw), R0.zzzz, texture[0] */ - 0x17000f82, 0x1c9d5400, 0x0001c800, 0x0001c800, - /* INST 4: MULR R2.yw (TR0.xyzw), R1.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */ - 0x02001404, 0x1c9ca104, 0x0000a002, 0x0001c800, - 0xbf800000, 0x3f800000, 0x00000000, 0x00000000, - /* INST 5: TEXR R3.xyz (TR0.xyzw), R1, texture[0] */ - 0x17000e86, 0x1c9dc804, 0x0001c800, 0x0001c800, - /* INST 6: MULR R2.xz (TR0.xyzw), R3.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */ - 0x02000a04, 0x1c9ca10c, 0x0000a002, 0x0001c800, - 0xbf800000, 0x3f800000, 0x00000000, 0x00000000, - /* INST 7: ADDR R2 (TR0.xyzw), R0.xyxy, R2 */ - 0x03001e04, 0x1c9c8800, 0x0001c808, 0x0001c800, - /* INST 8: TEXR R1.y (TR0.xyzw), R2.zwzz, -texture[1] */ - 0x17020402, 0x1c9d5c08, 0x0001c800, 0x0001c800, - /* INST 9: MADH R1.x (TR0.xyzw), -R1.zzzz, R1.yyyy, R1.yyyy */ - 0x04400282, 0x1c9f5504, 0x0000aa04, 0x0000aa04, - /* INST 10: TEXR R0.y (TR0.xyzw), R2.xwxw, -texture[1] */ - 0x17020400, 0x1c9d9808, 0x0001c800, 0x0001c800, - /* INST 11: MADH R0.w (TR0.xyzw), -R1.zzzz, R0.yyyy, R0.yyyy */ - 0x04401080, 0x1c9f5504, 0x0000aa00, 0x0000aa00, - /* INST 12: TEXR R0.x (TR0.xyzw), R2.zyxy, texture[1] */ - 0x17020200, 0x1c9c8c08, 0x0001c800, 0x0001c800, - /* INST 13: MADH R1.x (TR0.xyzw), R1.zzzz, R0, R1 */ - 0x04400282, 0x1c9d5504, 0x0001c800, 0x0001c904, - /* INST 14: TEXR R0.x (NE0.zzzz), R2, texture[1] */ - 0x17020200, 0x1555c808, 0x0001c800, 0x0001c800, - /* INST 15: MADH R0.x (TR0.xyzw), R1.zzzz, R0, R0.wwww */ - 0x04400280, 0x1c9d5504, 0x0001c800, 0x0001ff00, - /* INST 16: MADH R0.w (TR0.xyzw), -R3.zzzz, R1.xxxx, R1.xxxx */ - 0x04401080, 0x1c9f550c, 0x00000104, 0x00000104, - /* INST 17: TEXR R0.yz (TR0.xyzw), attrib.texcoord[1], abs(texture[2]) */ - 0x1704ac80, 0x1c9dc801, 0x0001c800, 0x3fe1c800, - /* INST 18: MADH R0.x (TR0.xyzw), R3.zzzz, R0, R0.wwww */ - 0x04400280, 0x1c9d550c, 0x0001c900, 0x0001ff00, - /* INST 19: MADH R1.xyz (TR0.xyzw), R0.xxxx, { 1.16, -0.87, 0.53, -1.08 }.xxxx, { 1.16, -0.87, 0.53, -1.08 }.yzww */ - 0x04400e82, 0x1c9c0100, 0x00000002, 0x0001f202, - 0x3f9507c8, 0xbf5ee393, 0x3f078fef, 0xbf8a6762, - /* INST 20: MADH R1.xyz (TR0.xyzw), R0.yyyy, { 0.00, -0.39, 2.02, 0.00 }, R1 */ - 0x04400e82, 0x1c9cab00, 0x0001c802, 0x0001c904, - 0x00000000, 0xbec890d6, 0x40011687, 0x00000000, - /* INST 21: MADH R0.xyz (TR0.xyzw), R0.zzzz, { 1.60, -0.81, 0.00, 0.00 }, R1 + END */ - 0x04400e81, 0x1c9d5500, 0x0001c802, 0x0001c904, - 0x3fcc432d, 0xbf501a37, 0x00000000, 0x00000000, - } -}; - -nv_shader_t nv30_fp_yv12_bicubic = { - .card_priv.NV30FP.num_regs = 4, - .size = (24*4), - .data = { - /* INST 0: MOVR R2.xy (TR0.xyzw), attrib.texcoord[0] */ - 0x01008604, 0x1c9dc801, 0x0001c800, 0x0001c800, - /* INST 1: ADDR R0.xy (TR0.xyzw), R2, { 0.50, 0.00, 0.00, 0.00 }.xxxx */ - 0x03000600, 0x1c9dc808, 0x00000002, 0x0001c800, - 0x3f000000, 0x00000000, 0x00000000, 0x00000000, - /* INST 2: TEXR R3.xyz (TR0.xyzw), R0, texture[0] */ - 0x17000e06, 0x1c9dc800, 0x0001c800, 0x0001c800, - /* INST 3: TEXR R0.xyz (TR0.xyzw), R0.yyyy, texture[0] */ - 0x17000e00, 0x1c9caa00, 0x0001c800, 0x0001c800, - /* INST 4: MULR R1.xz (TR0.xyzw), R3.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */ - 0x02000a02, 0x1c9ca00c, 0x0000a002, 0x0001c800, - 0xbf800000, 0x3f800000, 0x00000000, 0x00000000, - /* INST 5: MULR R1.yw (TR0.xyzw), R0.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */ - 0x02001402, 0x1c9ca000, 0x0000a002, 0x0001c800, - 0xbf800000, 0x3f800000, 0x00000000, 0x00000000, - /* INST 6: ADDR R2 (TR0.xyzw), R2.xyxy, R1 */ - 0x03001e04, 0x1c9c8808, 0x0001c804, 0x0001c800, - /* INST 7: TEXR R0.x (TR0.xyzw), R2, texture[1] */ - 0x17020200, 0x1c9dc808, 0x0001c800, 0x0001c800, - /* INST 8: TEXR R1.y (TR0.xyzw), R2.xwxw, texture[1] */ - 0x17020402, 0x1c9d9808, 0x0001c800, 0x0001c800, - /* INST 9: TEXR R1.x (TR0.xyzw), R2.zyxy, texture[1] */ - 0x17020202, 0x1c9c8c08, 0x0001c800, 0x0001c800, - /* INST 10: LRPH R0.x (TR0.xyzw), R0.zzzz, R0, R1.yyyy */ - 0x1f400280, 0x1c9d5400, 0x0001c800, 0x0000aa04, - /* INST 11: TEXR R0.y (TR0.xyzw), R2.zwzz, texture[1] */ - 0x17020400, 0x1c9d5c08, 0x0001c800, 0x0001c800, - /* INST 12: LRPH R0.y (TR0.xyzw), R0.zzzz, R1.xxxx, R0 */ - 0x1f400480, 0x1c9d5400, 0x00000004, 0x0001c800, - /* INST 13: LRPH R0.x (TR0.xyzw), R3.zzzz, R0, R0.yyyy */ - 0x1f400280, 0x1c9d540c, 0x0001c900, 0x0000ab00, - /* INST 14: MADH R0.xyz (TR0.xyzw), R0.xxxx, { 1.16, -0.87, 0.53, -1.08 }.xxxx, { 1.16, -0.87, 0.53, -1.08 }.yzww */ - 0x04400e80, 0x1c9c0100, 0x00000002, 0x0001f202, - 0x3f9507c8, 0xbf5ee393, 0x3f078fef, 0xbf8a6762, - /* INST 15: TEXR R1.yz (TR0.xyzw), attrib.texcoord[1], abs(texture[2]) */ - 0x1704ac02, 0x1c9dc801, 0x0001c800, 0x0001c800, - /* INST 16: MADH R0.xyz (TR0.xyzw), R1.yyyy, { 0.00, -0.39, 2.02, 0.00 }, R0 */ - 0x04400e80, 0x1c9caa04, 0x0001c802, 0x0001c900, - 0x00000000, 0xbec890d6, 0x40011687, 0x00000000, - /* INST 17: MADH R0.xyz (TR0.xyzw), R1.zzzz, { 1.60, -0.81, 0.00, 0.00 }, R0 + END */ - 0x04400e81, 0x1c9d5404, 0x0001c802, 0x0001c900, - 0x3fcc432d, 0xbf501a37, 0x00000000, 0x00000000, - } -}; - -nv_shader_t nv30_fp_yv12_bilinear = { - .card_priv.NV30FP.num_regs = 2, - .size = (8*4), - .data = { - /* INST 0: TEXR R0.x (TR0.xyzw), attrib.texcoord[0], abs(texture[1]) */ - 0x17028200, 0x1c9dc801, 0x0001c800, 0x3fe1c800, - /* INST 1: MADR R1.xyz (TR0.xyzw), R0.xxxx, { 1.16, -0.87, 0.53, -1.08 }.xxxx, { 1.16, -0.87, 0.53, -1.08 }.yzww */ - 0x04000e02, 0x1c9c0000, 0x00000002, 0x0001f202, - 0x3f9507c8, 0xbf5ee393, 0x3f078fef, 0xbf8a6762, - /* INST 2: TEXR R0.yz (TR0.xyzw), attrib.texcoord[1], abs(texture[2]) */ - 0x1704ac80, 0x1c9dc801, 0x0001c800, 0x3fe1c800, - /* INST 3: MADR R1.xyz (TR0.xyzw), R0.yyyy, { 0.00, -0.39, 2.02, 0.00 }, R1 */ - 0x04000e02, 0x1c9cab00, 0x0001c802, 0x0001c804, - 0x00000000, 0xbec890d6, 0x40011687, 0x00000000, - /* INST 4: MADR R0.xyz (TR0.xyzw), R0.zzzz, { 1.60, -0.81, 0.00, 0.00 }, R1 + END */ - 0x04000e81, 0x1c9d5500, 0x0001c802, 0x0001c804, - 0x3fcc432d, 0xbf501a37, 0x00000000, 0x00000000, - } -}; - - diff --git a/src/nv30_shaders.h b/src/nv30_shaders.h deleted file mode 100644 index 3e666fa..0000000 --- a/src/nv30_shaders.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NV30_SHADERS_H__ -#define __NV30_SHADERS_H__ - -#define NV_SHADER_MAX_PROGRAM_LENGTH 256 - -#include "nv_include.h" - -typedef struct nv_shader { - uint32_t hw_id; - uint32_t size; - union { - struct { - uint32_t vp_in_reg; - uint32_t vp_out_reg; - } NV30VP; - struct { - uint32_t num_regs; - } NV30FP; - } card_priv; - uint32_t data[NV_SHADER_MAX_PROGRAM_LENGTH]; -} nv_shader_t; - -void NV30_UploadFragProg(NVPtr pNv, nv_shader_t *shader, int *hw_offset); -void NV40_UploadVtxProg(NVPtr pNv, nv_shader_t *shader, int *hw_id); -void NV40_LoadVtxProg(ScrnInfoPtr pScrn, nv_shader_t *shader); -Bool NV40_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader); -Bool NV30_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader); - - -/******************************************************************************* - * NV40/G70 vertex shaders - */ - -nv_shader_t nv40_vp_exa_render; -nv_shader_t nv40_vp_video; - -/******************************************************************************* - * NV30/NV40/G70 fragment shaders - */ - -nv_shader_t nv30_fp_pass_col0; -nv_shader_t nv30_fp_pass_tex0; -nv_shader_t nv30_fp_composite_mask; -nv_shader_t nv30_fp_composite_mask_sa_ca; -nv_shader_t nv30_fp_composite_mask_ca; -nv_shader_t nv30_fp_yv12_bicubic; -nv_shader_t nv30_fp_yv12_bilinear; -nv_shader_t nv40_fp_yv12_bicubic; - -#endif diff --git a/src/nv30_xv_tex.c b/src/nv30_xv_tex.c index 88fed3e..7ab0520 100644 --- a/src/nv30_xv_tex.c +++ b/src/nv30_xv_tex.c @@ -36,8 +36,6 @@ #include "nv_include.h" #include "nv_dma.h" -#include "nv30_shaders.h" - #include "hwdefs/nv30-40_3d.xml.h" #include "nv04_accel.h" @@ -259,10 +257,17 @@ NV30PutTextureImage(ScrnInfoPtr pScrn, struct nouveau_bo *src, int src_offset, if (drw_w / 2 < src_w || drw_h / 2 < src_h) bicubic = FALSE; - if (!NV30_LoadFragProg(pScrn, bicubic ? - &nv30_fp_yv12_bicubic : - &nv30_fp_yv12_bilinear)) - return BadImplementation; + BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1); + PUSH_MTHD (push, NV30_3D(FP_ACTIVE_PROGRAM), pNv->scratch, + bicubic ? PFP_NV12_BICUBIC : PFP_NV12_BILINEAR, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | + NOUVEAU_BO_OR, + NV30_3D_FP_ACTIVE_PROGRAM_DMA0, + NV30_3D_FP_ACTIVE_PROGRAM_DMA1); + BEGIN_NV04(push, NV30_3D(FP_REG_CONTROL), 1); + PUSH_DATA (push, 0x0001000f); + BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1); + PUSH_DATA (push, 0x00000001); nouveau_pushbuf_bufctx(push, pNv->bufctx); if (nouveau_pushbuf_validate(push)) { diff --git a/src/nv40_exa.c b/src/nv40_exa.c index 42fc428..c4249a8 100644 --- a/src/nv40_exa.c +++ b/src/nv40_exa.c @@ -21,7 +21,6 @@ */ #include "nv_include.h" -#include "nv30_shaders.h" #include "hwdefs/nv_object.xml.h" #include "hwdefs/nv30-40_3d.xml.h" @@ -80,48 +79,6 @@ NV40_GetPictSurfaceFormat(int format) return NULL; } -enum { - NV40EXA_FPID_PASS_COL0 = 0, - NV40EXA_FPID_PASS_TEX0 = 1, - NV40EXA_FPID_COMPOSITE_MASK = 2, - NV40EXA_FPID_COMPOSITE_MASK_SA_CA = 3, - NV40EXA_FPID_COMPOSITE_MASK_CA = 4, - NV40EXA_FPID_MAX = 5 -} NV40EXA_FPID; - -static nv_shader_t *nv40_fp_map[NV40EXA_FPID_MAX] = { - &nv30_fp_pass_col0, - &nv30_fp_pass_tex0, - &nv30_fp_composite_mask, - &nv30_fp_composite_mask_sa_ca, - &nv30_fp_composite_mask_ca -}; - -static nv_shader_t *nv40_fp_map_a8[NV40EXA_FPID_MAX]; - -static void -NV40EXAHackupA8Shaders(ScrnInfoPtr pScrn) -{ - int s; - - for (s = 0; s < NV40EXA_FPID_MAX; s++) { - nv_shader_t *def, *a8; - - def = nv40_fp_map[s]; - a8 = calloc(1, sizeof(nv_shader_t)); - a8->card_priv.NV30FP.num_regs = def->card_priv.NV30FP.num_regs; - a8->size = def->size + 4; - memcpy(a8->data, def->data, def->size * sizeof(uint32_t)); - nv40_fp_map_a8[s] = a8; - - a8->data[a8->size - 8 + 0] &= ~0x00000081; - a8->data[a8->size - 4 + 0] = 0x01401e81; - a8->data[a8->size - 4 + 1] = 0x1c9dfe00; - a8->data[a8->size - 4 + 2] = 0x0001c800; - a8->data[a8->size - 4 + 3] = 0x0001c800; - } -} - #define _(r,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ { \ PICT_##r, NV40_3D_TEX_FORMAT_FORMAT_##tf, \ @@ -406,7 +363,7 @@ NV40EXAPrepareComposite(int op, PicturePtr psPict, NVPtr pNv = NVPTR(pScrn); nv_pict_op_t *blend = NV40_GetPictOpRec(op); struct nouveau_pushbuf *push = pNv->pushbuf; - int fpid = NV40EXA_FPID_PASS_COL0; + uint32_t fragprog; NV40EXA_STATE; if (!PUSH_SPACE(push, 128)) @@ -421,31 +378,39 @@ NV40EXAPrepareComposite(int op, PicturePtr psPict, !NV40EXATexture(pScrn, psPix, psPict, 0)) return FALSE; - NV40_LoadVtxProg(pScrn, &nv40_vp_exa_render); if (pmPict) { if (!NV40EXATexture(pScrn, pmPix, pmPict, 1)) return FALSE; + if (pdPict->format == PICT_a8) { + fragprog = PFP_C_A8; + } else if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) { if (blend->src_alpha) - fpid = NV40EXA_FPID_COMPOSITE_MASK_SA_CA; + fragprog = PFP_CCASA; else - fpid = NV40EXA_FPID_COMPOSITE_MASK_CA; + fragprog = PFP_CCA; } else { - fpid = NV40EXA_FPID_COMPOSITE_MASK; + fragprog = PFP_C; } state->have_mask = TRUE; } else { - fpid = NV40EXA_FPID_PASS_TEX0; - + if (pdPict->format == PICT_a8) + fragprog = PFP_S_A8; + else + fragprog = PFP_S; state->have_mask = FALSE; } - - if (!NV40_LoadFragProg(pScrn, (pdPict->format == PICT_a8) ? - nv40_fp_map_a8[fpid] : nv40_fp_map[fpid])) - return FALSE; + BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1); + PUSH_MTHD (push, NV30_3D(FP_ACTIVE_PROGRAM), pNv->scratch, fragprog, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | + NOUVEAU_BO_OR, + NV30_3D_FP_ACTIVE_PROGRAM_DMA0, + NV30_3D_FP_ACTIVE_PROGRAM_DMA1); + BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1); + PUSH_DATA (push, 0x02000000); /* Appears to be some kind of cache flush, needed here at least * sometimes.. funky text rendering otherwise :) @@ -580,10 +545,8 @@ NVAccelInitNV40TCL(ScrnInfoPtr pScrn) struct nouveau_pushbuf *push = pNv->pushbuf; struct nv04_fifo *fifo = pNv->channel->data; uint32_t class = 0, chipset; - int next_hw_id = 0, next_hw_offset = FRAGPROG, i; + int i; - if (!nv40_fp_map_a8[0]) - NV40EXAHackupA8Shaders(pScrn); NVXVComputeBicubicFilter(pNv->scratch, XV_TABLE, XV_TABLE_SIZE); chipset = pNv->dev->chipset; @@ -667,7 +630,7 @@ NVAccelInitNV40TCL(ScrnInfoPtr pScrn) PUSH_DATA (push, 0); BEGIN_NV04(push, NV30_3D(DEPTH_WRITE_ENABLE), 2); PUSH_DATA (push, 0); - PUSH_DATA (push, 0); + PUSH_DATA (push, 0); BEGIN_NV04(push, NV30_3D(COLOR_MASK), 1); PUSH_DATA (push, 0x01010101); /* TR,TR,TR,TR */ BEGIN_NV04(push, NV30_3D(CULL_FACE_ENABLE), 1); @@ -714,15 +677,262 @@ NVAccelInitNV40TCL(ScrnInfoPtr pScrn) PUSH_DATA (push, (4095 << 16)); PUSH_DATA (push, (4095 << 16)); - NV40_UploadVtxProg(pNv, &nv40_vp_exa_render, &next_hw_id); - for (i = 0; i < NV40EXA_FPID_MAX; i++) { - NV30_UploadFragProg(pNv, nv40_fp_map[i], &next_hw_offset); - NV30_UploadFragProg(pNv, nv40_fp_map_a8[i], &next_hw_offset); - } - - NV40_UploadVtxProg(pNv, &nv40_vp_video, &next_hw_id); - NV30_UploadFragProg(pNv, &nv40_fp_yv12_bicubic, &next_hw_offset); - NV30_UploadFragProg(pNv, &nv30_fp_yv12_bilinear, &next_hw_offset); - + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); + PUSH_DATA (push, 0x40041c6c); /* mov o[pos], a[0] */ + PUSH_DATA (push, 0x0040000d); + PUSH_DATA (push, 0x8106c083); + PUSH_DATA (push, 0x6041ff80); + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); + PUSH_DATA (push, 0x401f9c6c); /* mov o[tex0], a[8] */ + PUSH_DATA (push, 0x0040080d); + PUSH_DATA (push, 0x8106c083); + PUSH_DATA (push, 0x6041ff9c); + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); + PUSH_DATA (push, 0x401f9c6c); /* mov o[tex1], a[9] */ + PUSH_DATA (push, 0x0040090d); + PUSH_DATA (push, 0x8106c083); + PUSH_DATA (push, 0x6041ffa1); + BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2); + PUSH_DATA (push, 0x00000309); + PUSH_DATA (push, 0x0000c001); + + PUSH_DATAu(push, pNv->scratch, PFP_PASS, 1 * 4); + PUSH_DATA (push, 0x01403e81); /* mov r0, a[col0] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + + PUSH_DATAu(push, pNv->scratch, PFP_S, 2 * 4); + PUSH_DATA (push, 0x17009e00); /* tex r0, a[tex0], t[0] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x01401e81); /* mov r0, r0 */ + PUSH_DATA (push, 0x1c9dc800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + + PUSH_DATAu(push, pNv->scratch, PFP_S_A8, 2 * 4); + PUSH_DATA (push, 0x17009000); /* tex r0.w, a[tex0], t[0] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x01401e81); /* mov r0, r0.w */ + PUSH_DATA (push, 0x1c9dfe00); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + + PUSH_DATAu(push, pNv->scratch, PFP_C, 3 * 4); + PUSH_DATA (push, 0x1702b102); /* texc0 r1.w, a[tex1], t[1] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x17009e00); /* tex r0 (ne0.w), a[tex0], t[0] */ + PUSH_DATA (push, 0x1ff5c801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x02001e81); /* mul r0, r0, r1.w */ + PUSH_DATA (push, 0x1c9dc800); + PUSH_DATA (push, 0x0001fe04); + PUSH_DATA (push, 0x0001c800); + + PUSH_DATAu(push, pNv->scratch, PFP_C_A8, 3 * 4); + PUSH_DATA (push, 0x1702b102); /* texc0 r1.w, a[tex1], t[1] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x17009000); /* tex r0.w (ne0.w), a[tex0], t[0] */ + PUSH_DATA (push, 0x1ff5c801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x02001e81); /* mul r0, r0.w, r1.w */ + PUSH_DATA (push, 0x1c9dfe00); + PUSH_DATA (push, 0x0001fe04); + PUSH_DATA (push, 0x0001c800); + + PUSH_DATAu(push, pNv->scratch, PFP_CCA, 3 * 4); + PUSH_DATA (push, 0x17009f00); /* texc0 r0, a[tex0], t[0] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x1702be02); /* tex r1 (ne0), a[tex1], t[1] */ + PUSH_DATA (push, 0x1c95c801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x02001e81); /* mul r0, r0, r1 */ + PUSH_DATA (push, 0x1c9dc800); + PUSH_DATA (push, 0x0001c804); + PUSH_DATA (push, 0x0001c800); + + PUSH_DATAu(push, pNv->scratch, PFP_CCASA, 3 * 4); + PUSH_DATA (push, 0x17009102); /* texc0 r1.w, a[tex0], t[0] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x1702be00); /* tex r0 (ne0.w), a[tex1], t[1] */ + PUSH_DATA (push, 0x1ff5c801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x02001e81); /* mul r0, r1.w, r0 */ + PUSH_DATA (push, 0x1c9dfe04); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + + PUSH_DATAu(push, pNv->scratch, PFP_NV12_BILINEAR, 8 * 4); + PUSH_DATA (push, 0x17028200); /* texr r0.x, a[tex0], t[1] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x04000e02); /* madr r1.xyz, r0.x, imm.x, imm.yzww */ + PUSH_DATA (push, 0x1c9c0000); + PUSH_DATA (push, 0x00000002); + PUSH_DATA (push, 0x0001f202); + PUSH_DATA (push, 0x3f9507c8); /* { 1.16, -0.87, 0.53, -1.08 } */ + PUSH_DATA (push, 0xbf5ee393); + PUSH_DATA (push, 0x3f078fef); + PUSH_DATA (push, 0xbf8a6762); + PUSH_DATA (push, 0x1704ac80); /* texr r0.yz, a[tex1], t[2] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x04000e02); /* madr r1.xyz, r0.y, imm, r1 */ + PUSH_DATA (push, 0x1c9cab00); + PUSH_DATA (push, 0x0001c802); + PUSH_DATA (push, 0x0001c804); + PUSH_DATA (push, 0x00000000); /* { 0.00, -0.39, 2.02, 0.00 } */ + PUSH_DATA (push, 0xbec890d6); + PUSH_DATA (push, 0x40011687); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x04000e81); /* madr r0.xyz, r0.z, imm, r1 */ + PUSH_DATA (push, 0x1c9d5500); + PUSH_DATA (push, 0x0001c802); + PUSH_DATA (push, 0x0001c804); + PUSH_DATA (push, 0x3fcc432d); /* { 1.60, -0.81, 0.00, 0.00 } */ + PUSH_DATA (push, 0xbf501a37); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); + + + PUSH_DATAu(push, pNv->scratch, PFP_NV12_BICUBIC, 29 * 4); + PUSH_DATA (push, 0x01008600); /* movr r0.xy, a[tex0] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x03000800); /* addr r0.z, r0.y, imm.x */ + PUSH_DATA (push, 0x1c9caa00); + PUSH_DATA (push, 0x00000002); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3f000000); /* { 0.50, 0.00, 0.00, 0.00 } */ + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x03000202); /* addr r1.x, r0, imm.x */ + PUSH_DATA (push, 0x1c9dc800); + PUSH_DATA (push, 0x00000002); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3f000000); /* { 0.50, 0.00, 0.00, 0.00 } */ + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x17000f82); /* texrc0 r1.xyz, r0.z, t[0] */ + PUSH_DATA (push, 0x1c9d5400); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x02001404); /* mulr r2.yw, r1.xxyy, imm.xxyy */ + PUSH_DATA (push, 0x1c9ca104); + PUSH_DATA (push, 0x0000a002); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0xbf800000); /* { -1.00, 1.00, 0.00, 0.00 } */ + PUSH_DATA (push, 0x3f800000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x17000e86); /* texr r3.xyz, r1, t[0] */ + PUSH_DATA (push, 0x1c9dc804); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x02000a04); /* mulr r2.xz, r3.xxyy, imm.xxyy */ + PUSH_DATA (push, 0x1c9ca10c); + PUSH_DATA (push, 0x0000a002); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0xbf800000); /* { -1.00, 1.00, 0.00, 0.00 } */ + PUSH_DATA (push, 0x3f800000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x03001e04); /* addr r2, r0.xyxy, r2 */ + PUSH_DATA (push, 0x1c9c8800); + PUSH_DATA (push, 0x0001c808); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x17020402); /* texr r1.y, r2.zwzz, -t[1] */ + PUSH_DATA (push, 0x1c9d5c08); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x04400282); /* madh r1.x, -r1.z, r1.y, r1.y */ + PUSH_DATA (push, 0x1c9f5504); + PUSH_DATA (push, 0x0000aa04); + PUSH_DATA (push, 0x0000aa04); + PUSH_DATA (push, 0x17020400); /* texr r0.y, r2.xwxw, -t[1] */ + PUSH_DATA (push, 0x1c9d9808); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x04401080); /* madh r0.w, -r1.z, r0.y, r0.y */ + PUSH_DATA (push, 0x1c9f5504); + PUSH_DATA (push, 0x0000aa00); + PUSH_DATA (push, 0x0000aa00); + PUSH_DATA (push, 0x17020200); /* texr r0.x, r2.zyxy, t[1] */ + PUSH_DATA (push, 0x1c9c8c08); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x04400282); /* madh r1.x, r1.z, r0, r1 */ + PUSH_DATA (push, 0x1c9d5504); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c904); + PUSH_DATA (push, 0x17020200); /* texr r0.x (NE0.z), r2, t[1] */ + PUSH_DATA (push, 0x1555c808); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x04400280); /* madh r0.x, r1.z, r0, r0.w */ + PUSH_DATA (push, 0x1c9d5504); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x0001ff00); + PUSH_DATA (push, 0x04401080); /* madh r0.w, -r3.z, r1.x, r1.x */ + PUSH_DATA (push, 0x1c9f550c); + PUSH_DATA (push, 0x00000104); + PUSH_DATA (push, 0x00000104); + PUSH_DATA (push, 0x1704ac80); /* texr r0.yz, a[tex1], t[2] */ + PUSH_DATA (push, 0x1c9dc801); + PUSH_DATA (push, 0x0001c800); + PUSH_DATA (push, 0x3fe1c800); + PUSH_DATA (push, 0x04400280); /* madh r0.x, r3.z, r0, r0.w */ + PUSH_DATA (push, 0x1c9d550c); + PUSH_DATA (push, 0x0001c900); + PUSH_DATA (push, 0x0001ff00); + PUSH_DATA (push, 0x04400e82); /* madh r1.xyz, r0.x, imm.x, imm.yzww */ + PUSH_DATA (push, 0x1c9c0100); + PUSH_DATA (push, 0x00000002); + PUSH_DATA (push, 0x0001f202); + PUSH_DATA (push, 0x3f9507c8); /* { 1.16, -0.87, 0.53, -1.08 } */ + PUSH_DATA (push, 0xbf5ee393); + PUSH_DATA (push, 0x3f078fef); + PUSH_DATA (push, 0xbf8a6762); + PUSH_DATA (push, 0x04400e82); /* madh r1.xyz, r0.y, imm, r1 */ + PUSH_DATA (push, 0x1c9cab00); + PUSH_DATA (push, 0x0001c802); + PUSH_DATA (push, 0x0001c904); + PUSH_DATA (push, 0x00000000); /* { 0.00, -0.39, 2.02, 0.00 } */ + PUSH_DATA (push, 0xbec890d6); + PUSH_DATA (push, 0x40011687); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x04400e81); /* madh r0.xyz, r0.z, imm, r1 */ + PUSH_DATA (push, 0x1c9d5500); + PUSH_DATA (push, 0x0001c802); + PUSH_DATA (push, 0x0001c904); + PUSH_DATA (push, 0x3fcc432d); /* { 1.60, -0.81, 0.00, 0.00 } */ + PUSH_DATA (push, 0xbf501a37); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000000); return TRUE; } diff --git a/src/nv40_xv_tex.c b/src/nv40_xv_tex.c index 78dd0d6..2184678 100644 --- a/src/nv40_xv_tex.c +++ b/src/nv40_xv_tex.c @@ -36,8 +36,6 @@ #include "nv_include.h" #include "nv_dma.h" -#include "nv30_shaders.h" - #include "hwdefs/nv30-40_3d.xml.h" #include "nv04_accel.h" @@ -236,17 +234,18 @@ NV40PutTextureImage(ScrnInfoPtr pScrn, return BadImplementation; } - NV40_LoadVtxProg(pScrn, &nv40_vp_video); - if (drw_w / 2 < src_w || drw_h / 2 < src_h) bicubic = FALSE; - if (!NV40_LoadFragProg(pScrn, bicubic ? - &nv40_fp_yv12_bicubic : - &nv30_fp_yv12_bilinear)) { - PUSH_RESET(push); - return BadImplementation; - } + BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1); + PUSH_MTHD (push, NV30_3D(FP_ACTIVE_PROGRAM), pNv->scratch, + bicubic ? PFP_NV12_BICUBIC : PFP_NV12_BILINEAR, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | + NOUVEAU_BO_OR, + NV30_3D_FP_ACTIVE_PROGRAM_DMA0, + NV30_3D_FP_ACTIVE_PROGRAM_DMA1); + BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1); + PUSH_DATA (push, 0x04000000); /* Appears to be some kind of cache flush, needed here at least * sometimes.. funky text rendering otherwise :) -- cgit v1.2.1