From fb3a36b1e5af0f81bb266da894d3442eed8e4e55 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 28 Mar 2012 16:38:30 +1000 Subject: nve0: initial exa/xv acceleration for kepler chipsets Signed-off-by: Ben Skeggs --- src/nouveau_exa.c | 6 + src/nouveau_xv.c | 2 +- src/nv_accel_common.c | 4 + src/nv_proto.h | 4 + src/nvc0_accel.c | 89 ++++++++--- src/nvc0_accel.h | 54 ++++--- src/nvc0_exa.c | 57 +++++++ src/nvc0_shader.h | 1 - src/nve0_shader.h | 404 ++++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 582 insertions(+), 39 deletions(-) create mode 100644 src/nve0_shader.h diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c index 6ad1790..9aaa3f2 100644 --- a/src/nouveau_exa.c +++ b/src/nouveau_exa.c @@ -47,6 +47,11 @@ NVAccelM2MF(NVPtr pNv, int w, int h, int cpp, uint32_t srcoff, uint32_t dstoff, struct nouveau_bo *src, int sd, int sp, int sh, int sx, int sy, struct nouveau_bo *dst, int dd, int dp, int dh, int dx, int dy) { + if (pNv->Architecture >= NV_ARCH_E0) + return NVE0EXARectCopy(pNv, w, h, cpp, + src, srcoff, sd, sp, sh, sx, sy, + dst, dstoff, dd, dp, dh, dx, dy); + else if (pNv->Architecture >= NV_ARCH_C0) return NVC0EXARectM2MF(pNv, w, h, cpp, src, srcoff, sd, sp, sh, sx, sy, @@ -413,6 +418,7 @@ nouveau_exa_init(ScreenPtr pScreen) exa->DoneComposite = NV50EXADoneComposite; break; case NV_ARCH_C0: + case NV_ARCH_E0: exa->CheckComposite = NVC0EXACheckComposite; exa->PrepareComposite = NVC0EXAPrepareComposite; exa->Composite = NVC0EXAComposite; diff --git a/src/nouveau_xv.c b/src/nouveau_xv.c index a43225c..9ada296 100644 --- a/src/nouveau_xv.c +++ b/src/nouveau_xv.c @@ -256,7 +256,7 @@ nouveau_xv_bo_realloc(ScrnInfoPtr pScrn, unsigned flags, unsigned size, if (pNv->Architecture == NV_ARCH_50) config.nv50.memtype = 0x70; else - if (pNv->Architecture == NV_ARCH_C0) + if (pNv->Architecture >= NV_ARCH_C0) config.nvc0.memtype = 0xfe; } flags |= NOUVEAU_BO_MAP; diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c index 09272b9..32cae49 100644 --- a/src/nv_accel_common.c +++ b/src/nv_accel_common.c @@ -623,11 +623,15 @@ NVAccelCommonInit(ScrnInfoPtr pScrn) if (pNv->Architecture < NV_ARCH_C0) INIT_CONTEXT_OBJECT(M2MF_NV50); else + if (pNv->Architecture < NV_ARCH_E0) INIT_CONTEXT_OBJECT(M2MF_NVC0); + else + INIT_CONTEXT_OBJECT(P2MF_NVE0); /* 3D init */ switch (pNv->Architecture) { case NV_ARCH_C0: + case NV_ARCH_E0: INIT_CONTEXT_OBJECT(3D_NVC0); break; case NV_ARCH_50: diff --git a/src/nv_proto.h b/src/nv_proto.h index 88cd870..f3aa3be 100644 --- a/src/nv_proto.h +++ b/src/nv_proto.h @@ -148,6 +148,7 @@ Bool NVAccelInitNV50TCL(ScrnInfoPtr pScrn); /* in nvc0_accel.c */ Bool NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn); +Bool NVAccelInitP2MF_NVE0(ScrnInfoPtr pScrn); Bool NVAccelInit2D_NVC0(ScrnInfoPtr pScrn); Bool NVAccelInit3D_NVC0(ScrnInfoPtr pScrn); @@ -191,6 +192,9 @@ Bool NVC0EXAUploadSIFC(const char *src, int src_pitch, Bool NVC0EXARectM2MF(NVPtr pNv, int, int, int, struct nouveau_bo *, uint32_t, int, int, int, int, int, struct nouveau_bo *, uint32_t, int, int, int, int, int); +Bool NVE0EXARectCopy(NVPtr pNv, int, int, int, + struct nouveau_bo *, uint32_t, int, int, int, int, int, + struct nouveau_bo *, uint32_t, int, int, int, int, int); /* nv50_xv.c */ int nv50_xv_image_put(ScrnInfoPtr, struct nouveau_bo *, int, int, int, int, diff --git a/src/nvc0_accel.c b/src/nvc0_accel.c index 2fc7ef9..ad34554 100644 --- a/src/nvc0_accel.c +++ b/src/nvc0_accel.c @@ -23,6 +23,7 @@ #include "nv_include.h" #include "nvc0_accel.h" #include "nvc0_shader.h" +#include "nve0_shader.h" Bool NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn) @@ -51,6 +52,30 @@ NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn) return TRUE; } +Bool +NVAccelInitP2MF_NVE0(ScrnInfoPtr pScrn) +{ + NVPtr pNv = NVPTR(pScrn); + struct nouveau_pushbuf *push = pNv->pushbuf; + int ret; + + ret = nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM, 128 * 1024, 0x20000, + NULL, &pNv->tesla_scratch); + if (ret) + return FALSE; + + ret = nouveau_object_new(pNv->channel, 0x0000a040, 0xa040, + NULL, 0, &pNv->NvMemFormat); + if (ret) + return FALSE; + + BEGIN_NVC0(push, NV01_SUBC(P2MF, OBJECT), 1); + PUSH_DATA (push, pNv->NvMemFormat->handle); + BEGIN_NVC0(push, NV01_SUBC(COPY, OBJECT), 1); + PUSH_DATA (push, 0x0000a0b5); + return TRUE; +} + Bool NVAccelInit2D_NVC0(ScrnInfoPtr pScrn) { @@ -104,9 +129,15 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) NVPtr pNv = NVPTR(pScrn); struct nouveau_pushbuf *push = pNv->pushbuf; struct nouveau_bo *bo = pNv->tesla_scratch; + uint32_t class; int ret; - ret = nouveau_object_new(pNv->channel, 0x00009097, 0x9097, + if (pNv->Architecture < NV_ARCH_E0) + class = 0x9097; + else + class = 0xa097; + + ret = nouveau_object_new(pNv->channel, class, class, NULL, 0, &pNv->Nv3D); if (ret) return FALSE; @@ -155,11 +186,25 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1); PUSH_DATA (push, 1); - BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(4)), 1); - PUSH_DATA (push, 0x54); - BEGIN_NIC0(push, NVC0_3D(BIND_TIC(4)), 2); - PUSH_DATA (push, (0 << 9) | (0 << 1) | NVC0_3D_BIND_TIC_ACTIVE); - PUSH_DATA (push, (1 << 9) | (1 << 1) | NVC0_3D_BIND_TIC_ACTIVE); + if (pNv->Architecture < NV_ARCH_E0) { + BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(4)), 1); + PUSH_DATA (push, 0x54); + BEGIN_NIC0(push, NVC0_3D(BIND_TIC(4)), 2); + PUSH_DATA (push, (0 << 9) | (0 << 1) | NVC0_3D_BIND_TIC_ACTIVE); + PUSH_DATA (push, (1 << 9) | (1 << 1) | NVC0_3D_BIND_TIC_ACTIVE); + } else { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 6); + PUSH_DATA (push, 256); + PUSH_DATA (push, (bo->offset + TB_OFFSET) >> 32); + PUSH_DATA (push, (bo->offset + TB_OFFSET)); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000001); + BEGIN_NVC0(push, NVC0_3D(CB_BIND(4)), 1); + PUSH_DATA (push, 0x11); + BEGIN_NVC0(push, SUBC_3D(0x2608), 1); + PUSH_DATA (push, 1); + } BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32); @@ -169,18 +214,28 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); PUSH_DATA (push, (bo->offset + CODE_OFFSET) >> 32); PUSH_DATA (push, (bo->offset + CODE_OFFSET)); + if (pNv->Architecture < NV_ARCH_E0) { + NVC0PushProgram(pNv, PVP_PASS, NVC0VP_Passthrough); + NVC0PushProgram(pNv, PFP_S, NVC0FP_Source); + NVC0PushProgram(pNv, PFP_C, NVC0FP_Composite); + NVC0PushProgram(pNv, PFP_CCA, NVC0FP_CAComposite); + NVC0PushProgram(pNv, PFP_CCASA, NVC0FP_CACompositeSrcAlpha); + NVC0PushProgram(pNv, PFP_S_A8, NVC0FP_Source_A8); + NVC0PushProgram(pNv, PFP_C_A8, NVC0FP_Composite_A8); + NVC0PushProgram(pNv, PFP_NV12, NVC0FP_NV12); - NVC0PushProgram(pNv, PVP_PASS, NVC0VP_Passthrough); - NVC0PushProgram(pNv, PFP_S, NVC0FP_Source); - NVC0PushProgram(pNv, PFP_C, NVC0FP_Composite); - NVC0PushProgram(pNv, PFP_CCA, NVC0FP_CAComposite); - NVC0PushProgram(pNv, PFP_CCASA, NVC0FP_CACompositeSrcAlpha); - NVC0PushProgram(pNv, PFP_S_A8, NVC0FP_Source_A8); - NVC0PushProgram(pNv, PFP_C_A8, NVC0FP_Composite_A8); - NVC0PushProgram(pNv, PFP_NV12, NVC0FP_NV12); - - BEGIN_NVC0(push, NVC0_3D(MEM_BARRIER), 1); - PUSH_DATA (push, 0x1111); + BEGIN_NVC0(push, NVC0_3D(MEM_BARRIER), 1); + PUSH_DATA (push, 0x1111); + } else { + NVC0PushProgram(pNv, PVP_PASS, NVE0VP_Passthrough); + NVC0PushProgram(pNv, PFP_S, NVE0FP_Source); + NVC0PushProgram(pNv, PFP_C, NVE0FP_Composite); + NVC0PushProgram(pNv, PFP_CCA, NVE0FP_CAComposite); + NVC0PushProgram(pNv, PFP_CCASA, NVE0FP_CACompositeSrcAlpha); + NVC0PushProgram(pNv, PFP_S_A8, NVE0FP_Source_A8); + NVC0PushProgram(pNv, PFP_C_A8, NVE0FP_Composite_A8); + NVC0PushProgram(pNv, PFP_NV12, NVE0FP_NV12); + } BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 4); PUSH_DATA (push, NVC0_3D_SP_SELECT_PROGRAM_VP_B | diff --git a/src/nvc0_accel.h b/src/nvc0_accel.h index 9a7475a..3b838ea 100644 --- a/src/nvc0_accel.h +++ b/src/nvc0_accel.h @@ -13,10 +13,12 @@ #define SUBC_3D(mthd) 0, (mthd) #define NVC0_3D(mthd) SUBC_3D(NVC0_3D_##mthd) #define SUBC_M2MF(mthd) 2, (mthd) +#define SUBC_P2MF(mthd) 2, (mthd) #define NVC0_M2MF(mthd) SUBC_M2MF(NVC0_M2MF_##mthd) #define SUBC_2D(mthd) 3, (mthd) #define NV50_2D(mthd) SUBC_2D(NV50_2D_##mthd) #define NVC0_2D(mthd) SUBC_2D(NVC0_2D_##mthd) +#define SUBC_COPY(mthd) 4, (mthd) #define SUBC_NVSW(mthd) 5, (mthd) /* scratch buffer offsets */ @@ -26,21 +28,23 @@ #define NTFY_OFFSET 0x08000 #define MISC_OFFSET 0x10000 -/* fragment programs */ -#define PFP_S 0x0000 /* (src) */ -#define PFP_C 0x0100 /* (src IN mask) */ -#define PFP_CCA 0x0200 /* (src IN mask) component-alpha */ -#define PFP_CCASA 0x0300 /* (src IN mask) component-alpha src-alpha */ -#define PFP_S_A8 0x0400 /* (src) a8 rt */ -#define PFP_C_A8 0x0500 /* (src IN mask) a8 rt - same for CA and CA_SA */ -#define PFP_NV12 0x0600 /* NV12 YUV->RGB */ - -/* vertex programs */ -#define PVP_PASS 0x0700 /* vertex pass-through shader */ +/* vertex/fragment programs */ +#define SPO ((pNv->Architecture < NV_ARCH_E0) ? 0x0000 : 0x0030) +#define PVP_PASS (0x0000 + SPO) /* vertex pass-through shader */ +#define PFP_S (0x0200 + SPO) /* (src) */ +#define PFP_C (0x0400 + SPO) /* (src IN mask) */ +#define PFP_CCA (0x0600 + SPO) /* (src IN mask) component-alpha */ +#define PFP_CCASA (0x0800 + SPO) /* (src IN mask) component-alpha src-alpha */ +#define PFP_S_A8 (0x0a00 + SPO) /* (src) a8 rt */ +#define PFP_C_A8 (0x0c00 + SPO) /* (src IN mask) a8 rt - same for CCA/CCASA */ +#define PFP_NV12 (0x0e00 + SPO) /* NV12 YUV->RGB */ /* shader constants */ #define CB_OFFSET 0x1000 +/* texture bindings (kepler) */ +#define TB_OFFSET 0x1800 + #define VTX_ATTR(a, c, t, s) \ ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \ ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \ @@ -98,15 +102,25 @@ static __inline__ void PUSH_DATAu(struct nouveau_pushbuf *push, struct nouveau_bo *bo, unsigned delta, unsigned dwords) { - BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); - PUSH_DATA (push, (bo->offset + delta) >> 32); - PUSH_DATA (push, (bo->offset + delta)); - BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); - PUSH_DATA (push, dwords * 4); - PUSH_DATA (push, 1); - BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); - PUSH_DATA (push, 0x100111); - BEGIN_NIC0(push, NVC0_M2MF(DATA), dwords); + if (push->client->device->chipset < 0xe0) { + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); + PUSH_DATA (push, (bo->offset + delta) >> 32); + PUSH_DATA (push, (bo->offset + delta)); + BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, dwords * 4); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); + PUSH_DATA (push, 0x100111); + BEGIN_NIC0(push, NVC0_M2MF(DATA), dwords); + } else { + BEGIN_NVC0(push, SUBC_P2MF(0x0180), 4); + PUSH_DATA (push, dwords * 4); + PUSH_DATA (push, 1); + PUSH_DATA (push, (bo->offset + delta) >> 32); + PUSH_DATA (push, (bo->offset + delta)); + BEGIN_1IC0(push, SUBC_P2MF(0x01b0), 1 + dwords); + PUSH_DATA (push, 0x001001); + } } #endif diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c index 4ba862d..8b6d51e 100644 --- a/src/nvc0_exa.c +++ b/src/nvc0_exa.c @@ -1006,3 +1006,60 @@ NVC0EXARectM2MF(NVPtr pNv, int w, int h, int cpp, return TRUE; } + +Bool +NVE0EXARectCopy(NVPtr pNv, int w, int h, int cpp, + struct nouveau_bo *src, uint32_t src_off, int src_dom, + int src_pitch, int src_h, int src_x, int src_y, + struct nouveau_bo *dst, uint32_t dst_off, int dst_dom, + int dst_pitch, int dst_h, int dst_x, int dst_y) +{ + struct nouveau_pushbuf *push = pNv->pushbuf; + struct nouveau_pushbuf_refn refs[] = { + { src, src_dom | NOUVEAU_BO_RD }, + { dst, dst_dom | NOUVEAU_BO_WR }, + }; + unsigned exec; + + if (nouveau_pushbuf_space(push, 64, 0, 0) || + nouveau_pushbuf_refn (push, refs, 2)) + return FALSE; + + exec = 0x00000206; + if (!src->config.nvc0.memtype) { + src_off += src_y * src_pitch + src_x * cpp; + exec |= 0x00000080; + } + if (!dst->config.nvc0.memtype) { + dst_off += dst_y * dst_pitch + dst_x * cpp; + exec |= 0x00000100; + } + + BEGIN_NVC0(push, SUBC_COPY(0x0728), 6); + PUSH_DATA (push, 0x00001000 | src->config.nvc0.tile_mode); + PUSH_DATA (push, src_pitch); + PUSH_DATA (push, src_h); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, (src_y << 16) | src_x * cpp); + BEGIN_NVC0(push, SUBC_COPY(0x070c), 6); + PUSH_DATA (push, 0x000001000 | dst->config.nvc0.tile_mode); + PUSH_DATA (push, dst_pitch); + PUSH_DATA (push, dst_h); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, (dst_y << 16) | dst_x * cpp); + BEGIN_NVC0(push, SUBC_COPY(0x0400), 8); + PUSH_DATA (push, (src->offset + src_off) >> 32); + PUSH_DATA (push, (src->offset + src_off)); + PUSH_DATA (push, (dst->offset + dst_off) >> 32); + PUSH_DATA (push, (dst->offset + dst_off)); + PUSH_DATA (push, src_pitch); + PUSH_DATA (push, dst_pitch); + PUSH_DATA (push, w * cpp); + PUSH_DATA (push, h); + BEGIN_NVC0(push, SUBC_COPY(0x0300), 1); + PUSH_DATA (push, exec); + + return TRUE; +} diff --git a/src/nvc0_shader.h b/src/nvc0_shader.h index f97b74b..2084a6d 100644 --- a/src/nvc0_shader.h +++ b/src/nvc0_shader.h @@ -5,7 +5,6 @@ const unsigned size = sizeof(code) / sizeof(code[0]); \ PUSH_DATAu((pNv)->pushbuf, (pNv)->tesla_scratch, (addr), size); \ PUSH_DATAp((pNv)->pushbuf, (code), size); \ - ErrorF("20 + %d * 2\n", (size - 20) / 2); \ } while(0) static uint32_t diff --git a/src/nve0_shader.h b/src/nve0_shader.h new file mode 100644 index 0000000..8f36279 --- /dev/null +++ b/src/nve0_shader.h @@ -0,0 +1,404 @@ +#ifndef __NVE0_SHADER_H__ +#define __NVE0_SHADER_H__ + +static uint32_t +NVE0VP_Passthrough[] = { + 0x00020461, + 0x00000000, + 0x00000000, + 0x00000000, + 0x000ff000, + 0x00000000, /* VP_ATTR_EN[0x000] */ + 0x0001033f, /* VP_ATTR_EN[0x080] */ + 0x00000000, /* VP_ATTR_EN[0x100] */ + 0x00000000, + 0x00000000, /* VP_ATTR_EN[0x200] */ + 0x00000000, + 0x00000000, /* VP_ATTR_EN[0x300] */ + 0x00000000, + 0x0033f000, /* VP_EXPORT_EN[0x040] */ + 0x00000000, /* VP_EXPORT_EN[0x0c0] */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* VP_EXPORT_EN[0x2c0] */ + 0x00000000, + 0xfff01c66, + 0x06000080, /* vfetch { $r0,1,2,3 } b128 a[0x80] */ + 0xfff11c26, + 0x06000090, /* vfetch { $r4,5 } b64 a[0x90] */ + 0xfff19c26, + 0x060000a0, /* vfetch { $r6,7 } b64 a[0xa0] */ + 0x03f01c66, + 0x0a7e0070, /* export v[0x70] { $r0 $r1 $r2 $r3 } */ + 0x13f01c26, + 0x0a7e0080, /* export v[0x80] { $r4 $r5 } */ + 0x1bf01c26, + 0x0a7e0090, /* export v[0x90] { $r6 $r7 } */ + 0x00001de7, + 0x80000000, /* exit */ +}; + +static uint32_t +NVE0FP_Source[] = { + 0x00021462, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, + 0x0000000a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000000f, + 0x00000000, + 0xfff01c00, + 0xc07e007c, /* linterp f32 $r0 v[$r63+0x7c] */ + 0x10001c00, + 0xc8000000, /* rcp f32 $r0 $r0 */ + 0x03f05c40, + 0xc07e0084, /* pinterp f32 $r1 $r0 v[$r63+0x84] */ + 0x03f01c40, + 0xc07e0080, /* pinterp f32 $r0 $r0 v[$r63+0x80] */ + 0xfc001e86, + 0x8013c000, /* tex { $r0,1,2,3 } $t0 { $r0,1 } */ + 0x00001de6, + 0xf0000000, /* texbar */ + 0x00001de7, + 0x80000000, /* exit */ +}; + +static uint32_t +NVE0FP_Composite[] = { + 0x00021462, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, + 0x00000a0a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000000f, + 0x00000000, + 0xfff01c00, + 0xc07e007c, /* linterp f32 $r0 v[$r63+0x7c] */ + 0x10001c00, + 0xc8000000, /* rcp f32 $r0 $r0 */ + 0x03f0dc40, + 0xc07e0094, /* pinterp f32 $r3 $r0 v[$r63+0x94] */ + 0x03f09c40, + 0xc07e0090, /* pinterp f32 $r2 $r0 v[$r63+0x90] */ + 0xfc211e86, + 0x80120001, /* tex { _,_,_,$r4 } $t1 { $r2,3 } */ + 0x00001de6, + 0xf0000000, /* texbar */ + 0x03f05c40, + 0xc07e0084, /* pinterp f32 $r1 $r0 v[$r63+0x84] */ + 0x03f01c40, + 0xc07e0080, /* pinterp f32 $r0 $r0 v[$r63+0x80] */ + 0xfc001e86, + 0x8013c000, /* tex { $r0,1,2,3 } $t0 { $r0,1 } */ + 0x00001de6, + 0xf0000000, /* texbar */ + 0x1030dc40, + 0x58000000, /* mul ftz rn f32 $r3 $r3 $r4 */ + 0x10209c40, + 0x58000000, /* mul ftz rn f32 $r2 $r2 $r4 */ + 0x10105c40, + 0x58000000, /* mul ftz rn f32 $r1 $r1 $r4 */ + 0x10001c40, + 0x58000000, /* mul ftz rn f32 $r0 $r0 $r4 */ + 0x00001de7, + 0x80000000, /* exit */ +}; + +static uint32_t +NVE0FP_CAComposite[] = { + 0x00021462, /* 0x0000c000 = USES_KIL, MULTI_COLORS */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, /* FRAG_COORD_UMASK = 0x8 */ + 0x00000a0a, /* FP_INTERP[0x080], 0022 0022 */ + 0x00000000, /* FP_INTERP[0x0c0], 0 = OFF */ + 0x00000000, /* FP_INTERP[0x100], 1 = FLAT */ + 0x00000000, /* FP_INTERP[0x140], 2 = PERSPECTIVE */ + 0x00000000, /* FP_INTERP[0x180], 3 = LINEAR */ + 0x00000000, /* FP_INTERP[0x1c0] */ + 0x00000000, /* FP_INTERP[0x200] */ + 0x00000000, /* FP_INTERP[0x240] */ + 0x00000000, /* FP_INTERP[0x280] */ + 0x00000000, /* FP_INTERP[0x2c0] */ + 0x00000000, /* FP_INTERP[0x300] */ + 0x00000000, + 0x0000000f, /* FP_RESULT_MASK (0x8000 Face ?) */ + 0x00000000, /* 0x2 = FragDepth, 0x1 = SampleMask */ + 0xfff01c00, + 0xc07e007c, /* linterp f32 $r0 v[$r63+0x7c] */ + 0x10001c00, + 0xc8000000, /* rcp f32 $r0 $r0 */ + 0x03f0dc40, + 0xc07e0094, /* pinterp f32 $r3 $r0 v[$r63+0x94] */ + 0x03f09c40, + 0xc07e0090, /* pinterp f32 $r2 $r0 v[$r63+0x90] */ + 0xfc211e86, + 0x8013c001, /* tex { $r4,5,6,7 } $t1 { $r2,3 } */ + 0x00001de6, + 0xf0000000, /* texbar */ + 0x03f05c40, + 0xc07e0084, /* pinterp f32 $r1 $r0 v[$r63+0x84] */ + 0x03f01c40, + 0xc07e0080, /* pinterp f32 $r0 $r0 v[$r63+0x80] */ + 0xfc001e86, + 0x8013c000, /* tex { $r0,1,2,3 } $t0 { $r0,1 } */ + 0x00001de6, + 0xf0000000, /* texbar */ + 0x1c30dc40, + 0x58000000, /* mul ftz rn f32 $r3 $r3 $r7 */ + 0x18209c40, + 0x58000000, /* mul ftz rn f32 $r2 $r2 $r6 */ + 0x14105c40, + 0x58000000, /* mul ftz rn f32 $r1 $r1 $r5 */ + 0x10001c40, + 0x58000000, /* mul ftz rn f32 $r0 $r0 $r4 */ + 0x00001de7, + 0x80000000, /* exit */ +}; + +static uint32_t +NVE0FP_CACompositeSrcAlpha[] = { + 0x00021462, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, + 0x00000a0a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000000f, + 0x00000000, + 0xfff01c00, + 0xc07e007c, /* linterp f32 $r0 v[$r63+0x7c] */ + 0x10001c00, + 0xc8000000, /* rcp f32 $r0 $r0 */ + 0x03f0dc40, + 0xc07e0084, /* pinterp f32 $r3 $r0 v[$r63+0x84] */ + 0x03f09c40, + 0xc07e0080, /* pinterp f32 $r2 $r0 v[$r63+0x80] */ + 0xfc211e86, + 0x80120000, /* tex { _,_,_,$r4 } $t0 { $r2,3 } */ + 0x00001de6, + 0xf0000000, /* texbar */ + 0x03f05c40, + 0xc07e0094, /* pinterp f32 $r1 $r0 v[$r63+0x94] */ + 0x03f01c40, + 0xc07e0090, /* pinterp f32 $r0 $r0 v[$r63+0x90] */ + 0xfc001e86, + 0x8013c001, /* tex { $r0,1,2,3 } $t1 { $r0,1 } */ + 0x00001de6, + 0xf0000000, /* texbar */ + 0x1030dc40, + 0x58000000, /* mul ftz rn f32 $r3 $r3 $r4 */ + 0x10209c40, + 0x58000000, /* mul ftz rn f32 $r2 $r2 $r4 */ + 0x10105c40, + 0x58000000, /* mul ftz rn f32 $r1 $r1 $r4 */ + 0x10001c40, + 0x58000000, /* mul ftz rn f32 $r0 $r0 $r4 */ + 0x00001de7, + 0x80000000, /* exit */ +}; + +static uint32_t +NVE0FP_Source_A8[] = { + 0x00021462, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, + 0x0000000a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000000f, + 0x00000000, + 0xfff01c00, + 0xc07e007c, /* linterp f32 $r0 v[$r63+0x7c] */ + 0x10001c00, + 0xc8000000, /* rcp f32 $r0 $r0 */ + 0x03f05c40, + 0xc07e0084, /* pinterp f32 $r1 $r0 v[$r63+0x84] */ + 0x03f01c40, + 0xc07e0080, /* pinterp f32 $r0 $r0 v[$r63+0x80] */ + 0xfc001e86, + 0x80120000, /* tex { _ _ _ $r0 } $t0 { $r0 $r1 } */ + 0x00001de6, + 0xf0000000, /* texbar */ + 0x0000dde4, + 0x28000000, /* mov b32 $r3 $r0 */ + 0x00009de4, + 0x28000000, /* mov b32 $r2 $r0 */ + 0x00005de4, + 0x28000000, /* mov b32 $r1 $r0 */ + 0x00001de7, + 0x80000000, /* exit */ +}; + +static uint32_t +NVE0FP_Composite_A8[] = { + 0x00021462, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, + 0x00000a0a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000000f, + 0x00000000, + 0xfff01c00, + 0xc07e007c, /* linterp f32 $r0 v[$r63+0x7c] */ + 0x10001c00, + 0xc8000000, /* rcp f32 $r0 $r0 */ + 0x03f0dc40, + 0xc07e0094, /* pinterp f32 $r3 $r0 v[$r63+0x94] */ + 0x03f09c40, + 0xc07e0090, /* pinterp f32 $r2 $r0 v[$r63+0x90] */ + 0xfc205e86, + 0x80120001, /* tex { _ _ _ $r1 } $t1 { $r2 $r3 } */ + 0x03f0dc40, + 0xc07e0084, /* pinterp f32 $r3 $r0 v[$r63+0x84] */ + 0x03f09c40, + 0xc07e0080, /* pinterp f32 $r2 $r0 v[$r63+0x80] */ + 0xfc201e86, + 0x80120000, /* tex { _ _ _ $r0 } $t0 { $r2 $r3 } */ + 0x00001de6, + 0xf0000000, /* texbar */ + 0x0400dc40, + 0x58000000, /* mul ftz rn f32 $r3 $r0 $r1 */ + 0x0c009de4, + 0x28000000, /* mov b32 $r2 $r3 */ + 0x0c005de4, + 0x28000000, /* mov b32 $r1 $r3 */ + 0x0c001de4, + 0x28000000, /* mov b32 $r0 $r3 */ + 0x00001de7, + 0x80000000, /* exit */ +}; + +static uint32_t +NVE0FP_NV12[] = { + 0x00021462, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, + 0x00000a0a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000000f, + 0x00000000, + 0xfff09c00, + 0xc07e007c, + 0x10209c00, + 0xc8000000, + 0x0bf01c40, + 0xc07e0080, + 0x0bf05c40, + 0xc07e0084, + 0xfc001e86, + 0x80120000, + 0x00001de6, + 0xf0000000, /* texbar */ + 0x00015c40, + 0x58004000, + 0x1050dc20, + 0x50004000, + 0x20511c20, + 0x50004000, + 0x30515c20, + 0x50004000, + 0x0bf01c40, + 0xc07e0090, + 0x0bf05c40, + 0xc07e0094, + 0xfc001e86, + 0x80130001, + 0x00001de6, + 0xf0000000, /* texbar */ + 0x4000dc40, + 0x30064000, + 0x50011c40, + 0x30084000, + 0x60015c40, + 0x300a4000, + 0x70101c40, + 0x30064000, + 0x90109c40, + 0x300a4000, + 0x80105c40, + 0x30084000, + 0x00001de7, + 0x80000000, +}; + +#endif -- cgit v1.2.1