summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Skeggs <bskeggs@redhat.com>2012-03-28 16:38:30 +1000
committerBen Skeggs <bskeggs@redhat.com>2012-04-14 02:07:57 +1000
commitfb3a36b1e5af0f81bb266da894d3442eed8e4e55 (patch)
tree313c88c391325f80b689935a7d8c5787513c65a8
parent95be16d8cd88db1ace0ac4568b4c67b51fd30fd9 (diff)
downloadxorg-driver-xf86-video-nouveau-fb3a36b1e5af0f81bb266da894d3442eed8e4e55.tar.gz
nve0: initial exa/xv acceleration for kepler chipsets
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
-rw-r--r--src/nouveau_exa.c6
-rw-r--r--src/nouveau_xv.c2
-rw-r--r--src/nv_accel_common.c4
-rw-r--r--src/nv_proto.h4
-rw-r--r--src/nvc0_accel.c89
-rw-r--r--src/nvc0_accel.h54
-rw-r--r--src/nvc0_exa.c57
-rw-r--r--src/nvc0_shader.h1
-rw-r--r--src/nve0_shader.h404
9 files changed, 582 insertions, 39 deletions
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 6ad1790..9aaa3f2 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -47,6 +47,11 @@ NVAccelM2MF(NVPtr pNv, int w, int h, int cpp, uint32_t srcoff, uint32_t dstoff,
struct nouveau_bo *src, int sd, int sp, int sh, int sx, int sy,
struct nouveau_bo *dst, int dd, int dp, int dh, int dx, int dy)
{
+ if (pNv->Architecture >= NV_ARCH_E0)
+ return NVE0EXARectCopy(pNv, w, h, cpp,
+ src, srcoff, sd, sp, sh, sx, sy,
+ dst, dstoff, dd, dp, dh, dx, dy);
+ else
if (pNv->Architecture >= NV_ARCH_C0)
return NVC0EXARectM2MF(pNv, w, h, cpp,
src, srcoff, sd, sp, sh, sx, sy,
@@ -413,6 +418,7 @@ nouveau_exa_init(ScreenPtr pScreen)
exa->DoneComposite = NV50EXADoneComposite;
break;
case NV_ARCH_C0:
+ case NV_ARCH_E0:
exa->CheckComposite = NVC0EXACheckComposite;
exa->PrepareComposite = NVC0EXAPrepareComposite;
exa->Composite = NVC0EXAComposite;
diff --git a/src/nouveau_xv.c b/src/nouveau_xv.c
index a43225c..9ada296 100644
--- a/src/nouveau_xv.c
+++ b/src/nouveau_xv.c
@@ -256,7 +256,7 @@ nouveau_xv_bo_realloc(ScrnInfoPtr pScrn, unsigned flags, unsigned size,
if (pNv->Architecture == NV_ARCH_50)
config.nv50.memtype = 0x70;
else
- if (pNv->Architecture == NV_ARCH_C0)
+ if (pNv->Architecture >= NV_ARCH_C0)
config.nvc0.memtype = 0xfe;
}
flags |= NOUVEAU_BO_MAP;
diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c
index 09272b9..32cae49 100644
--- a/src/nv_accel_common.c
+++ b/src/nv_accel_common.c
@@ -623,11 +623,15 @@ NVAccelCommonInit(ScrnInfoPtr pScrn)
if (pNv->Architecture < NV_ARCH_C0)
INIT_CONTEXT_OBJECT(M2MF_NV50);
else
+ if (pNv->Architecture < NV_ARCH_E0)
INIT_CONTEXT_OBJECT(M2MF_NVC0);
+ else
+ INIT_CONTEXT_OBJECT(P2MF_NVE0);
/* 3D init */
switch (pNv->Architecture) {
case NV_ARCH_C0:
+ case NV_ARCH_E0:
INIT_CONTEXT_OBJECT(3D_NVC0);
break;
case NV_ARCH_50:
diff --git a/src/nv_proto.h b/src/nv_proto.h
index 88cd870..f3aa3be 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -148,6 +148,7 @@ Bool NVAccelInitNV50TCL(ScrnInfoPtr pScrn);
/* in nvc0_accel.c */
Bool NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn);
+Bool NVAccelInitP2MF_NVE0(ScrnInfoPtr pScrn);
Bool NVAccelInit2D_NVC0(ScrnInfoPtr pScrn);
Bool NVAccelInit3D_NVC0(ScrnInfoPtr pScrn);
@@ -191,6 +192,9 @@ Bool NVC0EXAUploadSIFC(const char *src, int src_pitch,
Bool NVC0EXARectM2MF(NVPtr pNv, int, int, int,
struct nouveau_bo *, uint32_t, int, int, int, int, int,
struct nouveau_bo *, uint32_t, int, int, int, int, int);
+Bool NVE0EXARectCopy(NVPtr pNv, int, int, int,
+ struct nouveau_bo *, uint32_t, int, int, int, int, int,
+ struct nouveau_bo *, uint32_t, int, int, int, int, int);
/* nv50_xv.c */
int nv50_xv_image_put(ScrnInfoPtr, struct nouveau_bo *, int, int, int, int,
diff --git a/src/nvc0_accel.c b/src/nvc0_accel.c
index 2fc7ef9..ad34554 100644
--- a/src/nvc0_accel.c
+++ b/src/nvc0_accel.c
@@ -23,6 +23,7 @@
#include "nv_include.h"
#include "nvc0_accel.h"
#include "nvc0_shader.h"
+#include "nve0_shader.h"
Bool
NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn)
@@ -52,6 +53,30 @@ NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn)
}
Bool
+NVAccelInitP2MF_NVE0(ScrnInfoPtr pScrn)
+{
+ NVPtr pNv = NVPTR(pScrn);
+ struct nouveau_pushbuf *push = pNv->pushbuf;
+ int ret;
+
+ ret = nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM, 128 * 1024, 0x20000,
+ NULL, &pNv->tesla_scratch);
+ if (ret)
+ return FALSE;
+
+ ret = nouveau_object_new(pNv->channel, 0x0000a040, 0xa040,
+ NULL, 0, &pNv->NvMemFormat);
+ if (ret)
+ return FALSE;
+
+ BEGIN_NVC0(push, NV01_SUBC(P2MF, OBJECT), 1);
+ PUSH_DATA (push, pNv->NvMemFormat->handle);
+ BEGIN_NVC0(push, NV01_SUBC(COPY, OBJECT), 1);
+ PUSH_DATA (push, 0x0000a0b5);
+ return TRUE;
+}
+
+Bool
NVAccelInit2D_NVC0(ScrnInfoPtr pScrn)
{
NVPtr pNv = NVPTR(pScrn);
@@ -104,9 +129,15 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
NVPtr pNv = NVPTR(pScrn);
struct nouveau_pushbuf *push = pNv->pushbuf;
struct nouveau_bo *bo = pNv->tesla_scratch;
+ uint32_t class;
int ret;
- ret = nouveau_object_new(pNv->channel, 0x00009097, 0x9097,
+ if (pNv->Architecture < NV_ARCH_E0)
+ class = 0x9097;
+ else
+ class = 0xa097;
+
+ ret = nouveau_object_new(pNv->channel, class, class,
NULL, 0, &pNv->Nv3D);
if (ret)
return FALSE;
@@ -155,11 +186,25 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
PUSH_DATA (push, 0);
BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
PUSH_DATA (push, 1);
- BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(4)), 1);
- PUSH_DATA (push, 0x54);
- BEGIN_NIC0(push, NVC0_3D(BIND_TIC(4)), 2);
- PUSH_DATA (push, (0 << 9) | (0 << 1) | NVC0_3D_BIND_TIC_ACTIVE);
- PUSH_DATA (push, (1 << 9) | (1 << 1) | NVC0_3D_BIND_TIC_ACTIVE);
+ if (pNv->Architecture < NV_ARCH_E0) {
+ BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(4)), 1);
+ PUSH_DATA (push, 0x54);
+ BEGIN_NIC0(push, NVC0_3D(BIND_TIC(4)), 2);
+ PUSH_DATA (push, (0 << 9) | (0 << 1) | NVC0_3D_BIND_TIC_ACTIVE);
+ PUSH_DATA (push, (1 << 9) | (1 << 1) | NVC0_3D_BIND_TIC_ACTIVE);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 6);
+ PUSH_DATA (push, 256);
+ PUSH_DATA (push, (bo->offset + TB_OFFSET) >> 32);
+ PUSH_DATA (push, (bo->offset + TB_OFFSET));
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0x00000000);
+ PUSH_DATA (push, 0x00000001);
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(4)), 1);
+ PUSH_DATA (push, 0x11);
+ BEGIN_NVC0(push, SUBC_3D(0x2608), 1);
+ PUSH_DATA (push, 1);
+ }
BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32);
@@ -169,18 +214,28 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
PUSH_DATA (push, (bo->offset + CODE_OFFSET) >> 32);
PUSH_DATA (push, (bo->offset + CODE_OFFSET));
+ if (pNv->Architecture < NV_ARCH_E0) {
+ NVC0PushProgram(pNv, PVP_PASS, NVC0VP_Passthrough);
+ NVC0PushProgram(pNv, PFP_S, NVC0FP_Source);
+ NVC0PushProgram(pNv, PFP_C, NVC0FP_Composite);
+ NVC0PushProgram(pNv, PFP_CCA, NVC0FP_CAComposite);
+ NVC0PushProgram(pNv, PFP_CCASA, NVC0FP_CACompositeSrcAlpha);
+ NVC0PushProgram(pNv, PFP_S_A8, NVC0FP_Source_A8);
+ NVC0PushProgram(pNv, PFP_C_A8, NVC0FP_Composite_A8);
+ NVC0PushProgram(pNv, PFP_NV12, NVC0FP_NV12);
- NVC0PushProgram(pNv, PVP_PASS, NVC0VP_Passthrough);
- NVC0PushProgram(pNv, PFP_S, NVC0FP_Source);
- NVC0PushProgram(pNv, PFP_C, NVC0FP_Composite);
- NVC0PushProgram(pNv, PFP_CCA, NVC0FP_CAComposite);
- NVC0PushProgram(pNv, PFP_CCASA, NVC0FP_CACompositeSrcAlpha);
- NVC0PushProgram(pNv, PFP_S_A8, NVC0FP_Source_A8);
- NVC0PushProgram(pNv, PFP_C_A8, NVC0FP_Composite_A8);
- NVC0PushProgram(pNv, PFP_NV12, NVC0FP_NV12);
-
- BEGIN_NVC0(push, NVC0_3D(MEM_BARRIER), 1);
- PUSH_DATA (push, 0x1111);
+ BEGIN_NVC0(push, NVC0_3D(MEM_BARRIER), 1);
+ PUSH_DATA (push, 0x1111);
+ } else {
+ NVC0PushProgram(pNv, PVP_PASS, NVE0VP_Passthrough);
+ NVC0PushProgram(pNv, PFP_S, NVE0FP_Source);
+ NVC0PushProgram(pNv, PFP_C, NVE0FP_Composite);
+ NVC0PushProgram(pNv, PFP_CCA, NVE0FP_CAComposite);
+ NVC0PushProgram(pNv, PFP_CCASA, NVE0FP_CACompositeSrcAlpha);
+ NVC0PushProgram(pNv, PFP_S_A8, NVE0FP_Source_A8);
+ NVC0PushProgram(pNv, PFP_C_A8, NVE0FP_Composite_A8);
+ NVC0PushProgram(pNv, PFP_NV12, NVE0FP_NV12);
+ }
BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 4);
PUSH_DATA (push, NVC0_3D_SP_SELECT_PROGRAM_VP_B |
diff --git a/src/nvc0_accel.h b/src/nvc0_accel.h
index 9a7475a..3b838ea 100644
--- a/src/nvc0_accel.h
+++ b/src/nvc0_accel.h
@@ -13,10 +13,12 @@
#define SUBC_3D(mthd) 0, (mthd)
#define NVC0_3D(mthd) SUBC_3D(NVC0_3D_##mthd)
#define SUBC_M2MF(mthd) 2, (mthd)
+#define SUBC_P2MF(mthd) 2, (mthd)
#define NVC0_M2MF(mthd) SUBC_M2MF(NVC0_M2MF_##mthd)
#define SUBC_2D(mthd) 3, (mthd)
#define NV50_2D(mthd) SUBC_2D(NV50_2D_##mthd)
#define NVC0_2D(mthd) SUBC_2D(NVC0_2D_##mthd)
+#define SUBC_COPY(mthd) 4, (mthd)
#define SUBC_NVSW(mthd) 5, (mthd)
/* scratch buffer offsets */
@@ -26,21 +28,23 @@
#define NTFY_OFFSET 0x08000
#define MISC_OFFSET 0x10000
-/* fragment programs */
-#define PFP_S 0x0000 /* (src) */
-#define PFP_C 0x0100 /* (src IN mask) */
-#define PFP_CCA 0x0200 /* (src IN mask) component-alpha */
-#define PFP_CCASA 0x0300 /* (src IN mask) component-alpha src-alpha */
-#define PFP_S_A8 0x0400 /* (src) a8 rt */
-#define PFP_C_A8 0x0500 /* (src IN mask) a8 rt - same for CA and CA_SA */
-#define PFP_NV12 0x0600 /* NV12 YUV->RGB */
-
-/* vertex programs */
-#define PVP_PASS 0x0700 /* vertex pass-through shader */
+/* vertex/fragment programs */
+#define SPO ((pNv->Architecture < NV_ARCH_E0) ? 0x0000 : 0x0030)
+#define PVP_PASS (0x0000 + SPO) /* vertex pass-through shader */
+#define PFP_S (0x0200 + SPO) /* (src) */
+#define PFP_C (0x0400 + SPO) /* (src IN mask) */
+#define PFP_CCA (0x0600 + SPO) /* (src IN mask) component-alpha */
+#define PFP_CCASA (0x0800 + SPO) /* (src IN mask) component-alpha src-alpha */
+#define PFP_S_A8 (0x0a00 + SPO) /* (src) a8 rt */
+#define PFP_C_A8 (0x0c00 + SPO) /* (src IN mask) a8 rt - same for CCA/CCASA */
+#define PFP_NV12 (0x0e00 + SPO) /* NV12 YUV->RGB */
/* shader constants */
#define CB_OFFSET 0x1000
+/* texture bindings (kepler) */
+#define TB_OFFSET 0x1800
+
#define VTX_ATTR(a, c, t, s) \
((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \
((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \
@@ -98,15 +102,25 @@ static __inline__ void
PUSH_DATAu(struct nouveau_pushbuf *push, struct nouveau_bo *bo,
unsigned delta, unsigned dwords)
{
- BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
- PUSH_DATA (push, (bo->offset + delta) >> 32);
- PUSH_DATA (push, (bo->offset + delta));
- BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
- PUSH_DATA (push, dwords * 4);
- PUSH_DATA (push, 1);
- BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
- PUSH_DATA (push, 0x100111);
- BEGIN_NIC0(push, NVC0_M2MF(DATA), dwords);
+ if (push->client->device->chipset < 0xe0) {
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATA (push, (bo->offset + delta) >> 32);
+ PUSH_DATA (push, (bo->offset + delta));
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, dwords * 4);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, 0x100111);
+ BEGIN_NIC0(push, NVC0_M2MF(DATA), dwords);
+ } else {
+ BEGIN_NVC0(push, SUBC_P2MF(0x0180), 4);
+ PUSH_DATA (push, dwords * 4);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, (bo->offset + delta) >> 32);
+ PUSH_DATA (push, (bo->offset + delta));
+ BEGIN_1IC0(push, SUBC_P2MF(0x01b0), 1 + dwords);
+ PUSH_DATA (push, 0x001001);
+ }
}
#endif
diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c
index 4ba862d..8b6d51e 100644
--- a/src/nvc0_exa.c
+++ b/src/nvc0_exa.c
@@ -1006,3 +1006,60 @@ NVC0EXARectM2MF(NVPtr pNv, int w, int h, int cpp,
return TRUE;
}
+
+Bool
+NVE0EXARectCopy(NVPtr pNv, int w, int h, int cpp,
+ struct nouveau_bo *src, uint32_t src_off, int src_dom,
+ int src_pitch, int src_h, int src_x, int src_y,
+ struct nouveau_bo *dst, uint32_t dst_off, int dst_dom,
+ int dst_pitch, int dst_h, int dst_x, int dst_y)
+{
+ struct nouveau_pushbuf *push = pNv->pushbuf;
+ struct nouveau_pushbuf_refn refs[] = {
+ { src, src_dom | NOUVEAU_BO_RD },
+ { dst, dst_dom | NOUVEAU_BO_WR },
+ };
+ unsigned exec;
+
+ if (nouveau_pushbuf_space(push, 64, 0, 0) ||
+ nouveau_pushbuf_refn (push, refs, 2))
+ return FALSE;
+
+ exec = 0x00000206;
+ if (!src->config.nvc0.memtype) {
+ src_off += src_y * src_pitch + src_x * cpp;
+ exec |= 0x00000080;
+ }
+ if (!dst->config.nvc0.memtype) {
+ dst_off += dst_y * dst_pitch + dst_x * cpp;
+ exec |= 0x00000100;
+ }
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0728), 6);
+ PUSH_DATA (push, 0x00001000 | src->config.nvc0.tile_mode);
+ PUSH_DATA (push, src_pitch);
+ PUSH_DATA (push, src_h);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, (src_y << 16) | src_x * cpp);
+ BEGIN_NVC0(push, SUBC_COPY(0x070c), 6);
+ PUSH_DATA (push, 0x000001000 | dst->config.nvc0.tile_mode);
+ PUSH_DATA (push, dst_pitch);
+ PUSH_DATA (push, dst_h);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, (dst_y << 16) | dst_x * cpp);
+ BEGIN_NVC0(push, SUBC_COPY(0x0400), 8);
+ PUSH_DATA (push, (src->offset + src_off) >> 32);
+ PUSH_DATA (push, (src->offset + src_off));
+ PUSH_DATA (push, (dst->offset + dst_off) >> 32);
+ PUSH_DATA (push, (dst->offset + dst_off));
+ PUSH_DATA (push, src_pitch);
+ PUSH_DATA (push, dst_pitch);
+ PUSH_DATA (push, w * cpp);
+ PUSH_DATA (push, h);
+ BEGIN_NVC0(push, SUBC_COPY(0x0300), 1);
+ PUSH_DATA (push, exec);
+
+ return TRUE;
+}
diff --git a/src/nvc0_shader.h b/src/nvc0_shader.h
index f97b74b..2084a6d 100644
--- a/src/nvc0_shader.h
+++ b/src/nvc0_shader.h
@@ -5,7 +5,6 @@
const unsigned size = sizeof(code) / sizeof(code[0]); \
PUSH_DATAu((pNv)->pushbuf, (pNv)->tesla_scratch, (addr), size); \
PUSH_DATAp((pNv)->pushbuf, (code), size); \
- ErrorF("20 + %d * 2\n", (size - 20) / 2); \
} while(0)
static uint32_t
diff --git a/src/nve0_shader.h b/src/nve0_shader.h
new file mode 100644
index 0000000..8f36279
--- /dev/null
+++ b/src/nve0_shader.h
@@ -0,0 +1,404 @@
+#ifndef __NVE0_SHADER_H__
+#define __NVE0_SHADER_H__
+
+static uint32_t
+NVE0VP_Passthrough[] = {
+ 0x00020461,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x000ff000,
+ 0x00000000, /* VP_ATTR_EN[0x000] */
+ 0x0001033f, /* VP_ATTR_EN[0x080] */
+ 0x00000000, /* VP_ATTR_EN[0x100] */
+ 0x00000000,
+ 0x00000000, /* VP_ATTR_EN[0x200] */
+ 0x00000000,
+ 0x00000000, /* VP_ATTR_EN[0x300] */
+ 0x00000000,
+ 0x0033f000, /* VP_EXPORT_EN[0x040] */
+ 0x00000000, /* VP_EXPORT_EN[0x0c0] */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000, /* VP_EXPORT_EN[0x2c0] */
+ 0x00000000,
+ 0xfff01c66,
+ 0x06000080, /* vfetch { $r0,1,2,3 } b128 a[0x80] */
+ 0xfff11c26,
+ 0x06000090, /* vfetch { $r4,5 } b64 a[0x90] */
+ 0xfff19c26,
+ 0x060000a0, /* vfetch { $r6,7 } b64 a[0xa0] */
+ 0x03f01c66,
+ 0x0a7e0070, /* export v[0x70] { $r0 $r1 $r2 $r3 } */
+ 0x13f01c26,
+ 0x0a7e0080, /* export v[0x80] { $r4 $r5 } */
+ 0x1bf01c26,
+ 0x0a7e0090, /* export v[0x90] { $r6 $r7 } */
+ 0x00001de7,
+ 0x80000000, /* exit */
+};
+
+static uint32_t
+NVE0FP_Source[] = {
+ 0x00021462,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x0000000a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000000f,
+ 0x00000000,
+ 0xfff01c00,
+ 0xc07e007c, /* linterp f32 $r0 v[$r63+0x7c] */
+ 0x10001c00,
+ 0xc8000000, /* rcp f32 $r0 $r0 */
+ 0x03f05c40,
+ 0xc07e0084, /* pinterp f32 $r1 $r0 v[$r63+0x84] */
+ 0x03f01c40,
+ 0xc07e0080, /* pinterp f32 $r0 $r0 v[$r63+0x80] */
+ 0xfc001e86,
+ 0x8013c000, /* tex { $r0,1,2,3 } $t0 { $r0,1 } */
+ 0x00001de6,
+ 0xf0000000, /* texbar */
+ 0x00001de7,
+ 0x80000000, /* exit */
+};
+
+static uint32_t
+NVE0FP_Composite[] = {
+ 0x00021462,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x00000a0a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000000f,
+ 0x00000000,
+ 0xfff01c00,
+ 0xc07e007c, /* linterp f32 $r0 v[$r63+0x7c] */
+ 0x10001c00,
+ 0xc8000000, /* rcp f32 $r0 $r0 */
+ 0x03f0dc40,
+ 0xc07e0094, /* pinterp f32 $r3 $r0 v[$r63+0x94] */
+ 0x03f09c40,
+ 0xc07e0090, /* pinterp f32 $r2 $r0 v[$r63+0x90] */
+ 0xfc211e86,
+ 0x80120001, /* tex { _,_,_,$r4 } $t1 { $r2,3 } */
+ 0x00001de6,
+ 0xf0000000, /* texbar */
+ 0x03f05c40,
+ 0xc07e0084, /* pinterp f32 $r1 $r0 v[$r63+0x84] */
+ 0x03f01c40,
+ 0xc07e0080, /* pinterp f32 $r0 $r0 v[$r63+0x80] */
+ 0xfc001e86,
+ 0x8013c000, /* tex { $r0,1,2,3 } $t0 { $r0,1 } */
+ 0x00001de6,
+ 0xf0000000, /* texbar */
+ 0x1030dc40,
+ 0x58000000, /* mul ftz rn f32 $r3 $r3 $r4 */
+ 0x10209c40,
+ 0x58000000, /* mul ftz rn f32 $r2 $r2 $r4 */
+ 0x10105c40,
+ 0x58000000, /* mul ftz rn f32 $r1 $r1 $r4 */
+ 0x10001c40,
+ 0x58000000, /* mul ftz rn f32 $r0 $r0 $r4 */
+ 0x00001de7,
+ 0x80000000, /* exit */
+};
+
+static uint32_t
+NVE0FP_CAComposite[] = {
+ 0x00021462, /* 0x0000c000 = USES_KIL, MULTI_COLORS */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000, /* FRAG_COORD_UMASK = 0x8 */
+ 0x00000a0a, /* FP_INTERP[0x080], 0022 0022 */
+ 0x00000000, /* FP_INTERP[0x0c0], 0 = OFF */
+ 0x00000000, /* FP_INTERP[0x100], 1 = FLAT */
+ 0x00000000, /* FP_INTERP[0x140], 2 = PERSPECTIVE */
+ 0x00000000, /* FP_INTERP[0x180], 3 = LINEAR */
+ 0x00000000, /* FP_INTERP[0x1c0] */
+ 0x00000000, /* FP_INTERP[0x200] */
+ 0x00000000, /* FP_INTERP[0x240] */
+ 0x00000000, /* FP_INTERP[0x280] */
+ 0x00000000, /* FP_INTERP[0x2c0] */
+ 0x00000000, /* FP_INTERP[0x300] */
+ 0x00000000,
+ 0x0000000f, /* FP_RESULT_MASK (0x8000 Face ?) */
+ 0x00000000, /* 0x2 = FragDepth, 0x1 = SampleMask */
+ 0xfff01c00,
+ 0xc07e007c, /* linterp f32 $r0 v[$r63+0x7c] */
+ 0x10001c00,
+ 0xc8000000, /* rcp f32 $r0 $r0 */
+ 0x03f0dc40,
+ 0xc07e0094, /* pinterp f32 $r3 $r0 v[$r63+0x94] */
+ 0x03f09c40,
+ 0xc07e0090, /* pinterp f32 $r2 $r0 v[$r63+0x90] */
+ 0xfc211e86,
+ 0x8013c001, /* tex { $r4,5,6,7 } $t1 { $r2,3 } */
+ 0x00001de6,
+ 0xf0000000, /* texbar */
+ 0x03f05c40,
+ 0xc07e0084, /* pinterp f32 $r1 $r0 v[$r63+0x84] */
+ 0x03f01c40,
+ 0xc07e0080, /* pinterp f32 $r0 $r0 v[$r63+0x80] */
+ 0xfc001e86,
+ 0x8013c000, /* tex { $r0,1,2,3 } $t0 { $r0,1 } */
+ 0x00001de6,
+ 0xf0000000, /* texbar */
+ 0x1c30dc40,
+ 0x58000000, /* mul ftz rn f32 $r3 $r3 $r7 */
+ 0x18209c40,
+ 0x58000000, /* mul ftz rn f32 $r2 $r2 $r6 */
+ 0x14105c40,
+ 0x58000000, /* mul ftz rn f32 $r1 $r1 $r5 */
+ 0x10001c40,
+ 0x58000000, /* mul ftz rn f32 $r0 $r0 $r4 */
+ 0x00001de7,
+ 0x80000000, /* exit */
+};
+
+static uint32_t
+NVE0FP_CACompositeSrcAlpha[] = {
+ 0x00021462,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x00000a0a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000000f,
+ 0x00000000,
+ 0xfff01c00,
+ 0xc07e007c, /* linterp f32 $r0 v[$r63+0x7c] */
+ 0x10001c00,
+ 0xc8000000, /* rcp f32 $r0 $r0 */
+ 0x03f0dc40,
+ 0xc07e0084, /* pinterp f32 $r3 $r0 v[$r63+0x84] */
+ 0x03f09c40,
+ 0xc07e0080, /* pinterp f32 $r2 $r0 v[$r63+0x80] */
+ 0xfc211e86,
+ 0x80120000, /* tex { _,_,_,$r4 } $t0 { $r2,3 } */
+ 0x00001de6,
+ 0xf0000000, /* texbar */
+ 0x03f05c40,
+ 0xc07e0094, /* pinterp f32 $r1 $r0 v[$r63+0x94] */
+ 0x03f01c40,
+ 0xc07e0090, /* pinterp f32 $r0 $r0 v[$r63+0x90] */
+ 0xfc001e86,
+ 0x8013c001, /* tex { $r0,1,2,3 } $t1 { $r0,1 } */
+ 0x00001de6,
+ 0xf0000000, /* texbar */
+ 0x1030dc40,
+ 0x58000000, /* mul ftz rn f32 $r3 $r3 $r4 */
+ 0x10209c40,
+ 0x58000000, /* mul ftz rn f32 $r2 $r2 $r4 */
+ 0x10105c40,
+ 0x58000000, /* mul ftz rn f32 $r1 $r1 $r4 */
+ 0x10001c40,
+ 0x58000000, /* mul ftz rn f32 $r0 $r0 $r4 */
+ 0x00001de7,
+ 0x80000000, /* exit */
+};
+
+static uint32_t
+NVE0FP_Source_A8[] = {
+ 0x00021462,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x0000000a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000000f,
+ 0x00000000,
+ 0xfff01c00,
+ 0xc07e007c, /* linterp f32 $r0 v[$r63+0x7c] */
+ 0x10001c00,
+ 0xc8000000, /* rcp f32 $r0 $r0 */
+ 0x03f05c40,
+ 0xc07e0084, /* pinterp f32 $r1 $r0 v[$r63+0x84] */
+ 0x03f01c40,
+ 0xc07e0080, /* pinterp f32 $r0 $r0 v[$r63+0x80] */
+ 0xfc001e86,
+ 0x80120000, /* tex { _ _ _ $r0 } $t0 { $r0 $r1 } */
+ 0x00001de6,
+ 0xf0000000, /* texbar */
+ 0x0000dde4,
+ 0x28000000, /* mov b32 $r3 $r0 */
+ 0x00009de4,
+ 0x28000000, /* mov b32 $r2 $r0 */
+ 0x00005de4,
+ 0x28000000, /* mov b32 $r1 $r0 */
+ 0x00001de7,
+ 0x80000000, /* exit */
+};
+
+static uint32_t
+NVE0FP_Composite_A8[] = {
+ 0x00021462,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x00000a0a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000000f,
+ 0x00000000,
+ 0xfff01c00,
+ 0xc07e007c, /* linterp f32 $r0 v[$r63+0x7c] */
+ 0x10001c00,
+ 0xc8000000, /* rcp f32 $r0 $r0 */
+ 0x03f0dc40,
+ 0xc07e0094, /* pinterp f32 $r3 $r0 v[$r63+0x94] */
+ 0x03f09c40,
+ 0xc07e0090, /* pinterp f32 $r2 $r0 v[$r63+0x90] */
+ 0xfc205e86,
+ 0x80120001, /* tex { _ _ _ $r1 } $t1 { $r2 $r3 } */
+ 0x03f0dc40,
+ 0xc07e0084, /* pinterp f32 $r3 $r0 v[$r63+0x84] */
+ 0x03f09c40,
+ 0xc07e0080, /* pinterp f32 $r2 $r0 v[$r63+0x80] */
+ 0xfc201e86,
+ 0x80120000, /* tex { _ _ _ $r0 } $t0 { $r2 $r3 } */
+ 0x00001de6,
+ 0xf0000000, /* texbar */
+ 0x0400dc40,
+ 0x58000000, /* mul ftz rn f32 $r3 $r0 $r1 */
+ 0x0c009de4,
+ 0x28000000, /* mov b32 $r2 $r3 */
+ 0x0c005de4,
+ 0x28000000, /* mov b32 $r1 $r3 */
+ 0x0c001de4,
+ 0x28000000, /* mov b32 $r0 $r3 */
+ 0x00001de7,
+ 0x80000000, /* exit */
+};
+
+static uint32_t
+NVE0FP_NV12[] = {
+ 0x00021462,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x00000a0a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000000f,
+ 0x00000000,
+ 0xfff09c00,
+ 0xc07e007c,
+ 0x10209c00,
+ 0xc8000000,
+ 0x0bf01c40,
+ 0xc07e0080,
+ 0x0bf05c40,
+ 0xc07e0084,
+ 0xfc001e86,
+ 0x80120000,
+ 0x00001de6,
+ 0xf0000000, /* texbar */
+ 0x00015c40,
+ 0x58004000,
+ 0x1050dc20,
+ 0x50004000,
+ 0x20511c20,
+ 0x50004000,
+ 0x30515c20,
+ 0x50004000,
+ 0x0bf01c40,
+ 0xc07e0090,
+ 0x0bf05c40,
+ 0xc07e0094,
+ 0xfc001e86,
+ 0x80130001,
+ 0x00001de6,
+ 0xf0000000, /* texbar */
+ 0x4000dc40,
+ 0x30064000,
+ 0x50011c40,
+ 0x30084000,
+ 0x60015c40,
+ 0x300a4000,
+ 0x70101c40,
+ 0x30064000,
+ 0x90109c40,
+ 0x300a4000,
+ 0x80105c40,
+ 0x30084000,
+ 0x00001de7,
+ 0x80000000,
+};
+
+#endif