summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Makefile.am2
-rw-r--r--src/nouveau_class.h77
-rw-r--r--src/nouveau_exa.c39
-rw-r--r--src/nouveau_wfb.c3
-rw-r--r--src/nv_accel_common.c46
-rw-r--r--src/nv_dma.c17
-rw-r--r--src/nv_driver.c2
-rw-r--r--src/nv_proto.h25
-rw-r--r--src/nv_type.h4
-rw-r--r--src/nvc0_accel.c758
-rw-r--r--src/nvc0_accel.h83
-rw-r--r--src/nvc0_exa.c1239
12 files changed, 2246 insertions, 49 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 9340c45..8f1f704 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -58,6 +58,8 @@ nouveau_drv_la_SOURCES = \
nv50_exa.c \
nv50_xv.c \
nv50_texture.h \
+ nvc0_accel.c nvc0_accel.h \
+ nvc0_exa.c \
drmmode_display.c \
vl_hwmc.c \
vl_hwmc.h
diff --git a/src/nouveau_class.h b/src/nouveau_class.h
index 5ce978a..e0ef4b9 100644
--- a/src/nouveau_class.h
+++ b/src/nouveau_class.h
@@ -735,43 +735,43 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x0000023c
-#define NVC0_MEMORY_TO_MEMORY_FORMAT 0x00009039
-
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_NOP 0x00000100
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_SERIALIZE 0x00000110
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_MODE_IN 0x00000204
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_PITCH_IN 0x00000208
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_HEIGHT_IN 0x0000020c
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_DEPTH_IN 0x00000210
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Z 0x00000214
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_MODE_OUT 0x00000220
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_PITCH_OUT 0x00000224
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_HEIGHT_OUT 0x00000228
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_DEPTH_OUT 0x0000022c
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Z 0x00000230
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x00000238
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_LOW 0x0000023c
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC 0x00000300
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_PUSH (1 << 0)
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_LINEAR_IN (1 << 4)
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_LINEAR_OUT (1 << 8)
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_NOTIFY (1 << 13)
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_INC_SHIFT 20
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_INC_MASK 0x00f00000
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_DATA 0x00000304
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH 0x0000030c
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_LOW 0x00000310
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_PITCH_IN 0x00000314
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT 0x00000318
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN 0x0000031c
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_LINE_COUNT 0x00000320
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_NOTIFY_ADDRESS_HIGH 0x0000032c
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_NOTIFY_ADDRESS_LOW 0x00000330
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000334
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_X 0x00000344
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Y 0x00000348
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_X 0x0000034c
-#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Y 0x00000350
+#define NVC0_M2MF 0x00009039
+
+#define NVC0_M2MF_NOP 0x00000100
+#define NVC0_M2MF_SERIALIZE 0x00000110
+#define NVC0_M2MF_TILING_MODE_IN 0x00000204
+#define NVC0_M2MF_TILING_PITCH_IN 0x00000208
+#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c
+#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210
+#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214
+#define NVC0_M2MF_TILING_MODE_OUT 0x00000220
+#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224
+#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228
+#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c
+#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230
+#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238
+#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c
+#define NVC0_M2MF_EXEC 0x00000300
+#define NVC0_M2MF_EXEC_PUSH (1 << 0)
+#define NVC0_M2MF_EXEC_LINEAR_IN (1 << 4)
+#define NVC0_M2MF_EXEC_LINEAR_OUT (1 << 8)
+#define NVC0_M2MF_EXEC_NOTIFY (1 << 13)
+#define NVC0_M2MF_EXEC_INC_SHIFT 20
+#define NVC0_M2MF_EXEC_INC_MASK 0x00f00000
+#define NVC0_M2MF_DATA 0x00000304
+#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c
+#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310
+#define NVC0_M2MF_PITCH_IN 0x00000314
+#define NVC0_M2MF_PITCH_OUT 0x00000318
+#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c
+#define NVC0_M2MF_LINE_COUNT 0x00000320
+#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c
+#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330
+#define NVC0_M2MF_NOTIFY 0x00000334
+#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344
+#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348
+#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c
+#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350
#define NV01_MEMORY_LOCAL_BANKED 0x0000003d
@@ -9122,6 +9122,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0TCL_NOTIFY 0x0000010c
#define NVC0TCL_SERIALIZE 0x00000110
#define NVC0TCL_EARLY_FRAGMENT_TESTS 0x00000210
+#define NVC0TCL_CODE_FLUSH 0x0000021c
#define NVC0TCL_TESS_MODE 0x00000320
#define NVC0TCL_TESS_MODE_PRIM_SHIFT 0
#define NVC0TCL_TESS_MODE_PRIM_MASK 0x0000000f
@@ -9556,6 +9557,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
#define NVC0TCL_BLEND_FUNC_DST_ALPHA_SRC1_ALPHA 0x0000c902
#define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
+#define NVC0TCL_BLEND_ENABLE(x) (0x00001360+((x)*4))
+#define NVC0TCL_BLEND_ENABLE__SIZE 0x00000008
#define NVC0TCL_STENCIL_ENABLE 0x00001380
#define NVC0TCL_STENCIL_FRONT_OP_FAIL 0x00001384
#define NVC0TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 4618994..d900e99 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -356,7 +356,7 @@ nv50_style_tiled_pixmap(PixmapPtr ppix)
ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
NVPtr pNv = NVPTR(pScrn);
- return pNv->Architecture == NV_ARCH_50 &&
+ return pNv->Architecture >= NV_ARCH_50 &&
(nouveau_pixmap_bo(ppix)->tile_flags &
NOUVEAU_BO_TILE_LAYOUT_MASK);
}
@@ -377,6 +377,10 @@ nouveau_exa_download_from_screen(PixmapPtr pspix, int x, int y, int w, int h,
offset = (y * src_pitch) + (x * cpp);
if (pNv->GART) {
+ if ((pNv->Architecture >= NV_ARCH_C0) &&
+ NVC0AccelDownloadM2MF(pspix, x, y, w, h, dst, dst_pitch))
+ return TRUE;
+ else
if (NVAccelDownloadM2MF(pspix, x, y, w, h, dst, dst_pitch))
return TRUE;
}
@@ -413,17 +417,29 @@ nouveau_exa_upload_to_screen(PixmapPtr pdpix, int x, int y, int w, int h,
exaMarkSync(pdpix->drawable.pScreen);
return TRUE;
}
- } else {
+ } else
+ if (pNv->Architecture < NV_ARCH_C0) {
if (NV50EXAUploadSIFC(src, src_pitch, pdpix,
x, y, w, h, cpp)) {
exaMarkSync(pdpix->drawable.pScreen);
return TRUE;
}
+ } else {
+ if (NVC0EXAUploadSIFC(src, src_pitch, pdpix,
+ x, y, w, h, cpp)) {
+ exaMarkSync(pdpix->drawable.pScreen);
+ return TRUE;
+ }
}
}
/* try gart-based transfer */
if (pNv->GART) {
+ if ((pNv->Architecture >= NV_ARCH_C0) &&
+ NVC0AccelUploadM2MF(pdpix, x, y, w, h, src, src_pitch)) {
+ exaMarkSync(pdpix->drawable.pScreen);
+ return TRUE;
+ } else
if (NVAccelUploadM2MF(pdpix, x, y, w, h, src, src_pitch)) {
exaMarkSync(pdpix->drawable.pScreen);
return TRUE;
@@ -464,6 +480,8 @@ nouveau_exa_init(ScreenPtr pScreen)
return FALSE;
}
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "nouveau EXA init\n");
+
exa->exa_major = EXA_VERSION_MAJOR;
exa->exa_minor = EXA_VERSION_MINOR;
exa->flags = EXA_OFFSCREEN_PIXMAPS;
@@ -545,12 +563,29 @@ nouveau_exa_init(ScreenPtr pScreen)
exa->Composite = NV50EXAComposite;
exa->DoneComposite = NV50EXADoneComposite;
break;
+ case NV_ARCH_C0:
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "EXA func pointers for NVC0\n");
+ exa->PrepareCopy = NVC0EXAPrepareCopy;
+ exa->Copy = NVC0EXACopy;
+ exa->DoneCopy = NVC0EXADoneCopy;
+
+ exa->PrepareSolid = NVC0EXAPrepareSolid;
+ exa->Solid = NVC0EXASolid;
+ exa->DoneSolid = NVC0EXADoneSolid;
+
+ exa->CheckComposite = NVC0EXACheckComposite;
+ exa->PrepareComposite = NVC0EXAPrepareComposite;
+ exa->Composite = NVC0EXAComposite;
+ exa->DoneComposite = NVC0EXADoneComposite;
+ break;
default:
break;
}
if (!exaDriverInit(pScreen, exa))
return FALSE;
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "exaDriverInit successful\n");
pNv->EXADriverPtr = exa;
return TRUE;
diff --git a/src/nouveau_wfb.c b/src/nouveau_wfb.c
index 2425e31..feab54a 100644
--- a/src/nouveau_wfb.c
+++ b/src/nouveau_wfb.c
@@ -180,6 +180,9 @@ nouveau_wfb_setup_wrap(ReadMemoryProcPtr *pRead, WriteMemoryProcPtr *pWrite,
wfb->tile_height = bo->tile_mode + 2;
wfb->horiz_tiles = wfb->pitch / 64;
have_tiled = 1;
+
+ if (1 /* NV_ARCH_C0, FIXME */)
+ wfb->tile_height += 1;
}
out:
diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c
index 1ade291..ea11cc6 100644
--- a/src/nv_accel_common.c
+++ b/src/nv_accel_common.c
@@ -40,7 +40,25 @@ nouveau_allocate_surface(ScrnInfoPtr scrn, int width, int height, int bpp,
*pitch = NOUVEAU_ALIGN(width * bpp, 512) / 8;
if (tiled) {
- if (pNv->Architecture >= NV_ARCH_50) {
+ if (pNv->Architecture >= NV_ARCH_C0) {
+ if (height > 64)
+ tile_mode = 4;
+ else if (height > 32)
+ tile_mode = 3;
+ else if (height > 16)
+ tile_mode = 2;
+ else if (height > 8)
+ tile_mode = 1;
+ else
+ tile_mode = 0;
+
+ if (usage_hint & NOUVEAU_CREATE_PIXMAP_ZETA)
+ tile_flags = 0x1100; /* S8Z24 */
+ else
+ tile_flags = 0xfe00;
+
+ height = NOUVEAU_ALIGN(height, 1 << (tile_mode + 3));
+ } else if (pNv->Architecture >= NV_ARCH_50) {
if (height > 32)
tile_mode = 4;
else if (height > 16)
@@ -592,6 +610,8 @@ NVAccelCommonInit(ScrnInfoPtr pScrn)
if (pNv->NoAccel)
return TRUE;
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "NVAccelCommonInit\n");
+
/* General engine objects */
INIT_CONTEXT_OBJECT(DmaNotifier0);
@@ -607,13 +627,23 @@ NVAccelCommonInit(ScrnInfoPtr pScrn)
INIT_CONTEXT_OBJECT(ScaledImage);
INIT_CONTEXT_OBJECT(ClipRectangle);
INIT_CONTEXT_OBJECT(ImageFromCpu);
- } else {
+ } else
+ if (pNv->Architecture < NV_ARCH_C0) {
INIT_CONTEXT_OBJECT(2D_NV50);
+ } else {
+ INIT_CONTEXT_OBJECT(2D_NVC0);
}
- INIT_CONTEXT_OBJECT(MemFormat);
+
+ if (pNv->Architecture < NV_ARCH_C0)
+ INIT_CONTEXT_OBJECT(MemFormat);
+ else
+ INIT_CONTEXT_OBJECT(M2MF_NVC0);
/* 3D init */
switch (pNv->Architecture) {
+ case NV_ARCH_C0:
+ INIT_CONTEXT_OBJECT(NVC0TCL);
+ break;
case NV_ARCH_50:
INIT_CONTEXT_OBJECT(NV50TCL);
break;
@@ -656,11 +686,15 @@ void NVAccelFree(ScrnInfoPtr pScrn)
nouveau_grobj_free(&pNv->NvClipRectangle);
nouveau_grobj_free(&pNv->NvImageFromCpu);
} else
+ if (pNv->Architecture < NV_ARCH_C0)
nouveau_grobj_free(&pNv->Nv2D);
- nouveau_grobj_free(&pNv->NvMemFormat);
- nouveau_grobj_free(&pNv->NvSW);
- nouveau_grobj_free(&pNv->Nv3D);
+ if (pNv->Architecture < NV_ARCH_C0) {
+ nouveau_grobj_free(&pNv->NvMemFormat);
+
+ nouveau_grobj_free(&pNv->NvSW);
+ nouveau_grobj_free(&pNv->Nv3D);
+ }
nouveau_bo_ref(NULL, &pNv->tesla_scratch);
nouveau_bo_ref(NULL, &pNv->shader_mem);
diff --git a/src/nv_dma.c b/src/nv_dma.c
index 409f42c..612215c 100644
--- a/src/nv_dma.c
+++ b/src/nv_dma.c
@@ -54,12 +54,21 @@ void NVSync(ScrnInfoPtr pScrn)
if (pNv->NoAccel)
return;
+ return; /* XXX */
+
/* Wait for nvchannel to go completely idle */
nouveau_notifier_reset(pNv->notify0, 0);
- BEGIN_RING(chan, gr, 0x104, 1);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, gr, 0x100, 1);
- OUT_RING (chan, 0);
+ if (pNv->Architecture >= NV_ARCH_C0) {
+ BEGIN_RING_NVC0(chan, NvSub2D, 0x0104, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING_NVC0(chan, NvSub2D, 0x0100, 1);
+ OUT_RING (chan, 0);
+ } else {
+ BEGIN_RING(chan, gr, 0x104, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, gr, 0x100, 1);
+ OUT_RING (chan, 0);
+ }
FIRE_RING (chan);
if (nouveau_notifier_wait_status(pNv->notify0, 0,
NV_NOTIFY_STATE_STATUS_COMPLETED, 2.0))
diff --git a/src/nv_driver.c b/src/nv_driver.c
index f351fac..b7edf53 100644
--- a/src/nv_driver.c
+++ b/src/nv_driver.c
@@ -110,6 +110,8 @@ static struct NvFamily NVKnownFamilies[] =
{ "GeForce 6", "NV4x" },
{ "GeForce 7", "G7x" },
{ "GeForce 8", "G8x" },
+ { "GeForce GTX 200", "NVA0" },
+ { "GeForce GTX 400", "NVC0" },
{ NULL, NULL}
};
diff --git a/src/nv_proto.h b/src/nv_proto.h
index 888e74e..61a7b5b 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -135,6 +135,11 @@ int NV40SetTexturePortAttribute(ScrnInfoPtr, Atom, INT32, pointer);
void NV50SyncToVBlank(PixmapPtr ppix, BoxPtr box);
Bool NVAccelInitNV50TCL(ScrnInfoPtr pScrn);
+/* in nvc0_accel.c */
+Bool NVAccelInitNVC0TCL(ScrnInfoPtr pScrn);
+Bool NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn);
+Bool NVAccelInit2D_NVC0(ScrnInfoPtr pScrn);
+
/* in nv50_exa.c */
Bool NV50EXAPrepareSolid(PixmapPtr, int, Pixel, Pixel);
void NV50EXASolid(PixmapPtr, int, int, int, int);
@@ -150,6 +155,26 @@ void NV50EXADoneComposite(PixmapPtr);
Bool NV50EXAUploadSIFC(const char *src, int src_pitch,
PixmapPtr pdPix, int x, int y, int w, int h, int cpp);
+/* in nvc0_exa.c */
+Bool NVC0AccelUploadM2MF(PixmapPtr pdpix, int x, int y, int w, int h,
+ const char *src, int src_pitch);
+Bool NVC0AccelDownloadM2MF(PixmapPtr pspix, int x, int y, int w, int h,
+ char *dst, unsigned dst_pitch);
+
+Bool NVC0EXAPrepareSolid(PixmapPtr, int, Pixel, Pixel);
+void NVC0EXASolid(PixmapPtr, int, int, int, int);
+void NVC0EXADoneSolid(PixmapPtr);
+Bool NVC0EXAPrepareCopy(PixmapPtr, PixmapPtr, int, int, int, Pixel);
+void NVC0EXACopy(PixmapPtr, int, int, int, int, int, int);
+void NVC0EXADoneCopy(PixmapPtr);
+Bool NVC0EXACheckComposite(int, PicturePtr, PicturePtr, PicturePtr);
+Bool NVC0EXAPrepareComposite(int, PicturePtr, PicturePtr, PicturePtr,
+ PixmapPtr, PixmapPtr, PixmapPtr);
+void NVC0EXAComposite(PixmapPtr, int, int, int, int, int, int, int, int);
+void NVC0EXADoneComposite(PixmapPtr);
+Bool NVC0EXAUploadSIFC(const char *src, int src_pitch,
+ PixmapPtr pdPix, int x, int y, int w, int h, int cpp);
+
/* nv50_xv.c */
int nv50_xv_image_put(ScrnInfoPtr, struct nouveau_bo *, int, int, int, int,
BoxPtr, int, int, int, int, uint16_t, uint16_t,
diff --git a/src/nv_type.h b/src/nv_type.h
index 4204556..a06859d 100644
--- a/src/nv_type.h
+++ b/src/nv_type.h
@@ -26,6 +26,10 @@
#define NV_ARCH_50 0x50
#define NV_ARCH_C0 0xc0
+#define NvSubM2MF 3
+#define NvSub2D 4
+#define NvSub3D 5
+
/* NV50 */
typedef struct _NVRec *NVPtr;
typedef struct _NVRec {
diff --git a/src/nvc0_accel.c b/src/nvc0_accel.c
new file mode 100644
index 0000000..f2fe8ff
--- /dev/null
+++ b/src/nvc0_accel.c
@@ -0,0 +1,758 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv_include.h"
+#include "nvc0_accel.h"
+
+#define NOUVEAU_BO(a, b, m) (NOUVEAU_BO_##a | NOUVEAU_BO_##b | NOUVEAU_BO_##m)
+
+Bool
+NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn)
+{
+ NVPtr pNv = NVPTR(pScrn);
+ struct nouveau_channel *chan = pNv->chan;
+
+ pNv->NvMemFormat = (struct nouveau_grobj *)-2;
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "init NVC0_M2MF (9039)\n");
+
+ BEGIN_RING(chan, NvSubM2MF, 0x0000, 1);
+ OUT_RING (chan, 0x9039);
+
+ /* XXX: Stupid interface, I want the notifier address ! */
+
+ FIRE_RING (chan);
+
+ return TRUE;
+}
+
+Bool
+NVAccelInit2D_NVC0(ScrnInfoPtr pScrn)
+{
+ NVPtr pNv = NVPTR(pScrn);
+ struct nouveau_channel *chan = pNv->chan;
+
+ pNv->Nv2D = (struct nouveau_grobj *)-2;
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "init NVC0_2D (902d)\n");
+
+ BEGIN_RING(chan, NvSub2D, 0x0000, 1);
+ OUT_RING (chan, 0x902d);
+
+ BEGIN_RING(chan, NvSub2D, NV50_2D_CLIP_ENABLE, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSub2D, NV50_2D_COLOR_KEY_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub2D, 0x0884, 1);
+ OUT_RING (chan, 0x3f);
+ BEGIN_RING(chan, NvSub2D, 0x0888, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSub2D, NV50_2D_ROP, 1);
+ OUT_RING (chan, 0x55);
+ BEGIN_RING(chan, NvSub2D, NV50_2D_OPERATION, 1);
+ OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY);
+
+ BEGIN_RING(chan, NvSub2D, NV50_2D_BLIT_DU_DX_FRACT, 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSub2D, NV50_2D_DRAW_SHAPE, 2);
+ OUT_RING (chan, 4);
+ OUT_RING (chan, NVC0TCL_RT_FORMAT_R5G6B5_UNORM);
+ BEGIN_RING(chan, NvSub2D, NV50_2D_PATTERN_FORMAT, 2);
+ OUT_RING (chan, 2);
+ OUT_RING (chan, 1);
+
+ FIRE_RING (chan);
+
+ pNv->currentRop = 0xfffffffa;
+ return TRUE;
+}
+
+Bool
+NVAccelInitNVC0TCL(ScrnInfoPtr pScrn)
+{
+ NVPtr pNv = NVPTR(pScrn);
+ struct nouveau_channel *chan = pNv->chan;
+ struct nouveau_bo *bo = pNv->tesla_scratch;
+ uint32_t tclClass;
+ int i;
+
+ switch (pNv->dev->chipset) {
+ case 0xc0:
+ tclClass = 0x9097;
+ break;
+ default:
+ return FALSE;
+ }
+
+ if (!pNv->Nv3D) {
+ int ret;
+
+ ret = nouveau_notifier_alloc(chan, NvVBlankSem, 1,
+ &pNv->vblank_sem);
+ if (ret)
+ return FALSE;
+
+ ret = nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM,
+ (128 << 10), 0x20000,
+ &pNv->tesla_scratch);
+ bo = pNv->tesla_scratch;
+ if (!ret)
+ ret = nouveau_bo_pin(bo, NOUVEAU_BO_VRAM);
+ if (ret) {
+ nouveau_notifier_free(&pNv->vblank_sem);
+ return FALSE;
+ }
+ }
+ pNv->Nv3D = (struct nouveau_grobj *)-2;
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "init NVC0TCL (%x)\n", tclClass);
+
+ if (MARK_RING(chan, 512, 32))
+ return FALSE;
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_NOTIFY_ADDRESS_HIGH, 3);
+ OUT_RELOCh(chan, bo, NTFY_OFST, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, NTFY_OFST, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, NvSub3D, 0x0000, 1);
+ OUT_RING (chan, tclClass);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_NOTIFY_ADDRESS_HIGH, 3);
+ OUT_RELOCh(chan, bo, NTFY_OFST, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, NTFY_OFST, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_MULTISAMPLE_COLOR_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_MULTISAMPLE_ZETA_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_MULTISAMPLE_MODE, 1);
+ OUT_RING (chan, NVC0TCL_MULTISAMPLE_MODE_1X);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_COND_MODE, 1);
+ OUT_RING (chan, NVC0TCL_COND_MODE_ALWAYS);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_RT_CONTROL, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_ZETA_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VIEWPORT_CLIP_RECTS_EN, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_CLIPID_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VERTEX_TWO_SIDE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, 0x0fac, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_COLOR_MASK(0), 8);
+ OUT_RING (chan, 0x1111);
+ for (i = 1; i < 8; ++i)
+ OUT_RING(chan, 0);
+
+ FIRE_RING (chan);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SCREEN_SCISSOR_HORIZ, 2);
+ OUT_RING (chan, (8192 << 16) | 0);
+ OUT_RING (chan, (8192 << 16) | 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_Y_ORIGIN_BOTTOM, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_WINDOW_OFFSET_X, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, 0x1590, 1);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_LINKED_TSC, 1);
+ OUT_RING (chan, 1);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VIEWPORT_TRANSFORM_EN, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VIEW_VOLUME_CLIP_CTRL, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_DEPTH_RANGE_NEAR(0), 2);
+ OUT_RINGf (chan, 0.0f);
+ OUT_RINGf (chan, 1.0f);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_TEX_LIMITS(4), 1);
+ OUT_RING (chan, 0x54);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_BLEND_ENABLE(0), 8);
+ OUT_RING (chan, 1);
+ for (i = 1; i < 8; ++i)
+ OUT_RING(chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_BLEND_INDEPENDENT, 1);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, NvSub3D, 0x17bc, 3);
+ OUT_RELOCh(chan, bo, MISC_OFST, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, MISC_OFST, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, 1);
+
+ BEGIN_RING(chan, NvSub3D, 0x3420, 2);
+ OUT_RING (chan, (bo->offset + CB_OFFSET) >> 8);
+ OUT_RING (chan, 16384);
+
+ FIRE_RING (chan);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_CODE_ADDRESS_HIGH, 2);
+ OUT_RELOCh(chan, bo, CODE_OFFSET, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, bo, CODE_OFFSET, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2);
+ if (OUT_RELOCh(chan, bo, PVP_PASS, NOUVEAU_BO(VRAM, VRAM, WR)) ||
+ OUT_RELOCl(chan, bo, PVP_PASS, NOUVEAU_BO(VRAM, VRAM, WR))) {
+ MARK_UNDO(chan);
+ return FALSE;
+ }
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2);
+ OUT_RING (chan, 7 * 8 + 20 * 4);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1);
+ OUT_RING (chan, 0x100111);
+ BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 7 * 2 + 20);
+ OUT_RING (chan, 0x00020461);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0xff000);
+ OUT_RING (chan, 0x00000000); /* VP_ATTR_EN[0x000] */
+ OUT_RING (chan, 0x0001033f); /* VP_ATTR_EN[0x080] */
+ OUT_RING (chan, 0x00000000); /* VP_ATTR_EN[0x100] */
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000); /* VP_ATTR_EN[0x200] */
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000); /* VP_ATTR_EN[0x300] */
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x0033f000); /* VP_EXPORT_EN[0x040] */
+ OUT_RING (chan, 0x00000000); /* VP_EXPORT_EN[0x0c0] */
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000); /* VP_EXPORT_EN[0x2c0] */
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0xfff01c66);
+ OUT_RING (chan, 0x06000080); /* vfetch { $r0,1,2,3 } b128 a[0x80] */
+ OUT_RING (chan, 0xfff11c26);
+ OUT_RING (chan, 0x06000090); /* vfetch { $r4,5 } b64 a[0x90] */
+ OUT_RING (chan, 0xfff19c26);
+ OUT_RING (chan, 0x060000a0); /* vfetch { $r6,7 } b64 a[0xa0] */
+ OUT_RING (chan, 0x03f01c66);
+ OUT_RING (chan, 0x0a7e0070); /* export v[0x70] { $r0 $r1 $r2 $r3 } */
+ OUT_RING (chan, 0x13f01c26);
+ OUT_RING (chan, 0x0a7e0080); /* export v[0x80] { $r4 $r5 } */
+ OUT_RING (chan, 0x1bf01c26);
+ OUT_RING (chan, 0x0a7e0090); /* export v[0x90] { $r6 $r7 } */
+ OUT_RING (chan, 0x00001de7);
+ OUT_RING (chan, 0x80000000); /* exit */
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_SELECT(1), 2);
+ OUT_RING (chan, 0x11);
+ OUT_RING (chan, PVP_PASS);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_GPR_ALLOC(1), 1);
+ OUT_RING (chan, 8);
+ BEGIN_RING(chan, NvSub3D, 0x163c, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, 0x2600, 1);
+ OUT_RING (chan, 1);
+
+ FIRE_RING (chan); usleep(500);
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2);
+ if (OUT_RELOCh(chan, bo, PFP_S, NOUVEAU_BO(VRAM, VRAM, WR)) ||
+ OUT_RELOCl(chan, bo, PFP_S, NOUVEAU_BO(VRAM, VRAM, WR))) {
+ MARK_UNDO(chan);
+ return FALSE;
+ }
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2);
+ OUT_RING (chan, 6 * 8 + 20 * 4);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1);
+ OUT_RING (chan, 0x100111);
+ BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 6 * 2 + 20);
+ OUT_RING (chan, 0x00021462);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x80000000);
+ OUT_RING (chan, 0x0000000a);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x0000000f);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0xfff01c00);
+ OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */
+ OUT_RING (chan, 0x10001c00);
+ OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */
+ OUT_RING (chan, 0x03f05c40);
+ OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r1 $r0 v[$r63+0x84] */
+ OUT_RING (chan, 0x03f01c40);
+ OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r0 $r0 v[$r63+0x80] */
+ OUT_RING (chan, 0xfc001e86);
+ OUT_RING (chan, 0x8013c000); /* tex { $r0,1,2,3 } $t0 { $r0,1 } */
+ OUT_RING (chan, 0x00001de7);
+ OUT_RING (chan, 0x80000000); /* exit */
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2);
+ if (OUT_RELOCh(chan, bo, PFP_C, NOUVEAU_BO(VRAM, VRAM, WR)) ||
+ OUT_RELOCl(chan, bo, PFP_C, NOUVEAU_BO(VRAM, VRAM, WR))) {
+ MARK_UNDO(chan);
+ return FALSE;
+ }
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2);
+ OUT_RING (chan, 13 * 8 + 20 * 4);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1);
+ OUT_RING (chan, 0x100111);
+ BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 13 * 2 + 20);
+ OUT_RING (chan, 0x00021462);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x80000000);
+ OUT_RING (chan, 0x00000a0a);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x0000000f);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0xfff01c00);
+ OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */
+ OUT_RING (chan, 0x10001c00);
+ OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */
+ OUT_RING (chan, 0x03f0dc40);
+ OUT_RING (chan, 0xc07e0094); /* pinterp f32 $r3 $r0 v[$r63+0x94] */
+ OUT_RING (chan, 0x03f09c40);
+ OUT_RING (chan, 0xc07e0090); /* pinterp f32 $r2 $r0 v[$r63+0x90] */
+ OUT_RING (chan, 0xfc211e86);
+ OUT_RING (chan, 0x80120001); /* tex { _,_,_,$r4 } $t1 { $r2,3 } */
+ OUT_RING (chan, 0x03f05c40);
+ OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r1 $r0 v[$r63+0x84] */
+ OUT_RING (chan, 0x03f01c40);
+ OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r0 $r0 v[$r63+0x80] */
+ OUT_RING (chan, 0xfc001e86);
+ OUT_RING (chan, 0x8013c000); /* tex { $r0,1,2,3 } $t0 { $r0,1 } */
+ OUT_RING (chan, 0x1030dc40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r3 $r3 $r4 */
+ OUT_RING (chan, 0x10209c40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r2 $r2 $r4 */
+ OUT_RING (chan, 0x10105c40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r1 $r1 $r4 */
+ OUT_RING (chan, 0x10001c40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r0 $r0 $r4 */
+ OUT_RING (chan, 0x00001de7);
+ OUT_RING (chan, 0x80000000); /* exit */
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2);
+ if (OUT_RELOCh(chan, bo, PFP_CCA, NOUVEAU_BO(VRAM, VRAM, WR)) ||
+ OUT_RELOCl(chan, bo, PFP_CCA, NOUVEAU_BO(VRAM, VRAM, WR))) {
+ MARK_UNDO(chan);
+ return FALSE;
+ }
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2);
+ OUT_RING (chan, 13 * 8 + 20 * 4);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1);
+ OUT_RING (chan, 0x100111);
+ BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 13 * 2 + 20);
+ OUT_RING (chan, 0x00021462); /* 0x0000c000 = USES_KIL, MULTI_COLORS */
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x80000000); /* FRAG_COORD_UMASK = 0x8 */
+ OUT_RING (chan, 0x00000a0a); /* FP_INTERP[0x080], 0022 0022 */
+ OUT_RING (chan, 0x00000000); /* FP_INTERP[0x0c0], 0 = OFF */
+ OUT_RING (chan, 0x00000000); /* FP_INTERP[0x100], 1 = FLAT */
+ OUT_RING (chan, 0x00000000); /* FP_INTERP[0x140], 2 = PERSPECTIVE */
+ OUT_RING (chan, 0x00000000); /* FP_INTERP[0x180], 3 = LINEAR */
+ OUT_RING (chan, 0x00000000); /* FP_INTERP[0x1c0] */
+ OUT_RING (chan, 0x00000000); /* FP_INTERP[0x200] */
+ OUT_RING (chan, 0x00000000); /* FP_INTERP[0x240] */
+ OUT_RING (chan, 0x00000000); /* FP_INTERP[0x280] */
+ OUT_RING (chan, 0x00000000); /* FP_INTERP[0x2c0] */
+ OUT_RING (chan, 0x00000000); /* FP_INTERP[0x300] */
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x0000000f); /* FP_RESULT_MASK (0x8000 Face ?) */
+ OUT_RING (chan, 0x00000000); /* 0x2 = FragDepth, 0x1 = SampleMask */
+ OUT_RING (chan, 0xfff01c00);
+ OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */
+ OUT_RING (chan, 0x10001c00);
+ OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */
+ OUT_RING (chan, 0x03f0dc40);
+ OUT_RING (chan, 0xc07e0094); /* pinterp f32 $r3 $r0 v[$r63+0x94] */
+ OUT_RING (chan, 0x03f09c40);
+ OUT_RING (chan, 0xc07e0090); /* pinterp f32 $r2 $r0 v[$r63+0x90] */
+ OUT_RING (chan, 0xfc211e86);
+ OUT_RING (chan, 0x8013c001); /* tex { $r4,5,6,7 } $t1 { $r2,3 } */
+ OUT_RING (chan, 0x03f05c40);
+ OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r1 $r0 v[$r63+0x84] */
+ OUT_RING (chan, 0x03f01c40);
+ OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r0 $r0 v[$r63+0x80] */
+ OUT_RING (chan, 0xfc001e86);
+ OUT_RING (chan, 0x8013c000); /* tex { $r0,1,2,3 } $t0 { $r0,1 } */
+ OUT_RING (chan, 0x1c30dc40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r3 $r3 $r7 */
+ OUT_RING (chan, 0x18209c40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r2 $r2 $r6 */
+ OUT_RING (chan, 0x14105c40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r1 $r1 $r5 */
+ OUT_RING (chan, 0x10001c40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r0 $r0 $r4 */
+ OUT_RING (chan, 0x00001de7);
+ OUT_RING (chan, 0x80000000); /* exit */
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2);
+ if (OUT_RELOCh(chan, bo, PFP_CCASA, NOUVEAU_BO(VRAM, VRAM, WR)) ||
+ OUT_RELOCl(chan, bo, PFP_CCASA, NOUVEAU_BO(VRAM, VRAM, WR))) {
+ MARK_UNDO(chan);
+ return FALSE;
+ }
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2);
+ OUT_RING (chan, 13 * 8 + 20 * 4);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1);
+ OUT_RING (chan, 0x100111);
+ BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 13 * 2 + 20);
+ OUT_RING (chan, 0x00021462);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x80000000);
+ OUT_RING (chan, 0x00000a0a);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x0000000f);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0xfff01c00);
+ OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */
+ OUT_RING (chan, 0x10001c00);
+ OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */
+ OUT_RING (chan, 0x03f0dc40);
+ OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r3 $r0 v[$r63+0x84] */
+ OUT_RING (chan, 0x03f09c40);
+ OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r2 $r0 v[$r63+0x80] */
+ OUT_RING (chan, 0xfc211e86);
+ OUT_RING (chan, 0x80120000); /* tex { _,_,_,$r4 } $t0 { $r2,3 } */
+ OUT_RING (chan, 0x03f05c40);
+ OUT_RING (chan, 0xc07e0094); /* pinterp f32 $r1 $r0 v[$r63+0x94] */
+ OUT_RING (chan, 0x03f01c40);
+ OUT_RING (chan, 0xc07e0090); /* pinterp f32 $r0 $r0 v[$r63+0x90] */
+ OUT_RING (chan, 0xfc001e86);
+ OUT_RING (chan, 0x8013c001); /* tex { $r0,1,2,3 } $t1 { $r0,1 } */
+ OUT_RING (chan, 0x1030dc40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r3 $r3 $r4 */
+ OUT_RING (chan, 0x10209c40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r2 $r2 $r4 */
+ OUT_RING (chan, 0x10105c40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r1 $r1 $r4 */
+ OUT_RING (chan, 0x10001c40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r0 $r0 $r4 */
+ OUT_RING (chan, 0x00001de7);
+ OUT_RING (chan, 0x80000000); /* exit */
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2);
+ if (OUT_RELOCh(chan, bo, PFP_S_A8, NOUVEAU_BO(VRAM, VRAM, WR)) ||
+ OUT_RELOCl(chan, bo, PFP_S_A8, NOUVEAU_BO(VRAM, VRAM, WR))) {
+ MARK_UNDO(chan);
+ return FALSE;
+ }
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2);
+ OUT_RING (chan, 9 * 8 + 20 * 4);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1);
+ OUT_RING (chan, 0x100111);
+ BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 9 * 2 + 20);
+ OUT_RING (chan, 0x00021462);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x80000000);
+ OUT_RING (chan, 0x0000000a);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x0000000f);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0xfff01c00);
+ OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */
+ OUT_RING (chan, 0x10001c00);
+ OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */
+ OUT_RING (chan, 0x03f05c40);
+ OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r1 $r0 v[$r63+0x84] */
+ OUT_RING (chan, 0x03f01c40);
+ OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r0 $r0 v[$r63+0x80] */
+ OUT_RING (chan, 0xfc001e86);
+ OUT_RING (chan, 0x80120000); /* tex { _ _ _ $r0 } $t0 { $r0 $r1 } */
+ OUT_RING (chan, 0x0000dde4);
+ OUT_RING (chan, 0x28000000); /* mov b32 $r3 $r0 */
+ OUT_RING (chan, 0x00009de4);
+ OUT_RING (chan, 0x28000000); /* mov b32 $r2 $r0 */
+ OUT_RING (chan, 0x00005de4);
+ OUT_RING (chan, 0x28000000); /* mov b32 $r1 $r0 */
+ OUT_RING (chan, 0x00001de7);
+ OUT_RING (chan, 0x80000000); /* exit */
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2);
+ if (OUT_RELOCh(chan, bo, PFP_C_A8, NOUVEAU_BO(VRAM, VRAM, WR)) ||
+ OUT_RELOCl(chan, bo, PFP_C_A8, NOUVEAU_BO(VRAM, VRAM, WR))) {
+ MARK_UNDO(chan);
+ return FALSE;
+ }
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2);
+ OUT_RING (chan, 13 * 8 + 20 * 4);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1);
+ OUT_RING (chan, 0x100111);
+ BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 13 * 2 + 20);
+ OUT_RING (chan, 0x00021462);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x80000000);
+ OUT_RING (chan, 0x00000a0a);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x0000000f);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0xfff01c00);
+ OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */
+ OUT_RING (chan, 0x10001c00);
+ OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */
+ OUT_RING (chan, 0x03f0dc40);
+ OUT_RING (chan, 0xc07e0094); /* pinterp f32 $r3 $r0 v[$r63+0x94] */
+ OUT_RING (chan, 0x03f09c40);
+ OUT_RING (chan, 0xc07e0090); /* pinterp f32 $r2 $r0 v[$r63+0x90] */
+ OUT_RING (chan, 0xfc205e86);
+ OUT_RING (chan, 0x80120001); /* tex { _ _ _ $r1 } $t1 { $r2 $r3 } */
+ OUT_RING (chan, 0x03f0dc40);
+ OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r3 $r0 v[$r63+0x84] */
+ OUT_RING (chan, 0x03f09c40);
+ OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r2 $r0 v[$r63+0x80] */
+ OUT_RING (chan, 0xfc201e86);
+ OUT_RING (chan, 0x80120000); /* tex { _ _ _ $r0 } $t0 { $r2 $r3 } */
+ OUT_RING (chan, 0x0400dc40);
+ OUT_RING (chan, 0x58000000); /* mul ftz rn f32 $r3 $r0 $r1 */
+ OUT_RING (chan, 0x0c009de4);
+ OUT_RING (chan, 0x28000000); /* mov b32 $r2 $r3 */
+ OUT_RING (chan, 0x0c005de4);
+ OUT_RING (chan, 0x28000000); /* mov b32 $r1 $r3 */
+ OUT_RING (chan, 0x0c001de4);
+ OUT_RING (chan, 0x28000000); /* mov b32 $r0 $r3 */
+ OUT_RING (chan, 0x00001de7);
+ OUT_RING (chan, 0x80000000); /* exit */
+
+ FIRE_RING (chan);
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2);
+ if (OUT_RELOCh(chan, bo, PFP_NV12, NOUVEAU_BO(VRAM, VRAM, WR)) ||
+ OUT_RELOCl(chan, bo, PFP_NV12, NOUVEAU_BO(VRAM, VRAM, WR))) {
+ MARK_UNDO(chan);
+ return FALSE;
+ }
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2);
+ OUT_RING (chan, 16 * 8 + 20 * 4);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1);
+ OUT_RING (chan, 0x100111);
+ BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 16 * 2 + 20);
+ OUT_RING (chan, 0x00021462);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x80000000);
+ OUT_RING (chan, 0x00000a0a);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x0000000f);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0xfff01c00);
+ OUT_RING (chan, 0xc07e007c); /* linterp f32 $r0 v[$r63+0x7c] */
+ OUT_RING (chan, 0x10001c00);
+ OUT_RING (chan, 0xc8000000); /* rcp f32 $r0 $r0 */
+ OUT_RING (chan, 0x03f0dc40);
+ OUT_RING (chan, 0xc07e0084); /* pinterp f32 $r3 $r0 v[$r63+0x84] */
+ OUT_RING (chan, 0x03f09c40);
+ OUT_RING (chan, 0xc07e0080); /* pinterp f32 $r2 $r0 v[$r63+0x80] */
+ OUT_RING (chan, 0xfc205e86);
+ OUT_RING (chan, 0x80120000); /* tex { _ _ _ $r1 } $t0 { $r2 $r3 } */
+ OUT_RING (chan, 0x03f0dc40);
+ OUT_RING (chan, 0xc07e0094); /* pinterp f32 $r3 $r0 v[$r63+0x94] */
+ OUT_RING (chan, 0x03f09c40);
+ OUT_RING (chan, 0xc07e0090); /* pinterp f32 $r2 $r0 v[$r63+0x90] */
+ OUT_RING (chan, 0xfc211e86);
+ OUT_RING (chan, 0x80130001); /* tex { _ _ $r4 $r5 } $t1 { $r2 $r3 } */
+ OUT_RING (chan, 0x28101c42);
+ OUT_RING (chan, 0x30fc7757); /* mul ftz f32 $r0 $r1 0.616543 */
+ OUT_RING (chan, 0x08109c42);
+ OUT_RING (chan, 0x32fe8493); /* mul ftz f32 $r2 $r1 -1.258934 */
+ OUT_RING (chan, 0xec10dc42);
+ OUT_RING (chan, 0x32fe0704); /* mul ftz f32 $r3 $r1 -1.013709 */
+ OUT_RING (chan, 0x00405c40);
+ OUT_RING (chan, 0x30004000); /* add ftz f32 $r1 mul $r4 c0[0] $r0 */
+ OUT_RING (chan, 0x10409c40);
+ OUT_RING (chan, 0x30044000); /* add ftz f32 $r2 mul $r4 c0[0x4] $r2 */
+ OUT_RING (chan, 0x30501c40);
+ OUT_RING (chan, 0x30064000); /* add ftz f32 $r0 mul $r5 c0[0xc] $r3 */
+ OUT_RING (chan, 0x20505c40);
+ OUT_RING (chan, 0x30024000); /* add ftz f32 $r1 mul $r5 c0[0x8] $r1 */
+ OUT_RING (chan, 0x00001de7);
+ OUT_RING (chan, 0x80000000); /* exit */
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_CB_SIZE, 3);
+ OUT_RING (chan, 256);
+ if (OUT_RELOCh(chan, bo, CB_OFFSET, NOUVEAU_BO(VRAM, VRAM, RD)) ||
+ OUT_RELOCl(chan, bo, CB_OFFSET, NOUVEAU_BO(VRAM, VRAM, RD))) {
+ MARK_UNDO(chan);
+ return FALSE;
+ }
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_CB_POS, 5);
+ OUT_RING (chan, 0);
+ OUT_RINGf (chan, -0.391730f);
+ OUT_RINGf (chan, 2.017000f);
+ OUT_RINGf (chan, -0.812900f);
+ OUT_RINGf (chan, 1.595800f);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_CODE_FLUSH, 1);
+ OUT_RING (chan, 0x1111);
+
+ FIRE_RING (chan);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_SELECT(5), 2);
+ OUT_RING (chan, 0x51);
+ OUT_RING (chan, PFP_S);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_GPR_ALLOC(5), 1);
+ OUT_RING (chan, 8);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_CB_BIND(4), 1);
+ OUT_RING (chan, 0x01);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_EARLY_FRAGMENT_TESTS, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, 0x0360, 2);
+ OUT_RING (chan, 0x20164010);
+ OUT_RING (chan, 0x20);
+ BEGIN_RING(chan, NvSub3D, 0x196c, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, 0x1664, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_FRAG_COLOR_CLAMP_EN, 1);
+ OUT_RING (chan, 0x11111111);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_RASTERIZE_ENABLE, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_SELECT(4), 1);
+ OUT_RING (chan, 0x40);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_GP_BUILTIN_RESULT_EN, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_SELECT(3), 1);
+ OUT_RING (chan, 0x30);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_SELECT(2), 1);
+ OUT_RING (chan, 0x20);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_SELECT(0), 1);
+ OUT_RING (chan, 0x00);
+
+ BEGIN_RING(chan, NvSub3D, 0x1604, 1);
+ OUT_RING (chan, 4);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_POINT_SPRITE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SCISSOR_ENABLE(0), 1);
+ OUT_RING (chan, 1);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VIEWPORT_HORIZ(0), 2);
+ OUT_RING (chan, (8192 << 16) | 0);
+ OUT_RING (chan, (8192 << 16) | 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SCISSOR_HORIZ(0), 2);
+ OUT_RING (chan, (8192 << 16) | 0);
+ OUT_RING (chan, (8192 << 16) | 0);
+
+ FIRE_RING (chan); usleep(50);
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "NVC0TCL has been initialized\n");
+
+ return TRUE;
+}
+
diff --git a/src/nvc0_accel.h b/src/nvc0_accel.h
new file mode 100644
index 0000000..1d72695
--- /dev/null
+++ b/src/nvc0_accel.h
@@ -0,0 +1,83 @@
+#ifndef __NVC0_ACCEL_H__
+#define __NVC0_ACCEL_H__
+
+#define BEGIN_RING(c, g, m, s) BEGIN_RING_NVC0(c, g, m, s)
+#define BEGIN_RING_NI(c, g, m, s) BEGIN_RING_NI_NVC0(c, g, m, s)
+
+/* scratch buffer offsets */
+#define CODE_OFFSET 0x00000000 /* Code */
+#define TIC_OFFSET 0x00002000 /* Texture Image Control */
+#define TSC_OFFSET 0x00003000 /* Texture Sampler Control */
+
+#define NTFY_OFST 0x08000
+#define MISC_OFST 0x10000
+
+/* fragment programs */
+#define PFP_S 0x0000 /* (src) */
+#define PFP_C 0x0100 /* (src IN mask) */
+#define PFP_CCA 0x0200 /* (src IN mask) component-alpha */
+#define PFP_CCASA 0x0300 /* (src IN mask) component-alpha src-alpha */
+#define PFP_S_A8 0x0400 /* (src) a8 rt */
+#define PFP_C_A8 0x0500 /* (src IN mask) a8 rt - same for CA and CA_SA */
+#define PFP_NV12 0x0600 /* NV12 YUV->RGB */
+
+/* vertex programs */
+#define PVP_PASS 0x0700 /* vertex pass-through shader */
+
+/* shader constants */
+#define CB_OFFSET 0x1000
+
+#define VTX_ATTR(a, c, t, s) \
+ ((NVC0TCL_VTX_ATTR_DEFINE_TYPE_##t) | \
+ ((a) << NVC0TCL_VTX_ATTR_DEFINE_ATTR_SHIFT) | \
+ ((c) << NVC0TCL_VTX_ATTR_DEFINE_COMP_SHIFT) | \
+ ((s) << NVC0TCL_VTX_ATTR_DEFINE_SIZE_SHIFT))
+
+static __inline__ void
+VTX1s(NVPtr pNv, float sx, float sy, unsigned dx, unsigned dy)
+{
+ struct nouveau_channel *chan = pNv->chan;
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 3);
+ OUT_RING (chan, VTX_ATTR(1, 2, FLOAT, 4));
+ OUT_RINGf (chan, sx);
+ OUT_RINGf (chan, sy);
+#if 1
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 2);
+ OUT_RING (chan, VTX_ATTR(0, 2, USCALED, 2));
+ OUT_RING (chan, (dy << 16) | dx);
+#else
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 3);
+ OUT_RING (chan, VTX_ATTR(0, 2, FLOAT, 4));
+ OUT_RINGf (chan, (float)dx);
+ OUT_RINGf (chan, (float)dy);
+#endif
+}
+
+static __inline__ void
+VTX2s(NVPtr pNv, float s1x, float s1y, float s2x, float s2y,
+ unsigned dx, unsigned dy)
+{
+ struct nouveau_channel *chan = pNv->chan;
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 3);
+ OUT_RING (chan, VTX_ATTR(1, 2, FLOAT, 4));
+ OUT_RINGf (chan, s1x);
+ OUT_RINGf (chan, s1y);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 3);
+ OUT_RING (chan, VTX_ATTR(2, 2, FLOAT, 4));
+ OUT_RINGf (chan, s2x);
+ OUT_RINGf (chan, s2y);
+#if 1
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 2);
+ OUT_RING (chan, VTX_ATTR(0, 2, USCALED, 2));
+ OUT_RING (chan, (dy << 16) | dx);
+#else
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VTX_ATTR_DEFINE, 3);
+ OUT_RING (chan, VTX_ATTR(0, 2, FLOAT, 4));
+ OUT_RINGf (chan, (float)dx);
+ OUT_RINGf (chan, (float)dy);
+#endif
+}
+
+#endif
diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c
new file mode 100644
index 0000000..91c0743
--- /dev/null
+++ b/src/nvc0_exa.c
@@ -0,0 +1,1239 @@
+/*
+ * Copyright 2007 NVIDIA, Corporation
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv_include.h"
+#include "nv_rop.h"
+#include "nvc0_accel.h"
+#include "nv50_texture.h"
+
+#define NOUVEAU_BO(a, b, c) (NOUVEAU_BO_##a | NOUVEAU_BO_##b | NOUVEAU_BO_##c)
+
+Bool
+NVC0AccelDownloadM2MF(PixmapPtr pspix, int x, int y, int w, int h,
+ char *dst, unsigned dst_pitch)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pspix->drawable.pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
+ struct nouveau_channel *chan = pNv->chan;
+ struct nouveau_bo *bo = nouveau_pixmap_bo(pspix);
+ const int cpp = pspix->drawable.bitsPerPixel / 8;
+ const int line_len = w * cpp;
+ const int line_limit = (128 << 10) / line_len;
+ unsigned src_offset = 0, src_pitch = 0, tiled = 1;
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "NVC0AccelDownloadM2MF: %i.%i %ix%i\n",
+ x, y, w, h);
+
+ if (!nv50_style_tiled_pixmap(pspix)) {
+ tiled = 0;
+ src_pitch = exaGetPixmapPitch(pspix);
+ src_offset = (y * src_pitch) + (x * cpp);
+ } else {
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_TILING_MODE_IN, 5);
+ OUT_RING (chan, bo->tile_mode << 4);
+ OUT_RING (chan, pspix->drawable.width * cpp);
+ OUT_RING (chan, pspix->drawable.height);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ }
+
+ while (h) {
+ const char *src;
+ int line_count, i;
+
+ /* GART size >= 128 KiB assumed */
+ line_count = h;
+ if (line_count > line_limit)
+ line_count = line_limit;
+
+ MARK_RING(chan, 16, 4);
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2);
+ OUT_RELOCh(chan, pNv->GART, 0, NOUVEAU_BO(GART, GART, WR));
+ OUT_RELOCl(chan, pNv->GART, 0, NOUVEAU_BO(GART, GART, WR));
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_IN_HIGH, 6);
+ OUT_RELOCh(chan, bo, src_offset, NOUVEAU_BO(VRAM, GART, RD));
+ OUT_RELOCl(chan, bo, src_offset, NOUVEAU_BO(VRAM, GART, RD));
+ OUT_RING (chan, src_pitch);
+ OUT_RING (chan, line_len);
+ OUT_RING (chan, line_len);
+ OUT_RING (chan, line_count);
+
+ if (tiled) {
+ BEGIN_RING(chan, NvSubM2MF,
+ NVC0_M2MF_TILING_POSITION_IN_X, 2);
+ OUT_RING (chan, x * cpp);
+ OUT_RING (chan, y);
+ }
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1);
+ OUT_RING (chan, 0x100000 | (tiled << 8));
+
+ if (nouveau_bo_map(pNv->GART, NOUVEAU_BO_RD)) {
+ MARK_UNDO(chan);
+ return FALSE;
+ }
+ src = pNv->GART->map;
+
+ if (dst_pitch == line_len) {
+ memcpy(dst, src, dst_pitch * line_count);
+ } else {
+ for (i = 0; i < line_count; ++i) {
+ memcpy(dst, src, line_len);
+ src += line_len;
+ dst += dst_pitch;
+ }
+ }
+ nouveau_bo_unmap(pNv->GART);
+
+ if (!tiled)
+ src_offset += line_count * src_pitch;
+ h -= line_count;
+ y += line_count;
+ }
+
+ return TRUE;
+}
+
+Bool
+NVC0AccelUploadM2MF(PixmapPtr pdpix, int x, int y, int w, int h,
+ const char *src, int src_pitch)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pdpix->drawable.pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
+ struct nouveau_channel *chan = pNv->chan;
+ struct nouveau_bo *bo = nouveau_pixmap_bo(pdpix);
+ int cpp = pdpix->drawable.bitsPerPixel / 8;
+ int line_len = w * cpp;
+ int line_limit = (128 << 10) / line_len;
+ unsigned dst_offset = 0, dst_pitch = 0, tiled = 1;
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "NVC0AccelUploadM2MF: %i.%i %ix%i\n",
+ x, y, w, h);
+
+ if (!nv50_style_tiled_pixmap(pdpix)) {
+ tiled = 0;
+ dst_pitch = exaGetPixmapPitch(pdpix);
+ dst_offset = (y * dst_pitch) + (x * cpp);
+ } else {
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_TILING_MODE_OUT, 5);
+ OUT_RING (chan, bo->tile_mode << 4);
+ OUT_RING (chan, pdpix->drawable.width * cpp);
+ OUT_RING (chan, pdpix->drawable.height);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ }
+
+ while (h) {
+ char *dst;
+ int i, line_count;
+
+ line_count = h;
+ if (line_count > line_limit)
+ line_count = line_limit;
+
+ if (nouveau_bo_map(pNv->GART, NOUVEAU_BO_WR))
+ return FALSE;
+ dst = pNv->GART->map;
+
+ if (src_pitch == line_len) {
+ memcpy(dst, src, src_pitch * line_count);
+ src += src_pitch * line_count;
+ } else {
+ for (i = 0; i < line_count; i++) {
+ memcpy(dst, src, line_len);
+ src += src_pitch;
+ dst += line_len;
+ }
+ }
+ nouveau_bo_unmap(pNv->GART);
+
+ if (MARK_RING(chan, 16, 4))
+ return FALSE;
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_IN_HIGH, 2);
+ OUT_RELOCh(chan, pNv->GART, 0, NOUVEAU_BO(GART, GART, RD));
+ OUT_RELOCl(chan, pNv->GART, 0, NOUVEAU_BO(GART, GART, RD));
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2);
+ OUT_RELOCh(chan, bo, dst_offset, NOUVEAU_BO(VRAM, GART, WR));
+ OUT_RELOCl(chan, bo, dst_offset, NOUVEAU_BO(VRAM, GART, WR));
+
+ if (tiled) {
+ BEGIN_RING(chan, NvSubM2MF,
+ NVC0_M2MF_TILING_POSITION_OUT_X, 2);
+ OUT_RING (chan, x * cpp);
+ OUT_RING (chan, y);
+ }
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_PITCH_IN, 4);
+ OUT_RING (chan, line_len);
+ OUT_RING (chan, dst_pitch);
+ OUT_RING (chan, line_len);
+ OUT_RING (chan, line_count);
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1);
+ OUT_RING (chan, 0x100000 | (tiled << 4));
+ FIRE_RING (chan);
+
+ if (!tiled)
+ dst_offset += line_count * dst_pitch;
+ h -= line_count;
+ y += line_count;
+ }
+
+ return TRUE;
+}
+
+
+struct nvc0_exa_state {
+ struct {
+ PictTransformPtr transform;
+ float width;
+ float height;
+ } unit[2];
+
+ Bool have_mask;
+};
+
+static struct nvc0_exa_state exa_state;
+
+#define NVC0EXA_LOCALS(p) \
+ ScrnInfoPtr pScrn = xf86Screens[(p)->drawable.pScreen->myNum]; \
+ NVPtr pNv = NVPTR(pScrn); \
+ struct nouveau_channel *chan = pNv->chan; (void)chan; \
+ struct nvc0_exa_state *state = &exa_state; (void)state
+
+#define BF(f) NVC0TCL_BLEND_FUNC_SRC_RGB_##f
+
+struct nvc0_blend_op {
+ unsigned src_alpha;
+ unsigned dst_alpha;
+ unsigned src_blend;
+ unsigned dst_blend;
+};
+
+static struct nvc0_blend_op
+NVC0EXABlendOp[] = {
+/* Clear */ { 0, 0, BF( ZERO), BF( ZERO) },
+/* Src */ { 0, 0, BF( ONE), BF( ZERO) },
+/* Dst */ { 0, 0, BF( ZERO), BF( ONE) },
+/* Over */ { 1, 0, BF( ONE), BF(ONE_MINUS_SRC_ALPHA) },
+/* OverReverse */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF( ONE) },
+/* In */ { 0, 1, BF( DST_ALPHA), BF( ZERO) },
+/* InReverse */ { 1, 0, BF( ZERO), BF( SRC_ALPHA) },
+/* Out */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF( ZERO) },
+/* OutReverse */ { 1, 0, BF( ZERO), BF(ONE_MINUS_SRC_ALPHA) },
+/* Atop */ { 1, 1, BF( DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
+/* AtopReverse */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF( SRC_ALPHA) },
+/* Xor */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
+/* Add */ { 0, 0, BF( ONE), BF( ONE) },
+};
+
+static Bool
+NVC0EXA2DSurfaceFormat(PixmapPtr ppix, uint32_t *fmt)
+{
+ NVC0EXA_LOCALS(ppix);
+
+ switch (ppix->drawable.bitsPerPixel) {
+ case 8 : *fmt = NV50_2D_SRC_FORMAT_R8_UNORM; break;
+ case 15: *fmt = NV50_2D_SRC_FORMAT_X1R5G5B5_UNORM; break;
+ case 16: *fmt = NV50_2D_SRC_FORMAT_R5G6B5_UNORM; break;
+ case 24: *fmt = NV50_2D_SRC_FORMAT_X8R8G8B8_UNORM; break;
+ case 30: *fmt = NV50_2D_SRC_FORMAT_A2B10G10R10_UNORM; break;
+ case 32: *fmt = NV50_2D_SRC_FORMAT_A8R8G8B8_UNORM; break;
+ default:
+ NOUVEAU_FALLBACK("Unknown surface format for bpp=%d\n",
+ ppix->drawable.bitsPerPixel);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static void NVC0EXASetClip(PixmapPtr ppix, int x, int y, int w, int h)
+{
+ NVC0EXA_LOCALS(ppix);
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXASetClip: %i.%i %ix%i\n",
+ x, y, w, h);
+
+ BEGIN_RING(chan, NvSub2D, NV50_2D_CLIP_X, 4);
+ OUT_RING (chan, x);
+ OUT_RING (chan, y);
+ OUT_RING (chan, w);
+ OUT_RING (chan, h);
+}
+
+static Bool
+NVC0EXAAcquireSurface2D(PixmapPtr ppix, int is_src)
+{
+ NVC0EXA_LOCALS(ppix);
+ struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
+ int mthd = is_src ? NV50_2D_SRC_FORMAT : NV50_2D_DST_FORMAT;
+ uint32_t fmt, bo_flags;
+
+ if (!NVC0EXA2DSurfaceFormat(ppix, &fmt))
+ return FALSE;
+
+ bo_flags = NOUVEAU_BO_VRAM;
+ bo_flags |= is_src ? NOUVEAU_BO_RD : NOUVEAU_BO_WR;
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "EXAAcquSurf2D: is_src=%i, tile_flags=%x, size=%ix%i\n",
+ is_src, bo->tile_flags,
+ ppix->drawable.width, ppix->drawable.height);
+
+ if (!nv50_style_tiled_pixmap(ppix)) {
+ BEGIN_RING(chan, NvSub2D, mthd, 2);
+ OUT_RING (chan, fmt);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSub2D, mthd + 0x14, 1);
+ OUT_RING (chan, (uint32_t)exaGetPixmapPitch(ppix));
+ } else {
+ BEGIN_RING(chan, NvSub2D, mthd, 5);
+ OUT_RING (chan, fmt);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, bo->tile_mode << 4);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ }
+
+ BEGIN_RING(chan, NvSub2D, mthd + 0x18, 4);
+ OUT_RING (chan, ppix->drawable.width);
+ OUT_RING (chan, ppix->drawable.height);
+ if (OUT_RELOCh(chan, bo, 0, bo_flags) ||
+ OUT_RELOCl(chan, bo, 0, bo_flags))
+ return FALSE;
+
+ if (is_src == 0)
+ NVC0EXASetClip(ppix, 0, 0, ppix->drawable.width, ppix->drawable.height);
+
+ return TRUE;
+}
+
+static void
+NVC0EXASetPattern(PixmapPtr pdpix, int col0, int col1, int pat0, int pat1)
+{
+ NVC0EXA_LOCALS(pdpix);
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "EXASetPattern: %i %i, %i %i\n",
+ col0, col1, pat0, pat1);
+
+ BEGIN_RING(chan, NvSub2D, NV50_2D_PATTERN_COLOR(0), 4);
+ OUT_RING (chan, col0);
+ OUT_RING (chan, col1);
+ OUT_RING (chan, pat0);
+ OUT_RING (chan, pat1);
+}
+
+static void
+NVC0EXASetROP(PixmapPtr pdpix, int alu, Pixel planemask)
+{
+ NVC0EXA_LOCALS(pdpix);
+ int rop;
+
+ if (planemask != ~0)
+ rop = NVROP[alu].copy_planemask;
+ else
+ rop = NVROP[alu].copy;
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXASetROP\n");
+
+ BEGIN_RING(chan, NvSub2D, NV50_2D_OPERATION, 1);
+ if (alu == GXcopy && EXA_PM_IS_SOLID(&pdpix->drawable, planemask)) {
+ OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY);
+ return;
+ } else {
+ OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY_PREMULT);
+ }
+
+ BEGIN_RING(chan, NvSub2D, NV50_2D_PATTERN_FORMAT, 2);
+ switch (pdpix->drawable.bitsPerPixel) {
+ case 8: OUT_RING (chan, 3); break;
+ case 15: OUT_RING (chan, 1); break;
+ case 16: OUT_RING (chan, 0); break;
+ case 24:
+ case 32:
+ default:
+ OUT_RING (chan, 2);
+ break;
+ }
+ OUT_RING (chan, 1);
+
+ /* There are 16 ALUs.
+ * 0-15: copy
+ * 16-31: copy_planemask
+ */
+
+ if (!EXA_PM_IS_SOLID(&pdpix->drawable, planemask)) {
+ alu += 16;
+ NVC0EXASetPattern(pdpix, 0, planemask, ~0, ~0);
+ } else {
+ if (pNv->currentRop > 15)
+ NVC0EXASetPattern(pdpix, ~0, ~0, ~0, ~0);
+ }
+
+ if (pNv->currentRop != alu) {
+ BEGIN_RING(chan, NvSub2D, NV50_2D_ROP, 1);
+ OUT_RING (chan, rop);
+ pNv->currentRop = alu;
+ }
+}
+
+static void
+NVC0EXAStateSolidResubmit(struct nouveau_channel *chan)
+{
+ ScrnInfoPtr pScrn = chan->user_private;
+ NVPtr pNv = NVPTR(pScrn);
+
+ NVC0EXAPrepareSolid(pNv->pdpix, pNv->alu, pNv->planemask,
+ pNv->fg_colour);
+}
+
+Bool
+NVC0EXAPrepareSolid(PixmapPtr pdpix, int alu, Pixel planemask, Pixel fg)
+{
+ NVC0EXA_LOCALS(pdpix);
+ uint32_t fmt;
+
+ if (!NVC0EXA2DSurfaceFormat(pdpix, &fmt))
+ NOUVEAU_FALLBACK("rect format\n");
+
+ if (MARK_RING(chan, 64, 4))
+ NOUVEAU_FALLBACK("ring space\n");
+
+ if (!NVC0EXAAcquireSurface2D(pdpix, 0)) {
+ MARK_UNDO(chan);
+ NOUVEAU_FALLBACK("dest pixmap\n");
+ }
+
+ NVC0EXASetROP(pdpix, alu, planemask);
+
+ BEGIN_RING(chan, NvSub2D, NV50_2D_DRAW_SHAPE, 3);
+ OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES);
+ OUT_RING (chan, fmt);
+ OUT_RING (chan, fg);
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXAPrepareSolid: fmt %x\n", fmt);
+
+ pNv->pdpix = pdpix;
+ pNv->alu = alu;
+ pNv->planemask = planemask;
+ pNv->fg_colour = fg;
+ chan->flush_notify = NVC0EXAStateSolidResubmit;
+ return TRUE;
+}
+
+void
+NVC0EXASolid(PixmapPtr pdpix, int x1, int y1, int x2, int y2)
+{
+ NVC0EXA_LOCALS(pdpix);
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXASolid: %i %i %i %i\n", x1, y1, x2, y2);
+
+ WAIT_RING (chan, 5);
+ BEGIN_RING(chan, NvSub2D, NV50_2D_DRAW_POINT32_X(0), 4);
+ OUT_RING (chan, x1);
+ OUT_RING (chan, y1);
+ OUT_RING (chan, x2);
+ OUT_RING (chan, y2);
+
+ if ((x2 - x1) * (y2 - y1) >= 512)
+ FIRE_RING (chan);
+}
+
+void
+NVC0EXADoneSolid(PixmapPtr pdpix)
+{
+ NVC0EXA_LOCALS(pdpix);
+
+ chan->flush_notify = NULL;
+}
+
+static void
+NVC0EXAStateCopyResubmit(struct nouveau_channel *chan)
+{
+ ScrnInfoPtr pScrn = chan->user_private;
+ NVPtr pNv = NVPTR(pScrn);
+
+ NVC0EXAPrepareCopy(pNv->pspix, pNv->pdpix, 0, 0, pNv->alu,
+ pNv->planemask);
+}
+
+Bool
+NVC0EXAPrepareCopy(PixmapPtr pspix, PixmapPtr pdpix, int dx, int dy,
+ int alu, Pixel planemask)
+{
+ NVC0EXA_LOCALS(pdpix);
+
+ if (MARK_RING(chan, 64, 4))
+ NOUVEAU_FALLBACK("ring space\n");
+
+ if (!NVC0EXAAcquireSurface2D(pspix, 1)) {
+ MARK_UNDO(chan);
+ NOUVEAU_FALLBACK("src pixmap\n");
+ }
+
+ if (!NVC0EXAAcquireSurface2D(pdpix, 0)) {
+ MARK_UNDO(chan);
+ NOUVEAU_FALLBACK("dest pixmap\n");
+ }
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXAPrepareCopy");
+
+ NVC0EXASetROP(pdpix, alu, planemask);
+
+ pNv->pspix = pspix;
+ pNv->pdpix = pdpix;
+ pNv->alu = alu;
+ pNv->planemask = planemask;
+ chan->flush_notify = NVC0EXAStateCopyResubmit;
+ return TRUE;
+}
+
+void
+NVC0EXACopy(PixmapPtr pdpix, int srcX , int srcY,
+ int dstX , int dstY,
+ int width, int height)
+{
+ NVC0EXA_LOCALS(pdpix);
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXACopy: %i.%i -> %i.%i, %ix%i\n",
+ srcX, srcY, dstX, dstY, width, height);
+
+ WAIT_RING (chan, 17);
+ BEGIN_RING(chan, NvSub2D, NV50_2D_SERIALIZE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub2D, 0x088c, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub2D, NV50_2D_BLIT_DST_X, 12);
+ OUT_RING (chan, dstX);
+ OUT_RING (chan, dstY);
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+ OUT_RING (chan, 0); /* DU,V_DX,Y_FRACT,INT */
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0); /* BLIT_SRC_X,Y_FRACT,INT */
+ OUT_RING (chan, srcX);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, srcY);
+
+ if (width * height >= 512)
+ FIRE_RING (chan);
+}
+
+void
+NVC0EXADoneCopy(PixmapPtr pdpix)
+{
+ NVC0EXA_LOCALS(pdpix);
+
+ chan->flush_notify = NULL;
+}
+
+static void
+NVC0EXAStateSIFCResubmit(struct nouveau_channel *chan)
+{
+ ScrnInfoPtr pScrn = chan->user_private;
+ NVPtr pNv = NVPTR(pScrn);
+
+ if (MARK_RING(pNv->chan, 32, 2))
+ return;
+
+ if (NVC0EXAAcquireSurface2D(pNv->pdpix, 0))
+ MARK_UNDO(pNv->chan);
+}
+
+Bool
+NVC0EXAUploadSIFC(const char *src, int src_pitch,
+ PixmapPtr pdpix, int x, int y, int w, int h, int cpp)
+{
+ NVC0EXA_LOCALS(pdpix);
+ int line_dwords = (w * cpp + 3) / 4;
+ uint32_t sifc_fmt;
+
+ if (!NVC0EXA2DSurfaceFormat(pdpix, &sifc_fmt))
+ NOUVEAU_FALLBACK("hostdata format\n");
+
+ if (MARK_RING(chan, 64, 2))
+ return FALSE;
+
+ if (!NVC0EXAAcquireSurface2D(pdpix, 0)) {
+ MARK_UNDO(chan);
+ NOUVEAU_FALLBACK("dest pixmap\n");
+ }
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXAUploadSIFC\n");
+
+ /* If the pitch isn't aligned to a dword you can
+ * get corruption at the end of a line.
+ */
+ NVC0EXASetClip(pdpix, x, y, w, h);
+
+ BEGIN_RING(chan, NvSub2D, NV50_2D_OPERATION, 1);
+ OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY);
+ BEGIN_RING(chan, NvSub2D, NV50_2D_SIFC_BITMAP_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, sifc_fmt);
+ BEGIN_RING(chan, NvSub2D, NV50_2D_SIFC_WIDTH, 10);
+ OUT_RING (chan, (line_dwords * 4) / cpp);
+ OUT_RING (chan, h);
+ OUT_RING (chan, 0); /* SIFC_DX,Y_DU,V_FRACT,INT */
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0); /* SIFC_DST_X,Y_FRACT,INT */
+ OUT_RING (chan, x);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, y);
+
+ pNv->pdpix = pdpix;
+ chan->flush_notify = NVC0EXAStateSIFCResubmit;
+
+ while (h--) {
+ const char *ptr = src;
+ int count = line_dwords;
+
+ while (count) {
+ int size = count > 1792 ? 1792 : count;
+
+ WAIT_RING (chan, size + 1);
+ BEGIN_RING_NI(chan, NvSub2D, NV50_2D_SIFC_DATA, size);
+ OUT_RINGp (chan, ptr, size);
+
+ ptr += size * 4;
+ count -= size;
+ }
+
+ src += src_pitch;
+ }
+
+ chan->flush_notify = NULL;
+ return TRUE;
+}
+
+static Bool
+NVC0EXACheckRenderTarget(PicturePtr ppict)
+{
+ if (ppict->pDrawable->width > 8192 ||
+ ppict->pDrawable->height > 8192)
+ NOUVEAU_FALLBACK("render target dimensions exceeded %dx%d\n",
+ ppict->pDrawable->width,
+ ppict->pDrawable->height);
+
+ switch (ppict->format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_r5g6b5:
+ case PICT_a8:
+ case PICT_x1r5g5b5:
+ case PICT_a1r5g5b5:
+ case PICT_x8b8g8r8:
+ case PICT_a2b10g10r10:
+ case PICT_x2b10g10r10:
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ break;
+ default:
+ NOUVEAU_FALLBACK("picture format 0x%08x\n", ppict->format);
+ }
+
+ return TRUE;
+}
+
+static Bool
+NVC0EXARenderTarget(PixmapPtr ppix, PicturePtr ppict)
+{
+ NVC0EXA_LOCALS(ppix);
+ struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
+ unsigned format;
+
+ /*XXX: Scanout buffer not tiled, someone needs to figure it out */
+ if (!nv50_style_tiled_pixmap(ppix))
+ NOUVEAU_FALLBACK("pixmap is scanout buffer\n");
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXARenderTarget\n");
+
+ switch (ppict->format) {
+ case PICT_a8r8g8b8: format = NVC0TCL_RT_FORMAT_A8R8G8B8_UNORM; break;
+ case PICT_x8r8g8b8: format = NVC0TCL_RT_FORMAT_X8R8G8B8_UNORM; break;
+ case PICT_r5g6b5: format = NVC0TCL_RT_FORMAT_R5G6B5_UNORM; break;
+ case PICT_a8: format = NVC0TCL_RT_FORMAT_A8_UNORM; break;
+ case PICT_x1r5g5b5: format = NVC0TCL_RT_FORMAT_X1R5G5B5_UNORM; break;
+ case PICT_a1r5g5b5: format = NVC0TCL_RT_FORMAT_A1R5G5B5_UNORM; break;
+ case PICT_x8b8g8r8: format = NVC0TCL_RT_FORMAT_X8B8G8R8_UNORM; break;
+ case PICT_a2b10g10r10:
+ case PICT_x2b10g10r10:
+ format = NVC0TCL_RT_FORMAT_A2B10G10R10_UNORM;
+ break;
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ format = NVC0TCL_RT_FORMAT_A2R10G10B10_UNORM;
+ break;
+ default:
+ NOUVEAU_FALLBACK("invalid picture format\n");
+ }
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_RT_ADDRESS_HIGH(0), 8);
+ if (OUT_RELOCh(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR) ||
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR))
+ return FALSE;
+ OUT_RING (chan, ppix->drawable.width);
+ OUT_RING (chan, ppix->drawable.height);
+ OUT_RING (chan, format);
+ OUT_RING (chan, bo->tile_mode << 4);
+ OUT_RING (chan, 0x00000001);
+ OUT_RING (chan, 0x00000000);
+
+ return TRUE;
+}
+
+static Bool
+NVC0EXACheckTexture(PicturePtr ppict, PicturePtr pdpict, int op)
+{
+ if (!ppict->pDrawable)
+ NOUVEAU_FALLBACK("Solid and gradient pictures unsupported.\n");
+
+ if (ppict->pDrawable->width > 8192 ||
+ ppict->pDrawable->height > 8192)
+ NOUVEAU_FALLBACK("texture dimensions exceeded %dx%d\n",
+ ppict->pDrawable->width,
+ ppict->pDrawable->height);
+
+ switch (ppict->format) {
+ case PICT_a8r8g8b8:
+ case PICT_a8b8g8r8:
+ case PICT_x8r8g8b8:
+ case PICT_x8b8g8r8:
+ case PICT_r5g6b5:
+ case PICT_a8:
+ case PICT_x1r5g5b5:
+ case PICT_x1b5g5r5:
+ case PICT_a1r5g5b5:
+ case PICT_a1b5g5r5:
+ case PICT_b5g6r5:
+ case PICT_b8g8r8a8:
+ case PICT_b8g8r8x8:
+ case PICT_a2b10g10r10:
+ case PICT_x2b10g10r10:
+ case PICT_x2r10g10b10:
+ case PICT_a2r10g10b10:
+ case PICT_x4r4g4b4:
+ case PICT_x4b4g4r4:
+ case PICT_a4r4g4b4:
+ case PICT_a4b4g4r4:
+ break;
+ default:
+ NOUVEAU_FALLBACK("picture format 0x%08x\n", ppict->format);
+ }
+
+ switch (ppict->filter) {
+ case PictFilterNearest:
+ case PictFilterBilinear:
+ break;
+ default:
+ NOUVEAU_FALLBACK("picture filter %d\n", ppict->filter);
+ }
+
+ /* OpenGL and Render disagree on what should be sampled outside an XRGB
+ * texture (with no repeating). Opengl has a hardcoded alpha value of
+ * 1.0, while render expects 0.0. We assume that clipping is done for
+ * untranformed sources.
+ */
+ if (NVC0EXABlendOp[op].src_alpha && !ppict->repeat &&
+ ppict->transform && (PICT_FORMAT_A(ppict->format) == 0)
+ && (PICT_FORMAT_A(pdpict->format) != 0))
+ NOUVEAU_FALLBACK("REPEAT_NONE unsupported for XRGB source\n");
+
+ return TRUE;
+}
+
+#define _(X1, X2, X3, X4, FMT) \
+ (NV50TIC_0_0_TYPER_UNORM | NV50TIC_0_0_TYPEG_UNORM | \
+ NV50TIC_0_0_TYPEB_UNORM | NV50TIC_0_0_TYPEA_UNORM | \
+ NV50TIC_0_0_MAP##X1 | NV50TIC_0_0_MAP##X2 | \
+ NV50TIC_0_0_MAP##X3 | NV50TIC_0_0_MAP##X4 | \
+ NV50TIC_0_0_FMT_##FMT)
+
+static Bool
+NVC0EXATexture(PixmapPtr ppix, PicturePtr ppict, unsigned unit)
+{
+ NVC0EXA_LOCALS(ppix);
+ struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
+ const unsigned tcb_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
+ uint32_t mode;
+
+ /* XXX: maybe add support for linear textures at some point */
+ if (!nv50_style_tiled_pixmap(ppix))
+ NOUVEAU_FALLBACK("pixmap is scanout buffer\n");
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXATexture\n");
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_TIC_ADDRESS_HIGH, 3);
+ if (OUT_RELOCh(chan, pNv->tesla_scratch, TIC_OFFSET, tcb_flags) ||
+ OUT_RELOCl(chan, pNv->tesla_scratch, TIC_OFFSET, tcb_flags))
+ return FALSE;
+ OUT_RING (chan, 15);
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2);
+ if (OUT_RELOCh(chan, pNv->tesla_scratch,
+ TIC_OFFSET + unit * 32, tcb_flags) ||
+ OUT_RELOCl(chan, pNv->tesla_scratch,
+ TIC_OFFSET + unit * 32, tcb_flags))
+ return FALSE;
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2);
+ OUT_RING (chan, 8 * 4);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1);
+ OUT_RING (chan, 0x100111);
+ BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 8);
+
+ switch (ppict->format) {
+ case PICT_a8r8g8b8:
+ OUT_RING(chan, _(B_C0, G_C1, R_C2, A_C3, 8_8_8_8));
+ break;
+ case PICT_a8b8g8r8:
+ OUT_RING(chan, _(R_C0, G_C1, B_C2, A_C3, 8_8_8_8));
+ break;
+ case PICT_x8r8g8b8:
+ OUT_RING(chan, _(B_C0, G_C1, R_C2, A_ONE, 8_8_8_8));
+ break;
+ case PICT_x8b8g8r8:
+ OUT_RING(chan, _(R_C0, G_C1, B_C2, A_ONE, 8_8_8_8));
+ break;
+ case PICT_r5g6b5:
+ OUT_RING(chan, _(B_C0, G_C1, R_C2, A_ONE, 5_6_5));
+ break;
+ case PICT_a8:
+ OUT_RING(chan, _(A_C0, B_ZERO, G_ZERO, R_ZERO, 8));
+ break;
+ case PICT_x1r5g5b5:
+ OUT_RING(chan, _(B_C0, G_C1, R_C2, A_ONE, 1_5_5_5));
+ break;
+ case PICT_x1b5g5r5:
+ OUT_RING(chan, _(R_C0, G_C1, B_C2, A_ONE, 1_5_5_5));
+ break;
+ case PICT_a1r5g5b5:
+ OUT_RING(chan, _(B_C0, G_C1, R_C2, A_C3, 1_5_5_5));
+ break;
+ case PICT_a1b5g5r5:
+ OUT_RING(chan, _(R_C0, G_C1, B_C2, A_C3, 1_5_5_5));
+ break;
+ case PICT_b5g6r5:
+ OUT_RING(chan, _(R_C0, G_C1, B_C2, A_ONE, 5_6_5));
+ break;
+ case PICT_b8g8r8x8:
+ OUT_RING(chan, _(A_ONE, R_C1, G_C2, B_C3, 8_8_8_8));
+ break;
+ case PICT_b8g8r8a8:
+ OUT_RING(chan, _(A_C0, R_C1, G_C2, B_C3, 8_8_8_8));
+ break;
+ case PICT_a2b10g10r10:
+ OUT_RING(chan, _(R_C0, G_C1, B_C2, A_C3, 2_10_10_10));
+ break;
+ case PICT_x2b10g10r10:
+ OUT_RING(chan, _(R_C0, G_C1, B_C2, A_ONE, 2_10_10_10));
+ break;
+ case PICT_x2r10g10b10:
+ OUT_RING(chan, _(B_C0, G_C1, R_C2, A_ONE, 2_10_10_10));
+ break;
+ case PICT_a2r10g10b10:
+ OUT_RING(chan, _(B_C0, G_C1, R_C2, A_C3, 2_10_10_10));
+ break;
+ case PICT_x4r4g4b4:
+ OUT_RING(chan, _(B_C0, G_C1, R_C2, A_ONE, 4_4_4_4));
+ break;
+ case PICT_x4b4g4r4:
+ OUT_RING(chan, _(R_C0, G_C1, B_C2, A_ONE, 4_4_4_4));
+ break;
+ case PICT_a4r4g4b4:
+ OUT_RING(chan, _(B_C0, G_C1, R_C2, A_C3, 4_4_4_4));
+ break;
+ case PICT_a4b4g4r4:
+ OUT_RING(chan, _(R_C0, G_C1, B_C2, A_C3, 4_4_4_4));
+ break;
+ default:
+ NOUVEAU_FALLBACK("invalid picture format, this SHOULD NOT HAPPEN. Expect trouble.\n");
+ }
+#undef _
+
+ mode = 0xd0005000 | (bo->tile_mode << 22);
+ if (OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD) ||
+ OUT_RELOCd(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH | NOUVEAU_BO_OR, mode, mode))
+ return FALSE;
+ OUT_RING (chan, 0x00300000);
+ OUT_RING (chan, (1 << 31) | ppix->drawable.width);
+ OUT_RING (chan, (1 << 16) | ppix->drawable.height);
+ OUT_RING (chan, 0x03000000);
+ OUT_RING (chan, 0x00000000);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_TSC_ADDRESS_HIGH, 3);
+ if (OUT_RELOCh(chan, pNv->tesla_scratch, TSC_OFFSET, tcb_flags) ||
+ OUT_RELOCl(chan, pNv->tesla_scratch, TSC_OFFSET, tcb_flags))
+ return FALSE;
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_OFFSET_OUT_HIGH, 2);
+ if (OUT_RELOCh(chan, pNv->tesla_scratch,
+ TSC_OFFSET + unit * 32, tcb_flags) ||
+ OUT_RELOCl(chan, pNv->tesla_scratch,
+ TSC_OFFSET + unit * 32, tcb_flags))
+ return FALSE;
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_LINE_LENGTH_IN, 2);
+ OUT_RING (chan, 8 * 4);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSubM2MF, NVC0_M2MF_EXEC, 1);
+ OUT_RING (chan, 0x100111);
+ BEGIN_RING_NI(chan, NvSubM2MF, NVC0_M2MF_DATA, 8);
+
+ if (ppict->repeat) {
+ switch (ppict->repeatType) {
+ case RepeatPad:
+ OUT_RING (chan, 0x00024000 |
+ NV50TSC_1_0_WRAPS_CLAMP |
+ NV50TSC_1_0_WRAPT_CLAMP |
+ NV50TSC_1_0_WRAPR_CLAMP);
+ break;
+ case RepeatReflect:
+ OUT_RING (chan, 0x00024000 |
+ NV50TSC_1_0_WRAPS_MIRROR_REPEAT |
+ NV50TSC_1_0_WRAPT_MIRROR_REPEAT |
+ NV50TSC_1_0_WRAPR_MIRROR_REPEAT);
+ break;
+ case RepeatNormal:
+ default:
+ OUT_RING (chan, 0x00024000 |
+ NV50TSC_1_0_WRAPS_REPEAT |
+ NV50TSC_1_0_WRAPT_REPEAT |
+ NV50TSC_1_0_WRAPR_REPEAT);
+ break;
+ }
+ } else {
+ OUT_RING (chan, 0x00024000 |
+ NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER |
+ NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER |
+ NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER);
+ }
+ if (ppict->filter == PictFilterBilinear) {
+ OUT_RING (chan,
+ NV50TSC_1_1_MAGF_LINEAR |
+ NV50TSC_1_1_MINF_LINEAR | NV50TSC_1_1_MIPF_NONE);
+ } else {
+ OUT_RING (chan,
+ NV50TSC_1_1_MAGF_NEAREST |
+ NV50TSC_1_1_MINF_NEAREST | NV50TSC_1_1_MIPF_NONE);
+ }
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x00000000);
+ OUT_RINGf (chan, 0.0f);
+ OUT_RINGf (chan, 0.0f);
+ OUT_RINGf (chan, 0.0f);
+ OUT_RINGf (chan, 0.0f);
+
+ state->unit[unit].width = ppix->drawable.width;
+ state->unit[unit].height = ppix->drawable.height;
+ state->unit[unit].transform = ppict->transform;
+ return TRUE;
+}
+
+static Bool
+NVC0EXACheckBlend(int op)
+{
+ if (op > PictOpAdd)
+ NOUVEAU_FALLBACK("unsupported blend op %d\n", op);
+ return TRUE;
+}
+
+static void
+NVC0EXABlend(PixmapPtr ppix, PicturePtr ppict, int op, int component_alpha)
+{
+ NVC0EXA_LOCALS(ppix);
+ struct nvc0_blend_op *b = &NVC0EXABlendOp[op];
+ unsigned sblend = b->src_blend;
+ unsigned dblend = b->dst_blend;
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXABlend\n");
+
+ if (b->dst_alpha) {
+ if (!PICT_FORMAT_A(ppict->format)) {
+ if (sblend == BF(DST_ALPHA))
+ sblend = BF(ONE);
+ else
+ if (sblend == BF(ONE_MINUS_DST_ALPHA))
+ sblend = BF(ZERO);
+ }
+ }
+
+ if (b->src_alpha && component_alpha) {
+ if (dblend == BF(SRC_ALPHA))
+ dblend = BF(SRC_COLOR);
+ else
+ if (dblend == BF(ONE_MINUS_SRC_ALPHA))
+ dblend = BF(ONE_MINUS_SRC_COLOR);
+ }
+
+ if (sblend == BF(ONE) && dblend == BF(ZERO)) {
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_BLEND_ENABLE(0), 1);
+ OUT_RING (chan, 0);
+ } else {
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_BLEND_ENABLE(0), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_BLEND_EQUATION_RGB, 5);
+ OUT_RING (chan, NVC0TCL_BLEND_EQUATION_RGB_FUNC_ADD);
+ OUT_RING (chan, sblend);
+ OUT_RING (chan, dblend);
+ OUT_RING (chan, NVC0TCL_BLEND_EQUATION_ALPHA_FUNC_ADD);
+ OUT_RING (chan, sblend);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_BLEND_FUNC_DST_ALPHA, 1);
+ OUT_RING (chan, dblend);
+ }
+}
+
+Bool
+NVC0EXACheckComposite(int op,
+ PicturePtr pspict, PicturePtr pmpict, PicturePtr pdpict)
+{
+ if (!NVC0EXACheckBlend(op))
+ NOUVEAU_FALLBACK("blend not supported\n");
+
+ if (!NVC0EXACheckRenderTarget(pdpict))
+ NOUVEAU_FALLBACK("render target invalid\n");
+
+ if (!NVC0EXACheckTexture(pspict, pdpict, op))
+ NOUVEAU_FALLBACK("src picture invalid\n");
+
+ ErrorF("EXACheckComposite\n");
+
+ if (pmpict) {
+ if (pmpict->componentAlpha &&
+ PICT_FORMAT_RGB(pmpict->format) &&
+ NVC0EXABlendOp[op].src_alpha &&
+ NVC0EXABlendOp[op].src_blend != BF(ZERO))
+ NOUVEAU_FALLBACK("component-alpha not supported\n");
+
+ if (!NVC0EXACheckTexture(pmpict, pdpict, op))
+ NOUVEAU_FALLBACK("mask picture invalid\n");
+ }
+
+ return TRUE;
+}
+
+static void
+NVC0EXAStateCompositeResubmit(struct nouveau_channel *chan)
+{
+ ScrnInfoPtr pScrn = chan->user_private;
+ NVPtr pNv = NVPTR(pScrn);
+
+ NVC0EXAPrepareComposite(pNv->alu, pNv->pspict, pNv->pmpict, pNv->pdpict,
+ pNv->pspix, pNv->pmpix, pNv->pdpix);
+}
+
+Bool
+NVC0EXAPrepareComposite(int op,
+ PicturePtr pspict, PicturePtr pmpict, PicturePtr pdpict,
+ PixmapPtr pspix, PixmapPtr pmpix, PixmapPtr pdpix)
+{
+ NVC0EXA_LOCALS(pspix);
+ const unsigned shd_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+
+ if (MARK_RING (chan, 128, 4 + 2 + 2 * 10))
+ NOUVEAU_FALLBACK("ring space\n");
+
+ // fonts: !pmpict, op == 12 (Add, ONE/ONE)
+ /*
+ if (pmpict || op != 12)
+ NOUVEAU_FALLBACK("comp-alpha");
+ */
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXAPrepareComposite\n");
+
+ BEGIN_RING(chan, NvSub2D, NV50_2D_SERIALIZE, 1);
+ OUT_RING (chan, 0);
+
+ if (!NVC0EXARenderTarget(pdpix, pdpict)) {
+ MARK_UNDO(chan);
+ NOUVEAU_FALLBACK("render target invalid\n");
+ }
+
+ NVC0EXABlend(pdpix, pdpict, op, pmpict && pmpict->componentAlpha &&
+ PICT_FORMAT_RGB(pmpict->format));
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_CODE_ADDRESS_HIGH, 2);
+ if (OUT_RELOCh(chan, pNv->tesla_scratch, CODE_OFFSET, shd_flags) ||
+ OUT_RELOCl(chan, pNv->tesla_scratch, CODE_OFFSET, shd_flags)) {
+ MARK_UNDO(chan);
+ return FALSE;
+ }
+
+ if (!NVC0EXATexture(pspix, pspict, 0)) {
+ MARK_UNDO(chan);
+ NOUVEAU_FALLBACK("src picture invalid\n");
+ }
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_BIND_TIC(4), 1);
+ OUT_RING (chan, (0 << 9) | (0 << 1) | NVC0TCL_BIND_TIC_ACTIVE);
+
+ if (pmpict) {
+ if (!NVC0EXATexture(pmpix, pmpict, 1)) {
+ MARK_UNDO(chan);
+ NOUVEAU_FALLBACK("mask picture invalid\n");
+ }
+ state->have_mask = TRUE;
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_BIND_TIC(4), 1);
+ OUT_RING (chan, (1 << 9) | (1 << 1) | NVC0TCL_BIND_TIC_ACTIVE);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_START_ID(5), 1);
+ if (pdpict->format == PICT_a8) {
+ OUT_RING (chan, PFP_C_A8);
+ } else {
+ if (pmpict->componentAlpha &&
+ PICT_FORMAT_RGB(pmpict->format)) {
+ if (NVC0EXABlendOp[op].src_alpha)
+ OUT_RING (chan, PFP_CCASA);
+ else
+ OUT_RING (chan, PFP_CCA);
+ } else {
+ OUT_RING (chan, PFP_C);
+ }
+ }
+ } else {
+ state->have_mask = FALSE;
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_BIND_TIC(4), 1);
+ OUT_RING (chan, (1 << 1) | 0);
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SP_START_ID(5), 1);
+ if (pdpict->format == PICT_a8)
+ OUT_RING (chan, PFP_S_A8);
+ else
+ OUT_RING (chan, PFP_S);
+ }
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_TSC_FLUSH, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_TIC_FLUSH, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (chan, 0);
+
+ pNv->alu = op;
+ pNv->pspict = pspict;
+ pNv->pmpict = pmpict;
+ pNv->pdpict = pdpict;
+ pNv->pspix = pspix;
+ pNv->pmpix = pmpix;
+ pNv->pdpix = pdpix;
+ chan->flush_notify = NVC0EXAStateCompositeResubmit;
+ return TRUE;
+}
+
+#define xFixedToFloat(v) \
+ ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
+
+static inline void
+NVC0EXATransform(PictTransformPtr t, int x, int y, float sx, float sy,
+ float *x_ret, float *y_ret)
+{
+ if (t) {
+ PictVector v;
+
+ v.vector[0] = IntToxFixed(x);
+ v.vector[1] = IntToxFixed(y);
+ v.vector[2] = xFixed1;
+ PictureTransformPoint(t, &v);
+ *x_ret = xFixedToFloat(v.vector[0]) / sx;
+ *y_ret = xFixedToFloat(v.vector[1]) / sy;
+ } else {
+ *x_ret = (float)x / sx;
+ *y_ret = (float)y / sy;
+ }
+}
+
+void
+NVC0EXAComposite(PixmapPtr pdpix,
+ int sx, int sy, int mx, int my,
+ int dx, int dy, int w, int h)
+{
+ NVC0EXA_LOCALS(pdpix);
+ float sX0, sX1, sX2, sY0, sY1, sY2;
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXAComposite\n");
+
+ WAIT_RING (chan, 64);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_SCISSOR_HORIZ(0), 2);
+ OUT_RING (chan, ((dx + w) << 16) | dx);
+ OUT_RING (chan, ((dy + h) << 16) | dy);
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, NVC0TCL_VERTEX_BEGIN_MODE_TRIANGLES);
+
+ NVC0EXATransform(state->unit[0].transform, sx, sy + (h * 2),
+ state->unit[0].width, state->unit[0].height,
+ &sX0, &sY0);
+ NVC0EXATransform(state->unit[0].transform, sx, sy,
+ state->unit[0].width, state->unit[0].height,
+ &sX1, &sY1);
+ NVC0EXATransform(state->unit[0].transform, sx + (w * 2), sy,
+ state->unit[0].width, state->unit[0].height,
+ &sX2, &sY2);
+
+ if (state->have_mask) {
+ float mX0, mX1, mX2, mY0, mY1, mY2;
+
+ NVC0EXATransform(state->unit[1].transform, mx, my + (h * 2),
+ state->unit[1].width, state->unit[1].height,
+ &mX0, &mY0);
+ NVC0EXATransform(state->unit[1].transform, mx, my,
+ state->unit[1].width, state->unit[1].height,
+ &mX1, &mY1);
+ NVC0EXATransform(state->unit[1].transform, mx + (w * 2), my,
+ state->unit[1].width, state->unit[1].height,
+ &mX2, &mY2);
+
+ VTX2s(pNv, sX0, sY0, mX0, mY0, dx, dy + (h * 2));
+ VTX2s(pNv, sX1, sY1, mX1, mY1, dx, dy);
+ VTX2s(pNv, sX2, sY2, mX2, mY2, dx + (w * 2), dy);
+ } else {
+ VTX1s(pNv, sX0, sY0, dx, dy + (h * 2));
+ VTX1s(pNv, sX1, sY1, dx, dy);
+ VTX1s(pNv, sX2, sY2, dx + (w * 2), dy);
+ }
+
+ BEGIN_RING(chan, NvSub3D, NVC0TCL_VERTEX_END, 1);
+ OUT_RING (chan, 0);
+}
+
+void
+NVC0EXADoneComposite(PixmapPtr pdpix)
+{
+ NVC0EXA_LOCALS(pdpix);
+
+ chan->flush_notify = NULL;
+}
+