summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Skeggs <skeggsb@gmail.com>2007-06-24 17:48:33 +1000
committerBen Skeggs <skeggsb@gmail.com>2007-06-24 17:56:22 +1000
commit824331e3fce36098e8dec31746204f7c13038abb (patch)
treeaa05134c43d450bbbeb838f57d383cd50a63e33f
parent2e2e95fc3b650ca633294bf4c11c9ba28f45b743 (diff)
downloadxorg-driver-xf86-video-nouveau-824331e3fce36098e8dec31746204f7c13038abb.tar.gz
Implement NV40 EXA composite hooks with 3D engine.
Named NV30EXA, but won't work on NV30 just yet. This should change at some point in the future. Will likely break non-voodoo'd NV4X cards (if the latest drm changes didn't already break them). Use EXANoComposite xorg.conf option to disable the hook if that's the case.
-rw-r--r--src/Makefile.am3
-rw-r--r--src/nv30_exa.c700
-rw-r--r--src/nv_accel_common.c9
-rw-r--r--src/nv_dma.h10
-rw-r--r--src/nv_exa.c24
-rw-r--r--src/nv_proto.h8
-rw-r--r--src/nv_shaders.h102
7 files changed, 842 insertions, 14 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 783a284..7e4f5b1 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -53,7 +53,8 @@ nouveau_drv_la_SOURCES = \
nv_shadow.c \
nv_type.h \
nv_video.c \
- nv_xaa.c
+ nv_xaa.c \
+ nv30_exa.c
#riva128_la_LTLIBRARIES = riva128.la
#riva128_la_LDFLAGS = -module -avoid-version
diff --git a/src/nv30_exa.c b/src/nv30_exa.c
new file mode 100644
index 0000000..66901d3
--- /dev/null
+++ b/src/nv30_exa.c
@@ -0,0 +1,700 @@
+#include "nv_include.h"
+#include "nv_shaders.h"
+
+#define NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2F_X(d) (0x00001880 + d * 0x0008)
+#define NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2F_Y(d) (0x00001884 + d * 0x0008)
+#define NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2I(d) (0x00001900 + d * 0x0004)
+
+typedef struct nv_pict_surface_format {
+ int pict_fmt;
+ uint32_t card_fmt;
+} nv_pict_surface_format_t;
+
+typedef struct nv_pict_texture_format {
+ int pict_fmt;
+ uint32_t card_fmt;
+ uint32_t card_swz;
+} nv_pict_texture_format_t;
+
+typedef struct nv_pict_op {
+ Bool src_alpha;
+ Bool dst_alpha;
+ uint32_t src_card_op;
+ uint32_t dst_card_op;
+} nv_pict_op_t;
+
+typedef struct nv30_exa_state {
+ Bool have_mask;
+
+ struct {
+ PictTransformPtr transform;
+ float width;
+ float height;
+ } unit[2];
+} nv30_exa_state_t;
+static nv30_exa_state_t exa_state;
+#define NV30EXA_STATE nv30_exa_state_t *state = &exa_state
+
+static nv_pict_surface_format_t
+NV30SurfaceFormat[] = {
+ { PICT_a8r8g8b8 , 0x148 },
+ { PICT_x8r8g8b8 , 0x145 },
+ { PICT_r5g6b5 , 0x143 },
+// { PICT_a8 , 0x149 },
+ { -1, ~0 }
+};
+
+static nv_pict_surface_format_t *
+NV30_GetPictSurfaceFormat(int format)
+{
+ int i = 0;
+
+ while (NV30SurfaceFormat[i].pict_fmt != -1) {
+ if (NV30SurfaceFormat[i].pict_fmt == format)
+ return &NV30SurfaceFormat[i];
+ i++;
+ }
+
+ return NULL;
+}
+
+
+/* should be in nouveau_reg.h at some point.. */
+#define NV30TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT 14
+#define NV30TCL_TX_SWIZZLE_UNIT_S0_X_ZERO 0
+#define NV30TCL_TX_SWIZZLE_UNIT_S0_X_ONE 1
+#define NV30TCL_TX_SWIZZLE_UNIT_S0_X_S1 2
+#define NV30TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT 12
+#define NV30TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT 10
+#define NV30TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT 8
+#define NV30TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT 6
+#define NV30TCL_TX_SWIZZLE_UNIT_S1_X_X 3
+#define NV30TCL_TX_SWIZZLE_UNIT_S1_X_Y 2
+#define NV30TCL_TX_SWIZZLE_UNIT_S1_X_Z 1
+#define NV30TCL_TX_SWIZZLE_UNIT_S1_X_W 0
+#define NV30TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT 4
+#define NV30TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT 2
+#define NV30TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT 0
+
+#define _(r,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \
+ { \
+ PICT_##r, \
+ (tf), \
+ (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0x << NV30TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT)|\
+ (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0y << NV30TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT)|\
+ (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0z << NV30TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT)|\
+ (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0w << NV30TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT)|\
+ (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1x << NV30TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT)|\
+ (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1y << NV30TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT)|\
+ (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1z << NV30TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT)|\
+ (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1w << NV30TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT)\
+ }
+static nv_pict_texture_format_t
+NV30TextureFormat[] = {
+ _(a8r8g8b8, 0x85, S1, S1, S1, S1, X, Y, Z, W),
+ _(x8r8g8b8, 0x85, S1, S1, S1, ONE, X, Y, Z, W),
+ _(x8b8g8r8, 0x85, S1, S1, S1, ONE, Z, Y, X, W),
+ _(a1r5g5b5, 0x82, S1, S1, S1, S1, X, Y, Z, W),
+ _(x1r5g5b5, 0x82, S1, S1, S1, ONE, X, Y, Z, W),
+ _( r5g6b5, 0x84, S1, S1, S1, S1, X, Y, Z, W),
+ _( a8, 0x81, ZERO, ZERO, ZERO, S1, X, X, X, X),
+ { -1, ~0, ~0 }
+};
+
+static nv_pict_texture_format_t *
+NV30_GetPictTextureFormat(int format)
+{
+ int i = 0;
+
+ while (NV30TextureFormat[i].pict_fmt != -1) {
+ if (NV30TextureFormat[i].pict_fmt == format)
+ return &NV30TextureFormat[i];
+ i++;
+ }
+
+ return NULL;
+}
+
+#define NV30_TCL_PRIMITIVE_3D_BF_ZERO 0x0000
+#define NV30_TCL_PRIMITIVE_3D_BF_ONE 0x0001
+#define NV30_TCL_PRIMITIVE_3D_BF_SRC_COLOR 0x0300
+#define NV30_TCL_PRIMITIVE_3D_BF_ONE_MINUS_SRC_COLOR 0x0301
+#define NV30_TCL_PRIMITIVE_3D_BF_SRC_ALPHA 0x0302
+#define NV30_TCL_PRIMITIVE_3D_BF_ONE_MINUS_SRC_ALPHA 0x0303
+#define NV30_TCL_PRIMITIVE_3D_BF_DST_ALPHA 0x0304
+#define NV30_TCL_PRIMITIVE_3D_BF_ONE_MINUS_DST_ALPHA 0x0305
+#define NV30_TCL_PRIMITIVE_3D_BF_DST_COLOR 0x0306
+#define NV30_TCL_PRIMITIVE_3D_BF_ONE_MINUS_DST_COLOR 0x0307
+#define NV30_TCL_PRIMITIVE_3D_BF_ALPHA_SATURATE 0x0308
+#define BF(bf) NV30_TCL_PRIMITIVE_3D_BF_##bf
+
+static nv_pict_op_t
+NV30PictOp[] = {
+/* Clear */ { 0, 0, BF( ZERO), BF( ZERO) },
+/* Src */ { 0, 0, BF( ONE), BF( ZERO) },
+/* Dst */ { 0, 0, BF( ZERO), BF( ONE) },
+/* Over */ { 1, 0, BF( ONE), BF(ONE_MINUS_SRC_ALPHA) },
+/* OverReverse */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF( ONE) },
+/* In */ { 0, 1, BF( DST_ALPHA), BF( ZERO) },
+/* InReverse */ { 1, 0, BF( ZERO), BF( SRC_ALPHA) },
+/* Out */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF( ZERO) },
+/* OutReverse */ { 1, 0, BF( ZERO), BF(ONE_MINUS_SRC_ALPHA) },
+/* Atop */ { 1, 1, BF( DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
+/* AtopReverse */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF( SRC_ALPHA) },
+/* Xor */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
+/* Add */ { 0, 0, BF( ONE), BF( ONE) }
+};
+
+static nv_pict_op_t *
+NV30_GetPictOpRec(int op)
+{
+ if (op >= PictOpSaturate)
+ return NULL;
+ return &NV30PictOp[op];
+}
+
+#if 0
+#define FALLBACK(fmt,args...) do { \
+ ErrorF("FALLBACK %s:%d> " fmt, __func__, __LINE__, ##args); \
+ return FALSE; \
+} while(0)
+#else
+#define FALLBACK(fmt,args...) do { \
+ return FALSE; \
+} while(0)
+#endif
+
+static void
+NV30_LoadVtxProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
+{
+ NVPtr pNv = NVPTR(pScrn);
+ static int next_hw_id = 0;
+ int i;
+
+ if (!shader->hw_id) {
+ shader->hw_id = next_hw_id;
+
+ NVDmaStart(pNv, NvSub3D,
+ NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_FROM_ID, 1);
+ NVDmaNext (pNv, (shader->hw_id));
+
+ for (i=0; i<shader->size; i+=4) {
+ NVDmaStart(pNv, NvSub3D,
+ NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0,
+ 4);
+ NVDmaNext (pNv, shader->data[i + 0]);
+ NVDmaNext (pNv, shader->data[i + 1]);
+ NVDmaNext (pNv, shader->data[i + 2]);
+ NVDmaNext (pNv, shader->data[i + 3]);
+ next_hw_id++;
+ }
+ }
+
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_PROGRAM_START_ID, 1);
+ NVDmaNext (pNv, (shader->hw_id));
+
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_IN_REG, 2);
+ NVDmaNext (pNv, shader->card_priv.NV30VP.vp_in_reg);
+ NVDmaNext (pNv, shader->card_priv.NV30VP.vp_out_reg);
+}
+
+static void
+NV30_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
+{
+ NVPtr pNv = NVPTR(pScrn);
+ static NVAllocRec *fp_mem = NULL;
+ static int next_hw_id_offset = 0;
+
+ if (!fp_mem) {
+ fp_mem = NVAllocateMemory(pNv, NOUVEAU_MEM_FB, 0x1000);
+ if (!fp_mem) {
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+ "Couldn't alloc fragprog buffer!\n");
+ return;
+ }
+ }
+
+ if (!shader->hw_id) {
+ memcpy(fp_mem->map + next_hw_id_offset, shader->data,
+ shader->size *
+ sizeof(uint32_t));
+
+ shader->hw_id = fp_mem->offset;
+ shader->hw_id += next_hw_id_offset;
+
+ next_hw_id_offset += (shader->size * sizeof(uint32_t));
+ next_hw_id_offset = (next_hw_id_offset + 63) & ~63;
+ }
+
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_FP_ACTIVE_PROGRAM, 1);
+ NVDmaNext (pNv, shader->hw_id | 1);
+
+ if (pNv->Architecture == NV_30) {
+ NVDmaStart(pNv, NvSub3D, 0x1d60, 1);
+ NVDmaNext (pNv, 0); /* USES_KIL (1<<7) == 0 */
+ NVDmaStart(pNv, NvSub3D, 0x1450, 1);
+ NVDmaNext (pNv, shader->card_priv.NV30FP.num_regs << 16);
+ } else {
+ NVDmaStart(pNv, NvSub3D, 0x1d60, 1);
+ NVDmaNext (pNv, (0<<7) /* !USES_KIL */ |
+ (shader->card_priv.NV30FP.num_regs << 24));
+ }
+}
+
+static void
+NV30_SetupBlend(ScrnInfoPtr pScrn, nv_pict_op_t *blend, Bool dest_has_alpha,
+ Bool component_alpha)
+{
+ NVPtr pNv = NVPTR(pScrn);
+ uint32_t sblend, dblend;
+
+ sblend = blend->src_card_op;
+ dblend = blend->dst_card_op;
+
+ if (!dest_has_alpha && blend->dst_alpha) {
+ if (sblend == BF(DST_ALPHA))
+ sblend = BF(ONE);
+ else if (sblend == BF(ONE_MINUS_DST_ALPHA))
+ sblend = BF(ZERO);
+ }
+
+ if (component_alpha && blend->src_alpha) {
+ if (dblend == BF(SRC_ALPHA))
+ dblend = BF(SRC_COLOR);
+ else if (dblend == BF(ONE_MINUS_SRC_ALPHA))
+ dblend = BF(ONE_MINUS_SRC_COLOR);
+ }
+
+ if (sblend == BF(ONE) && dblend == BF(ZERO)) {
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 1);
+ NVDmaNext (pNv, 0);
+ } else {
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 5);
+ NVDmaNext (pNv, 1);
+ NVDmaNext (pNv, (sblend << 16) | sblend);
+ NVDmaNext (pNv, (dblend << 16) | dblend);
+ NVDmaNext (pNv, 0x00000000); /* Blend colour */
+ NVDmaNext (pNv, (0x8006 << 16) | 0x8006); /* FUNC_ADD, FUNC_ADD */
+ }
+}
+
+static Bool
+NV30EXATexture(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict, int unit)
+{
+ NVPtr pNv = NVPTR(pScrn);
+ nv_pict_texture_format_t *fmt;
+ uint32_t card_filter, card_repeat;
+ NV30EXA_STATE;
+
+ fmt = NV30_GetPictTextureFormat(pPict->format);
+ if (!fmt)
+ return FALSE;
+
+ if (pPict->repeat && pPict->repeatType == RepeatNormal)
+ card_repeat = 1;
+ else
+ card_repeat = 3;
+
+ if (pPict->filter == PictFilterBilinear)
+ card_filter = 2;
+ else
+ card_filter = 1;
+
+ NVDmaStart(pNv, NvSub3D,
+ NV30_TCL_PRIMITIVE_3D_TX_ADDRESS_UNIT(unit), 8);
+ NVDmaNext (pNv, NVAccelGetPixmapOffset(pPix));
+ NVDmaNext (pNv, (2 << 4) /* 2D */ |
+ (fmt->card_fmt << 8) |
+ (1 << 13) /* NPOT */ |
+ (1<<16) /* 1 mipmap level */ |
+ (1<<0) /* NvDmaFB */ |
+ (1<<3) /* border disable? */);
+ NVDmaNext (pNv, (card_repeat << 0) /* S */ |
+ (card_repeat << 8) /* T */ |
+ (card_repeat << 16) /* R */);
+ NVDmaNext (pNv, 0x80000000);
+ NVDmaNext (pNv, fmt->card_swz);
+ NVDmaNext (pNv, (card_filter << 16) /* min */ |
+ (card_filter << 24) /* mag */ |
+ 0x3fd6 /* engine lock */);
+ NVDmaNext (pNv, (pPix->drawable.width << 16) | pPix->drawable.height);
+ NVDmaNext (pNv, 0); /* border ARGB */
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_TX_DEPTH_UNIT(unit), 1);
+ NVDmaNext (pNv, (1 << 20) /* depth */ |
+ (uint32_t)exaGetPixmapPitch(pPix));
+
+ state->unit[unit].width = (float)pPix->drawable.width;
+ state->unit[unit].height = (float)pPix->drawable.height;
+ state->unit[unit].transform = pPict->transform;
+
+ return TRUE;
+}
+
+static Bool
+NV30_SetupSurface(ScrnInfoPtr pScrn, PixmapPtr pPix, PictFormatShort format)
+{
+ NVPtr pNv = NVPTR(pScrn);
+ nv_pict_surface_format_t *fmt;
+
+ fmt = NV30_GetPictSurfaceFormat(format);
+ if (!fmt) {
+ ErrorF("AIII no format\n");
+ return FALSE;
+ }
+
+ NVDmaStart(pNv, NvSub3D, 0x208, 3);
+ NVDmaNext (pNv, fmt->card_fmt);
+ NVDmaNext (pNv, (uint32_t)exaGetPixmapPitch(pPix));
+ NVDmaNext (pNv, NVAccelGetPixmapOffset(pPix));
+
+ return TRUE;
+}
+
+static Bool
+NV30EXACheckCompositeTexture(PicturePtr pPict)
+{
+ nv_pict_texture_format_t *fmt;
+ int w = pPict->pDrawable->width;
+ int h = pPict->pDrawable->height;
+
+ if ((w > 4096) || (h>4096))
+ FALLBACK("picture too large, %dx%d\n", w, h);
+
+ fmt = NV30_GetPictTextureFormat(pPict->format);
+ if (!fmt)
+ FALLBACK("picture format 0x%08x not supported\n",
+ pPict->format);
+
+ if (pPict->filter != PictFilterNearest &&
+ pPict->filter != PictFilterBilinear)
+ FALLBACK("filter 0x%x not supported\n", pPict->filter);
+
+ if (pPict->repeat && (pPict->repeat != RepeatNormal &&
+ pPict->repeatType != RepeatNone))
+ FALLBACK("repeat 0x%x not supported\n", pPict->repeatType);
+
+ return TRUE;
+}
+
+Bool
+NV30EXACheckComposite(int op, PicturePtr psPict,
+ PicturePtr pmPict,
+ PicturePtr pdPict)
+{
+ nv_pict_surface_format_t *fmt;
+ nv_pict_op_t *opr;
+
+ opr = NV30_GetPictOpRec(op);
+ if (!opr)
+ FALLBACK("unsupported blend op 0x%x\n", op);
+
+ fmt = NV30_GetPictSurfaceFormat(pdPict->format);
+ if (!fmt)
+ FALLBACK("dst picture format 0x%08x not supported\n",
+ pdPict->format);
+
+ if (!NV30EXACheckCompositeTexture(psPict))
+ FALLBACK("src picture\n");
+ if (pmPict) {
+ if (pmPict->componentAlpha &&
+ PICT_FORMAT_RGB(pmPict->format) &&
+ opr->src_alpha && opr->src_card_op != BF(ZERO))
+ FALLBACK("mask CA + SA\n");
+ if (!NV30EXACheckCompositeTexture(pmPict))
+ FALLBACK("mask picture\n");
+ }
+
+ return TRUE;
+}
+
+Bool
+NV30EXAPrepareComposite(int op, PicturePtr psPict,
+ PicturePtr pmPict,
+ PicturePtr pdPict,
+ PixmapPtr psPix,
+ PixmapPtr pmPix,
+ PixmapPtr pdPix)
+{
+ ScrnInfoPtr pScrn = xf86Screens[psPix->drawable.pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
+ nv_pict_op_t *blend;
+ NV30EXA_STATE;
+
+ blend = NV30_GetPictOpRec(op);
+
+ NV30_SetupBlend(pScrn, blend, PICT_FORMAT_A(pdPict->format),
+ (pmPict && pmPict->componentAlpha &&
+ PICT_FORMAT_RGB(pmPict->format)));
+
+ NV30_SetupSurface(pScrn, pdPix, pdPict->format);
+ NV30EXATexture(pScrn, psPix, psPict, 0);
+
+ NV30_LoadVtxProg(pScrn, &nv40_vp_exa_render);
+ if (pmPict) {
+ NV30EXATexture(pScrn, pmPix, pmPict, 1);
+
+ if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) {
+ if (blend->src_alpha)
+ NV30_LoadFragProg(pScrn, &nv30_fp_composite_mask_sa_ca);
+ else
+ NV30_LoadFragProg(pScrn, &nv30_fp_composite_mask_ca);
+ } else {
+ NV30_LoadFragProg(pScrn, &nv30_fp_composite_mask);
+ }
+
+ state->have_mask = TRUE;
+ } else {
+ NV30_LoadFragProg(pScrn, &nv30_fp_pass_tex0);
+
+ state->have_mask = FALSE;
+ }
+
+ /* Appears to be some kind of cache flush, needed here at least
+ * sometimes.. funky text rendering otherwise :)
+ */
+ NVDmaStart(pNv, NvSub3D, 0x1fd8, 1);
+ NVDmaNext (pNv, 2);
+ NVDmaStart(pNv, NvSub3D, 0x1fd8, 1);
+ NVDmaNext (pNv, 1);
+
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_BEGIN_END, 1);
+ NVDmaNext (pNv, 8); /* GL_QUADS */
+
+ return TRUE;
+}
+
+#define xFixedToFloat(v) \
+ ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
+
+static void
+NV30EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
+ float *x_ret, float *y_ret)
+{
+ PictVector v;
+
+ if (t) {
+ v.vector[0] = IntToxFixed(x);
+ v.vector[1] = IntToxFixed(y);
+ v.vector[2] = xFixed1;
+ PictureTransformPoint(t, &v);
+ *x_ret = xFixedToFloat(v.vector[0]) / sx;
+ *y_ret = xFixedToFloat(v.vector[1]) / sy;
+ } else {
+ *x_ret = (float)x / sx;
+ *y_ret = (float)y / sy;
+ }
+}
+
+#define CV_OUTm(sx,sy,mx,my,dx,dy) do { \
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2F_X(8), 4); \
+ NVDmaFloat(pNv, (sx)); NVDmaFloat(pNv, (sy)); \
+ NVDmaFloat(pNv, (mx)); NVDmaFloat(pNv, (my)); \
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2I(0), 1); \
+ NVDmaNext (pNv, ((dy)<<16)|(dx)); \
+} while(0)
+#define CV_OUT(sx,sy,dx,dy) do { \
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2F_X(8), 2); \
+ NVDmaFloat(pNv, (sx)); NVDmaFloat(pNv, (sy)); \
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2I(0), 1); \
+ NVDmaNext (pNv, ((dy)<<16)|(dx)); \
+} while(0)
+
+void
+NV30EXAComposite(PixmapPtr pdPix, int srcX , int srcY,
+ int maskX, int maskY,
+ int dstX , int dstY,
+ int width, int height)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
+ float sX0, sX1, sY0, sY1;
+ float mX0, mX1, mY0, mY1;
+ NV30EXA_STATE;
+
+ NV30EXATransformCoord(state->unit[0].transform, srcX, srcY,
+ state->unit[0].width,
+ state->unit[0].height, &sX0, &sY0);
+ NV30EXATransformCoord(state->unit[0].transform,
+ srcX + width, srcY + height,
+ state->unit[0].width,
+ state->unit[0].height, &sX1, &sY1);
+
+ if (state->have_mask) {
+ NV30EXATransformCoord(state->unit[1].transform, maskX, maskY,
+ state->unit[1].width,
+ state->unit[1].height, &mX0, &mY0);
+ NV30EXATransformCoord(state->unit[1].transform,
+ maskX + width, maskY + height,
+ state->unit[1].width,
+ state->unit[1].height, &mX1, &mY1);
+ CV_OUTm(sX0 , sY0 , mX0, mY0, dstX , dstY);
+ CV_OUTm(sX1 , sY0 , mX1, mY0, dstX + width, dstY);
+ CV_OUTm(sX1 , sY1 , mX1, mY1, dstX + width, dstY + height);
+ CV_OUTm(sX0 , sY1 , mX0, mY1, dstX , dstY + height);
+ } else {
+ CV_OUT(sX0 , sY0 , dstX , dstY);
+ CV_OUT(sX1 , sY0 , dstX + width, dstY);
+ CV_OUT(sX1 , sY1 , dstX + width, dstY + height);
+ CV_OUT(sX0 , sY1 , dstX , dstY + height);
+ }
+
+ NVDmaKickoff(pNv);
+}
+
+void
+NV30EXADoneComposite(PixmapPtr pdPix)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
+
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_BEGIN_END, 1);
+ NVDmaNext (pNv, 0);
+}
+
+Bool
+NVAccelInitNV40TCL(ScrnInfoPtr pScrn)
+{
+ NVPtr pNv = NVPTR(pScrn);
+ static int have_object = FALSE;
+ uint32_t class = 0, chipset;
+ int i;
+
+#undef NV40_TCL_PRIMITIVE_3D
+#define NV40_TCL_PRIMITIVE_3D 0x4097
+#define NV40_TCL_PRIMITIVE_3D_CHIPSET_4X_MASK 0x00000baf
+#define NV44_TCL_PRIMITIVE_3D 0x4497
+#define NV44_TCL_PRIMITIVE_3D_CHIPSET_4X_MASK 0x00005450
+
+ chipset = (nvReadMC(pNv, 0) >> 20) & 0xff;
+ if ((chipset & 0xf0) != 0x40)
+ return TRUE;
+ chipset &= 0xf;
+
+ if (NV40_TCL_PRIMITIVE_3D_CHIPSET_4X_MASK & (1<<chipset))
+ class = NV40_TCL_PRIMITIVE_3D;
+ else
+ if (NV44_TCL_PRIMITIVE_3D_CHIPSET_4X_MASK & (1<<chipset))
+ class = NV44_TCL_PRIMITIVE_3D;
+ else {
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+ "NV30EXA: Unknown chipset NV%02x\n", chipset);
+ return FALSE;
+ }
+
+ if (!have_object) {
+ if (!NVDmaCreateContextObject(pNv, Nv3D, class))
+ return FALSE;
+ have_object = TRUE;
+ }
+
+ NVDmaSetObjectOnSubchannel(pNv, NvSub3D, Nv3D);
+
+ NVDmaStart(pNv, NvSub3D, 0x180, 1);
+ NVDmaNext (pNv, NvDmaNotifier0);
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_SET_OBJECT1, 2);
+ NVDmaNext (pNv, NvDmaFB);
+ NVDmaNext (pNv, NvDmaFB);
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_SET_OBJECT8, 1);
+ NVDmaNext (pNv, NvDmaFB);
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_SET_OBJECT4, 2);
+ NVDmaNext (pNv, NvDmaFB);
+ NVDmaNext (pNv, NvDmaFB);
+
+ /* voodoo */
+ NVDmaStart(pNv, NvSub3D, 0x1ea4, 3);
+ NVDmaNext(pNv, 0x00000010);
+ NVDmaNext(pNv, 0x01000100);
+ NVDmaNext(pNv, 0xff800006);
+ NVDmaStart(pNv, NvSub3D, 0x1fc4, 1);
+ NVDmaNext(pNv, 0x06144321);
+ NVDmaStart(pNv, NvSub3D, 0x1fc8, 2);
+ NVDmaNext(pNv, 0xedcba987);
+ NVDmaNext(pNv, 0x00000021);
+ NVDmaStart(pNv, NvSub3D, 0x1fd0, 1);
+ NVDmaNext(pNv, 0x00171615);
+ NVDmaStart(pNv, NvSub3D, 0x1fd4, 1);
+ NVDmaNext(pNv, 0x001b1a19);
+ NVDmaStart(pNv, NvSub3D, 0x1ef8, 1);
+ NVDmaNext(pNv, 0x0020ffff);
+ NVDmaStart(pNv, NvSub3D, 0x1d64, 1);
+ NVDmaNext(pNv, 0x00d30000);
+ NVDmaStart(pNv, NvSub3D, 0x1e94, 1);
+ NVDmaNext(pNv, 0x00000001);
+
+ /* identity viewport transform */
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VIEWPORT_XFRM_OX, 8);
+ NVDmaFloat(pNv, 0.0);
+ NVDmaFloat(pNv, 0.0);
+ NVDmaFloat(pNv, 0.0);
+ NVDmaFloat(pNv, 0.0);
+ NVDmaFloat(pNv, 1.0);
+ NVDmaFloat(pNv, 1.0);
+ NVDmaFloat(pNv, 1.0);
+ NVDmaFloat(pNv, 0.0);
+
+ /* default 3D state */
+ /*XXX: replace with the same state that the DRI emits on startup */
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_STENCIL_FRONT_ENABLE, 1);
+ NVDmaNext (pNv, 0);
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_STENCIL_BACK_ENABLE, 1);
+ NVDmaNext (pNv, 0);
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_ALPHA_FUNC_ENABLE, 1);
+ NVDmaNext (pNv, 0);
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_DEPTH_WRITE_ENABLE, 2);
+ NVDmaNext (pNv, 0); /* wr disable */
+ NVDmaNext (pNv, 0); /* test disable */
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_COLOR_MASK, 1);
+ NVDmaNext (pNv, 0x01010101); /* TR,TR,TR,TR */
+ NVDmaStart(pNv, NvSub3D, NV40_TCL_PRIMITIVE_3D_COLOR_MASK_BUFFER123, 1);
+ NVDmaNext (pNv, 0x0000fff0);
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_CULL_FACE_ENABLE, 1);
+ NVDmaNext (pNv, 0);
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 1);
+ NVDmaNext (pNv, 0);
+ NVDmaStart(pNv, NvSub3D,
+ NV30_TCL_PRIMITIVE_3D_LOGIC_OP_ENABLE, 2);
+ NVDmaNext (pNv, 0);
+ NVDmaNext (pNv, 0x1503);
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_DITHER_ENABLE, 1);
+ NVDmaNext (pNv, 0);
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_SHADE_MODEL, 1);
+ NVDmaNext (pNv, 0x1d01); /* GL_SMOOTH */
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_POLYGON_OFFSET_FACTOR,2);
+ NVDmaFloat(pNv, 0.0);
+ NVDmaFloat(pNv, 0.0);
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_POLYGON_MODE_FRONT, 2);
+ NVDmaNext (pNv, 0x1b02); /* FRONT = GL_FILL */
+ NVDmaNext (pNv, 0x1b02); /* BACK = GL_FILL */
+ NVDmaStart(pNv, NvSub3D,
+ NV30_TCL_PRIMITIVE_3D_POLYGON_STIPPLE_PATTERN(0), 0x20);
+ for (i=0;i<0x20;i++)
+ NVDmaNext(pNv, 0xFFFFFFFF);
+ for (i=0;i<16;i++) {
+ NVDmaStart(pNv, NvSub3D,
+ NV30_TCL_PRIMITIVE_3D_TX_ENABLE_UNIT(i), 1);
+ NVDmaNext(pNv, 0);
+ }
+
+ NVDmaStart(pNv, NvSub3D, 0x1d78, 1);
+ NVDmaNext (pNv, 0x110);
+
+ NVDmaStart(pNv, NvSub3D, 0x0220, 1);
+ NVDmaNext (pNv, 1);
+ NVDmaStart(pNv, NvSub3D,
+ NV30_TCL_PRIMITIVE_3D_VIEWPORT_COLOR_BUFFER_DIM0, 2);
+ NVDmaNext (pNv, (4096 << 16));
+ NVDmaNext (pNv, (4096 << 16));
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_SCISSOR_WIDTH_XPOS, 2);
+ NVDmaNext (pNv, (4096 << 16));
+ NVDmaNext (pNv, (4096 << 16));
+ NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VIEWPORT_DIMS_0, 2);
+ NVDmaNext (pNv, (4096 << 16));
+ NVDmaNext (pNv, (4096 << 16));
+ NVDmaStart(pNv, NvSub3D,
+ NV30_TCL_PRIMITIVE_3D_VIEWPORT_COLOR_BUFFER_OFS0, 2);
+ NVDmaNext (pNv, (4095 << 16));
+ NVDmaNext (pNv, (4095 << 16));
+
+ return TRUE;
+}
+
diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c
index e1c0f8e..51bb5a7 100644
--- a/src/nv_accel_common.c
+++ b/src/nv_accel_common.c
@@ -429,6 +429,15 @@ NVAccelCommonInit(ScrnInfoPtr pScrn)
/* EXA-only */
INIT_CONTEXT_OBJECT(MemFormat);
+ /* 3D init */
+ switch (pNv->Architecture) {
+ case NV_ARCH_40:
+ INIT_CONTEXT_OBJECT(NV40TCL);
+ break;
+ default:
+ break;
+ }
+
return TRUE;
}
diff --git a/src/nv_dma.h b/src/nv_dma.h
index d09b35f..16be1fd 100644
--- a/src/nv_dma.h
+++ b/src/nv_dma.h
@@ -93,9 +93,13 @@ enum DMASubchannel {
(pNv)->dmaBase[(pNv)->dmaCurrent++] = (data); \
} while(0)
-#define NVDmaFloat(pNv, data) do { \
- float f = (data); \
- NVDmaNext((pNv), *(CARD32*)&f); \
+#define NVDmaFloat(pNv, data) do { \
+ union { \
+ float v; \
+ uint32_t u; \
+ } c; \
+ c.v = (data); \
+ NVDmaNext((pNv), c.u); \
} while(0)
#define NVDmaStart(pNv, subchannel, tag, size) do { \
diff --git a/src/nv_exa.c b/src/nv_exa.c
index 3096e29..7e5c929 100644
--- a/src/nv_exa.c
+++ b/src/nv_exa.c
@@ -565,17 +565,21 @@ Bool NVExaInit(ScreenPtr pScreen)
pNv->EXADriverPtr->Solid = NVExaSolid;
pNv->EXADriverPtr->DoneSolid = NVExaDoneSolid;
- /*darktama: Hard-disabled these for now, I get lockups often when
- * starting e17 with them enabled.
- *marcheu: Doesn't crash for me... was it related to the setup being
- * called twice before ?
- */
- if (pNv->BlendingPossible) {
- /* install composite hooks */
- pNv->EXADriverPtr->CheckComposite = NVCheckComposite;
+ switch (pNv->Architecture) {
+ case NV_ARCH_40:
+ pNv->EXADriverPtr->CheckComposite = NV30EXACheckComposite;
+ pNv->EXADriverPtr->PrepareComposite = NV30EXAPrepareComposite;
+ pNv->EXADriverPtr->Composite = NV30EXAComposite;
+ pNv->EXADriverPtr->DoneComposite = NV30EXADoneComposite;
+ break;
+ default:
+ if (!pNv->BlendingPossible)
+ break;
+ pNv->EXADriverPtr->CheckComposite = NVCheckComposite;
pNv->EXADriverPtr->PrepareComposite = NVPrepareComposite;
- pNv->EXADriverPtr->Composite = NVComposite;
- pNv->EXADriverPtr->DoneComposite = NVDoneComposite;
+ pNv->EXADriverPtr->Composite = NVComposite;
+ pNv->EXADriverPtr->DoneComposite = NVDoneComposite;
+ break;
}
/* If we're going to try and use 3D, let the card-specific function
diff --git a/src/nv_proto.h b/src/nv_proto.h
index fcf487f..83748c4 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -100,5 +100,13 @@ void NVPointerMoved(int index, int x, int y);
/* in nv_bios.c */
unsigned int NVParseBios(ScrnInfoPtr pScrn);
+/* in nv30_exa.c */
+Bool NVAccelInitNV40TCL(ScrnInfoPtr pScrn);
+Bool NV30EXACheckComposite(int, PicturePtr, PicturePtr, PicturePtr);
+Bool NV30EXAPrepareComposite(int, PicturePtr, PicturePtr, PicturePtr,
+ PixmapPtr, PixmapPtr, PixmapPtr);
+void NV30EXAComposite(PixmapPtr, int, int, int, int, int, int, int, int);
+void NV30EXADoneComposite(PixmapPtr);
+
#endif /* __NV_PROTO_H__ */
diff --git a/src/nv_shaders.h b/src/nv_shaders.h
new file mode 100644
index 0000000..d1a769f
--- /dev/null
+++ b/src/nv_shaders.h
@@ -0,0 +1,102 @@
+#ifndef __NV_SHADERS_H__
+#define __NV_SHADERS_H__
+
+#define NV_SHADER_MAX_PROGRAM_LENGTH 256
+
+typedef struct nv_shader {
+ uint32_t hw_id;
+ uint32_t size;
+ union {
+ struct {
+ uint32_t vp_in_reg;
+ uint32_t vp_out_reg;
+ } NV30VP;
+ struct {
+ uint32_t num_regs;
+ } NV30FP;
+ } card_priv;
+ uint32_t data[NV_SHADER_MAX_PROGRAM_LENGTH];
+} nv_shader_t;
+
+/*******************************************************************************
+ * NV40/G70 vertex shaders
+ */
+
+static nv_shader_t nv40_vp_exa_render = {
+ .card_priv.NV30VP.vp_in_reg = 0x00000309,
+ .card_priv.NV30VP.vp_out_reg = 0x0000c001,
+ .size = (3*4),
+ .data = {
+ /* MOV result.position, vertex.position */
+ 0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff80,
+ /* MOV result.texcoord[0], vertex.texcoord[0] */
+ 0x401f9c6c, 0x0040080d, 0x8106c083, 0x6041ff9c,
+ /* MOV result.texcoord[1], vertex.texcoord[1] */
+ 0x401f9c6c, 0x0040090d, 0x8106c083, 0x6041ffa1,
+ }
+};
+
+/*******************************************************************************
+ * NV30/NV40/G70 fragment shaders
+ */
+
+static nv_shader_t nv30_fp_pass_col0 = {
+ .card_priv.NV30FP.num_regs = 2,
+ .size = (1*4),
+ .data = {
+ /* MOV R0, fragment.color */
+ 0x01403e81, 0x1c9dc801, 0x0001c800, 0x3fe1c800
+ }
+};
+
+static nv_shader_t nv30_fp_pass_tex0 = {
+ .card_priv.NV30FP.num_regs = 2,
+ .size = (2*4),
+ .data = {
+ /* TEX R0, fragment.texcoord[0], texture[0], 2D */
+ 0x17009e00, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
+ /* MOV R0, R0 */
+ 0x01401e81, 0x1c9dc800, 0x0001c800, 0x0001c800
+ }
+};
+
+static nv_shader_t nv30_fp_composite_mask = {
+ .card_priv.NV30FP.num_regs = 2,
+ .size = (3*4),
+ .data = {
+ /* TEXC0 R1.w , fragment.texcoord[1], texture[1], 2D */
+ 0x1702b102, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
+ /* TEX R0 (NE0.wwww), fragment.texcoord[0], texture[0], 2D */
+ 0x17009e00, 0x1ff5c801, 0x0001c800, 0x3fe1c800,
+ /* MUL R0 , R0, R1.w */
+ 0x02001e81, 0x1c9dc800, 0x0001fe04, 0x0001c800
+ }
+};
+
+static nv_shader_t nv30_fp_composite_mask_sa_ca = {
+ .card_priv.NV30FP.num_regs = 2,
+ .size = (3*4),
+ .data = {
+ /* TEXC0 R1.w , fragment.texcoord[0], texture[0], 2D */
+ 0x17009102, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
+ /* TEX R0 (NE0.wwww), fragment.texcoord[1], texture[1], 2D */
+ 0x1702be00, 0x1ff5c801, 0x0001c800, 0x3fe1c800,
+ /* MUL R0 , R1,wwww, R0 */
+ 0x02001e81, 0x1c9dfe04, 0x0001c800, 0x0001c800
+ }
+};
+
+static nv_shader_t nv30_fp_composite_mask_ca = {
+ .card_priv.NV30FP.num_regs = 2,
+ .size = (3*4),
+ .data = {
+ /* TEXC0 R0 , fragment.texcoord[0], texture[0], 2D */
+ 0x17009f00, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
+ /* TEX R1 (NE0.xyzw), fragment.texcoord[1], texture[1], 2D */
+ 0x1702be02, 0x1c95c801, 0x0001c800, 0x3fe1c800,
+ /* MUL R0 , R0, R1 */
+ 0x02001e81, 0x1c9dc800, 0x0001c804, 0x0001c800
+ }
+};
+
+#endif