From b48bcc094beecf521899dd63c8fdbccfd534e5cd Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 19 Apr 2012 13:21:27 +1000 Subject: nv50/exa: perform texcoord transformations in vertex program Signed-off-by: Ben Skeggs --- src/nv10_exa.c | 3 -- src/nv30_exa.c | 3 -- src/nv40_exa.c | 3 -- src/nv50_accel.c | 91 ++++++++++++++++++++++++++++++++++++------------ src/nv50_accel.h | 26 ++++++-------- src/nv50_exa.c | 103 ++++++++++++++++--------------------------------------- src/nv50_xv.c | 25 +++++++++----- src/nv_type.h | 3 ++ src/nvc0_exa.c | 3 -- 9 files changed, 129 insertions(+), 131 deletions(-) diff --git a/src/nv10_exa.c b/src/nv10_exa.c index efbf4b5..16b6d1c 100644 --- a/src/nv10_exa.c +++ b/src/nv10_exa.c @@ -613,9 +613,6 @@ NV10EXAPrepareComposite(int op, f(p, __i, v, ## __VA_ARGS__); \ } while (0); -#define xFixedToFloat(v) \ - ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0)) - #define PUSH_DATAi(push, v, i) \ PUSH_DATAf(push, xFixedToFloat((v).vector[i])) diff --git a/src/nv30_exa.c b/src/nv30_exa.c index 0df2d0f..d571af3 100644 --- a/src/nv30_exa.c +++ b/src/nv30_exa.c @@ -481,9 +481,6 @@ NV30EXAPrepareComposite(int op, PicturePtr psPict, return TRUE; } -#define xFixedToFloat(v) \ - ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0)) - static void NV30EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy, float *x_ret, float *y_ret) diff --git a/src/nv40_exa.c b/src/nv40_exa.c index c4249a8..4c1f1be 100644 --- a/src/nv40_exa.c +++ b/src/nv40_exa.c @@ -429,9 +429,6 @@ NV40EXAPrepareComposite(int op, PicturePtr psPict, return TRUE; } -#define xFixedToFloat(v) \ - ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0)) - static inline void NV40EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy, float *x_ret, float *y_ret) diff --git a/src/nv50_accel.c b/src/nv50_accel.c index 32166ec..086180f 100644 --- a/src/nv50_accel.c +++ b/src/nv50_accel.c @@ -215,19 +215,67 @@ NVAccelInitNV50TCL(ScrnInfoPtr pScrn) BEGIN_NV04(push, NV50_3D(TEX_LIMITS(2)), 1); PUSH_DATA (push, 0x54); - PUSH_DATAu(push, pNv->scratch, PVP_OFFSET, 3 * 2 * 2); + PUSH_DATAu(push, pNv->scratch, PVP_OFFSET, 30 * 2); PUSH_DATA (push, 0x10000001); PUSH_DATA (push, 0x0423c788); PUSH_DATA (push, 0x10000205); PUSH_DATA (push, 0x0423c788); - PUSH_DATA (push, 0x10000409); - PUSH_DATA (push, 0x0423c788); - PUSH_DATA (push, 0x1000060d); - PUSH_DATA (push, 0x0423c788); - PUSH_DATA (push, 0x10000811); - PUSH_DATA (push, 0x0423c788); - PUSH_DATA (push, 0x10000a15); - PUSH_DATA (push, 0x0423c789); + PUSH_DATA (push, 0xc0800401); + PUSH_DATA (push, 0x00200780); + PUSH_DATA (push, 0xc0830405); + PUSH_DATA (push, 0x00200780); + PUSH_DATA (push, 0xc0860409); + PUSH_DATA (push, 0x00200780); + PUSH_DATA (push, 0xe0810601); + PUSH_DATA (push, 0x00200780); + PUSH_DATA (push, 0xe0840605); + PUSH_DATA (push, 0x00204780); + PUSH_DATA (push, 0xe0870609); + PUSH_DATA (push, 0x00208780); + PUSH_DATA (push, 0xb1000001); + PUSH_DATA (push, 0x00008780); + PUSH_DATA (push, 0xb1000205); + PUSH_DATA (push, 0x00014780); + PUSH_DATA (push, 0xb1000409); + PUSH_DATA (push, 0x00020780); + PUSH_DATA (push, 0x90000409); + PUSH_DATA (push, 0x00000780); + PUSH_DATA (push, 0xc0020001); + PUSH_DATA (push, 0x00000780); + PUSH_DATA (push, 0xc0020205); + PUSH_DATA (push, 0x00000780); + PUSH_DATA (push, 0xc0890009); + PUSH_DATA (push, 0x00000788); + PUSH_DATA (push, 0xc08a020d); + PUSH_DATA (push, 0x00000788); + PUSH_DATA (push, 0xc08b0801); + PUSH_DATA (push, 0x00200780); + PUSH_DATA (push, 0xc08e0805); + PUSH_DATA (push, 0x00200780); + PUSH_DATA (push, 0xc0910809); + PUSH_DATA (push, 0x00200780); + PUSH_DATA (push, 0xe08c0a01); + PUSH_DATA (push, 0x00200780); + PUSH_DATA (push, 0xe08f0a05); + PUSH_DATA (push, 0x00204780); + PUSH_DATA (push, 0xe0920a09); + PUSH_DATA (push, 0x00208780); + PUSH_DATA (push, 0xb1000001); + PUSH_DATA (push, 0x00034780); + PUSH_DATA (push, 0xb1000205); + PUSH_DATA (push, 0x00040780); + PUSH_DATA (push, 0xb1000409); + PUSH_DATA (push, 0x0004c780); + PUSH_DATA (push, 0x90000409); + PUSH_DATA (push, 0x00000780); + PUSH_DATA (push, 0xc0020001); + PUSH_DATA (push, 0x00000780); + PUSH_DATA (push, 0xc0020205); + PUSH_DATA (push, 0x00000780); + PUSH_DATA (push, 0xc0940011); + PUSH_DATA (push, 0x00000788); + PUSH_DATA (push, 0xc0950215); + PUSH_DATA (push, 0x00000789); /* fetch only VTX_ATTR[0,8,9].xy */ BEGIN_NV04(push, NV50_3D(VP_ATTR_EN(0)), 2); @@ -235,17 +283,18 @@ NVAccelInitNV50TCL(ScrnInfoPtr pScrn) PUSH_DATA (push, 0x00000033); BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_RESULT), 1); PUSH_DATA (push, 6); - if (pNv->Nv3D->oclass != 0x8597) { - BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 2); - PUSH_DATA (push, 8); - PUSH_DATA (push, 0); /* NV50_3D_VP_REG_ALLOC_TEMP */ - } else { - BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1); - PUSH_DATA (push, 8); - } + BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 2); + PUSH_DATA (push, 8); + PUSH_DATA (push, 4); /* NV50_3D_VP_REG_ALLOC_TEMP */ BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2); PUSH_DATA (push, (pNv->scratch->offset + PVP_OFFSET) >> 32); PUSH_DATA (push, (pNv->scratch->offset + PVP_OFFSET)); + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32); + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA)); + PUSH_DATA (push, (CB_PVP << NV50_3D_CB_DEF_SET_BUFFER__SHIFT) | 0x4000); + BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); + PUSH_DATA (push, 0x00000001 | (CB_PVP << 12)); BEGIN_NV04(push, NV50_3D(VP_START_ID), 1); PUSH_DATA (push, 0); @@ -346,8 +395,8 @@ NVAccelInitNV50TCL(ScrnInfoPtr pScrn) PUSH_DATA (push, 0xb0810a0c); PUSH_DATA (push, 0xb0820a10); PUSH_DATA (push, 0xb0830a14); - PUSH_DATA (push, 0x82030400); - PUSH_DATA (push, 0x82040404); + PUSH_DATA (push, 0x82010400); + PUSH_DATA (push, 0x82020404); PUSH_DATA (push, 0xf0400201); PUSH_DATA (push, 0x0000c784); PUSH_DATA (push, 0xe084000c); @@ -377,6 +426,8 @@ NVAccelInitNV50TCL(ScrnInfoPtr pScrn) PUSH_DATA (push, (pNv->scratch->offset + PFP_DATA) >> 32); PUSH_DATA (push, (pNv->scratch->offset + PFP_DATA)); PUSH_DATA (push, (CB_PFP << NV50_3D_CB_DEF_SET_BUFFER__SHIFT) | 0x4000); + BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); + PUSH_DATA (push, 0x00000031 | (CB_PFP << 12)); BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 1); PUSH_DATA (push, 1); @@ -395,8 +446,6 @@ NVAccelInitNV50TCL(ScrnInfoPtr pScrn) PUSH_DATA (push, 8192 << NV50_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT); PUSH_DATA (push, 8192 << NV50_3D_SCREEN_SCISSOR_VERT_H__SHIFT); - BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); - PUSH_DATA (push, 0x00000031 | (CB_PFP << 12)); return TRUE; } diff --git a/src/nv50_accel.h b/src/nv50_accel.h index 0f77389..6fb3dbd 100644 --- a/src/nv50_accel.h +++ b/src/nv50_accel.h @@ -25,7 +25,8 @@ #define PFP_OFFSET 0x00001000 /* Fragment program */ #define TIC_OFFSET 0x00002000 /* Texture Image Control */ #define TSC_OFFSET 0x00003000 /* Texture Sampler Control */ -#define PFP_DATA 0x00004000 /* FP constbuf */ +#define PVP_DATA 0x00004000 /* VP constbuf */ +#define PFP_DATA 0x00005000 /* FP constbuf */ /* Fragment programs */ #define PFP_S 0x0000 /* (src) */ @@ -38,33 +39,28 @@ /* Constant buffer assignments */ #define CB_PSH 0 +#define CB_PVP 1 #define CB_PFP 2 static __inline__ void -VTX1s(NVPtr pNv, float sx, float sy, unsigned dx, unsigned dy) +PUSH_VTX1s(struct nouveau_pushbuf *push, float sx, float sy, int dx, int dy) { - struct nouveau_pushbuf *push = pNv->pushbuf; - BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(8)), 2); PUSH_DATAf(push, sx); PUSH_DATAf(push, sy); BEGIN_NV04(push, NV50_3D(VTX_ATTR_2I(0)), 1); - PUSH_DATA (push, (dy << 16) | dx); + PUSH_DATA (push, (dy << 16) | dx); } static __inline__ void -VTX2s(NVPtr pNv, float s1x, float s1y, float s2x, float s2y, - unsigned dx, unsigned dy) +PUSH_VTX2s(struct nouveau_pushbuf *push, + int x1, int y1, int x2, int y2, int dx, int dy) { - struct nouveau_pushbuf *push = pNv->pushbuf; - - BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(8)), 4); - PUSH_DATAf(push, s1x); - PUSH_DATAf(push, s1y); - PUSH_DATAf(push, s2x); - PUSH_DATAf(push, s2y); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_2I(8)), 2); + PUSH_DATA (push, (y1 << 16) | x1); + PUSH_DATA (push, (y2 << 16) | x2); BEGIN_NV04(push, NV50_3D(VTX_ATTR_2I(0)), 1); - PUSH_DATA (push, (dy << 16) | dx); + PUSH_DATA (push, (dy << 16) | dx); } static __inline__ void diff --git a/src/nv50_exa.c b/src/nv50_exa.c index 4760319..14759c5 100644 --- a/src/nv50_exa.c +++ b/src/nv50_exa.c @@ -26,22 +26,10 @@ #include "nv50_accel.h" -struct nv50_exa_state { - Bool have_mask; - - struct { - PictTransformPtr transform; - float width; - float height; - } unit[2]; -}; -static struct nv50_exa_state exa_state; - #define NV50EXA_LOCALS(p) \ ScrnInfoPtr pScrn = xf86Screens[(p)->drawable.pScreen->myNum]; \ NVPtr pNv = NVPTR(pScrn); \ - struct nouveau_pushbuf *push = pNv->pushbuf; (void)push; \ - struct nv50_exa_state *state = &exa_state; (void)state + struct nouveau_pushbuf *push = pNv->pushbuf; (void)push; #define BF(f) NV50_BLEND_FACTOR_##f @@ -666,9 +654,31 @@ NV50EXATexture(PixmapPtr ppix, PicturePtr ppict, unsigned unit) PUSH_DATA (push, 0x00000000); PUSH_DATA (push, 0x00000000); - state->unit[unit].width = ppix->drawable.width; - state->unit[unit].height = ppix->drawable.height; - state->unit[unit].transform = ppict->transform; + PUSH_DATAu(push, pNv->scratch, PVP_DATA + (unit * 11 * 4), 11); + if (ppict->transform) { + PUSH_DATAf(push, xFixedToFloat(ppict->transform->matrix[0][0])); + PUSH_DATAf(push, xFixedToFloat(ppict->transform->matrix[0][1])); + PUSH_DATAf(push, xFixedToFloat(ppict->transform->matrix[0][2])); + PUSH_DATAf(push, xFixedToFloat(ppict->transform->matrix[1][0])); + PUSH_DATAf(push, xFixedToFloat(ppict->transform->matrix[1][1])); + PUSH_DATAf(push, xFixedToFloat(ppict->transform->matrix[1][2])); + PUSH_DATAf(push, xFixedToFloat(ppict->transform->matrix[2][0])); + PUSH_DATAf(push, xFixedToFloat(ppict->transform->matrix[2][1])); + PUSH_DATAf(push, xFixedToFloat(ppict->transform->matrix[2][2])); + } else { + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 1.0); + } + PUSH_DATAf(push, 1.0 / ppix->drawable.width); + PUSH_DATAf(push, 1.0 / ppix->drawable.height); + return TRUE; } @@ -778,7 +788,6 @@ NV50EXAPrepareComposite(int op, if (pmpict) { if (!NV50EXATexture(pmpix, pmpict, 1)) NOUVEAU_FALLBACK("mask picture invalid\n"); - state->have_mask = TRUE; BEGIN_NV04(push, NV50_3D(FP_START_ID), 1); if (pdpict->format == PICT_a8) { @@ -795,8 +804,6 @@ NV50EXAPrepareComposite(int op, } } } else { - state->have_mask = FALSE; - BEGIN_NV04(push, NV50_3D(FP_START_ID), 1); if (pdpict->format == PICT_a8) PUSH_DATA (push, PFP_S_A8); @@ -828,33 +835,11 @@ NV50EXAPrepareComposite(int op, return TRUE; } -#define xFixedToFloat(v) \ - ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0)) -static inline void -NV50EXATransform(PictTransformPtr t, int x, int y, float sx, float sy, - float *x_ret, float *y_ret) -{ - if (t) { - PictVector v; - - v.vector[0] = IntToxFixed(x); - v.vector[1] = IntToxFixed(y); - v.vector[2] = xFixed1; - PictureTransformPoint(t, &v); - *x_ret = xFixedToFloat(v.vector[0]) / sx; - *y_ret = xFixedToFloat(v.vector[1]) / sy; - } else { - *x_ret = (float)x / sx; - *y_ret = (float)y / sy; - } -} - void NV50EXAComposite(PixmapPtr pdpix, int sx, int sy, int mx, int my, int dx, int dy, int w, int h) { NV50EXA_LOCALS(pdpix); - float sX0, sX1, sX2, sY0, sY1, sY2; if (!PUSH_SPACE(push, 64)) return; @@ -864,39 +849,9 @@ NV50EXAComposite(PixmapPtr pdpix, int sx, int sy, int mx, int my, PUSH_DATA (push, (dy + h) << 16 | dy); BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (push, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); - - NV50EXATransform(state->unit[0].transform, sx, sy + (h * 2), - state->unit[0].width, state->unit[0].height, - &sX0, &sY0); - NV50EXATransform(state->unit[0].transform, sx, sy, - state->unit[0].width, state->unit[0].height, - &sX1, &sY1); - NV50EXATransform(state->unit[0].transform, sx + (w * 2), sy, - state->unit[0].width, state->unit[0].height, - &sX2, &sY2); - - if (state->have_mask) { - float mX0, mX1, mX2, mY0, mY1, mY2; - - NV50EXATransform(state->unit[1].transform, mx, my + (h * 2), - state->unit[1].width, state->unit[1].height, - &mX0, &mY0); - NV50EXATransform(state->unit[1].transform, mx, my, - state->unit[1].width, state->unit[1].height, - &mX1, &mY1); - NV50EXATransform(state->unit[1].transform, mx + (w * 2), my, - state->unit[1].width, state->unit[1].height, - &mX2, &mY2); - - VTX2s(pNv, sX0, sY0, mX0, mY0, dx, dy + (h * 2)); - VTX2s(pNv, sX1, sY1, mX1, mY1, dx, dy); - VTX2s(pNv, sX2, sY2, mX2, mY2, dx + (w * 2), dy); - } else { - VTX1s(pNv, sX0, sY0, dx, dy + (h * 2)); - VTX1s(pNv, sX1, sY1, dx, dy); - VTX1s(pNv, sX2, sY2, dx + (w * 2), dy); - } - + PUSH_VTX2s(push, sx, sy + (h * 2), mx, my + (h * 2), dx, dy + (h * 2)); + PUSH_VTX2s(push, sx, sy, mx, my, dx, dy); + PUSH_VTX2s(push, sx + (w * 2), sy, mx + (w * 2), my, dx + (w * 2), dy); BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); PUSH_DATA (push, 0); } diff --git a/src/nv50_xv.c b/src/nv50_xv.c index 1c6e5be..8ab7202 100644 --- a/src/nv50_xv.c +++ b/src/nv50_xv.c @@ -214,6 +214,19 @@ nv50_xv_image_put(ScrnInfoPtr pScrn, BEGIN_NV04(push, NV50_3D(BIND_TIC(2)), 1); PUSH_DATA (push, 0x203); + PUSH_DATAu(push, pNv->scratch, PVP_DATA, 11); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 1.0 / width); + PUSH_DATAf(push, 1.0 / height); + if (pPriv->SyncToVBlank) NV50SyncToVBlank(ppix, dstBox); @@ -235,11 +248,6 @@ nv50_xv_image_put(ScrnInfoPtr pScrn, int sy1=pbox->y1; int sy2=pbox->y2; - tx1 = tx1 / width; - tx2 = tx2 / width; - ty1 = ty1 / height; - ty2 = ty2 / height; - if (nouveau_pushbuf_space(push, 64, 0, 0) || nouveau_pushbuf_refn (push, refs, 3)) return BadImplementation; @@ -251,12 +259,11 @@ nv50_xv_image_put(ScrnInfoPtr pScrn, BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2); PUSH_DATA (push, sx2 << NV50_3D_SCISSOR_HORIZ_MAX__SHIFT | sx1); PUSH_DATA (push, sy2 << NV50_3D_SCISSOR_VERT_MAX__SHIFT | sy1 ); - BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (push, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); - VTX2s(pNv, tx1, ty1, tx1, ty1, sx1, sy1); - VTX2s(pNv, tx2+(tx2-tx1), ty1, tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1); - VTX2s(pNv, tx1, ty2+(ty2-ty1), tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1)); + PUSH_VTX1s(push, tx1, ty1, sx1, sy1); + PUSH_VTX1s(push, tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1); + PUSH_VTX1s(push, tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1)); BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); PUSH_DATA (push, 0); diff --git a/src/nv_type.h b/src/nv_type.h index bd00df3..49150ba 100644 --- a/src/nv_type.h +++ b/src/nv_type.h @@ -206,4 +206,7 @@ static inline int nv_cursor_width(NVPtr pNv) return pNv->dev->chipset >= 0x10 ? NV10_CURSOR_SIZE : NV04_CURSOR_SIZE; } +#define xFixedToFloat(v) \ + ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0)) + #endif /* __NV_STRUCT_H__ */ diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c index 7ec45b3..9149cdd 100644 --- a/src/nvc0_exa.c +++ b/src/nvc0_exa.c @@ -832,9 +832,6 @@ NVC0EXAPrepareComposite(int op, return TRUE; } -#define xFixedToFloat(v) \ - ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0)) - static inline void NVC0EXATransform(PictTransformPtr t, int x, int y, float sx, float sy, float *x_ret, float *y_ret) -- cgit v1.2.1