diff options
Diffstat (limited to 'cogl/driver/drm/brw/brw_wm.c')
-rw-r--r-- | cogl/driver/drm/brw/brw_wm.c | 681 |
1 files changed, 681 insertions, 0 deletions
diff --git a/cogl/driver/drm/brw/brw_wm.c b/cogl/driver/drm/brw/brw_wm.c new file mode 100644 index 00000000..f54e55ef --- /dev/null +++ b/cogl/driver/drm/brw/brw_wm.c @@ -0,0 +1,681 @@ +#include "brw.h" + +#define X16 8 +#define Y16 10 + +static void brw_wm_xy(struct brw_compile *p, int dw) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg r1_uw = __retype_uw(r1); + struct brw_reg x_uw, y_uw; + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + if (dw == 16) { + x_uw = brw_uw16_grf(30, 0); + y_uw = brw_uw16_grf(28, 0); + } else { + x_uw = brw_uw8_grf(30, 0); + y_uw = brw_uw8_grf(28, 0); + } + + brw_ADD(p, + x_uw, + __stride(__suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + brw_ADD(p, + y_uw, + __stride(__suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1)); + brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1))); +} + +static void brw_wm_affine_st(struct brw_compile *p, int dw, + int channel, int msg) +{ + int uv; + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + uv = p->gen >= 60 ? 6 : 3; + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + uv = p->gen >= 60 ? 4 : 3; + } + uv += 2*channel; + + msg++; + if (p->gen >= 60) { + brw_PLN(p, + brw_message_reg(msg), + brw_vec1_grf(uv, 0), + brw_vec8_grf(2, 0)); + msg += dw/8; + + brw_PLN(p, + brw_message_reg(msg), + brw_vec1_grf(uv, 4), + brw_vec8_grf(2, 0)); + } else { + struct brw_reg r = brw_vec1_grf(uv, 0); + + brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); + msg += dw/8; + + brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); + } +} + +static inline unsigned simd(int dw) +{ + return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8; +} + +static inline struct brw_reg sample_result(int dw, int result) +{ + return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0, + BRW_REGISTER_TYPE_UW, + dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, + dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +static int brw_wm_sample(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + struct brw_reg src0; + bool header; + int len; + + len = dw == 16 ? 4 : 2; + if (p->gen >= 60) { + header = false; + src0 = brw_message_reg(++msg); + } else { + header = true; + src0 = brw_vec8_grf(0, 0); + } + + brw_SAMPLE(p, sample_result(dw, result), msg, src0, + channel+1, channel, WRITEMASK_XYZW, 0, + 2*len, len+header, header, simd(dw)); + return result; +} + +static int brw_wm_sample__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + struct brw_reg src0; + int mlen, rlen; + + if (dw == 8) { + /* SIMD8 sample return is not masked */ + mlen = 3; + rlen = 4; + } else { + mlen = 5; + rlen = 2; + } + + if (p->gen >= 60) + src0 = brw_message_reg(msg); + else + src0 = brw_vec8_grf(0, 0); + + brw_SAMPLE(p, sample_result(dw, result), msg, src0, + channel+1, channel, WRITEMASK_W, 0, + rlen, mlen, true, simd(dw)); + + if (dw == 8) + result += 3; + + return result; +} + +static int brw_wm_affine(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_affine_st(p, dw, channel, msg); + return brw_wm_sample(p, dw, channel, msg, result); +} + +static int brw_wm_affine__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_affine_st(p, dw, channel, msg); + return brw_wm_sample__alpha(p, dw, channel, msg, result); +} + +static inline struct brw_reg null_result(int dw) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0, + BRW_REGISTER_TYPE_UW, + dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, + dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +static void brw_fb_write(struct brw_compile *p, int dw) +{ + struct brw_instruction *insn; + unsigned msg_control, msg_type, msg_len; + struct brw_reg src0; + bool header; + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; + msg_len = 8; + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; + msg_len = 4; + } + + if (p->gen < 60) { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + + msg_len += 2; + } + + /* The execution mask is ignored for render target writes. */ + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.predicate_control = 0; + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->gen >= 60) { + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + src0 = brw_message_reg(2); + header = false; + } else { + insn->header.destreg__conditionalmod = 0; + msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + src0 = __retype_uw(brw_vec8_grf(0, 0)); + header = true; + } + + brw_set_dest(p, insn, null_result(dw)); + brw_set_src0(p, insn, src0); + brw_set_dp_write_message(p, insn, 0, + msg_control, msg_type, msg_len, + header, true, 0, true, false); +} + +static void brw_wm_write(struct brw_compile *p, int dw, int src) +{ + int n; + + if (dw == 8 && p->gen >= 60) { + /* XXX pixel execution mask? */ + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0)); + brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0)); + brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0)); + brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0)); + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 60) { + brw_MOV(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0)); + } else if (p->gen >= 45 && dw == 16) { + brw_MOV(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n+1, 0)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +static void brw_wm_write__mask(struct brw_compile *p, int dw, + int src, int mask) +{ + int n; + + if (dw == 8 && p->gen >= 60) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MUL(p, + brw_message_reg(2), + brw_vec8_grf(src+0, 0), + brw_vec8_grf(mask, 0)); + brw_MUL(p, + brw_message_reg(3), + brw_vec8_grf(src+1, 0), + brw_vec8_grf(mask, 0)); + brw_MUL(p, + brw_message_reg(4), + brw_vec8_grf(src+2, 0), + brw_vec8_grf(mask, 0)); + brw_MUL(p, + brw_message_reg(5), + brw_vec8_grf(src+3, 0), + brw_vec8_grf(mask, 0)); + + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 60) { + brw_MUL(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask, 0)); + } else if (p->gen >= 45 && dw == 16) { + brw_MUL(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask, 0)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MUL(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n+1, 0), + brw_vec8_grf(mask+1, 0)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +static void brw_wm_write__opacity(struct brw_compile *p, int dw, + int src, int mask) +{ + int n; + + if (dw == 8 && p->gen >= 60) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MUL(p, + brw_message_reg(2), + brw_vec8_grf(src+0, 0), + brw_vec1_grf(mask, 3)); + brw_MUL(p, + brw_message_reg(3), + brw_vec8_grf(src+1, 0), + brw_vec1_grf(mask, 3)); + brw_MUL(p, + brw_message_reg(4), + brw_vec8_grf(src+2, 0), + brw_vec1_grf(mask, 3)); + brw_MUL(p, + brw_message_reg(5), + brw_vec8_grf(src+3, 0), + brw_vec1_grf(mask, 3)); + + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 60) { + brw_MUL(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0), + brw_vec1_grf(mask, 3)); + } else if (p->gen >= 45 && dw == 16) { + brw_MUL(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0), + brw_vec1_grf(mask, 3)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0), + brw_vec1_grf(mask, 3)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MUL(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n+1, 0), + brw_vec1_grf(mask, 3)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +static void brw_wm_write__mask_ca(struct brw_compile *p, int dw, + int src, int mask) +{ + int n; + + if (dw == 8 && p->gen >= 60) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MUL(p, + brw_message_reg(2), + brw_vec8_grf(src + 0, 0), + brw_vec8_grf(mask + 0, 0)); + brw_MUL(p, + brw_message_reg(3), + brw_vec8_grf(src + 1, 0), + brw_vec8_grf(mask + 1, 0)); + brw_MUL(p, + brw_message_reg(4), + brw_vec8_grf(src + 2, 0), + brw_vec8_grf(mask + 2, 0)); + brw_MUL(p, + brw_message_reg(5), + brw_vec8_grf(src + 3, 0), + brw_vec8_grf(mask + 3, 0)); + + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 60) { + brw_MUL(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask + 2*n, 0)); + } else if (p->gen >= 45 && dw == 16) { + brw_MUL(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask + 2*n, 0)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask + 2*n, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MUL(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n + 1, 0), + brw_vec8_grf(mask + 2*n + 1, 0)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +bool +brw_wm_kernel__affine(struct brw_compile *p, int dispatch) +{ + if (p->gen < 60) + brw_wm_xy(p, dispatch); + brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12)); + + return true; +} + +bool +brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + + src = brw_wm_affine(p, dispatch, 0, 1, 12); + mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20); + brw_wm_write__mask(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + + src = brw_wm_affine(p, dispatch, 0, 1, 12); + mask = brw_wm_affine(p, dispatch, 1, 6, 20); + brw_wm_write__mask_ca(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + + src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12); + mask = brw_wm_affine(p, dispatch, 1, 6, 16); + brw_wm_write__mask(p, dispatch, mask, src); + + return true; +} + +/* Projective variants */ + +static void brw_wm_projective_st(struct brw_compile *p, int dw, + int channel, int msg) +{ + int uv; + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + uv = p->gen >= 60 ? 6 : 3; + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + uv = p->gen >= 60 ? 4 : 3; + } + uv += 2*channel; + + msg++; + if (p->gen >= 60) { + /* First compute 1/z */ + brw_PLN(p, + brw_message_reg(msg), + brw_vec1_grf(uv+1, 0), + brw_vec8_grf(2, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } else + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_PLN(p, + brw_vec8_grf(28, 0), + brw_vec1_grf(uv, 0), + brw_vec8_grf(2, 0)); + brw_MUL(p, + brw_message_reg(msg), + brw_vec8_grf(28, 0), + brw_vec8_grf(30, 0)); + msg += dw/8; + + brw_PLN(p, + brw_vec8_grf(28, 0), + brw_vec1_grf(uv, 0), + brw_vec8_grf(4, 0)); + brw_MUL(p, + brw_message_reg(msg), + brw_vec8_grf(28, 0), + brw_vec8_grf(30, 0)); + } else { + struct brw_reg r = brw_vec1_grf(uv, 0); + + /* First compute 1/z */ + brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } else + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + + /* Now compute the output s,t values */ + brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); + brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); + msg += dw/8; + + brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); + brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); + } +} + +static int brw_wm_projective(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_projective_st(p, dw, channel, msg); + return brw_wm_sample(p, dw, channel, msg, result); +} + +static int brw_wm_projective__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_projective_st(p, dw, channel, msg); + return brw_wm_sample__alpha(p, dw, channel, msg, result); +} + +bool +brw_wm_kernel__projective(struct brw_compile *p, int dispatch) +{ + if (p->gen < 60) + brw_wm_xy(p, dispatch); + brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12)); + + return true; +} + +bool +brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + + src = brw_wm_projective(p, dispatch, 0, 1, 12); + mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20); + brw_wm_write__mask(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + + src = brw_wm_projective(p, dispatch, 0, 1, 12); + mask = brw_wm_projective(p, dispatch, 1, 6, 20); + brw_wm_write__mask_ca(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + + src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12); + mask = brw_wm_projective(p, dispatch, 1, 6, 16); + brw_wm_write__mask(p, dispatch, mask, src); + + return true; +} + +bool +brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) { + brw_wm_xy(p, dispatch); + mask = 4; + } else + mask = dispatch == 16 ? 8 : 6; + + src = brw_wm_affine(p, dispatch, 0, 1, 12); + brw_wm_write__opacity(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) { + brw_wm_xy(p, dispatch); + mask = 4; + } else + mask = dispatch == 16 ? 8 : 6; + + src = brw_wm_projective(p, dispatch, 0, 1, 12); + brw_wm_write__opacity(p, dispatch, src, mask); + + return true; +} |