summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmber <amber@igalia.com>2023-02-28 14:14:35 +0100
committerMarge Bot <emma+marge@anholt.net>2023-05-17 00:27:27 +0000
commit7609f83c70234725b7d4f2a618f82c197e09e4c6 (patch)
treefe362c4c19e90f3e6a12825862522e01548dc025
parent2cc77088b96772cd55c724b26758df12937aede9 (diff)
downloadmesa-7609f83c70234725b7d4f2a618f82c197e09e4c6.tar.gz
ir3, freedreno: implement GL_ARB_shader_draw_parameters
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21593>
-rw-r--r--src/freedreno/ir3/ir3_compiler.c4
-rw-r--r--src/freedreno/ir3/ir3_compiler.h3
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c6
-rw-r--r--src/freedreno/ir3/ir3_context.h2
-rw-r--r--src/freedreno/ir3/ir3_nir.c4
-rw-r--r--src/freedreno/ir3/ir3_shader.h7
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc15
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_const.cc6
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_draw.cc44
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_emit.h1
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_const.h8
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_gallium.c4
13 files changed, 84 insertions, 22 deletions
diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c
index 4636a7398c7..b55ca64ab39 100644
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@@ -285,6 +285,10 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
compiler->nir_options.force_indirect_unrolling = nir_var_all;
}
+ if (options->lower_base_vertex) {
+ compiler->nir_options.lower_base_vertex = true;
+ }
+
/* 16-bit ALU op generation is mostly controlled by frontend compiler options, but
* this core NIR option enables some optimizations of 16-bit operations.
*/
diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h
index 4abd574ff80..daf966a993a 100644
--- a/src/freedreno/ir3/ir3_compiler.h
+++ b/src/freedreno/ir3/ir3_compiler.h
@@ -67,6 +67,9 @@ struct ir3_compiler_options {
/* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */
bool storage_16bit;
+
+ /* If base_vertex should be lowered in nir */
+ bool lower_base_vertex;
};
struct ir3_compiler {
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index f8316f417a4..c457d4db575 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2204,6 +2204,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
}
dst[0] = ctx->basevertex;
break;
+ case nir_intrinsic_load_is_indexed_draw:
+ if (!ctx->is_indexed_draw) {
+ ctx->is_indexed_draw = create_driver_param(ctx, IR3_DP_IS_INDEXED_DRAW);
+ }
+ dst[0] = ctx->is_indexed_draw;
+ break;
case nir_intrinsic_load_draw_id:
if (!ctx->draw_id) {
ctx->draw_id = create_driver_param(ctx, IR3_DP_DRAWID);
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h
index 0085d8abdbf..22c7b1b3cdf 100644
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -85,7 +85,7 @@ struct ir3_context {
/* For vertex shaders, keep track of the system values sources */
struct ir3_instruction *vertex_id, *basevertex, *instance_id, *base_instance,
- *draw_id, *view_index;
+ *draw_id, *view_index, *is_indexed_draw;
/* For fragment shaders: */
struct ir3_instruction *samp_id, *samp_mask_in;
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index b1cf668421e..da842e01c5c 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -864,6 +864,10 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
layout->num_driver_params =
MAX2(layout->num_driver_params, IR3_DP_VTXID_BASE + 1);
break;
+ case nir_intrinsic_load_is_indexed_draw:
+ layout->num_driver_params =
+ MAX2(layout->num_driver_params, IR3_DP_IS_INDEXED_DRAW + 1);
+ break;
case nir_intrinsic_load_base_instance:
layout->num_driver_params =
MAX2(layout->num_driver_params, IR3_DP_INSTID_BASE + 1);
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 25125332e7c..6c921732c1a 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -69,11 +69,12 @@ enum ir3_driver_param {
IR3_DP_VTXID_BASE = 1,
IR3_DP_INSTID_BASE = 2,
IR3_DP_VTXCNT_MAX = 3,
+ IR3_DP_IS_INDEXED_DRAW = 4, /* Note: boolean, ie. 0 or ~0 */
/* user-clip-plane components, up to 8x vec4's: */
- IR3_DP_UCP0_X = 4,
+ IR3_DP_UCP0_X = 5,
/* .... */
- IR3_DP_UCP7_W = 35,
- IR3_DP_VS_COUNT = 36, /* must be aligned to vec4 */
+ IR3_DP_UCP7_W = 36,
+ IR3_DP_VS_COUNT = 40, /* must be aligned to vec4 */
/* TCS driver params: */
IR3_DP_HS_DEFAULT_OUTER_LEVEL_X = 0,
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc
index bc13f7fdf27..9a3d92949fb 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc
@@ -186,12 +186,25 @@ fd6_memory_barrier(struct pipe_context *pctx, unsigned flags)
if (flags & (PIPE_BARRIER_TEXTURE |
PIPE_BARRIER_IMAGE |
- PIPE_BARRIER_INDIRECT_BUFFER |
PIPE_BARRIER_UPDATE_BUFFER |
PIPE_BARRIER_UPDATE_TEXTURE)) {
flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE;
}
+ if (flags & PIPE_BARRIER_INDIRECT_BUFFER) {
+ flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE;
+
+ /* Various firmware bugs/inconsistencies mean that some indirect draw opcodes
+ * do not wait for WFI's to complete before executing. Add a WAIT_FOR_ME if
+ * pending for these opcodes. This may result in a few extra WAIT_FOR_ME's
+ * with these opcodes, but the alternative would add unnecessary WAIT_FOR_ME's
+ * before draw opcodes that don't need it.
+ */
+ if (fd_context(pctx)->screen->info->a6xx.indirect_draw_wfm_quirk) {
+ flushes |= FD6_WAIT_FOR_ME;
+ }
+ }
+
if (flags & PIPE_BARRIER_FRAMEBUFFER) {
fd6_texture_barrier(pctx, PIPE_TEXTURE_BARRIER_FRAMEBUFFER);
}
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.cc b/src/gallium/drivers/freedreno/a6xx/fd6_const.cc
index ba0acf4205e..61d19428d83 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_const.cc
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.cc
@@ -300,12 +300,12 @@ fd6_build_driver_params(struct fd6_emit *emit)
if (emit->vs->need_driver_params) {
ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info,
- emit->indirect, emit->draw);
+ emit->indirect, emit->draw, emit->draw_id);
}
if (emit->gs && emit->gs->need_driver_params) {
ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info,
- emit->indirect, emit->draw);
+ emit->indirect, emit->draw, 0);
}
if (emit->hs && emit->hs->need_driver_params) {
@@ -314,7 +314,7 @@ fd6_build_driver_params(struct fd6_emit *emit)
if (emit->ds && emit->ds->need_driver_params) {
ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info,
- emit->indirect, emit->draw);
+ emit->indirect, emit->draw, 0);
}
fd6_ctx->has_dp_state = true;
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc b/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc
index 165d0fc0955..015cde6f3f2 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc
@@ -74,26 +74,39 @@ draw_emit_xfb(struct fd_ringbuffer *ring, struct CP_DRAW_INDX_OFFSET_0 *draw0,
}
static void
-draw_emit_indirect(struct fd_ringbuffer *ring,
+draw_emit_indirect(struct fd_context *ctx,
+ struct fd_ringbuffer *ring,
struct CP_DRAW_INDX_OFFSET_0 *draw0,
const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
- unsigned index_offset)
+ unsigned index_offset, uint32_t driver_param)
{
struct fd_resource *ind = fd_resource(indirect->buffer);
if (info->index_size) {
+ OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 9);
+ OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
+ OUT_RING(ring,
+ (A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDEXED)
+ | A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
struct pipe_resource *idx = info->index.resource;
unsigned max_indices = (idx->width0 - index_offset) / info->index_size;
-
- OUT_PKT(ring, CP_DRAW_INDX_INDIRECT, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
- A5XX_CP_DRAW_INDX_INDIRECT_INDX_BASE(fd_resource(idx)->bo,
- index_offset),
- A5XX_CP_DRAW_INDX_INDIRECT_3(.max_indices = max_indices),
- A5XX_CP_DRAW_INDX_INDIRECT_INDIRECT(ind->bo, indirect->offset));
+ OUT_RING(ring, indirect->draw_count);
+ //index va
+ OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
+ //max indices
+ OUT_RING(ring, max_indices);
+ OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
+ OUT_RING(ring, indirect->stride);
} else {
- OUT_PKT(ring, CP_DRAW_INDIRECT, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
- A5XX_CP_DRAW_INDIRECT_INDIRECT(ind->bo, indirect->offset));
+ OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 6);
+ OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
+ OUT_RING(ring,
+ (A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_NORMAL)
+ | A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
+ OUT_RING(ring, indirect->draw_count);
+ OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
+ OUT_RING(ring, indirect->stride);
}
}
@@ -228,6 +241,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
emit.state.num_groups = 0;
emit.streamout_mask = 0;
emit.prog = NULL;
+ emit.draw_id = 0;
if (!(ctx->prog.vs && ctx->prog.fs))
return;
@@ -365,7 +379,14 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
if (indirect->count_from_stream_output) {
draw_emit_xfb(ring, &draw0, info, indirect);
} else {
- draw_emit_indirect(ring, &draw0, info, indirect, index_offset);
+ const struct ir3_const_state *const_state = ir3_const_state(emit.vs);
+ uint32_t dst_offset_dp = const_state->offsets.driver_param;
+
+ /* If unused, pass 0 for DST_OFF: */
+ if (dst_offset_dp > emit.vs->constlen)
+ dst_offset_dp = 0;
+
+ draw_emit_indirect(ctx, ring, &draw0, info, indirect, index_offset, dst_offset_dp);
}
} else {
draw_emit(ring, &draw0, info, &draws[0], index_offset);
@@ -401,6 +422,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
if (emit.dirty_groups) {
emit.state.num_groups = 0;
emit.draw = &draws[i];
+ emit.draw_id = info->increment_draw_id ? i : 0;
fd6_emit_3d_state<CHIP>(ring, &emit);
}
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
index 67572374fcc..939f7bb3af2 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
@@ -183,6 +183,7 @@ struct fd6_emit {
bool rasterflat : 1;
bool primitive_restart : 1;
uint8_t streamout_mask;
+ uint32_t draw_id;
/* cached to avoid repeated lookups: */
const struct fd6_program_state *prog;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 1dfb78284bf..f5081a3c10a 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -213,6 +213,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_DRAW_PARAMETERS:
return is_a6xx(screen);
case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h
index 21a4ab0232b..40e86544994 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_const.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h
@@ -454,17 +454,19 @@ ir3_emit_driver_params(const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_context *ctx,
const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
- const struct pipe_draw_start_count_bias *draw) assert_dt
+ const struct pipe_draw_start_count_bias *draw,
+ const uint32_t draw_id) assert_dt
{
assert(v->need_driver_params);
const struct ir3_const_state *const_state = ir3_const_state(v);
uint32_t offset = const_state->offsets.driver_param;
uint32_t vertex_params[IR3_DP_VS_COUNT] = {
- [IR3_DP_DRAWID] = 0, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
+ [IR3_DP_DRAWID] = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
[IR3_DP_VTXID_BASE] = info->index_size ? draw->index_bias : draw->start,
[IR3_DP_INSTID_BASE] = info->start_instance,
[IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx,
+ [IR3_DP_IS_INDEXED_DRAW] = info->index_size != 0 ? ~0 : 0,
};
if (v->key.ucp_enables) {
struct pipe_clip_state *ucp = &ctx->ucp;
@@ -573,7 +575,7 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v,
/* emit driver params every time: */
if (info && v->need_driver_params) {
ring_wfi(ctx->batch, ring);
- ir3_emit_driver_params(v, ring, ctx, info, indirect, draw);
+ ir3_emit_driver_params(v, ring, ctx, info, indirect, draw, 0);
}
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
index 2ff88272556..ab677e79a6b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
@@ -570,6 +570,10 @@ ir3_screen_init(struct pipe_screen *pscreen)
.bindless_fb_read_slot = IR3_BINDLESS_IMAGE_OFFSET +
IR3_BINDLESS_IMAGE_COUNT - 1 - screen->max_rts,
};
+
+ if (screen->gen >= 6) {
+ options.lower_base_vertex = true;
+ }
screen->compiler = ir3_compiler_create(screen->dev, screen->dev_id, &options);
/* TODO do we want to limit things to # of fast cores, or just limit