diff options
author | Qiang Yu <yuq825@gmail.com> | 2023-04-04 09:42:24 +0800 |
---|---|---|
committer | Dylan Baker <dylan.c.baker@intel.com> | 2023-04-20 16:05:03 -0700 |
commit | 1d2c0192cfe4d58b5a7042ec91ac6567ffefa96f (patch) | |
tree | a0b65311e938546519f946ddca23047f58205923 | |
parent | 8405693c330069afbae75aa536aa3bd54e6125cd (diff) | |
download | mesa-1d2c0192cfe4d58b5a7042ec91ac6567ffefa96f.tar.gz |
radeonsi: fix max scrach lds size calculation when ngg
Fixes: 028d0590f85 ("radeonsi: replace llvm ngg vs/tes with nir lowering")
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22304>
(cherry picked from commit fc3d8e1125524eb8f7dfdefd6ca2a716c68c0e7d)
Conflicts:
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 8 |
2 files changed, 5 insertions, 5 deletions
diff --git a/.pick_status.json b/.pick_status.json index 1657fa4899c..37c9178c4fa 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -5924,7 +5924,7 @@ "description": "radeonsi: fix max scrach lds size calculation when ngg", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "028d0590f85c418cf470510cafcbe8026c2c4208", "notes": null diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 2392e73bee2..b9c0c51799e 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -163,10 +163,10 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader) const unsigned max_verts_per_prim = u_vertices_per_prim(input_prim); const unsigned min_verts_per_prim = gs_stage == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1; - /* All these are in dwords: */ - /* GE can only use 8K dwords (32KB) of LDS per workgroup. - */ - const unsigned max_lds_size = 8 * 1024 - gfx10_ngg_get_scratch_dw_size(shader); + /* All these are in dwords. The maximum is 16K dwords (64KB) of LDS per workgroup. */ + const unsigned scratch_lds_size = gfx10_ngg_get_scratch_dw_size(shader); + /* Scrach is at last of LDS space and 2 dwords aligned, so it may cost more for alignment. */ + const unsigned max_lds_size = 16 * 1024 - ALIGN(scratch_lds_size, 2); const unsigned target_lds_size = max_lds_size; unsigned esvert_lds_size = 0; unsigned gsprim_lds_size = 0; |