diff options
author | Rob Clark <robdclark@chromium.org> | 2021-01-09 12:12:37 -0800 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-01-13 18:32:47 +0000 |
commit | 3e15ba5ccc4e7b8af80ea84a44906a2ffa895490 (patch) | |
tree | 74d467b070bef5726762e8b8c4fed4e90d485eda | |
parent | 11cba228fd5a7296088d42947e6ce014891f00fb (diff) | |
download | mesa-3e15ba5ccc4e7b8af80ea84a44906a2ffa895490.tar.gz |
freedreno/ir3: Better sstall estimation
1) Take into account repeat/nop cycles
2) Clear sfu_delay after an (ss) sync
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7997>
4 files changed, 85 insertions, 83 deletions
diff --git a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log index 8900e61ebe4..3c4e006b4a1 100644 --- a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log +++ b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log @@ -816,13 +816,13 @@ t4 write SP_VS_OBJ_START_LO (a81c) - used (full): 4-11 (cnt=8, max=11) - input (half): 8-19 (cnt=12, max=19) - input (full): 4-9 (cnt=6, max=9) - - max const: 5 - - output (half): 16-23 (cnt=8, max=23) (estimated) - output (full): 8-11 (cnt=4, max=11) (estimated) - - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full + + - shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen - shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) 00000000010541a4: 0000: 48a81c02 01054000 00000000 t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) { DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 } @@ -843,13 +843,13 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) - used (full): 4-11 (cnt=8, max=11) - input (half): 8-19 (cnt=12, max=19) - input (full): 4-9 (cnt=6, max=9) - - max const: 5 - - output (half): 16-23 (cnt=8, max=23) (estimated) - output (full): 8-11 (cnt=4, max=11) (estimated) - - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full + + - shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen - shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) 00000000010541b0: 0000: 70328003 00620000 01054000 00000000 t7 opcode: CP_LOAD_STATE6_GEOM (32) (8 dwords) { DST_OFF = 1 | STATE_TYPE = ST6_CONSTANTS | STATE_SRC = SS6_DIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 } @@ -908,13 +908,13 @@ t4 write SP_FS_OBJ_START_LO (a983) - used (full): 0 2-5 (cnt=5, max=5) - input (half): 0-1 (cnt=2, max=1) - input (full): 0 (cnt=1, max=0) - - max const: 0 - - output (half): 4-11 (cnt=8, max=11) (estimated) - output (full): 2-5 (cnt=4, max=5) (estimated) - - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full + + - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov + - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) 000000000105422c: 0000: 40a98302 01054080 00000000 t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) { DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_FS_SHADER | NUM_UNIT = 1 } @@ -934,13 +934,13 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) - used (full): 0 2-5 (cnt=5, max=5) - input (half): 0-1 (cnt=2, max=1) - input (full): 0 (cnt=1, max=0) - - max const: 0 - - output (half): 4-11 (cnt=8, max=11) (estimated) - output (full): 2-5 (cnt=4, max=5) (estimated) - - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full + + - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov + - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) 0000000001054238: 0000: 70348003 00720000 01054080 00000000 t4 write SP_CS_CONFIG (a9bb) SP_CS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } @@ -1502,13 +1502,13 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords) - used (full): 4-11 (cnt=8, max=11) - input (half): 8-19 (cnt=12, max=19) - input (full): 4-9 (cnt=6, max=9) - - max const: 5 - - output (half): 16-23 (cnt=8, max=23) (estimated) - output (full): 8-11 (cnt=4, max=11) (estimated) - - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full + + - shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen - shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) !+ 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } !+ 00000001 SP_VS_INSTRLEN: 1 + 00000000 SP_HS_WAVE_INPUT_SIZE: 0 @@ -1537,13 +1537,13 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords) - used (full): 0 2-5 (cnt=5, max=5) - input (half): 0-1 (cnt=2, max=1) - input (full): 0 (cnt=1, max=0) - - max const: 0 - - output (half): 4-11 (cnt=8, max=11) (estimated) - output (full): 2-5 (cnt=4, max=5) (estimated) - - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full + + - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov + - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) !+ 00000100 SP_BLEND_CNTL: { UNK8 } + 00000000 SP_SRGB_CNTL: { 0 } !+ 0000000f SP_FS_RENDER_COMPONENTS: { RT0 = 0xf | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 } diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log index 874f1085c78..a3cfde19efa 100644 --- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log +++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log @@ -639,13 +639,13 @@ t4 write SP_VS_OBJ_START_LO (a81c) - used (full): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - max const: 0 - - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + + - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) 0000000001121038: 0000: 48a81c02 01011000 00000000 t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) { DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 } @@ -661,13 +661,13 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) - used (full): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - max const: 0 - - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + + - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) 0000000001121044: 0000: 70328003 00620000 01011000 00000000 t4 write VPC_VAR[0].DISABLE (9212) VPC_VAR[0].DISABLE: 0xffffffff @@ -1109,13 +1109,13 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) - used (full): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - max const: 0 - - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + + - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) !+ 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } !+ 00000001 SP_VS_INSTRLEN: 1 + 00000000 SP_HS_WAVE_INPUT_SIZE: 0 @@ -1954,13 +1954,13 @@ t4 write SP_VS_OBJ_START_LO (a81c) - used (full): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - max const: 0 - - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + + - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) 0000000001120038: 0000: 48a81c02 01012000 00000000 t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) { DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 } @@ -1976,13 +1976,13 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) - used (full): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - max const: 0 - - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + + - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) 0000000001120044: 0000: 70328003 00620000 01012000 00000000 t4 write VPC_VAR[0].DISABLE (9212) VPC_VAR[0].DISABLE: 0xffffffff @@ -3497,13 +3497,13 @@ t4 write SP_FS_OBJ_START_LO (a983) - used (full): 0-73 (cnt=74, max=73) - input (half): 38-41 (cnt=4, max=41) - input (full): 19-20 (cnt=2, max=20) - - max const: 113 - - output (half): 8-15 (cnt=8, max=15) (estimated) - output (full): 4-7 (cnt=4, max=7) (estimated) - - shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full + + - shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov + - shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen - shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 140 (ss), 0 (sy) + - shaderdb: 1326 sstall, 140 (ss), 0 (sy) 0000000001120158: 0000: 40a98302 01013000 00000000 t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) { DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_FS_SHADER | NUM_UNIT = 88 } @@ -4920,13 +4920,13 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) - used (full): 0-73 (cnt=74, max=73) - input (half): 38-41 (cnt=4, max=41) - input (full): 19-20 (cnt=2, max=20) - - max const: 113 - - output (half): 8-15 (cnt=8, max=15) (estimated) - output (full): 4-7 (cnt=4, max=7) (estimated) - - shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full + + - shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov + - shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen - shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 140 (ss), 0 (sy) + - shaderdb: 1326 sstall, 140 (ss), 0 (sy) 0000000001120164: 0000: 70348003 16320000 01013000 00000000 t4 write VFD_CONTROL_1 (a001) VFD_CONTROL_1: { REGID4VTX = r63.x | REGID4INST = r63.x | REGID4PRIMID = r63.x | REGID4VIEWID = r63.x } @@ -5334,13 +5334,13 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) - used (full): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - max const: 0 - - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + + - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) + 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } + 00000001 SP_VS_INSTRLEN: 1 + 00000000 SP_HS_WAVE_INPUT_SIZE: 0 @@ -6772,13 +6772,13 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) - used (full): 0-73 (cnt=74, max=73) - input (half): 38-41 (cnt=4, max=41) - input (full): 19-20 (cnt=2, max=20) - - max const: 113 - - output (half): 8-15 (cnt=8, max=15) (estimated) - output (full): 4-7 (cnt=4, max=7) (estimated) - - shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full + + - shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov + - shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen - shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 140 (ss), 0 (sy) + - shaderdb: 1326 sstall, 140 (ss), 0 (sy) !+ 00000100 SP_BLEND_CNTL: { UNK8 } + fcfcfc00 SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x } !+ 00000001 SP_FS_OUTPUT_CNTL1: { MRT = 1 } diff --git a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log index efed6fcdcfd..c5ef966f48e 100644 --- a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log +++ b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log @@ -429,13 +429,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - used (full): (cnt=0, max=0) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - max const: 0 - - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + + - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) 109ce1f0: 0000: c0213000 00600000 00000000 00000000 03000000 00000000 00000000 00000000 * t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) @@ -455,13 +455,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - used (full): 0-3 (cnt=4, max=3) - input (half): (cnt=0, max=0) - input (full): (cnt=0, max=0) - - max const: 3 - - output (half): (cnt=0, max=0) (estimated) - output (full): 0-3 (cnt=4, max=3) (estimated) - - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 last-baryf, 0 half, 1 full + + - shaderdb: 9 instr, 4 nops, 5 non-nops, 4 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 1 full, 1 constlen - shaderdb: 5 cat0, 4 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 0 (ss), 0 (sy) + - shaderdb: 0 sstall, 0 (ss), 0 (sy) 109ce27c: 0000: c0213000 00700000 00000000 00000000 20244000 00000001 20244001 00000002 109ce29c: 0020: 20244002 00000003 20244003 00000000 03000000 00000000 00000000 00000000 * @@ -1043,13 +1043,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) - used (full): 0-13 (cnt=14, max=13) - input (half): (cnt=0, max=0) - input (full): 2-5 (cnt=4, max=5) - - max const: 52 - - output (half): (cnt=0, max=0) (estimated) - output (full): 6-13 (cnt=8, max=13) (estimated) - - shaderdb: 74 instructions, 27 nops, 47 non-nops, (61 instlen), 0 last-baryf, 0 half, 4 full + + - shaderdb: 74 instr, 27 nops, 47 non-nops, 7 mov, 1 cov + - shaderdb: 0 last-baryf, 0 half, 4 full, 14 constlen - shaderdb: 28 cat0, 8 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 1 (ss), 0 (sy) + - shaderdb: 10 sstall, 1 (ss), 0 (sy) 109ce66c: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004 109ce68c: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c 109ce6ac: 0040: 63828006 10010002 40700000 0001100f 63828009 00001005 63818000 00000010 @@ -1085,13 +1085,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - used (full): 0-3 (cnt=4, max=3) - input (half): (cnt=0, max=0) - input (full): 0-3 (cnt=4, max=3) - - max const: 0 - - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 5 last-baryf, 0 half, 1 full + + - shaderdb: 11 instr, 5 nops, 6 non-nops, 0 mov, 0 cov + - shaderdb: 5 last-baryf, 0 half, 1 full, 0 constlen - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 - - shaderdb: 1 (ss), 0 (sy) + - shaderdb: 65531 sstall, 1 (ss), 0 (sy) 109ce878: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 109ce898: 0020: c7c60001 01c00004 c7c60002 01c00006 c7c60003 00002000 473090fc 00000000 109ce8b8: 0040: 03000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 @@ -1675,13 +1675,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) - used (full): 0-8 10-17 (cnt=17, max=17) - input (half): (cnt=0, max=0) - input (full): 2-8 (cnt=7, max=8) - - max const: 52 - - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - - shaderdb: 67 instructions, 23 nops, 44 non-nops, (56 instlen), 0 last-baryf, 0 half, 5 full + + - shaderdb: 67 instr, 23 nops, 44 non-nops, 4 mov, 1 cov + - shaderdb: 0 last-baryf, 0 half, 5 full, 14 constlen - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 - - shaderdb: 1 (ss), 0 (sy) + - shaderdb: 10 sstall, 1 (ss), 0 (sy) 109cee34: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004 109cee54: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c 109cee74: 0040: 6382800a 10010002 40700000 0001100f 6382800d 00001005 63818000 00000010 diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 0188da9657a..61d3c7bab5a 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -1002,7 +1002,7 @@ ir3_collect_info(struct ir3_shader_variant *v) info->sizedwords = info->size / 4; foreach_block (block, &shader->block_list) { - unsigned sfu_delay = 0; + int sfu_delay = 0; foreach_instr (instr, &block->instr_list) { @@ -1050,6 +1050,7 @@ ir3_collect_info(struct ir3_shader_variant *v) if (instr->flags & IR3_INSTR_SS) { info->ss++; info->sstall += sfu_delay; + sfu_delay = 0; } if (instr->flags & IR3_INSTR_SY) @@ -1057,8 +1058,9 @@ ir3_collect_info(struct ir3_shader_variant *v) if (is_sfu(instr)) { sfu_delay = 10; - } else if (sfu_delay > 0) { - sfu_delay--; + } else { + int n = MIN2(sfu_delay, 1 + instr->repeat + instr->nop); + sfu_delay -= n; } } } |