summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robdclark@chromium.org>2021-01-09 12:12:37 -0800
committerMarge Bot <eric+marge@anholt.net>2021-01-13 18:32:47 +0000
commit3e15ba5ccc4e7b8af80ea84a44906a2ffa895490 (patch)
tree74d467b070bef5726762e8b8c4fed4e90d485eda
parent11cba228fd5a7296088d42947e6ce014891f00fb (diff)
downloadmesa-3e15ba5ccc4e7b8af80ea84a44906a2ffa895490.tar.gz
freedreno/ir3: Better sstall estimation
1) Take into account repeat/nop cycles 2) Clear sfu_delay after an (ss) sync Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7997>
-rw-r--r--src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log48
-rw-r--r--src/freedreno/.gitlab-ci/reference/fd-clouds.log72
-rw-r--r--src/freedreno/.gitlab-ci/reference/glxgears-a420.log40
-rw-r--r--src/freedreno/ir3/ir3.c8
4 files changed, 85 insertions, 83 deletions
diff --git a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
index 8900e61ebe4..3c4e006b4a1 100644
--- a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
+++ b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
@@ -816,13 +816,13 @@ t4 write SP_VS_OBJ_START_LO (a81c)
- used (full): 4-11 (cnt=8, max=11)
- input (half): 8-19 (cnt=12, max=19)
- input (full): 4-9 (cnt=6, max=9)
- - max const: 5
-
- output (half): 16-23 (cnt=8, max=23) (estimated)
- output (full): 8-11 (cnt=4, max=11) (estimated)
- - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full
+
+ - shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov
+ - shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen
- shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
00000000010541a4: 0000: 48a81c02 01054000 00000000
t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
{ DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 }
@@ -843,13 +843,13 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
- used (full): 4-11 (cnt=8, max=11)
- input (half): 8-19 (cnt=12, max=19)
- input (full): 4-9 (cnt=6, max=9)
- - max const: 5
-
- output (half): 16-23 (cnt=8, max=23) (estimated)
- output (full): 8-11 (cnt=4, max=11) (estimated)
- - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full
+
+ - shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov
+ - shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen
- shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
00000000010541b0: 0000: 70328003 00620000 01054000 00000000
t7 opcode: CP_LOAD_STATE6_GEOM (32) (8 dwords)
{ DST_OFF = 1 | STATE_TYPE = ST6_CONSTANTS | STATE_SRC = SS6_DIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 }
@@ -908,13 +908,13 @@ t4 write SP_FS_OBJ_START_LO (a983)
- used (full): 0 2-5 (cnt=5, max=5)
- input (half): 0-1 (cnt=2, max=1)
- input (full): 0 (cnt=1, max=0)
- - max const: 0
-
- output (half): 4-11 (cnt=8, max=11) (estimated)
- output (full): 2-5 (cnt=4, max=5) (estimated)
- - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full
+
+ - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov
+ - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen
- shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
000000000105422c: 0000: 40a98302 01054080 00000000
t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
{ DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_FS_SHADER | NUM_UNIT = 1 }
@@ -934,13 +934,13 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
- used (full): 0 2-5 (cnt=5, max=5)
- input (half): 0-1 (cnt=2, max=1)
- input (full): 0 (cnt=1, max=0)
- - max const: 0
-
- output (half): 4-11 (cnt=8, max=11) (estimated)
- output (full): 2-5 (cnt=4, max=5) (estimated)
- - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full
+
+ - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov
+ - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen
- shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
0000000001054238: 0000: 70348003 00720000 01054080 00000000
t4 write SP_CS_CONFIG (a9bb)
SP_CS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
@@ -1502,13 +1502,13 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
- used (full): 4-11 (cnt=8, max=11)
- input (half): 8-19 (cnt=12, max=19)
- input (full): 4-9 (cnt=6, max=9)
- - max const: 5
-
- output (half): 16-23 (cnt=8, max=23) (estimated)
- output (full): 8-11 (cnt=4, max=11) (estimated)
- - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full
+
+ - shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov
+ - shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen
- shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
!+ 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
!+ 00000001 SP_VS_INSTRLEN: 1
+ 00000000 SP_HS_WAVE_INPUT_SIZE: 0
@@ -1537,13 +1537,13 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
- used (full): 0 2-5 (cnt=5, max=5)
- input (half): 0-1 (cnt=2, max=1)
- input (full): 0 (cnt=1, max=0)
- - max const: 0
-
- output (half): 4-11 (cnt=8, max=11) (estimated)
- output (full): 2-5 (cnt=4, max=5) (estimated)
- - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full
+
+ - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov
+ - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen
- shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
!+ 00000100 SP_BLEND_CNTL: { UNK8 }
+ 00000000 SP_SRGB_CNTL: { 0 }
!+ 0000000f SP_FS_RENDER_COMPONENTS: { RT0 = 0xf | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 }
diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log
index 874f1085c78..a3cfde19efa 100644
--- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log
+++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log
@@ -639,13 +639,13 @@ t4 write SP_VS_OBJ_START_LO (a81c)
- used (full): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- - max const: 0
-
- output (half): (cnt=0, max=0) (estimated)
- output (full): (cnt=0, max=0) (estimated)
- - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+ - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+ - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
0000000001121038: 0000: 48a81c02 01011000 00000000
t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
{ DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 }
@@ -661,13 +661,13 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
- used (full): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- - max const: 0
-
- output (half): (cnt=0, max=0) (estimated)
- output (full): (cnt=0, max=0) (estimated)
- - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+ - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+ - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
0000000001121044: 0000: 70328003 00620000 01011000 00000000
t4 write VPC_VAR[0].DISABLE (9212)
VPC_VAR[0].DISABLE: 0xffffffff
@@ -1109,13 +1109,13 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
- used (full): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- - max const: 0
-
- output (half): (cnt=0, max=0) (estimated)
- output (full): (cnt=0, max=0) (estimated)
- - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+ - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+ - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
!+ 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
!+ 00000001 SP_VS_INSTRLEN: 1
+ 00000000 SP_HS_WAVE_INPUT_SIZE: 0
@@ -1954,13 +1954,13 @@ t4 write SP_VS_OBJ_START_LO (a81c)
- used (full): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- - max const: 0
-
- output (half): (cnt=0, max=0) (estimated)
- output (full): (cnt=0, max=0) (estimated)
- - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+ - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+ - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
0000000001120038: 0000: 48a81c02 01012000 00000000
t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
{ DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 }
@@ -1976,13 +1976,13 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
- used (full): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- - max const: 0
-
- output (half): (cnt=0, max=0) (estimated)
- output (full): (cnt=0, max=0) (estimated)
- - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+ - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+ - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
0000000001120044: 0000: 70328003 00620000 01012000 00000000
t4 write VPC_VAR[0].DISABLE (9212)
VPC_VAR[0].DISABLE: 0xffffffff
@@ -3497,13 +3497,13 @@ t4 write SP_FS_OBJ_START_LO (a983)
- used (full): 0-73 (cnt=74, max=73)
- input (half): 38-41 (cnt=4, max=41)
- input (full): 19-20 (cnt=2, max=20)
- - max const: 113
-
- output (half): 8-15 (cnt=8, max=15) (estimated)
- output (full): 4-7 (cnt=4, max=7) (estimated)
- - shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full
+
+ - shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov
+ - shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen
- shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 140 (ss), 0 (sy)
+ - shaderdb: 1326 sstall, 140 (ss), 0 (sy)
0000000001120158: 0000: 40a98302 01013000 00000000
t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
{ DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_FS_SHADER | NUM_UNIT = 88 }
@@ -4920,13 +4920,13 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
- used (full): 0-73 (cnt=74, max=73)
- input (half): 38-41 (cnt=4, max=41)
- input (full): 19-20 (cnt=2, max=20)
- - max const: 113
-
- output (half): 8-15 (cnt=8, max=15) (estimated)
- output (full): 4-7 (cnt=4, max=7) (estimated)
- - shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full
+
+ - shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov
+ - shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen
- shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 140 (ss), 0 (sy)
+ - shaderdb: 1326 sstall, 140 (ss), 0 (sy)
0000000001120164: 0000: 70348003 16320000 01013000 00000000
t4 write VFD_CONTROL_1 (a001)
VFD_CONTROL_1: { REGID4VTX = r63.x | REGID4INST = r63.x | REGID4PRIMID = r63.x | REGID4VIEWID = r63.x }
@@ -5334,13 +5334,13 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
- used (full): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- - max const: 0
-
- output (half): (cnt=0, max=0) (estimated)
- output (full): (cnt=0, max=0) (estimated)
- - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+ - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+ - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
+ 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
+ 00000001 SP_VS_INSTRLEN: 1
+ 00000000 SP_HS_WAVE_INPUT_SIZE: 0
@@ -6772,13 +6772,13 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
- used (full): 0-73 (cnt=74, max=73)
- input (half): 38-41 (cnt=4, max=41)
- input (full): 19-20 (cnt=2, max=20)
- - max const: 113
-
- output (half): 8-15 (cnt=8, max=15) (estimated)
- output (full): 4-7 (cnt=4, max=7) (estimated)
- - shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full
+
+ - shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov
+ - shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen
- shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 140 (ss), 0 (sy)
+ - shaderdb: 1326 sstall, 140 (ss), 0 (sy)
!+ 00000100 SP_BLEND_CNTL: { UNK8 }
+ fcfcfc00 SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x }
!+ 00000001 SP_FS_OUTPUT_CNTL1: { MRT = 1 }
diff --git a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log
index efed6fcdcfd..c5ef966f48e 100644
--- a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log
+++ b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log
@@ -429,13 +429,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords)
- used (full): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- - max const: 0
-
- output (half): (cnt=0, max=0) (estimated)
- output (full): (cnt=0, max=0) (estimated)
- - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+ - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+ - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
109ce1f0: 0000: c0213000 00600000 00000000 00000000 03000000 00000000 00000000 00000000
*
t3 opcode: CP_LOAD_STATE4 (30) (35 dwords)
@@ -455,13 +455,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords)
- used (full): 0-3 (cnt=4, max=3)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- - max const: 3
-
- output (half): (cnt=0, max=0) (estimated)
- output (full): 0-3 (cnt=4, max=3) (estimated)
- - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 last-baryf, 0 half, 1 full
+
+ - shaderdb: 9 instr, 4 nops, 5 non-nops, 4 mov, 0 cov
+ - shaderdb: 0 last-baryf, 0 half, 1 full, 1 constlen
- shaderdb: 5 cat0, 4 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 0 (ss), 0 (sy)
+ - shaderdb: 0 sstall, 0 (ss), 0 (sy)
109ce27c: 0000: c0213000 00700000 00000000 00000000 20244000 00000001 20244001 00000002
109ce29c: 0020: 20244002 00000003 20244003 00000000 03000000 00000000 00000000 00000000
*
@@ -1043,13 +1043,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords)
- used (full): 0-13 (cnt=14, max=13)
- input (half): (cnt=0, max=0)
- input (full): 2-5 (cnt=4, max=5)
- - max const: 52
-
- output (half): (cnt=0, max=0) (estimated)
- output (full): 6-13 (cnt=8, max=13) (estimated)
- - shaderdb: 74 instructions, 27 nops, 47 non-nops, (61 instlen), 0 last-baryf, 0 half, 4 full
+
+ - shaderdb: 74 instr, 27 nops, 47 non-nops, 7 mov, 1 cov
+ - shaderdb: 0 last-baryf, 0 half, 4 full, 14 constlen
- shaderdb: 28 cat0, 8 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 1 (ss), 0 (sy)
+ - shaderdb: 10 sstall, 1 (ss), 0 (sy)
109ce66c: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004
109ce68c: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c
109ce6ac: 0040: 63828006 10010002 40700000 0001100f 63828009 00001005 63818000 00000010
@@ -1085,13 +1085,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords)
- used (full): 0-3 (cnt=4, max=3)
- input (half): (cnt=0, max=0)
- input (full): 0-3 (cnt=4, max=3)
- - max const: 0
-
- output (half): (cnt=0, max=0) (estimated)
- output (full): (cnt=0, max=0) (estimated)
- - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 5 last-baryf, 0 half, 1 full
+
+ - shaderdb: 11 instr, 5 nops, 6 non-nops, 0 mov, 0 cov
+ - shaderdb: 5 last-baryf, 0 half, 1 full, 0 constlen
- shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7
- - shaderdb: 1 (ss), 0 (sy)
+ - shaderdb: 65531 sstall, 1 (ss), 0 (sy)
109ce878: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
109ce898: 0020: c7c60001 01c00004 c7c60002 01c00006 c7c60003 00002000 473090fc 00000000
109ce8b8: 0040: 03000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
@@ -1675,13 +1675,13 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords)
- used (full): 0-8 10-17 (cnt=17, max=17)
- input (half): (cnt=0, max=0)
- input (full): 2-8 (cnt=7, max=8)
- - max const: 52
-
- output (half): (cnt=0, max=0) (estimated)
- output (full): 10-17 (cnt=8, max=17) (estimated)
- - shaderdb: 67 instructions, 23 nops, 44 non-nops, (56 instlen), 0 last-baryf, 0 half, 5 full
+
+ - shaderdb: 67 instr, 23 nops, 44 non-nops, 4 mov, 1 cov
+ - shaderdb: 0 last-baryf, 0 half, 5 full, 14 constlen
- shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7
- - shaderdb: 1 (ss), 0 (sy)
+ - shaderdb: 10 sstall, 1 (ss), 0 (sy)
109cee34: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004
109cee54: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c
109cee74: 0040: 6382800a 10010002 40700000 0001100f 6382800d 00001005 63818000 00000010
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 0188da9657a..61d3c7bab5a 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -1002,7 +1002,7 @@ ir3_collect_info(struct ir3_shader_variant *v)
info->sizedwords = info->size / 4;
foreach_block (block, &shader->block_list) {
- unsigned sfu_delay = 0;
+ int sfu_delay = 0;
foreach_instr (instr, &block->instr_list) {
@@ -1050,6 +1050,7 @@ ir3_collect_info(struct ir3_shader_variant *v)
if (instr->flags & IR3_INSTR_SS) {
info->ss++;
info->sstall += sfu_delay;
+ sfu_delay = 0;
}
if (instr->flags & IR3_INSTR_SY)
@@ -1057,8 +1058,9 @@ ir3_collect_info(struct ir3_shader_variant *v)
if (is_sfu(instr)) {
sfu_delay = 10;
- } else if (sfu_delay > 0) {
- sfu_delay--;
+ } else {
+ int n = MIN2(sfu_delay, 1 + instr->repeat + instr->nop);
+ sfu_delay -= n;
}
}
}