summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll144
1 files changed, 24 insertions, 120 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
index 5164b072a6dd..ed0de729dafd 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
@@ -14,7 +14,6 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_mov_b32 s4, 0
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: v_mov_b32_e32 v36, v16
@@ -22,13 +21,6 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX9-NEXT: v_mov_b32_e32 v34, v14
; GFX9-NEXT: v_mov_b32_e32 v33, v13
; GFX9-NEXT: v_mov_b32_e32 v32, v12
-; GFX9-NEXT: s_mov_b32 s5, s4
-; GFX9-NEXT: s_mov_b32 s6, s4
-; GFX9-NEXT: s_mov_b32 s7, s4
-; GFX9-NEXT: s_mov_b32 s8, s4
-; GFX9-NEXT: s_mov_b32 s9, s4
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s4
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
@@ -82,16 +74,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX10-NEXT: v_mov_b32_e32 v34, v14
; GFX10-NEXT: v_mov_b32_e32 v33, v13
; GFX10-NEXT: v_mov_b32_e32 v32, v12
-; GFX10-NEXT: s_mov_b32 s4, 0
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: s_mov_b32 s5, s4
-; GFX10-NEXT: s_mov_b32 s6, s4
-; GFX10-NEXT: s_mov_b32 s7, s4
-; GFX10-NEXT: s_mov_b32 s8, s4
-; GFX10-NEXT: s_mov_b32 s9, s4
-; GFX10-NEXT: s_mov_b32 s10, s4
-; GFX10-NEXT: s_mov_b32 s11, s4
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
@@ -145,16 +129,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX11-NEXT: v_dual_mov_b32 v36, v16 :: v_dual_mov_b32 v35, v15
; GFX11-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v33, v13
; GFX11-NEXT: v_mov_b32_e32 v32, v12
-; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: v_writelane_b32 v40, s33, 2
; GFX11-NEXT: s_mov_b32 s33, s32
-; GFX11-NEXT: s_mov_b32 s1, s0
-; GFX11-NEXT: s_mov_b32 s2, s0
-; GFX11-NEXT: s_mov_b32 s3, s0
-; GFX11-NEXT: s_mov_b32 s4, s0
-; GFX11-NEXT: s_mov_b32 s5, s0
-; GFX11-NEXT: s_mov_b32 s6, s0
-; GFX11-NEXT: s_mov_b32 s7, s0
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:12
; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8
@@ -225,65 +201,41 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 10
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: v_writelane_b32 v40, s36, 2
-; GFX9-NEXT: v_writelane_b32 v40, s37, 3
-; GFX9-NEXT: v_writelane_b32 v40, s38, 4
-; GFX9-NEXT: v_writelane_b32 v40, s39, 5
-; GFX9-NEXT: v_writelane_b32 v40, s40, 6
-; GFX9-NEXT: v_writelane_b32 v40, s41, 7
+; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: v_writelane_b32 v40, s42, 8
-; GFX9-NEXT: s_mov_b32 s36, 0
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: v_writelane_b32 v40, s43, 9
; GFX9-NEXT: v_mov_b32_e32 v45, v16
; GFX9-NEXT: v_mov_b32_e32 v44, v15
; GFX9-NEXT: v_mov_b32_e32 v43, v14
; GFX9-NEXT: v_mov_b32_e32 v42, v13
; GFX9-NEXT: v_mov_b32_e32 v41, v12
-; GFX9-NEXT: s_mov_b32 s37, s36
-; GFX9-NEXT: s_mov_b32 s38, s36
-; GFX9-NEXT: s_mov_b32 s39, s36
-; GFX9-NEXT: s_mov_b32 s40, s36
-; GFX9-NEXT: s_mov_b32 s41, s36
-; GFX9-NEXT: s_mov_b32 s42, s36
-; GFX9-NEXT: s_mov_b32 s43, s36
-; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[41:45], s[36:43], s[4:7] dmask:0x1
+; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[41:45], s[4:11], s[4:7] dmask:0x1
; GFX9-NEXT: s_addk_i32 s32, 0x800
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[41:45], s[36:43], s[4:7] dmask:0x1
+; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[41:45], s[4:11], s[4:7] dmask:0x1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_load_dword v45, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
-; GFX9-NEXT: v_readlane_b32 s43, v40, 9
-; GFX9-NEXT: v_readlane_b32 s42, v40, 8
-; GFX9-NEXT: v_readlane_b32 s41, v40, 7
-; GFX9-NEXT: v_readlane_b32 s40, v40, 6
-; GFX9-NEXT: v_readlane_b32 s39, v40, 5
-; GFX9-NEXT: v_readlane_b32 s38, v40, 4
-; GFX9-NEXT: v_readlane_b32 s37, v40, 3
-; GFX9-NEXT: v_readlane_b32 s36, v40, 2
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xf800
-; GFX9-NEXT: v_readlane_b32 s33, v40, 10
+; GFX9-NEXT: v_readlane_b32 s33, v40, 2
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
@@ -298,66 +250,42 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 10
+; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX10-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_addk_i32 s32, 0x400
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-NEXT: s_getpc_b64 s[4:5]
+; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
+; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX10-NEXT: v_mov_b32_e32 v41, v16
; GFX10-NEXT: v_mov_b32_e32 v42, v15
; GFX10-NEXT: v_mov_b32_e32 v43, v14
-; GFX10-NEXT: v_mov_b32_e32 v44, v13
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-NEXT: v_mov_b32_e32 v44, v13
; GFX10-NEXT: v_mov_b32_e32 v45, v12
-; GFX10-NEXT: v_writelane_b32 v40, s36, 2
-; GFX10-NEXT: s_mov_b32 s36, 0
-; GFX10-NEXT: v_writelane_b32 v40, s37, 3
-; GFX10-NEXT: s_mov_b32 s37, s36
-; GFX10-NEXT: v_writelane_b32 v40, s38, 4
-; GFX10-NEXT: s_mov_b32 s38, s36
-; GFX10-NEXT: v_writelane_b32 v40, s39, 5
-; GFX10-NEXT: s_mov_b32 s39, s36
-; GFX10-NEXT: v_writelane_b32 v40, s40, 6
-; GFX10-NEXT: s_mov_b32 s40, s36
-; GFX10-NEXT: v_writelane_b32 v40, s41, 7
-; GFX10-NEXT: s_mov_b32 s41, s36
-; GFX10-NEXT: v_writelane_b32 v40, s42, 8
-; GFX10-NEXT: s_mov_b32 s42, s36
-; GFX10-NEXT: v_writelane_b32 v40, s43, 9
-; GFX10-NEXT: s_mov_b32 s43, s36
-; GFX10-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
-; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_clause 0x4
; GFX10-NEXT: buffer_load_dword v45, off, s[0:3], s33
; GFX10-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:4
; GFX10-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:8
; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16
-; GFX10-NEXT: v_readlane_b32 s43, v40, 9
-; GFX10-NEXT: v_readlane_b32 s42, v40, 8
-; GFX10-NEXT: v_readlane_b32 s41, v40, 7
-; GFX10-NEXT: v_readlane_b32 s40, v40, 6
-; GFX10-NEXT: v_readlane_b32 s39, v40, 5
-; GFX10-NEXT: v_readlane_b32 s38, v40, 4
-; GFX10-NEXT: v_readlane_b32 s37, v40, 3
-; GFX10-NEXT: v_readlane_b32 s36, v40, 2
; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: v_readlane_b32 s30, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfc00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 10
+; GFX10-NEXT: v_readlane_b32 s33, v40, 2
; GFX10-NEXT: s_or_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
@@ -372,7 +300,7 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX11-NEXT: s_or_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:20 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: v_writelane_b32 v40, s33, 10
+; GFX11-NEXT: v_writelane_b32 v40, s33, 2
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_clause 0x4
; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:16
@@ -380,56 +308,32 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:8
; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:4
; GFX11-NEXT: scratch_store_b32 off, v45, s33
+; GFX11-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX11-NEXT: s_add_i32 s32, s32, 32
+; GFX11-NEXT: s_getpc_b64 s[0:1]
+; GFX11-NEXT: s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4
+; GFX11-NEXT: s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: v_dual_mov_b32 v41, v16 :: v_dual_mov_b32 v42, v15
; GFX11-NEXT: v_dual_mov_b32 v43, v14 :: v_dual_mov_b32 v44, v13
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: v_mov_b32_e32 v45, v12
-; GFX11-NEXT: v_writelane_b32 v40, s36, 2
-; GFX11-NEXT: s_mov_b32 s36, 0
-; GFX11-NEXT: v_writelane_b32 v40, s37, 3
-; GFX11-NEXT: s_mov_b32 s37, s36
-; GFX11-NEXT: v_writelane_b32 v40, s38, 4
-; GFX11-NEXT: s_mov_b32 s38, s36
-; GFX11-NEXT: v_writelane_b32 v40, s39, 5
-; GFX11-NEXT: s_mov_b32 s39, s36
-; GFX11-NEXT: v_writelane_b32 v40, s40, 6
-; GFX11-NEXT: s_mov_b32 s40, s36
-; GFX11-NEXT: v_writelane_b32 v40, s41, 7
-; GFX11-NEXT: s_mov_b32 s41, s36
-; GFX11-NEXT: v_writelane_b32 v40, s42, 8
-; GFX11-NEXT: s_mov_b32 s42, s36
-; GFX11-NEXT: v_writelane_b32 v40, s43, 9
-; GFX11-NEXT: s_mov_b32 s43, s36
-; GFX11-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[36:43], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D
-; GFX11-NEXT: s_getpc_b64 s[0:1]
-; GFX11-NEXT: s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4
-; GFX11-NEXT: s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12
-; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX11-NEXT: image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[36:43], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; GFX11-NEXT: image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX11-NEXT: s_clause 0x4
; GFX11-NEXT: scratch_load_b32 v45, off, s33
; GFX11-NEXT: scratch_load_b32 v44, off, s33 offset:4
; GFX11-NEXT: scratch_load_b32 v43, off, s33 offset:8
; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:12
; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:16
-; GFX11-NEXT: v_readlane_b32 s43, v40, 9
-; GFX11-NEXT: v_readlane_b32 s42, v40, 8
-; GFX11-NEXT: v_readlane_b32 s41, v40, 7
-; GFX11-NEXT: v_readlane_b32 s40, v40, 6
-; GFX11-NEXT: v_readlane_b32 s39, v40, 5
-; GFX11-NEXT: v_readlane_b32 s38, v40, 4
-; GFX11-NEXT: v_readlane_b32 s37, v40, 3
-; GFX11-NEXT: v_readlane_b32 s36, v40, 2
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: s_addk_i32 s32, 0xffe0
-; GFX11-NEXT: v_readlane_b32 s33, v40, 10
+; GFX11-NEXT: v_readlane_b32 s33, v40, 2
; GFX11-NEXT: s_or_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:20 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0