diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2016-11-29 03:41:24 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2016-11-29 03:41:24 +0000 |
commit | 25c1eb0bc3267b6a541e3e9544b6201f38feaa7a (patch) | |
tree | f3cfe4b5079719dfe62475c7fd355aa26dfb3874 | |
parent | fadb1c1c61e0fefaf578508ddebf6bf3f10b25a7 (diff) | |
download | llvm-25c1eb0bc3267b6a541e3e9544b6201f38feaa7a.tar.gz |
Merging r277500:
------------------------------------------------------------------------
r277500 | nhaehnle | 2016-08-02 12:17:37 -0700 (Tue, 02 Aug 2016) | 18 lines
AMDGPU: Track physical registers in SIWholeQuadMode
Summary:
There are cases where uniform branch conditions are computed in VGPRs, and
we didn't correctly mark those as WQM.
The stray change in basic-branch.ll is because invoking the LiveIntervals
analysis leads to the detection of a dead register that would otherwise
not be seen at -O0.
This is a candidate for the 3.9 branch, as it fixes a possible hang.
Reviewers: arsenm, tstellarAMD, mareko
Subscribers: arsenm, llvm-commits, kzhuravl
Differential Revision: https://reviews.llvm.org/D22673
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_39@288103 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | test/CodeGen/AMDGPU/wqm.ll | 41 |
1 files changed, 0 insertions, 41 deletions
diff --git a/test/CodeGen/AMDGPU/wqm.ll b/test/CodeGen/AMDGPU/wqm.ll index 809a7ba9b826..b272190d7e18 100644 --- a/test/CodeGen/AMDGPU/wqm.ll +++ b/test/CodeGen/AMDGPU/wqm.ll @@ -374,47 +374,6 @@ break: ret <4 x float> %c.iv } -; Only intrinsic stores need exact execution -- other stores do not have -; externally visible effects and may require WQM for correctness. -; -; CHECK-LABEL: {{^}}test_alloca: -; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec -; CHECK: s_wqm_b64 exec, exec - -; CHECK: s_and_b64 exec, exec, [[LIVE]] -; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 -; CHECK: s_wqm_b64 exec, exec -; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen -; CHECK: s_and_b64 exec, exec, [[LIVE]] -; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen -; CHECK: s_wqm_b64 exec, exec -; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen - -; CHECK: image_sample -; CHECK: s_and_b64 exec, exec, [[LIVE]] -; CHECK: buffer_store_dwordx4 -define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind { -entry: - %array = alloca [32 x i32], align 4 - - call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0) - - %s.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 0 - store volatile i32 %a, i32* %s.gep, align 4 - - call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i1 0, i1 0) - - %c.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 %idx - %c = load i32, i32* %c.gep, align 4 - - %t = call <4 x float> @llvm.SI.image.sample.i32(i32 %c, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) - - call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0) - - ret void -} - - declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1 declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1 |