diff options
author | Johannes Doerfert <johannes@jdoerfert.de> | 2023-02-02 01:06:00 -0800 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2023-02-06 11:47:36 -0800 |
commit | a2a85f261f953394f7e08bb6a41bb4f026c598fe (patch) | |
tree | 86900ec91fa9eb44b18b89cd115d10276224b806 | |
parent | 3fae9049718777900fde345876ea35f00a570b7b (diff) | |
download | llvm-a2a85f261f953394f7e08bb6a41bb4f026c598fe.tar.gz |
[Attributor][FIX] Ensure we use the right AAExecutionDomain
Before we might have ended up queriying the AAExecutionDomain of a
different function, which resulted in wrong optimistic results.
Partially fixes https://github.com/llvm/llvm-project/issues/60425
(cherry picked from commit 18a2975b57830a231e2b8f0299969edfc4f8477c)
-rw-r--r-- | llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 2 | ||||
-rw-r--r-- | llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll | 46 |
3 files changed, 64 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index e023ee76e4b0..42158e4e05dd 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1043,12 +1043,14 @@ struct AAPointerInfoImpl const auto &NoSyncAA = A.getAAFor<AANoSync>( QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>( - IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL); + IRPosition::function(Scope), &QueryingAA, DepClassTy::NONE); bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync(); bool InstIsExecutedByInitialThreadOnly = ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I); bool InstIsExecutedInAlignedRegion = ExecDomainAA && ExecDomainAA->isExecutedInAlignedRegion(A, I); + if (InstIsExecutedInAlignedRegion || InstIsExecutedByInitialThreadOnly) + A.recordDependence(*ExecDomainAA, QueryingAA, DepClassTy::OPTIONAL); InformationCache &InfoCache = A.getInfoCache(); bool IsThreadLocalObj = @@ -1063,14 +1065,24 @@ struct AAPointerInfoImpl auto CanIgnoreThreadingForInst = [&](const Instruction &I) -> bool { if (IsThreadLocalObj || AllInSameNoSyncFn) return true; - if (!ExecDomainAA) + const auto *FnExecDomainAA = + I.getFunction() == &Scope + ? ExecDomainAA + : A.lookupAAFor<AAExecutionDomain>( + IRPosition::function(*I.getFunction()), &QueryingAA, + DepClassTy::NONE); + if (!FnExecDomainAA) return false; if (InstIsExecutedInAlignedRegion || - ExecDomainAA->isExecutedInAlignedRegion(A, I)) + FnExecDomainAA->isExecutedInAlignedRegion(A, I)) { + A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL); return true; + } if (InstIsExecutedByInitialThreadOnly && - ExecDomainAA->isExecutedByInitialThreadOnly(I)) + FnExecDomainAA->isExecutedByInitialThreadOnly(I)) { + A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL); return true; + } return false; }; diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index ee90bf8d9720..2d4ded17a370 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2680,6 +2680,8 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { bool isExecutedInAlignedRegion(Attributor &A, const Instruction &I) const override { + assert(I.getFunction() == getAnchorScope() && + "Instruction is out of scope!"); if (!isValidState() || isa<CallBase>(I)) return false; diff --git a/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll b/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll new file mode 100644 index 000000000000..ed36ebcd3aa8 --- /dev/null +++ b/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes --check-globals --include-generated-funcs +; RUN: opt -passes=openmp-opt -S < %s | FileCheck %s --check-prefixes=CHECK + +%"struct.ompx::state::TeamStateTy" = type { %"struct.ompx::state::ICVStateTy", i32, i32, ptr } +%"struct.ompx::state::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 } + +@_ZN4ompx5state9TeamStateE = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef + +define weak_odr amdgpu_kernel void @__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() { + %1 = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false) + ret void +} + +define internal i32 @__kmpc_target_init(ptr %0, i8 %1, i1 %2) { + store <2 x i32> zeroinitializer, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16 + %4 = call i1 @__kmpc_kernel_parallel() + ret i32 0 +} + +define internal i1 @__kmpc_kernel_parallel() { + %1 = load ptr, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 8 + ret i1 false +} + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"openmp", i32 50} +;. +; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef +;. +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() { +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false) +; CHECK-NEXT: ret void +; +; +; CHECK: Function Attrs: norecurse nosync nounwind memory(write) +; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init +; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret i32 0 +; +;. +; CHECK: attributes #[[ATTR0]] = { norecurse nosync nounwind memory(write) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind } +;. +; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} +;. |