summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohannes Doerfert <johannes@jdoerfert.de>2023-02-02 01:06:00 -0800
committerTom Stellard <tstellar@redhat.com>2023-02-06 11:47:36 -0800
commita2a85f261f953394f7e08bb6a41bb4f026c598fe (patch)
tree86900ec91fa9eb44b18b89cd115d10276224b806
parent3fae9049718777900fde345876ea35f00a570b7b (diff)
downloadllvm-a2a85f261f953394f7e08bb6a41bb4f026c598fe.tar.gz
[Attributor][FIX] Ensure we use the right AAExecutionDomain
Before we might have ended up queriying the AAExecutionDomain of a different function, which resulted in wrong optimistic results. Partially fixes https://github.com/llvm/llvm-project/issues/60425 (cherry picked from commit 18a2975b57830a231e2b8f0299969edfc4f8477c)
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp20
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp2
-rw-r--r--llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll46
3 files changed, 64 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index e023ee76e4b0..42158e4e05dd 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1043,12 +1043,14 @@ struct AAPointerInfoImpl
const auto &NoSyncAA = A.getAAFor<AANoSync>(
QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
- IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
+ IRPosition::function(Scope), &QueryingAA, DepClassTy::NONE);
bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync();
bool InstIsExecutedByInitialThreadOnly =
ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I);
bool InstIsExecutedInAlignedRegion =
ExecDomainAA && ExecDomainAA->isExecutedInAlignedRegion(A, I);
+ if (InstIsExecutedInAlignedRegion || InstIsExecutedByInitialThreadOnly)
+ A.recordDependence(*ExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
InformationCache &InfoCache = A.getInfoCache();
bool IsThreadLocalObj =
@@ -1063,14 +1065,24 @@ struct AAPointerInfoImpl
auto CanIgnoreThreadingForInst = [&](const Instruction &I) -> bool {
if (IsThreadLocalObj || AllInSameNoSyncFn)
return true;
- if (!ExecDomainAA)
+ const auto *FnExecDomainAA =
+ I.getFunction() == &Scope
+ ? ExecDomainAA
+ : A.lookupAAFor<AAExecutionDomain>(
+ IRPosition::function(*I.getFunction()), &QueryingAA,
+ DepClassTy::NONE);
+ if (!FnExecDomainAA)
return false;
if (InstIsExecutedInAlignedRegion ||
- ExecDomainAA->isExecutedInAlignedRegion(A, I))
+ FnExecDomainAA->isExecutedInAlignedRegion(A, I)) {
+ A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
+ }
if (InstIsExecutedByInitialThreadOnly &&
- ExecDomainAA->isExecutedByInitialThreadOnly(I))
+ FnExecDomainAA->isExecutedByInitialThreadOnly(I)) {
+ A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
+ }
return false;
};
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index ee90bf8d9720..2d4ded17a370 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2680,6 +2680,8 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
bool isExecutedInAlignedRegion(Attributor &A,
const Instruction &I) const override {
+ assert(I.getFunction() == getAnchorScope() &&
+ "Instruction is out of scope!");
if (!isValidState() || isa<CallBase>(I))
return false;
diff --git a/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll b/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll
new file mode 100644
index 000000000000..ed36ebcd3aa8
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes --check-globals --include-generated-funcs
+; RUN: opt -passes=openmp-opt -S < %s | FileCheck %s --check-prefixes=CHECK
+
+%"struct.ompx::state::TeamStateTy" = type { %"struct.ompx::state::ICVStateTy", i32, i32, ptr }
+%"struct.ompx::state::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 }
+
+@_ZN4ompx5state9TeamStateE = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef
+
+define weak_odr amdgpu_kernel void @__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() {
+ %1 = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false)
+ ret void
+}
+
+define internal i32 @__kmpc_target_init(ptr %0, i8 %1, i1 %2) {
+ store <2 x i32> zeroinitializer, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
+ %4 = call i1 @__kmpc_kernel_parallel()
+ ret i32 0
+}
+
+define internal i1 @__kmpc_kernel_parallel() {
+ %1 = load ptr, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 8
+ ret i1 false
+}
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 7, !"openmp", i32 50}
+;.
+; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef
+;.
+; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() {
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false)
+; CHECK-NEXT: ret void
+;
+;
+; CHECK: Function Attrs: norecurse nosync nounwind memory(write)
+; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
+; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret i32 0
+;
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse nosync nounwind memory(write) }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind }
+;.
+; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+;.