summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp41
-rw-r--r--llvm/test/Transforms/OpenMP/deduplication.ll4
-rw-r--r--llvm/test/Transforms/OpenMP/deduplication_target.ll2
-rw-r--r--openmp/libomptarget/test/offloading/atomic-compare-signedness.c2
4 files changed, 21 insertions, 28 deletions
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 1a3c692e628e..e2d3aef240af 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -1706,37 +1707,27 @@ private:
};
if (!ReplVal) {
- for (Use *U : *UV)
+ auto *DT =
+ OMPInfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(F);
+ if (!DT)
+ return false;
+ Instruction *IP = nullptr;
+ for (Use *U : *UV) {
if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
+ if (IP)
+ IP = DT->findNearestCommonDominator(IP, CI);
+ else
+ IP = CI;
if (!CanBeMoved(*CI))
continue;
-
- // If the function is a kernel, dedup will move
- // the runtime call right after the kernel init callsite. Otherwise,
- // it will move it to the beginning of the caller function.
- if (isKernel(F)) {
- auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
- auto *KernelInitUV = KernelInitRFI.getUseVector(F);
-
- if (KernelInitUV->empty())
- continue;
-
- assert(KernelInitUV->size() == 1 &&
- "Expected a single __kmpc_target_init in kernel\n");
-
- CallInst *KernelInitCI =
- getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI);
- assert(KernelInitCI &&
- "Expected a call to __kmpc_target_init in kernel\n");
-
- CI->moveAfter(KernelInitCI);
- } else
- CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
- ReplVal = CI;
- break;
+ if (!ReplVal)
+ ReplVal = CI;
}
+ }
if (!ReplVal)
return false;
+ assert(IP && "Expected insertion point!");
+ cast<Instruction>(ReplVal)->moveBefore(IP);
}
// If we use a call as a replacement value we need to make sure the ident is
diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll
index df5c7737ee39..b7964a1f2643 100644
--- a/llvm/test/Transforms/OpenMP/deduplication.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication.ll
@@ -102,8 +102,8 @@ m:
define void @local_and_global_gtid_calls() {
; CHECK-LABEL: define {{[^@]+}}@local_and_global_gtid_calls() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
; CHECK-NEXT: [[DOTKMPC_LOC_ADDR:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8
+; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
@@ -132,10 +132,10 @@ entry:
define void @local_gtid_calls_only() {
; CHECK-LABEL: define {{[^@]+}}@local_gtid_calls_only() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
; CHECK-NEXT: [[DOTKMPC_LOC_ADDR1:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8
; CHECK-NEXT: [[DOTKMPC_LOC_ADDR2:%.*]] = alloca [[STRUCT_IDENT_T]], align 8
; CHECK-NEXT: [[DOTKMPC_LOC_ADDR3:%.*]] = alloca [[STRUCT_IDENT_T]], align 8
+; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll
index f1e9d656e80c..f76e0f66d12c 100644
--- a/llvm/test/Transforms/OpenMP/deduplication_target.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll
@@ -19,10 +19,10 @@ define weak void @__omp_offloading_50_a3e09bf8_foo_l2() #0 {
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
diff --git a/openmp/libomptarget/test/offloading/atomic-compare-signedness.c b/openmp/libomptarget/test/offloading/atomic-compare-signedness.c
index 08b7acc20338..5b8ebeae83ae 100644
--- a/openmp/libomptarget/test/offloading/atomic-compare-signedness.c
+++ b/openmp/libomptarget/test/offloading/atomic-compare-signedness.c
@@ -5,6 +5,8 @@
// RUN: %libomptarget-compile-generic -fopenmp-version=51
// RUN: %libomptarget-run-generic | %fcheck-generic
+// RUN: %libomptarget-compileopt-generic -fopenmp-version=51
+// RUN: %libomptarget-run-generic | %fcheck-generic
// High parallelism increases our chances of detecting a lack of atomicity.
#define NUM_THREADS_TRY 256