diff options
4 files changed, 21 insertions, 28 deletions
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 1a3c692e628e..e2d3aef240af 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -1706,37 +1707,27 @@ private: }; if (!ReplVal) { - for (Use *U : *UV) + auto *DT = + OMPInfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(F); + if (!DT) + return false; + Instruction *IP = nullptr; + for (Use *U : *UV) { if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { + if (IP) + IP = DT->findNearestCommonDominator(IP, CI); + else + IP = CI; if (!CanBeMoved(*CI)) continue; - - // If the function is a kernel, dedup will move - // the runtime call right after the kernel init callsite. Otherwise, - // it will move it to the beginning of the caller function. - if (isKernel(F)) { - auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; - auto *KernelInitUV = KernelInitRFI.getUseVector(F); - - if (KernelInitUV->empty()) - continue; - - assert(KernelInitUV->size() == 1 && - "Expected a single __kmpc_target_init in kernel\n"); - - CallInst *KernelInitCI = - getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI); - assert(KernelInitCI && - "Expected a call to __kmpc_target_init in kernel\n"); - - CI->moveAfter(KernelInitCI); - } else - CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); - ReplVal = CI; - break; + if (!ReplVal) + ReplVal = CI; } + } if (!ReplVal) return false; + assert(IP && "Expected insertion point!"); + cast<Instruction>(ReplVal)->moveBefore(IP); } // If we use a call as a replacement value we need to make sure the ident is diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll index df5c7737ee39..b7964a1f2643 100644 --- a/llvm/test/Transforms/OpenMP/deduplication.ll +++ b/llvm/test/Transforms/OpenMP/deduplication.ll @@ -102,8 +102,8 @@ m: define void @local_and_global_gtid_calls() { ; CHECK-LABEL: define {{[^@]+}}@local_and_global_gtid_calls() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) ; CHECK-NEXT: [[DOTKMPC_LOC_ADDR:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8 +; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) @@ -132,10 +132,10 @@ entry: define void @local_gtid_calls_only() { ; CHECK-LABEL: define {{[^@]+}}@local_gtid_calls_only() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) ; CHECK-NEXT: [[DOTKMPC_LOC_ADDR1:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8 ; CHECK-NEXT: [[DOTKMPC_LOC_ADDR2:%.*]] = alloca [[STRUCT_IDENT_T]], align 8 ; CHECK-NEXT: [[DOTKMPC_LOC_ADDR3:%.*]] = alloca [[STRUCT_IDENT_T]], align 8 +; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll index f1e9d656e80c..f76e0f66d12c 100644 --- a/llvm/test/Transforms/OpenMP/deduplication_target.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll @@ -19,10 +19,10 @@ define weak void @__omp_offloading_50_a3e09bf8_foo_l2() #0 { ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) ; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: diff --git a/openmp/libomptarget/test/offloading/atomic-compare-signedness.c b/openmp/libomptarget/test/offloading/atomic-compare-signedness.c index 08b7acc20338..5b8ebeae83ae 100644 --- a/openmp/libomptarget/test/offloading/atomic-compare-signedness.c +++ b/openmp/libomptarget/test/offloading/atomic-compare-signedness.c @@ -5,6 +5,8 @@ // RUN: %libomptarget-compile-generic -fopenmp-version=51 // RUN: %libomptarget-run-generic | %fcheck-generic +// RUN: %libomptarget-compileopt-generic -fopenmp-version=51 +// RUN: %libomptarget-run-generic | %fcheck-generic // High parallelism increases our chances of detecting a lack of atomicity. #define NUM_THREADS_TRY 256 |