diff options
author | Yaxun Liu <Yaxun.Liu@amd.com> | 2019-09-24 19:16:40 +0000 |
---|---|---|
committer | Yaxun Liu <Yaxun.Liu@amd.com> | 2019-09-24 19:16:40 +0000 |
commit | 7fd15a3b66eaa8c90756983ddd960624f543949f (patch) | |
tree | 995ee23f86a635b6dd608886d49848ae1b3d78af /lib/CodeGen | |
parent | e8c2d2746b1e718b607b78a830534fd3a981d250 (diff) | |
download | clang-7fd15a3b66eaa8c90756983ddd960624f543949f.tar.gz |
[HIP] Support new kernel launching API
Differential Revision: https://reviews.llvm.org/D67947
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@372773 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
-rw-r--r-- | lib/CodeGen/CGCUDANV.cpp | 17 |
1 files changed, 11 insertions, 6 deletions
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp index 4d4038dae9..05aeef4194 100644 --- a/lib/CodeGen/CGCUDANV.cpp +++ b/lib/CodeGen/CGCUDANV.cpp @@ -236,7 +236,8 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), - CudaFeature::CUDA_USES_NEW_LAUNCH)) + CudaFeature::CUDA_USES_NEW_LAUNCH) || + CGF.getLangOpts().HIPUseNewLaunchAPI) emitDeviceStubBodyNew(CGF, Args); else emitDeviceStubBodyLegacy(CGF, Args); @@ -264,14 +265,18 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); - // Lookup cudaLaunchKernel function. + // Lookup cudaLaunchKernel/hipLaunchKernel function. // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, // void **args, size_t sharedMem, // cudaStream_t stream); + // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, + // void **args, size_t sharedMem, + // hipStream_t stream); TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + auto LaunchKernelName = addPrefixToName("LaunchKernel"); IdentifierInfo &cudaLaunchKernelII = - CGM.getContext().Idents.get("cudaLaunchKernel"); + CGM.getContext().Idents.get(LaunchKernelName); FunctionDecl *cudaLaunchKernelFD = nullptr; for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) { if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result)) @@ -280,7 +285,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, if (cudaLaunchKernelFD == nullptr) { CGM.Error(CGF.CurFuncDecl->getLocation(), - "Can't find declaration for cudaLaunchKernel()"); + "Can't find declaration for " + LaunchKernelName); return; } // Create temporary dim3 grid_dim, block_dim. @@ -301,7 +306,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, /*ShmemSize=*/ShmemSize.getType(), /*Stream=*/Stream.getType()}, /*isVarArg=*/false), - "__cudaPopCallConfiguration"); + addUnderscoredPrefixToName("PopCallConfiguration")); CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn, {GridDim.getPointer(), BlockDim.getPointer(), @@ -329,7 +334,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, const CGFunctionInfo &FI = CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD); llvm::FunctionCallee cudaLaunchKernelFn = - CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel"); + CGM.CreateRuntimeFunction(FTy, LaunchKernelName); CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(), LaunchKernelArgs); CGF.EmitBranch(EndBlock); |