[libc] Use `nvptx_kernel` attribute in NVPTX startup code

Summary: A recent patch allowed us to emit a callable kernel from freestanding NVPTX code. This allows us to move away from using the CUDA language. This has several advantages in that it works around an entire assortment of errors I was seeing while implementing RPC for Nvidia.
author: Joseph Huber <jhuber6@vols.utk.edu> 2023-03-24 14:45:14 -0500
committer: Joseph Huber <jhuber6@vols.utk.edu> 2023-03-24 14:46:26 -0500
commit: 1fce1d341b17762bb45bdc89520b00820fd63337 (patch)
tree: 402afac7238c1fdaddd97670c469871e8b11c677 /libc/startup
parent: 8ab9eebb1897ab96f0a8f5bc5430f4039e38e6f8 (diff)
download: llvm-1fce1d341b17762bb45bdc89520b00820fd63337.tar.gz
2 files changed, 4 insertions, 8 deletions
diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt
index 96ab7540cedb..1ee2108b0ef2 100644
--- a/libc/startup/gpu/nvptx/CMakeLists.txt
+++ b/libc/startup/gpu/nvptx/CMakeLists.txt
@@ -6,11 +6,8 @@ add_startup_object(
     -ffreestanding # To avoid compiler warnings about calling the main function.
     -fno-builtin
     -nogpulib # Do not include any GPU vendor libraries.
-    -nostdinc
-    -x cuda # Use the CUDA toolchain to emit the `_start` kernel.
-    -fgpu-rdc # Emit relocatable device code from CUDA.
-    --offload-device-only
-    --offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE}
+    -march=${LIBC_GPU_TARGET_ARCHITECTURE}
+    --target=${LIBC_GPU_TARGET_TRIPLE}
   NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
 )
 get_fq_target_name(crt1 fq_name)
diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp
index cf4077c3d9ed..1e7f4ca7668c 100644
--- a/libc/startup/gpu/nvptx/start.cpp
+++ b/libc/startup/gpu/nvptx/start.cpp
@@ -6,10 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-extern "C" __attribute__((device)) int main(int argc, char **argv, char **envp);
+extern "C" int main(int argc, char **argv, char **envp);
 
-// TODO: We shouldn't need to use the CUDA language to emit a kernel for NVPTX.
-extern "C" [[gnu::visibility("protected")]] __attribute__((global)) void
+extern "C" [[gnu::visibility("protected")]] __attribute__((nvptx_kernel)) void
 _start(int argc, char **argv, char **envp, int *ret, void *in, void *out,
        void *buffer) {
   __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);
author	Joseph Huber <jhuber6@vols.utk.edu>	2023-03-24 14:45:14 -0500
committer	Joseph Huber <jhuber6@vols.utk.edu>	2023-03-24 14:46:26 -0500
commit	1fce1d341b17762bb45bdc89520b00820fd63337 (patch)
tree	402afac7238c1fdaddd97670c469871e8b11c677 /libc/startup
parent	8ab9eebb1897ab96f0a8f5bc5430f4039e38e6f8 (diff)
download	llvm-1fce1d341b17762bb45bdc89520b00820fd63337.tar.gz