summaryrefslogtreecommitdiff
path: root/libc/startup
diff options
context:
space:
mode:
authorJoseph Huber <jhuber6@vols.utk.edu>2023-03-24 14:45:14 -0500
committerJoseph Huber <jhuber6@vols.utk.edu>2023-03-24 14:46:26 -0500
commit1fce1d341b17762bb45bdc89520b00820fd63337 (patch)
tree402afac7238c1fdaddd97670c469871e8b11c677 /libc/startup
parent8ab9eebb1897ab96f0a8f5bc5430f4039e38e6f8 (diff)
downloadllvm-1fce1d341b17762bb45bdc89520b00820fd63337.tar.gz
[libc] Use `nvptx_kernel` attribute in NVPTX startup code
Summary: A recent patch allowed us to emit a callable kernel from freestanding NVPTX code. This allows us to move away from using the CUDA language. This has several advantages in that it works around an entire assortment of errors I was seeing while implementing RPC for Nvidia.
Diffstat (limited to 'libc/startup')
-rw-r--r--libc/startup/gpu/nvptx/CMakeLists.txt7
-rw-r--r--libc/startup/gpu/nvptx/start.cpp5
2 files changed, 4 insertions, 8 deletions
diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt
index 96ab7540cedb..1ee2108b0ef2 100644
--- a/libc/startup/gpu/nvptx/CMakeLists.txt
+++ b/libc/startup/gpu/nvptx/CMakeLists.txt
@@ -6,11 +6,8 @@ add_startup_object(
-ffreestanding # To avoid compiler warnings about calling the main function.
-fno-builtin
-nogpulib # Do not include any GPU vendor libraries.
- -nostdinc
- -x cuda # Use the CUDA toolchain to emit the `_start` kernel.
- -fgpu-rdc # Emit relocatable device code from CUDA.
- --offload-device-only
- --offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE}
+ -march=${LIBC_GPU_TARGET_ARCHITECTURE}
+ --target=${LIBC_GPU_TARGET_TRIPLE}
NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
)
get_fq_target_name(crt1 fq_name)
diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp
index cf4077c3d9ed..1e7f4ca7668c 100644
--- a/libc/startup/gpu/nvptx/start.cpp
+++ b/libc/startup/gpu/nvptx/start.cpp
@@ -6,10 +6,9 @@
//
//===----------------------------------------------------------------------===//
-extern "C" __attribute__((device)) int main(int argc, char **argv, char **envp);
+extern "C" int main(int argc, char **argv, char **envp);
-// TODO: We shouldn't need to use the CUDA language to emit a kernel for NVPTX.
-extern "C" [[gnu::visibility("protected")]] __attribute__((global)) void
+extern "C" [[gnu::visibility("protected")]] __attribute__((nvptx_kernel)) void
_start(int argc, char **argv, char **envp, int *ret, void *in, void *out,
void *buffer) {
__atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);