diff options
author | Joseph Huber <jhuber6@vols.utk.edu> | 2023-03-24 14:45:14 -0500 |
---|---|---|
committer | Joseph Huber <jhuber6@vols.utk.edu> | 2023-03-24 14:46:26 -0500 |
commit | 1fce1d341b17762bb45bdc89520b00820fd63337 (patch) | |
tree | 402afac7238c1fdaddd97670c469871e8b11c677 /libc/startup | |
parent | 8ab9eebb1897ab96f0a8f5bc5430f4039e38e6f8 (diff) | |
download | llvm-1fce1d341b17762bb45bdc89520b00820fd63337.tar.gz |
[libc] Use `nvptx_kernel` attribute in NVPTX startup code
Summary:
A recent patch allowed us to emit a callable kernel from freestanding
NVPTX code. This allows us to move away from using the CUDA language.
This has several advantages in that it works around an entire assortment
of errors I was seeing while implementing RPC for Nvidia.
Diffstat (limited to 'libc/startup')
-rw-r--r-- | libc/startup/gpu/nvptx/CMakeLists.txt | 7 | ||||
-rw-r--r-- | libc/startup/gpu/nvptx/start.cpp | 5 |
2 files changed, 4 insertions, 8 deletions
diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt index 96ab7540cedb..1ee2108b0ef2 100644 --- a/libc/startup/gpu/nvptx/CMakeLists.txt +++ b/libc/startup/gpu/nvptx/CMakeLists.txt @@ -6,11 +6,8 @@ add_startup_object( -ffreestanding # To avoid compiler warnings about calling the main function. -fno-builtin -nogpulib # Do not include any GPU vendor libraries. - -nostdinc - -x cuda # Use the CUDA toolchain to emit the `_start` kernel. - -fgpu-rdc # Emit relocatable device code from CUDA. - --offload-device-only - --offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE} + -march=${LIBC_GPU_TARGET_ARCHITECTURE} + --target=${LIBC_GPU_TARGET_TRIPLE} NO_GPU_BUNDLE # Compile this file directly without special GPU handling. ) get_fq_target_name(crt1 fq_name) diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp index cf4077c3d9ed..1e7f4ca7668c 100644 --- a/libc/startup/gpu/nvptx/start.cpp +++ b/libc/startup/gpu/nvptx/start.cpp @@ -6,10 +6,9 @@ // //===----------------------------------------------------------------------===// -extern "C" __attribute__((device)) int main(int argc, char **argv, char **envp); +extern "C" int main(int argc, char **argv, char **envp); -// TODO: We shouldn't need to use the CUDA language to emit a kernel for NVPTX. -extern "C" [[gnu::visibility("protected")]] __attribute__((global)) void +extern "C" [[gnu::visibility("protected")]] __attribute__((nvptx_kernel)) void _start(int argc, char **argv, char **envp, int *ret, void *in, void *out, void *buffer) { __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED); |