diff options
author | Joseph Huber <jhuber6@vols.utk.edu> | 2023-04-20 11:16:01 -0500 |
---|---|---|
committer | Joseph Huber <jhuber6@vols.utk.edu> | 2023-04-24 15:47:53 -0500 |
commit | 50445dff43037014a23eb38b1f50bb698e64ffcf (patch) | |
tree | b44aeeb6b16e717993efdb2a6170ede2030fd906 /libc/startup | |
parent | 5084ba395e487adee67ba38cc5c68ff7e052e37c (diff) | |
download | llvm-50445dff43037014a23eb38b1f50bb698e64ffcf.tar.gz |
[libc] Add more utility functions for the GPU
This patch adds extra intrinsics for the GPU. Some of these are unused
for now but will be used later. We use these currently to update the
`RPC` handling. Currently, every thread can update the RPC client, which
isn't correct. This patch adds code neccesary to allow a single thread
to perfrom the write while the others wait.
Feedback is welcome for the naming of these functions. I'm copying the
OpenMP nomenclature where we call an AMD `wavefront` or NVIDIA `warp` a
`lane`.
Reviewed By: tra
Differential Revision: https://reviews.llvm.org/D148810
Diffstat (limited to 'libc/startup')
-rw-r--r-- | libc/startup/gpu/amdgpu/CMakeLists.txt | 1 | ||||
-rw-r--r-- | libc/startup/gpu/amdgpu/start.cpp | 28 | ||||
-rw-r--r-- | libc/startup/gpu/nvptx/CMakeLists.txt | 1 | ||||
-rw-r--r-- | libc/startup/gpu/nvptx/start.cpp | 30 |
4 files changed, 53 insertions, 7 deletions
diff --git a/libc/startup/gpu/amdgpu/CMakeLists.txt b/libc/startup/gpu/amdgpu/CMakeLists.txt index 891d20993b08..a9f33af6d79e 100644 --- a/libc/startup/gpu/amdgpu/CMakeLists.txt +++ b/libc/startup/gpu/amdgpu/CMakeLists.txt @@ -4,6 +4,7 @@ add_startup_object( start.cpp DEPENDS libc.src.__support.RPC.rpc_client + libc.src.__support.GPU.utils COMPILE_OPTIONS -ffreestanding # To avoid compiler warnings about calling the main function. -fno-builtin diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp index 66f06b086a23..e8b5029f2a76 100644 --- a/libc/startup/gpu/amdgpu/start.cpp +++ b/libc/startup/gpu/amdgpu/start.cpp @@ -6,16 +6,38 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/GPU/utils.h" #include "src/__support/RPC/rpc_client.h" -static __llvm_libc::cpp::Atomic<uint32_t> lock; - extern "C" int main(int argc, char **argv, char **envp); +namespace __llvm_libc { + +static cpp::Atomic<uint32_t> lock = 0; + +static cpp::Atomic<uint32_t> init = 0; + +void init_rpc(void *in, void *out, void *buffer) { + // Only a single thread should update the RPC data. + if (gpu::get_thread_id() == 0 && gpu::get_block_id() == 0) { + rpc::client.reset(&lock, in, out, buffer); + init.store(1, cpp::MemoryOrder::RELAXED); + } + + // Wait until the previous thread signals that the data has been written. + while (!init.load(cpp::MemoryOrder::RELAXED)) + rpc::sleep_briefly(); + + // Wait for the threads in the block to converge and fence the write. + gpu::sync_threads(); +} + +} // namespace __llvm_libc + extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void _start(int argc, char **argv, char **envp, int *ret, void *in, void *out, void *buffer) { - __llvm_libc::rpc::client.reset(&lock, in, out, buffer); + __llvm_libc::init_rpc(in, out, buffer); __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED); } diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt index 49661691ecb5..b8a9f49d5be5 100644 --- a/libc/startup/gpu/nvptx/CMakeLists.txt +++ b/libc/startup/gpu/nvptx/CMakeLists.txt @@ -5,6 +5,7 @@ add_startup_object( start.cpp DEPENDS libc.src.__support.RPC.rpc_client + libc.src.__support.GPU.utils COMPILE_OPTIONS -ffreestanding # To avoid compiler warnings about calling the main function. -fno-builtin diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp index 9939c6e21330..7b88e30f7f37 100644 --- a/libc/startup/gpu/nvptx/start.cpp +++ b/libc/startup/gpu/nvptx/start.cpp @@ -6,16 +6,38 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/GPU/utils.h" #include "src/__support/RPC/rpc_client.h" -static __llvm_libc::cpp::Atomic<uint32_t> lock; - extern "C" int main(int argc, char **argv, char **envp); -extern "C" [[gnu::visibility("protected")]] __attribute__((nvptx_kernel)) void +namespace __llvm_libc { + +static cpp::Atomic<uint32_t> lock = 0; + +static cpp::Atomic<uint32_t> init = 0; + +void init_rpc(void *in, void *out, void *buffer) { + // Only a single thread should update the RPC data. + if (gpu::get_thread_id() == 0 && gpu::get_block_id() == 0) { + rpc::client.reset(&lock, in, out, buffer); + init.store(1, cpp::MemoryOrder::RELAXED); + } + + // Wait until the previous thread signals that the data has been written. + while (!init.load(cpp::MemoryOrder::RELAXED)) + rpc::sleep_briefly(); + + // Wait for the threads in the block to converge and fence the write. + gpu::sync_threads(); +} + +} // namespace __llvm_libc + +extern "C" [[gnu::visibility("protected"), clang::nvptx_kernel]] void _start(int argc, char **argv, char **envp, int *ret, void *in, void *out, void *buffer) { - __llvm_libc::rpc::client.reset(&lock, in, out, buffer); + __llvm_libc::init_rpc(in, out, buffer); __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED); } |