summaryrefslogtreecommitdiff
path: root/libc/startup
diff options
context:
space:
mode:
authorJoseph Huber <jhuber6@vols.utk.edu>2023-04-20 11:16:01 -0500
committerJoseph Huber <jhuber6@vols.utk.edu>2023-04-24 15:47:53 -0500
commit50445dff43037014a23eb38b1f50bb698e64ffcf (patch)
treeb44aeeb6b16e717993efdb2a6170ede2030fd906 /libc/startup
parent5084ba395e487adee67ba38cc5c68ff7e052e37c (diff)
downloadllvm-50445dff43037014a23eb38b1f50bb698e64ffcf.tar.gz
[libc] Add more utility functions for the GPU
This patch adds extra intrinsics for the GPU. Some of these are unused for now but will be used later. We use these currently to update the `RPC` handling. Currently, every thread can update the RPC client, which isn't correct. This patch adds code neccesary to allow a single thread to perfrom the write while the others wait. Feedback is welcome for the naming of these functions. I'm copying the OpenMP nomenclature where we call an AMD `wavefront` or NVIDIA `warp` a `lane`. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D148810
Diffstat (limited to 'libc/startup')
-rw-r--r--libc/startup/gpu/amdgpu/CMakeLists.txt1
-rw-r--r--libc/startup/gpu/amdgpu/start.cpp28
-rw-r--r--libc/startup/gpu/nvptx/CMakeLists.txt1
-rw-r--r--libc/startup/gpu/nvptx/start.cpp30
4 files changed, 53 insertions, 7 deletions
diff --git a/libc/startup/gpu/amdgpu/CMakeLists.txt b/libc/startup/gpu/amdgpu/CMakeLists.txt
index 891d20993b08..a9f33af6d79e 100644
--- a/libc/startup/gpu/amdgpu/CMakeLists.txt
+++ b/libc/startup/gpu/amdgpu/CMakeLists.txt
@@ -4,6 +4,7 @@ add_startup_object(
start.cpp
DEPENDS
libc.src.__support.RPC.rpc_client
+ libc.src.__support.GPU.utils
COMPILE_OPTIONS
-ffreestanding # To avoid compiler warnings about calling the main function.
-fno-builtin
diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp
index 66f06b086a23..e8b5029f2a76 100644
--- a/libc/startup/gpu/amdgpu/start.cpp
+++ b/libc/startup/gpu/amdgpu/start.cpp
@@ -6,16 +6,38 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
-static __llvm_libc::cpp::Atomic<uint32_t> lock;
-
extern "C" int main(int argc, char **argv, char **envp);
+namespace __llvm_libc {
+
+static cpp::Atomic<uint32_t> lock = 0;
+
+static cpp::Atomic<uint32_t> init = 0;
+
+void init_rpc(void *in, void *out, void *buffer) {
+ // Only a single thread should update the RPC data.
+ if (gpu::get_thread_id() == 0 && gpu::get_block_id() == 0) {
+ rpc::client.reset(&lock, in, out, buffer);
+ init.store(1, cpp::MemoryOrder::RELAXED);
+ }
+
+ // Wait until the previous thread signals that the data has been written.
+ while (!init.load(cpp::MemoryOrder::RELAXED))
+ rpc::sleep_briefly();
+
+ // Wait for the threads in the block to converge and fence the write.
+ gpu::sync_threads();
+}
+
+} // namespace __llvm_libc
+
extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
_start(int argc, char **argv, char **envp, int *ret, void *in, void *out,
void *buffer) {
- __llvm_libc::rpc::client.reset(&lock, in, out, buffer);
+ __llvm_libc::init_rpc(in, out, buffer);
__atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);
}
diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt
index 49661691ecb5..b8a9f49d5be5 100644
--- a/libc/startup/gpu/nvptx/CMakeLists.txt
+++ b/libc/startup/gpu/nvptx/CMakeLists.txt
@@ -5,6 +5,7 @@ add_startup_object(
start.cpp
DEPENDS
libc.src.__support.RPC.rpc_client
+ libc.src.__support.GPU.utils
COMPILE_OPTIONS
-ffreestanding # To avoid compiler warnings about calling the main function.
-fno-builtin
diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp
index 9939c6e21330..7b88e30f7f37 100644
--- a/libc/startup/gpu/nvptx/start.cpp
+++ b/libc/startup/gpu/nvptx/start.cpp
@@ -6,16 +6,38 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
-static __llvm_libc::cpp::Atomic<uint32_t> lock;
-
extern "C" int main(int argc, char **argv, char **envp);
-extern "C" [[gnu::visibility("protected")]] __attribute__((nvptx_kernel)) void
+namespace __llvm_libc {
+
+static cpp::Atomic<uint32_t> lock = 0;
+
+static cpp::Atomic<uint32_t> init = 0;
+
+void init_rpc(void *in, void *out, void *buffer) {
+ // Only a single thread should update the RPC data.
+ if (gpu::get_thread_id() == 0 && gpu::get_block_id() == 0) {
+ rpc::client.reset(&lock, in, out, buffer);
+ init.store(1, cpp::MemoryOrder::RELAXED);
+ }
+
+ // Wait until the previous thread signals that the data has been written.
+ while (!init.load(cpp::MemoryOrder::RELAXED))
+ rpc::sleep_briefly();
+
+ // Wait for the threads in the block to converge and fence the write.
+ gpu::sync_threads();
+}
+
+} // namespace __llvm_libc
+
+extern "C" [[gnu::visibility("protected"), clang::nvptx_kernel]] void
_start(int argc, char **argv, char **envp, int *ret, void *in, void *out,
void *buffer) {
- __llvm_libc::rpc::client.reset(&lock, in, out, buffer);
+ __llvm_libc::init_rpc(in, out, buffer);
__atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);
}