[libc][rpc] Allocate locks array within process

Replaces the globals currently used. Worth changing to a bitmap before allowing runtime number of ports >> 64. One bit per port is likely to be cheap enough that sizing for the worst case is always fine, otherwise in the future we can change to dynamically allocating it. Reviewed By: jhuber6 Differential Revision: https://reviews.llvm.org/D150309
author: Jon Chesterfield <jonathanchesterfield@gmail.com> 2023-05-11 00:39:54 +0100
committer: Jon Chesterfield <jonathanchesterfield@gmail.com> 2023-05-11 00:41:51 +0100
commit: f497611f436cbf5ae0157edcf498f62a136799cb (patch)
tree: 9e27d72e406e4979b710ffa943ae5c554d2d233f /libc
parent: 958a3d8e2dec95a878dfc9d823861b3a6c674534 (diff)
download: llvm-f497611f436cbf5ae0157edcf498f62a136799cb.tar.gz
6 files changed, 15 insertions, 24 deletions
diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h
index ae905bccbc65..d448e190214f 100644
--- a/libc/src/__support/RPC/rpc.h
+++ b/libc/src/__support/RPC/rpc.h
@@ -106,20 +106,20 @@ template <bool InvertInbox> struct Process {
 
   uint64_t port_count;
   uint32_t lane_size;
-  cpp::Atomic<uint32_t> *lock;
   cpp::Atomic<uint32_t> *inbox;
   cpp::Atomic<uint32_t> *outbox;
   Packet *packet;
 
+  cpp::Atomic<uint32_t> lock[default_port_count] = {0};
+
   /// Initialize the communication channels.
-  LIBC_INLINE void reset(uint64_t port_count, uint32_t lane_size, void *lock,
-                         void *inbox, void *outbox, void *packet) {
-    *this = {port_count,
-             lane_size,
-             reinterpret_cast<cpp::Atomic<uint32_t> *>(lock),
-             reinterpret_cast<cpp::Atomic<uint32_t> *>(inbox),
-             reinterpret_cast<cpp::Atomic<uint32_t> *>(outbox),
-             reinterpret_cast<Packet *>(packet)};
+  LIBC_INLINE void reset(uint64_t port_count, uint32_t lane_size, void *inbox,
+                         void *outbox, void *packet) {
+    this->port_count = port_count;
+    this->lane_size = lane_size;
+    this->inbox = reinterpret_cast<cpp::Atomic<uint32_t> *>(inbox);
+    this->outbox = reinterpret_cast<cpp::Atomic<uint32_t> *>(outbox);
+    this->packet = reinterpret_cast<Packet *>(packet);
   }
 
   /// The length of the packet is flexible because the server needs to look up
diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp
index 84adb3b97527..9761c64cb318 100644
--- a/libc/startup/gpu/amdgpu/start.cpp
+++ b/libc/startup/gpu/amdgpu/start.cpp
@@ -15,8 +15,6 @@ extern "C" int main(int argc, char **argv, char **envp);
 
 namespace __llvm_libc {
 
-static cpp::Atomic<uint32_t> lock[rpc::default_port_count] = {0};
-
 extern "C" uintptr_t __init_array_start[];
 extern "C" uintptr_t __init_array_end[];
 extern "C" uintptr_t __fini_array_start[];
@@ -44,8 +42,8 @@ _begin(int argc, char **argv, char **env, void *in, void *out, void *buffer) {
   // We need to set up the RPC client first in case any of the constructors
   // require it.
   __llvm_libc::rpc::client.reset(__llvm_libc::rpc::default_port_count,
-                                 __llvm_libc::gpu::get_lane_size(),
-                                 &__llvm_libc::lock, in, out, buffer);
+                                 __llvm_libc::gpu::get_lane_size(), in, out,
+                                 buffer);
 
   // We want the fini array callbacks to be run after other atexit
   // callbacks are run. So, we register them before running the init
diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp
index 1d366dc829df..78cdc64ed967 100644
--- a/libc/startup/gpu/nvptx/start.cpp
+++ b/libc/startup/gpu/nvptx/start.cpp
@@ -15,8 +15,6 @@ extern "C" int main(int argc, char **argv, char **envp);
 
 namespace __llvm_libc {
 
-static cpp::Atomic<uint32_t> lock[rpc::default_port_count] = {0};
-
 extern "C" {
 // Nvidia's 'nvlink' linker does not provide these symbols. We instead need
 // to manually create them and update the globals in the loader implememtation.
@@ -48,8 +46,8 @@ _begin(int argc, char **argv, char **env, void *in, void *out, void *buffer) {
   // We need to set up the RPC client first in case any of the constructors
   // require it.
   __llvm_libc::rpc::client.reset(__llvm_libc::rpc::default_port_count,
-                                 __llvm_libc::gpu::get_lane_size(),
-                                 &__llvm_libc::lock, in, out, buffer);
+                                 __llvm_libc::gpu::get_lane_size(), in, out,
+                                 buffer);
 
   // We want the fini array callbacks to be run after other atexit
   // callbacks are run. So, we register them before running the init
diff --git a/libc/utils/gpu/loader/Server.h b/libc/utils/gpu/loader/Server.h
index f77bf256618a..89ef712e8596 100644
--- a/libc/utils/gpu/loader/Server.h
+++ b/libc/utils/gpu/loader/Server.h
@@ -19,9 +19,6 @@
 
 static __llvm_libc::rpc::Server server;
 
-static __llvm_libc::cpp::Atomic<uint32_t>
-    lock[__llvm_libc::rpc::default_port_count] = {0};
-
 /// Queries the RPC client at least once and performs server-side work if there
 /// are any active requests.
 void handle_server() {
diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp
index 07fa1ae7fe16..ad5e02116918 100644
--- a/libc/utils/gpu/loader/amdgpu/Loader.cpp
+++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp
@@ -359,8 +359,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
   hsa_amd_agents_allow_access(1, &dev_agent, nullptr, buffer);
 
   // Initialize the RPC server's buffer for host-device communication.
-  server.reset(port_size, wavefront_size, &lock, server_inbox, server_outbox,
-               buffer);
+  server.reset(port_size, wavefront_size, server_inbox, server_outbox, buffer);
 
   // Obtain a queue with the minimum (power of two) size, used to send commands
   // to the HSA runtime and launch execution on the device.
diff --git a/libc/utils/gpu/loader/nvptx/Loader.cpp b/libc/utils/gpu/loader/nvptx/Loader.cpp
index 314f5a8055fb..2230f55ea24e 100644
--- a/libc/utils/gpu/loader/nvptx/Loader.cpp
+++ b/libc/utils/gpu/loader/nvptx/Loader.cpp
@@ -260,8 +260,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
     handle_error("Failed to allocate memory the RPC client / server.");
 
   // Initialize the RPC server's buffer for host-device communication.
-  server.reset(port_size, warp_size, &lock, server_inbox, server_outbox,
-               buffer);
+  server.reset(port_size, warp_size, server_inbox, server_outbox, buffer);
 
   LaunchParameters single_threaded_params = {1, 1, 1, 1, 1, 1};
   // Call the kernel to
author	Jon Chesterfield <jonathanchesterfield@gmail.com>	2023-05-11 00:39:54 +0100
committer	Jon Chesterfield <jonathanchesterfield@gmail.com>	2023-05-11 00:41:51 +0100
commit	f497611f436cbf5ae0157edcf498f62a136799cb (patch)
tree	9e27d72e406e4979b710ffa943ae5c554d2d233f /libc
parent	958a3d8e2dec95a878dfc9d823861b3a6c674534 (diff)
download	llvm-f497611f436cbf5ae0157edcf498f62a136799cb.tar.gz