diff options
author | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2023-05-11 00:39:54 +0100 |
---|---|---|
committer | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2023-05-11 00:41:51 +0100 |
commit | f497611f436cbf5ae0157edcf498f62a136799cb (patch) | |
tree | 9e27d72e406e4979b710ffa943ae5c554d2d233f /libc | |
parent | 958a3d8e2dec95a878dfc9d823861b3a6c674534 (diff) | |
download | llvm-f497611f436cbf5ae0157edcf498f62a136799cb.tar.gz |
[libc][rpc] Allocate locks array within process
Replaces the globals currently used. Worth changing to a bitmap
before allowing runtime number of ports >> 64. One bit per port is likely
to be cheap enough that sizing for the worst case is always fine, otherwise
in the future we can change to dynamically allocating it.
Reviewed By: jhuber6
Differential Revision: https://reviews.llvm.org/D150309
Diffstat (limited to 'libc')
-rw-r--r-- | libc/src/__support/RPC/rpc.h | 18 | ||||
-rw-r--r-- | libc/startup/gpu/amdgpu/start.cpp | 6 | ||||
-rw-r--r-- | libc/startup/gpu/nvptx/start.cpp | 6 | ||||
-rw-r--r-- | libc/utils/gpu/loader/Server.h | 3 | ||||
-rw-r--r-- | libc/utils/gpu/loader/amdgpu/Loader.cpp | 3 | ||||
-rw-r--r-- | libc/utils/gpu/loader/nvptx/Loader.cpp | 3 |
6 files changed, 15 insertions, 24 deletions
diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h index ae905bccbc65..d448e190214f 100644 --- a/libc/src/__support/RPC/rpc.h +++ b/libc/src/__support/RPC/rpc.h @@ -106,20 +106,20 @@ template <bool InvertInbox> struct Process { uint64_t port_count; uint32_t lane_size; - cpp::Atomic<uint32_t> *lock; cpp::Atomic<uint32_t> *inbox; cpp::Atomic<uint32_t> *outbox; Packet *packet; + cpp::Atomic<uint32_t> lock[default_port_count] = {0}; + /// Initialize the communication channels. - LIBC_INLINE void reset(uint64_t port_count, uint32_t lane_size, void *lock, - void *inbox, void *outbox, void *packet) { - *this = {port_count, - lane_size, - reinterpret_cast<cpp::Atomic<uint32_t> *>(lock), - reinterpret_cast<cpp::Atomic<uint32_t> *>(inbox), - reinterpret_cast<cpp::Atomic<uint32_t> *>(outbox), - reinterpret_cast<Packet *>(packet)}; + LIBC_INLINE void reset(uint64_t port_count, uint32_t lane_size, void *inbox, + void *outbox, void *packet) { + this->port_count = port_count; + this->lane_size = lane_size; + this->inbox = reinterpret_cast<cpp::Atomic<uint32_t> *>(inbox); + this->outbox = reinterpret_cast<cpp::Atomic<uint32_t> *>(outbox); + this->packet = reinterpret_cast<Packet *>(packet); } /// The length of the packet is flexible because the server needs to look up diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp index 84adb3b97527..9761c64cb318 100644 --- a/libc/startup/gpu/amdgpu/start.cpp +++ b/libc/startup/gpu/amdgpu/start.cpp @@ -15,8 +15,6 @@ extern "C" int main(int argc, char **argv, char **envp); namespace __llvm_libc { -static cpp::Atomic<uint32_t> lock[rpc::default_port_count] = {0}; - extern "C" uintptr_t __init_array_start[]; extern "C" uintptr_t __init_array_end[]; extern "C" uintptr_t __fini_array_start[]; @@ -44,8 +42,8 @@ _begin(int argc, char **argv, char **env, void *in, void *out, void *buffer) { // We need to set up the RPC client first in case any of the constructors // require it. __llvm_libc::rpc::client.reset(__llvm_libc::rpc::default_port_count, - __llvm_libc::gpu::get_lane_size(), - &__llvm_libc::lock, in, out, buffer); + __llvm_libc::gpu::get_lane_size(), in, out, + buffer); // We want the fini array callbacks to be run after other atexit // callbacks are run. So, we register them before running the init diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp index 1d366dc829df..78cdc64ed967 100644 --- a/libc/startup/gpu/nvptx/start.cpp +++ b/libc/startup/gpu/nvptx/start.cpp @@ -15,8 +15,6 @@ extern "C" int main(int argc, char **argv, char **envp); namespace __llvm_libc { -static cpp::Atomic<uint32_t> lock[rpc::default_port_count] = {0}; - extern "C" { // Nvidia's 'nvlink' linker does not provide these symbols. We instead need // to manually create them and update the globals in the loader implememtation. @@ -48,8 +46,8 @@ _begin(int argc, char **argv, char **env, void *in, void *out, void *buffer) { // We need to set up the RPC client first in case any of the constructors // require it. __llvm_libc::rpc::client.reset(__llvm_libc::rpc::default_port_count, - __llvm_libc::gpu::get_lane_size(), - &__llvm_libc::lock, in, out, buffer); + __llvm_libc::gpu::get_lane_size(), in, out, + buffer); // We want the fini array callbacks to be run after other atexit // callbacks are run. So, we register them before running the init diff --git a/libc/utils/gpu/loader/Server.h b/libc/utils/gpu/loader/Server.h index f77bf256618a..89ef712e8596 100644 --- a/libc/utils/gpu/loader/Server.h +++ b/libc/utils/gpu/loader/Server.h @@ -19,9 +19,6 @@ static __llvm_libc::rpc::Server server; -static __llvm_libc::cpp::Atomic<uint32_t> - lock[__llvm_libc::rpc::default_port_count] = {0}; - /// Queries the RPC client at least once and performs server-side work if there /// are any active requests. void handle_server() { diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp index 07fa1ae7fe16..ad5e02116918 100644 --- a/libc/utils/gpu/loader/amdgpu/Loader.cpp +++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp @@ -359,8 +359,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size, hsa_amd_agents_allow_access(1, &dev_agent, nullptr, buffer); // Initialize the RPC server's buffer for host-device communication. - server.reset(port_size, wavefront_size, &lock, server_inbox, server_outbox, - buffer); + server.reset(port_size, wavefront_size, server_inbox, server_outbox, buffer); // Obtain a queue with the minimum (power of two) size, used to send commands // to the HSA runtime and launch execution on the device. diff --git a/libc/utils/gpu/loader/nvptx/Loader.cpp b/libc/utils/gpu/loader/nvptx/Loader.cpp index 314f5a8055fb..2230f55ea24e 100644 --- a/libc/utils/gpu/loader/nvptx/Loader.cpp +++ b/libc/utils/gpu/loader/nvptx/Loader.cpp @@ -260,8 +260,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size, handle_error("Failed to allocate memory the RPC client / server."); // Initialize the RPC server's buffer for host-device communication. - server.reset(port_size, warp_size, &lock, server_inbox, server_outbox, - buffer); + server.reset(port_size, warp_size, server_inbox, server_outbox, buffer); LaunchParameters single_threaded_params = {1, 1, 1, 1, 1, 1}; // Call the kernel to |