summaryrefslogtreecommitdiff
path: root/libc/test
diff options
context:
space:
mode:
authorJoseph Huber <jhuber6@vols.utk.edu>2023-05-04 14:53:28 -0500
committerJoseph Huber <jhuber6@vols.utk.edu>2023-05-04 19:31:41 -0500
commit507edb52f9a9a5c1ab2a92ec2e291a7b63c3fbff (patch)
treedcd9f8ef610af4a60ead26e721c5d3aead79777b /libc/test
parentfe9f557578a565ed01faf75cd07ea4d9b75feeb1 (diff)
downloadllvm-507edb52f9a9a5c1ab2a92ec2e291a7b63c3fbff.tar.gz
[libc] Enable multiple threads to use RPC on the GPU
The execution model of the GPU expects that groups of threads will execute in lock-step in SIMD fashion. It's both important for performance and correctness that we treat this as the smallest possible granularity for an RPC operation. Thus, we map multiple threads to a single larger buffer and ship that across the wire. This patch makes the necessary changes to support executing the RPC on the GPU with multiple threads. This requires some workarounds to mimic the model when handling the protocol from the CPU. I'm not completely happy with some of the workarounds required, but I think it should work. Uses some of the implementation details from D148191. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D148943
Diffstat (limited to 'libc/test')
-rw-r--r--libc/test/integration/startup/gpu/CMakeLists.txt8
-rw-r--r--libc/test/integration/startup/gpu/rpc_test.cpp15
2 files changed, 20 insertions, 3 deletions
diff --git a/libc/test/integration/startup/gpu/CMakeLists.txt b/libc/test/integration/startup/gpu/CMakeLists.txt
index 754f36d8789c..d2028cc941f0 100644
--- a/libc/test/integration/startup/gpu/CMakeLists.txt
+++ b/libc/test/integration/startup/gpu/CMakeLists.txt
@@ -22,8 +22,12 @@ add_integration_test(
libc.src.__support.RPC.rpc_client
libc.src.__support.GPU.utils
LOADER_ARGS
- --blocks 16
- --threads 1
+ --blocks-x 2
+ --blocks-y 2
+ --blocks-z 2
+ --threads-x 4
+ --threads-y 4
+ --threads-z 4
)
add_integration_test(
diff --git a/libc/test/integration/startup/gpu/rpc_test.cpp b/libc/test/integration/startup/gpu/rpc_test.cpp
index daf7bf77302c..9dc2214fde41 100644
--- a/libc/test/integration/startup/gpu/rpc_test.cpp
+++ b/libc/test/integration/startup/gpu/rpc_test.cpp
@@ -13,7 +13,8 @@
using namespace __llvm_libc;
static void test_add_simple() {
- uint32_t num_additions = 1000 + 10 * gpu::get_block_id_x();
+ uint32_t num_additions =
+ 10 + 10 * gpu::get_thread_id() + 10 * gpu::get_block_id();
uint64_t cnt = 0;
for (uint32_t i = 0; i < num_additions; ++i) {
rpc::Client::Port port = rpc::client.open(rpc::TEST_INCREMENT);
@@ -29,8 +30,20 @@ static void test_add_simple() {
ASSERT_TRUE(cnt == num_additions && "Incorrect sum");
}
+// Test to ensure that the RPC mechanism doesn't hang on divergence.
+static void test_noop(uint8_t data) {
+ rpc::Client::Port port = rpc::client.open(rpc::NOOP);
+ port.send([=](rpc::Buffer *buffer) { buffer->data[0] = data; });
+ port.close();
+}
+
TEST_MAIN(int argc, char **argv, char **envp) {
test_add_simple();
+ if (gpu::get_thread_id() % 2)
+ test_noop(1);
+ else
+ test_noop(2);
+
return 0;
}