diff options
author | Joseph Huber <jhuber6@vols.utk.edu> | 2023-04-27 14:35:47 -0500 |
---|---|---|
committer | Joseph Huber <jhuber6@vols.utk.edu> | 2023-04-27 14:37:00 -0500 |
commit | efe5e2bbb66500a1f37f683d88ac9e4716802292 (patch) | |
tree | 38253805b95ba8eb3732d8f9dc1f3471c50e62c2 /libc/src | |
parent | 460ea850148b10dfd30cce32b2ac600b313324c4 (diff) | |
download | llvm-efe5e2bbb66500a1f37f683d88ac9e4716802292.tar.gz |
[libc] Add more missing GPU utilities
Summary:
This patch adds a way to get the total number of blocks and implement
the wave sync intrinsic for AMDGPU. This is a no-op, but that may change
in the future so we might as well implement it right.
Diffstat (limited to 'libc/src')
-rw-r--r-- | libc/src/__support/GPU/amdgpu/utils.h | 14 | ||||
-rw-r--r-- | libc/src/__support/GPU/generic/utils.h | 12 | ||||
-rw-r--r-- | libc/src/__support/GPU/nvptx/utils.h | 10 |
3 files changed, 31 insertions, 5 deletions
diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h index a4ac7d26f0d9..ca9122b6b6a5 100644 --- a/libc/src/__support/GPU/amdgpu/utils.h +++ b/libc/src/__support/GPU/amdgpu/utils.h @@ -34,6 +34,11 @@ LIBC_INLINE uint32_t get_num_blocks_z() { return __builtin_amdgcn_grid_size_z() / __builtin_amdgcn_workgroup_size_z(); } +/// Returns the total number of workgruops in the grid. +LIBC_INLINE uint64_t get_num_blocks() { + return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z(); +} + /// Returns the 'x' dimension of the current AMD workgroup's id. LIBC_INLINE uint32_t get_block_id_x() { return __builtin_amdgcn_workgroup_id_x(); @@ -70,6 +75,11 @@ LIBC_INLINE uint32_t get_num_threads_z() { return __builtin_amdgcn_workgroup_size_z(); } +/// Returns the total number of workitems in the workgroup. +LIBC_INLINE uint64_t get_num_threads() { + return get_num_threads_x() * get_num_threads_y() * get_num_threads_z(); +} + /// Returns the 'x' dimension id of the workitem in the current AMD workgroup. LIBC_INLINE uint32_t get_thread_id_x() { return __builtin_amdgcn_workitem_id_x(); @@ -119,7 +129,9 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; } } /// Wait for all threads in the wavefront to converge, this is a noop on AMDGPU. -[[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t) {} +[[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t) { + __builtin_amdgcn_wave_barrier(); +} } // namespace gpu } // namespace __llvm_libc diff --git a/libc/src/__support/GPU/generic/utils.h b/libc/src/__support/GPU/generic/utils.h index 20e1b16ec25b..0decb3fa59d5 100644 --- a/libc/src/__support/GPU/generic/utils.h +++ b/libc/src/__support/GPU/generic/utils.h @@ -20,9 +20,11 @@ constexpr const uint64_t LANE_SIZE = 1; LIBC_INLINE uint32_t get_num_blocks_x() { return 1; } -LIBC_INLINE uint32_t get_num_blocks_y() { return 0; } +LIBC_INLINE uint32_t get_num_blocks_y() { return 1; } -LIBC_INLINE uint32_t get_num_blocks_z() { return 0; } +LIBC_INLINE uint32_t get_num_blocks_z() { return 1; } + +LIBC_INLINE uint64_t get_num_blocks() { return 1; } LIBC_INLINE uint32_t get_block_id_x() { return 0; } @@ -34,9 +36,11 @@ LIBC_INLINE uint64_t get_block_id() { return 0; } LIBC_INLINE uint32_t get_num_threads_x() { return 1; } -LIBC_INLINE uint32_t get_num_threads_y() { return 0; } +LIBC_INLINE uint32_t get_num_threads_y() { return 1; } + +LIBC_INLINE uint32_t get_num_threads_z() { return 1; } -LIBC_INLINE uint32_t get_num_threads_z() { return 0; } +LIBC_INLINE uint64_t get_num_threads() { return 1; } LIBC_INLINE uint32_t get_thread_id_x() { return 0; } diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h index 88544db85b9f..443b8c72fc85 100644 --- a/libc/src/__support/GPU/nvptx/utils.h +++ b/libc/src/__support/GPU/nvptx/utils.h @@ -34,6 +34,11 @@ LIBC_INLINE uint32_t get_num_blocks_z() { return __nvvm_read_ptx_sreg_nctaid_z(); } +/// Returns the total number of CUDA blocks. +LIBC_INLINE uint64_t get_num_blocks() { + return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z(); +} + /// Returns the 'x' dimension of the current CUDA block's id. LIBC_INLINE uint32_t get_block_id_x() { return __nvvm_read_ptx_sreg_ctaid_x(); } @@ -64,6 +69,11 @@ LIBC_INLINE uint32_t get_num_threads_z() { return __nvvm_read_ptx_sreg_ntid_z(); } +/// Returns the total number of threads in the block. +LIBC_INLINE uint64_t get_num_threads() { + return get_num_threads_x() * get_num_threads_y() * get_num_threads_z(); +} + /// Returns the 'x' dimension id of the thread in the current CUDA block. LIBC_INLINE uint32_t get_thread_id_x() { return __nvvm_read_ptx_sreg_tid_x(); } |