summaryrefslogtreecommitdiff
path: root/libc/src/__support/GPU/amdgpu/utils.h
diff options
context:
space:
mode:
Diffstat (limited to 'libc/src/__support/GPU/amdgpu/utils.h')
-rw-r--r--libc/src/__support/GPU/amdgpu/utils.h12
1 files changed, 11 insertions, 1 deletions
diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h
index ca9122b6b6a5..87cd6451445a 100644
--- a/libc/src/__support/GPU/amdgpu/utils.h
+++ b/libc/src/__support/GPU/amdgpu/utils.h
@@ -106,7 +106,7 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }
/// Returns the id of the thread inside of an AMD wavefront executing together.
[[clang::convergent]] LIBC_INLINE uint32_t get_lane_id() {
- if (LANE_SIZE == 64)
+ if constexpr (LANE_SIZE == 64)
return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
else
return __builtin_amdgcn_mbcnt_lo(~0u, 0u);
@@ -122,6 +122,16 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }
return __builtin_amdgcn_readfirstlane(x);
}
+[[clang::convergent]] LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) {
+ // the lane_mask & gives the nvptx semantics when lane_mask is a subset of
+ // the active threads
+ if constexpr (LANE_SIZE == 64) {
+ return lane_mask & __builtin_amdgcn_ballot_w64(x);
+ } else {
+ return lane_mask & __builtin_amdgcn_ballot_w32(x);
+ }
+}
+
/// Waits for all the threads in the block to converge and issues a fence.
[[clang::convergent]] LIBC_INLINE void sync_threads() {
__builtin_amdgcn_s_barrier();