diff options
Diffstat (limited to 'libc/src/__support/GPU/amdgpu/utils.h')
-rw-r--r-- | libc/src/__support/GPU/amdgpu/utils.h | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h index ca9122b6b6a5..87cd6451445a 100644 --- a/libc/src/__support/GPU/amdgpu/utils.h +++ b/libc/src/__support/GPU/amdgpu/utils.h @@ -106,7 +106,7 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; } /// Returns the id of the thread inside of an AMD wavefront executing together. [[clang::convergent]] LIBC_INLINE uint32_t get_lane_id() { - if (LANE_SIZE == 64) + if constexpr (LANE_SIZE == 64) return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); else return __builtin_amdgcn_mbcnt_lo(~0u, 0u); @@ -122,6 +122,16 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; } return __builtin_amdgcn_readfirstlane(x); } +[[clang::convergent]] LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) { + // the lane_mask & gives the nvptx semantics when lane_mask is a subset of + // the active threads + if constexpr (LANE_SIZE == 64) { + return lane_mask & __builtin_amdgcn_ballot_w64(x); + } else { + return lane_mask & __builtin_amdgcn_ballot_w32(x); + } +} + /// Waits for all the threads in the block to converge and issues a fence. [[clang::convergent]] LIBC_INLINE void sync_threads() { __builtin_amdgcn_s_barrier(); |