From 7062348bf35c1e4cbfee00ad9fffb4a21aa6eff7 Mon Sep 17 00:00:00 2001 From: Jia Tan Date: Wed, 19 Apr 2023 21:59:03 +0800 Subject: tuklib_integer: Use __builtin_clz() with Clang. Clang has support for __builtin_clz(), but previously Clang would fallback to either the MSVC intrinsic or the regular C code. This was discovered due to a bug where a new version of Clang required the header file in order to use the MSVC intrinsics. Thanks to Anton Kochkov for notifying us about the bug. --- src/common/tuklib_integer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/tuklib_integer.h b/src/common/tuklib_integer.h index b58ef68..2125695 100644 --- a/src/common/tuklib_integer.h +++ b/src/common/tuklib_integer.h @@ -630,7 +630,7 @@ bsr32(uint32_t n) #if defined(__INTEL_COMPILER) return _bit_scan_reverse(n); -#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX +#elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX // GCC >= 3.4 has __builtin_clz(), which gives good results on // multiple architectures. On x86, __builtin_clz() ^ 31U becomes // either plain BSR (so the XOR gets optimized away) or LZCNT and @@ -684,7 +684,7 @@ clz32(uint32_t n) #if defined(__INTEL_COMPILER) return _bit_scan_reverse(n) ^ 31U; -#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX +#elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX return (uint32_t)__builtin_clz(n); #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) @@ -739,7 +739,7 @@ ctz32(uint32_t n) #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX return (uint32_t)__builtin_ctz(n); -#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX uint32_t i; __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); return i; -- cgit v1.2.1