From 416923346a6d31563801784963d2893a8d1da1c8 Mon Sep 17 00:00:00 2001
From: Timo Rothenpieler <timo@rothenpieler.org>
Date: Sat, 3 Sep 2022 19:49:53 +0200
Subject: compat/cuda: switch from powf to __powf intrinsic

The powf builtin causes crashes on older clang, so manually implement
the (faster) intrinsic.
The code it spawns is identical to that of nvcc.
---
 compat/cuda/cuda_runtime.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'compat')

diff --git a/compat/cuda/cuda_runtime.h b/compat/cuda/cuda_runtime.h
index 082e4a8ba3..699c4b6c75 100644
--- a/compat/cuda/cuda_runtime.h
+++ b/compat/cuda/cuda_runtime.h
@@ -182,11 +182,11 @@ static inline __device__ float fabsf(float a) { return __builtin_fabsf(a); }
 static inline __device__ float fabs(float a) { return __builtin_fabsf(a); }
 static inline __device__ double fabs(double a) { return __builtin_fabs(a); }
 static inline __device__ float sqrtf(float a) { return __builtin_sqrtf(a); }
-static inline __device__ float powf(float a, float  y) { return __builtin_powf(a,y); }
 
 static inline __device__ float __saturatef(float a) { return __nvvm_saturate_f(a); }
 static inline __device__ float __sinf(float a) { return __nvvm_sin_approx_f(a); }
 static inline __device__ float __cosf(float a) { return __nvvm_cos_approx_f(a); }
 static inline __device__ float __expf(float a) { return __nvvm_ex2_approx_f(a * (float)__builtin_log2(__builtin_exp(1))); }
+static inline __device__ float __powf(float a, float b) { return __nvvm_ex2_approx_f(__nvvm_lg2_approx_f(a) * b); }
 
 #endif /* COMPAT_CUDA_CUDA_RUNTIME_H */
-- 
cgit v1.2.1