summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrander.wang <rander.wang@intel.com>2017-05-15 14:51:17 +0800
committerYang Rong <rong.r.yang@intel.com>2017-05-17 18:10:22 +0800
commit48f5c2024ad2d7752c2f8adc452167346d163e58 (patch)
treec8e7f03ca40f045d4db5c7f971e0a148ea2296b6
parentc56b5076fcccc2718ffa63c8d461614cbbc82e67 (diff)
downloadbeignet-48f5c2024ad2d7752c2f8adc452167346d163e58.tar.gz
backend: refine sin function
(1)refine the NAN check (2)using sqrt to get cos (3)remove small range check Signed-off-by: rander.wang <rander.wang@intel.com> Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/libocl/tmpl/ocl_math_common.tmpl.cl42
1 files changed, 22 insertions, 20 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index 7c449423..b3c61234 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -489,29 +489,31 @@ OVERLOADABLE float sin(float x)
if (__ocl_math_fastpath_flag)
return __gen_ocl_internal_fastpath_sin(x);
- const float pio4 = 7.8539812565e-01; /* 0x3f490fda */
- float y,z=0.0;
- int n, ix;
-
+ float y;
+ float na ;
+ uint n, ix;
float negative = x < 0.0f? -1.0f : 1.0f;
x = fabs(x);
- GEN_OCL_GET_FLOAT_WORD(ix,x);
- ix &= 0x7fffffff;
-
- /* sin(Inf or NaN) is NaN */
- if (ix >= 0x7f800000) return x-x;
-
- if(x <= pio4)
- return negative * __kernel_sinf(x);
- /* argument reduction needed */
- else {
- n = __ieee754_rem_pio2f(x,&y);
- float s = __kernel_sinf(y);
- float c = __kernel_cosf(y,0.0f);
- float ret = (n&1) ? negative*c : negative*s;
- return (n&3)> 1? -1.0f*ret : ret;
- }
+ /* cos(Inf or NaN) is NaN */
+ na = x -x;
+
+ uint n0, n1;
+ float v;
+ n = __ieee754_rem_pio2f(x,&y);
+ float s = __kernel_sinf(y);
+ float c = sqrt(mad(-s, s, 1.0f));
+ n0 = (n&0x1);
+ n1 = (n&0x2);
+ v = (n0)?c:s;
+ v = (n1)?-v:v;
+ /* n&3 return
+ 0 sin(y)
+ 1 cos(y)
+ 2 -sin(y)
+ 3 -cos(y)
+ */
+ return mad(v, negative, na);
}
OVERLOADABLE float cos(float x)