diff options
author | rander <rander.wang@intel.com> | 2017-03-31 10:13:06 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-04-17 16:08:49 +0800 |
commit | aefbc2abd827ff5c4902133ac8805b46b3a6e403 (patch) | |
tree | a0a9efe514bdab9768c1e552e5191caa714b6eab | |
parent | 02e420383d3885f0e8cb55792424b166119d119f (diff) | |
download | beignet-aefbc2abd827ff5c4902133ac8805b46b3a6e403.tar.gz |
backend: add double version of cbrt
cp from fdlibm and pass the cft after refined
Signed-off-by: rander <rander.wang@intel.com>
Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 56 | ||||
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math_common.tmpl.h | 1 |
2 files changed, 57 insertions, 0 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl index cf122812..90df3a98 100644 --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl @@ -604,6 +604,62 @@ OVERLOADABLE double expm1(double x) return y; } +OVERLOADABLE double cbrt(double x) +{ + double B1 = 715094163, /* B1 = (682-0.03306235651)*2**20 */ + B2 = 696219795; /* B2 = (664-0.03306235651)*2**20 */ + + double C = 5.42857142857142815906e-01, /* 19/35 = 0x3FE15F15, 0xF15F15F1 */ + D = -7.05306122448979611050e-01, /* -864/1225 = 0xBFE691DE, 0x2532C834 */ + E = 1.41428571428571436819e+00, /* 99/70 = 0x3FF6A0EA, 0x0EA0EA0F */ + F = 1.60714285714285720630e+00, /* 45/28 = 0x3FF9B6DB, 0x6DB6DB6E */ + G = 3.57142857142857150787e-01; /* 5/14 = 0x3FD6DB6D, 0xB6DB6DB7 */ + + int hx; + double r,s,t=0.0,w; + unsigned sign; + + + hx = __HI(x); /* high word of x */ + sign=hx&0x80000000; /* sign= sign(x) */ + hx ^=sign; + if(hx>=0x7ff00000) return(x+x); /* cbrt(NaN,INF) is itself */ + if((hx|__LO(x))==0) + return(x); /* cbrt(0) is itself */ + + __setHigh(&x, hx); /* x <- |x| */ + /* rough cbrt to 5 bits */ + if(hx<0x00100000) /* subnormal number */ + { + __setHigh(&t, 0x43500000); /* set t= 2**54 */ + t*=x; + __setHigh(&t, __HI(t)/3+B2); + } + else + __setHigh(&t, hx/3+B1); + + + /* new cbrt to 23 bits, may be implemented in single precision */ + r=t*t/x; + s=C+r*t; + t*=G+F/(s+E+D/s); + + /* chopped to 20 bits and make it larger than cbrt(x) */ + __setLow(&t, 0); __setHigh(&t, __HI(t)+0x00000001); + + + /* one step newton iteration to 53 bits with error less than 0.667 ulps */ + s=t*t; /* t*t is exact */ + r=x/s; + w=t+t; + r=(r-t)/(w+r); /* r-s is exact */ + t=t+t*r; + + /* retore the sign bit */ + __setHigh(&t, __HI(t)|sign); + return(t); +} + OVERLOADABLE double ceil(double x) { double ret; diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h index a5910468..c815261f 100644 --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h @@ -33,6 +33,7 @@ OVERLOADABLE double atan2pi(double x, double y); OVERLOADABLE double atanh(double x); OVERLOADABLE double exp(double x); OVERLOADABLE double expm1(double x); +OVERLOADABLE double cbrt(double x); OVERLOADABLE double ceil(double x); OVERLOADABLE double copysign(double x, double y); OVERLOADABLE double cos(double x); |