summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrander <rander.wang@intel.com>2017-03-31 10:13:06 +0800
committerYang Rong <rong.r.yang@intel.com>2017-04-17 16:08:49 +0800
commitaefbc2abd827ff5c4902133ac8805b46b3a6e403 (patch)
treea0a9efe514bdab9768c1e552e5191caa714b6eab
parent02e420383d3885f0e8cb55792424b166119d119f (diff)
downloadbeignet-aefbc2abd827ff5c4902133ac8805b46b3a6e403.tar.gz
backend: add double version of cbrt
cp from fdlibm and pass the cft after refined Signed-off-by: rander <rander.wang@intel.com> Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/libocl/tmpl/ocl_math_common.tmpl.cl56
-rw-r--r--backend/src/libocl/tmpl/ocl_math_common.tmpl.h1
2 files changed, 57 insertions, 0 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index cf122812..90df3a98 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -604,6 +604,62 @@ OVERLOADABLE double expm1(double x)
return y;
}
+OVERLOADABLE double cbrt(double x)
+{
+ double B1 = 715094163, /* B1 = (682-0.03306235651)*2**20 */
+ B2 = 696219795; /* B2 = (664-0.03306235651)*2**20 */
+
+ double C = 5.42857142857142815906e-01, /* 19/35 = 0x3FE15F15, 0xF15F15F1 */
+ D = -7.05306122448979611050e-01, /* -864/1225 = 0xBFE691DE, 0x2532C834 */
+ E = 1.41428571428571436819e+00, /* 99/70 = 0x3FF6A0EA, 0x0EA0EA0F */
+ F = 1.60714285714285720630e+00, /* 45/28 = 0x3FF9B6DB, 0x6DB6DB6E */
+ G = 3.57142857142857150787e-01; /* 5/14 = 0x3FD6DB6D, 0xB6DB6DB7 */
+
+ int hx;
+ double r,s,t=0.0,w;
+ unsigned sign;
+
+
+ hx = __HI(x); /* high word of x */
+ sign=hx&0x80000000; /* sign= sign(x) */
+ hx ^=sign;
+ if(hx>=0x7ff00000) return(x+x); /* cbrt(NaN,INF) is itself */
+ if((hx|__LO(x))==0)
+ return(x); /* cbrt(0) is itself */
+
+ __setHigh(&x, hx); /* x <- |x| */
+ /* rough cbrt to 5 bits */
+ if(hx<0x00100000) /* subnormal number */
+ {
+ __setHigh(&t, 0x43500000); /* set t= 2**54 */
+ t*=x;
+ __setHigh(&t, __HI(t)/3+B2);
+ }
+ else
+ __setHigh(&t, hx/3+B1);
+
+
+ /* new cbrt to 23 bits, may be implemented in single precision */
+ r=t*t/x;
+ s=C+r*t;
+ t*=G+F/(s+E+D/s);
+
+ /* chopped to 20 bits and make it larger than cbrt(x) */
+ __setLow(&t, 0); __setHigh(&t, __HI(t)+0x00000001);
+
+
+ /* one step newton iteration to 53 bits with error less than 0.667 ulps */
+ s=t*t; /* t*t is exact */
+ r=x/s;
+ w=t+t;
+ r=(r-t)/(w+r); /* r-s is exact */
+ t=t+t*r;
+
+ /* retore the sign bit */
+ __setHigh(&t, __HI(t)|sign);
+ return(t);
+}
+
OVERLOADABLE double ceil(double x)
{
double ret;
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
index a5910468..c815261f 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
@@ -33,6 +33,7 @@ OVERLOADABLE double atan2pi(double x, double y);
OVERLOADABLE double atanh(double x);
OVERLOADABLE double exp(double x);
OVERLOADABLE double expm1(double x);
+OVERLOADABLE double cbrt(double x);
OVERLOADABLE double ceil(double x);
OVERLOADABLE double copysign(double x, double y);
OVERLOADABLE double cos(double x);