backend: add double version of cbrt

cp from fdlibm and pass the cft after refined Signed-off-by: rander <rander.wang@intel.com> Tested-by: Yang Rong <rong.r.yang@intel.com>
author: rander <rander.wang@intel.com> 2017-03-31 10:13:06 +0800
committer: Yang Rong <rong.r.yang@intel.com> 2017-04-17 16:08:49 +0800
commit: aefbc2abd827ff5c4902133ac8805b46b3a6e403 (patch)
tree: a0a9efe514bdab9768c1e552e5191caa714b6eab
parent: 02e420383d3885f0e8cb55792424b166119d119f (diff)
download: beignet-aefbc2abd827ff5c4902133ac8805b46b3a6e403.tar.gz
2 files changed, 57 insertions, 0 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index cf122812..90df3a98 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -604,6 +604,62 @@ OVERLOADABLE double expm1(double x)
 	return y;
 }
 
+OVERLOADABLE double cbrt(double x)
+{
+	double B1 = 715094163, /* B1 = (682-0.03306235651)*2**20 */
+	B2 = 696219795; /* B2 = (664-0.03306235651)*2**20 */
+
+	double C =  5.42857142857142815906e-01, /* 19/35	 = 0x3FE15F15, 0xF15F15F1 */
+	D = -7.05306122448979611050e-01, /* -864/1225 = 0xBFE691DE, 0x2532C834 */
+	E =  1.41428571428571436819e+00, /* 99/70	 = 0x3FF6A0EA, 0x0EA0EA0F */
+	F =  1.60714285714285720630e+00, /* 45/28	 = 0x3FF9B6DB, 0x6DB6DB6E */
+	G =  3.57142857142857150787e-01; /* 5/14	  = 0x3FD6DB6D, 0xB6DB6DB7 */
+
+	int	hx;
+	double r,s,t=0.0,w;
+	unsigned sign;
+
+
+	hx = __HI(x);		/* high word of x */
+	sign=hx&0x80000000; 		/* sign= sign(x) */
+	hx  ^=sign;
+	if(hx>=0x7ff00000) return(x+x); /* cbrt(NaN,INF) is itself */
+	if((hx|__LO(x))==0)
+		return(x);		/* cbrt(0) is itself */
+
+	__setHigh(&x, hx);	/* x <- |x| */
+	/* rough cbrt to 5 bits */
+	if(hx<0x00100000) 		/* subnormal number */
+	  {
+		__setHigh(&t, 0x43500000); 		/* set t= 2**54 */
+		t*=x;
+		__setHigh(&t, __HI(t)/3+B2);
+	  }
+	else
+	  __setHigh(&t, hx/3+B1);
+
+
+	/* new cbrt to 23 bits, may be implemented in single precision */
+	r=t*t/x;
+	s=C+r*t;
+	t*=G+F/(s+E+D/s);
+
+	/* chopped to 20 bits and make it larger than cbrt(x) */
+	__setLow(&t, 0); __setHigh(&t, __HI(t)+0x00000001);
+
+
+	/* one step newton iteration to 53 bits with error less than 0.667 ulps */
+	s=t*t;		/* t*t is exact */
+	r=x/s;
+	w=t+t;
+	r=(r-t)/(w+r);	/* r-s is exact */
+	t=t+t*r;
+
+	/* retore the sign bit */
+	__setHigh(&t, __HI(t)|sign);
+	return(t);
+}
+
 OVERLOADABLE double ceil(double x)
 {
     double ret;
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
index a5910468..c815261f 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
@@ -33,6 +33,7 @@ OVERLOADABLE double atan2pi(double x, double y);
 OVERLOADABLE double atanh(double x);
 OVERLOADABLE double exp(double x);
 OVERLOADABLE double expm1(double x);
+OVERLOADABLE double cbrt(double x);
 OVERLOADABLE double ceil(double x);
 OVERLOADABLE double copysign(double x, double y);
 OVERLOADABLE double cos(double x);
author	rander <rander.wang@intel.com>	2017-03-31 10:13:06 +0800
committer	Yang Rong <rong.r.yang@intel.com>	2017-04-17 16:08:49 +0800
commit	aefbc2abd827ff5c4902133ac8805b46b3a6e403 (patch)
tree	a0a9efe514bdab9768c1e552e5191caa714b6eab
parent	02e420383d3885f0e8cb55792424b166119d119f (diff)
download	beignet-aefbc2abd827ff5c4902133ac8805b46b3a6e403.tar.gz