summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrander <rander.wang@intel.com>2017-04-12 16:43:14 +0800
committerYang Rong <rong.r.yang@intel.com>2017-05-04 19:08:31 +0800
commite04f8daf110215b9b4de0f0d9baf76245cfef626 (patch)
tree852b6cb9d49686a177d0e5dee562d86007fe5216
parent2f77bfe64c164579e9baa07d78d4d2f9c63f2e95 (diff)
downloadbeignet-e04f8daf110215b9b4de0f0d9baf76245cfef626.tar.gz
backend: refine remquo to pass cft
do the thing according to spec Signed-off-by: rander.wang <rander.wang@intel.com> Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/libocl/tmpl/ocl_math.tmpl.cl277
-rw-r--r--backend/src/libocl/tmpl/ocl_math_20.tmpl.cl184
2 files changed, 396 insertions, 65 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index bd2a264a..14414af6 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -4528,10 +4528,146 @@ OVERLOADABLE double modf(double x, private double *i)
return x - retVal;
}
+double __fmod (double x, double y, int* ik)
+{
+ const double one = 1.0, Zero[] = {0.0, -0.0,};
+ int n,hx,hy,hz,ix,iy,sx,i;
+ uint lx,ly,lz;
+ int ifactor = 0;
+
+ hx = __HI(x);
+ lx = __LO(x);
+ hy = __HI(y);
+ ly = __LO(y);
+ sx = hx&0x80000000; /* sign of x */
+ hx ^=sx; /* |x| */
+ hy &= 0x7fffffff; /* |y| */
+
+ /* purge off exception values */
+ if((hy|ly)==0||(hx>=0x7ff00000)|| /* y=0,or x not finite */
+ ((hy|((ly|-ly)>>31))>0x7ff00000)) /* or y is NaN */
+ return (x*y)/(x*y);
+ if(hx<=hy) {
+ if((hx<hy)||(lx<ly)) return x; /* |x|<|y| return x */
+ if(lx==ly)
+ {
+ *ik = 2;
+ return Zero[(uint)sx>>31]; /* |x|=|y| return x*0*/
+ }
+ }
+
+ /* determine ix = ilogb(x) */
+ if(hx<0x00100000) { /* subnormal x */
+ if(hx==0) {
+ for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
+ } else {
+ for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
+ }
+ } else ix = (hx>>20)-1023;
+
+ /* determine iy = ilogb(y) */
+ if(hy<0x00100000) { /* subnormal y */
+ if(hy==0) {
+ for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
+ } else {
+ for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
+ }
+ } else iy = (hy>>20)-1023;
+
+ /* set up {hx,lx}, {hy,ly} and align y to x */
+ if(ix >= -1022)
+ hx = 0x00100000|(0x000fffff&hx);
+ else { /* subnormal x, shift x to normal */
+ n = -1022-ix;
+ if(n<=31) {
+ hx = (hx<<n)|(lx>>(32-n));
+ lx <<= n;
+ } else {
+ hx = lx<<(n-32);
+ lx = 0;
+ }
+ }
+ if(iy >= -1022)
+ hy = 0x00100000|(0x000fffff&hy);
+ else { /* subnormal y, shift y to normal */
+ n = -1022-iy;
+ if(n<=31) {
+ hy = (hy<<n)|(ly>>(32-n));
+ ly <<= n;
+ } else {
+ hy = ly<<(n-32);
+ ly = 0;
+ }
+ }
+
+ /* fix point fmod */
+ n = ix - iy;
+ while(n--)
+ {
+ hz=hx-hy;lz=lx-ly;
+ if(lx<ly) hz -= 1;
+
+ ifactor <<= 1;
+ if(hz<0)
+ {
+ hx = hx+hx+(lx>>31);
+ lx = lx+lx;
+ }
+ else
+ {
+ if(n<16)ifactor |= 1;
+
+ if((hz|lz)==0) /* return sign(x)*0 */
+ {
+ *ik = (ifactor << (n + 2));
+ return Zero[(uint)sx>>31];
+ }
+
+ hx = hz+hz+(lz>>31); lx = lz+lz;
+ }
+ }
+
+ hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+ if(hz>=0){hx=hz;lx=lz;ifactor <<= 1; ifactor |= 1;}
+ else ifactor = (ifactor << 1);
+
+ ifactor = (ifactor << 1);
+ *ik = ifactor;
+ /* convert back to floating value and restore the sign */
+ if((hx|lx)==0) /* return sign(x)*0 */
+ return Zero[(uint)sx>>31];
+ while(hx<0x00100000) { /* normalize x */
+ hx = hx+hx+(lx>>31); lx = lx+lx;
+ iy -= 1;
+ }
+ if(iy>= -1022) { /* normalize output */
+ hx = ((hx-0x00100000)|((iy+1023)<<20));
+ __setHigh(&x,hx|sx);
+ __setLow(&x, lx);
+ } else { /* subnormal output */
+ n = -1022 - iy;
+ if(n<=20) {
+ lx = (lx>>n)|((uint)hx<<(32-n));
+ hx >>= n;
+ } else if (n<=31) {
+ lx = (hx<<(32-n))|(lx>>n); hx = sx;
+ } else {
+ lx = hx>>(n-32); hx = sx;
+ }
+ __setHigh(&x,hx|sx);
+ __setLow(&x, lx);
+
+ x *= one; /* create necessary signal */
+ }
+ return x; /* exact output */
+
+}
+
+
OVERLOADABLE double remquo(double x, double p, global int *quo)
{
- int hx,hp;
- unsigned sx,lx,lp;
+ int hx,hp, ik = 0;
+ unsigned sx,sy, lx,lp;
double p_half, zero = 0.0;
double xx = x;
@@ -4540,140 +4676,185 @@ OVERLOADABLE double remquo(double x, double p, global int *quo)
hp = __HI(p); /* high word of p */
lp = __LO(p); /* low word of p */
sx = hx&0x80000000;
+ sy = hp&0x80000000;
hp &= 0x7fffffff;
hx &= 0x7fffffff;
/* purge off exception values */
- if((hp|lp)==0) return (x*p)/(x*p); /* p = 0 */
+ if((hp|lp)==0){*quo = 0; return (x*p)/(x*p);} /* p = 0 */
if((hx>=0x7ff00000)|| /* x not finite */
((hp>=0x7ff00000)&& /* p is NaN */
(((hp-0x7ff00000)|lp)!=0)))
- return (x*p)/(x*p);
+ {*quo = 0; return (x*p)/(x*p);}
- if (hp<=0x7fdfffff) x = fmod(x,p+p); /* now x < 2p */
- if (((hx-hp)|(lx-lp))==0) return zero*x;
+ if (hp<=0x7fdfffff) x = __fmod(x,p+p, &ik); /* now x < 2p */
+
x = fabs(x);
p = fabs(p);
if (hp<0x00200000)
+ {
+ if(x+x>p)
{
- if(x+x>p) {
- x-=p;
- if(x+x>=p) x -= p;
+ x-=p;
+ ik += 1;
+ if(x+x>=p)
+ {
+ x -= p;
+ ik += 1;
+ }
}
}
- else
- {
+ else
+ {
p_half = 0.5*p;
if(x>p_half)
+ {
+ x-=p;
+ ik += 1;
+ if(x>=p_half)
{
- x-=p;
- if(x>=p_half) x -= p;
+ ik += 1;
+ x -= p;
+ }
}
}
__setHigh(&x, __HI(x) ^sx);
- *quo = (xx -x)/p;
+
+ ik &= 0x7f;
+ if(sx != sy) ik = -ik;
+ *quo = ik;
return x;
}
OVERLOADABLE double remquo(double x, double p, local int *quo)
{
- int hx,hp;
- unsigned sx,lx,lp;
+ int hx,hp, ik = 0;
+ unsigned sx,sy, lx,lp;
double p_half, zero = 0.0;
double xx = x;
hx = __HI(x); /* high word of x */
- lx = __LO(x); /* low word of x */
+ lx = __LO(x); /* low word of x */
hp = __HI(p); /* high word of p */
- lp = __LO(p); /* low word of p */
+ lp = __LO(p); /* low word of p */
sx = hx&0x80000000;
+ sy = hp&0x80000000;
hp &= 0x7fffffff;
hx &= 0x7fffffff;
/* purge off exception values */
- if((hp|lp)==0) return (x*p)/(x*p); /* p = 0 */
+ if((hp|lp)==0){*quo = 0; return (x*p)/(x*p);} /* p = 0 */
if((hx>=0x7ff00000)|| /* x not finite */
((hp>=0x7ff00000)&& /* p is NaN */
(((hp-0x7ff00000)|lp)!=0)))
- return (x*p)/(x*p);
+ {*quo = 0; return (x*p)/(x*p);}
- if (hp<=0x7fdfffff) x = fmod(x,p+p); /* now x < 2p */
- if (((hx-hp)|(lx-lp))==0) return zero*x;
+ if (hp<=0x7fdfffff) x = __fmod(x,p+p, &ik); /* now x < 2p */
+
x = fabs(x);
p = fabs(p);
if (hp<0x00200000)
+ {
+ if(x+x>p)
{
- if(x+x>p) {
- x-=p;
- if(x+x>=p) x -= p;
+ x-=p;
+ ik += 1;
+ if(x+x>=p)
+ {
+ x -= p;
+ ik += 1;
+ }
}
}
- else
- {
+ else
+ {
p_half = 0.5*p;
if(x>p_half)
+ {
+ x-=p;
+ ik += 1;
+ if(x>=p_half)
{
- x-=p;
- if(x>=p_half) x -= p;
+ ik += 1;
+ x -= p;
+ }
}
}
__setHigh(&x, __HI(x) ^sx);
- *quo = (xx -x)/p;
- return x;
+ ik &= 0x7f;
+ if(sx != sy) ik = -ik;
+ *quo = ik;
+ return x;
}
+
OVERLOADABLE double remquo(double x, double p, private int *quo)
{
- int hx,hp;
- unsigned sx,lx,lp;
+ int hx,hp, ik = 0;
+ unsigned sx,sy, lx,lp;
double p_half, zero = 0.0;
double xx = x;
hx = __HI(x); /* high word of x */
- lx = __LO(x); /* low word of x */
+ lx = __LO(x); /* low word of x */
hp = __HI(p); /* high word of p */
- lp = __LO(p); /* low word of p */
+ lp = __LO(p); /* low word of p */
sx = hx&0x80000000;
+ sy = hp&0x80000000;
hp &= 0x7fffffff;
hx &= 0x7fffffff;
/* purge off exception values */
- if((hp|lp)==0) return (x*p)/(x*p); /* p = 0 */
+ if((hp|lp)==0){*quo = 0; return (x*p)/(x*p);} /* p = 0 */
if((hx>=0x7ff00000)|| /* x not finite */
((hp>=0x7ff00000)&& /* p is NaN */
(((hp-0x7ff00000)|lp)!=0)))
- return (x*p)/(x*p);
+ {*quo = 0; return (x*p)/(x*p);}
+
+ if (hp<=0x7fdfffff) x = __fmod(x,p+p, &ik); /* now x < 2p */
- if (hp<=0x7fdfffff) x = fmod(x,p+p); /* now x < 2p */
- if (((hx-hp)|(lx-lp))==0) return zero*x;
x = fabs(x);
p = fabs(p);
if (hp<0x00200000)
+ {
+ if(x+x>p)
{
- if(x+x>p) {
- x-=p;
- if(x+x>=p) x -= p;
+ x-=p;
+ ik += 1;
+ if(x+x>=p)
+ {
+ x -= p;
+ ik += 1;
+ }
}
}
- else
- {
+ else
+ {
p_half = 0.5*p;
if(x>p_half)
+ {
+ x-=p;
+ ik += 1;
+ if(x>=p_half)
{
- x-=p;
- if(x>=p_half) x -= p;
+ ik += 1;
+ x -= p;
+ }
}
}
__setHigh(&x, __HI(x) ^sx);
- *quo = (xx -x)/p;
- return x;
+ ik &= 0x7f;
+ if(sx != sy) ik = -ik;
+ *quo = ik;
+ return x;
}
+
OVERLOADABLE double sincos(double x, global double *cosval)
{
*cosval = cos(x);
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
index f7a91c83..e01401b3 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
@@ -4041,55 +4041,205 @@ OVERLOADABLE double modf(double x, double *i)
return x - retVal;
}
+double __fmod (double x, double y, int* ik)
+{
+ const double one = 1.0, Zero[] = {0.0, -0.0,};
+ int n,hx,hy,hz,ix,iy,sx,i;
+ uint lx,ly,lz;
+ int ifactor = 0;
+
+ hx = __HI(x);
+ lx = __LO(x);
+ hy = __HI(y);
+ ly = __LO(y);
+ sx = hx&0x80000000; /* sign of x */
+ hx ^=sx; /* |x| */
+ hy &= 0x7fffffff; /* |y| */
+
+ /* purge off exception values */
+ if((hy|ly)==0||(hx>=0x7ff00000)|| /* y=0,or x not finite */
+ ((hy|((ly|-ly)>>31))>0x7ff00000)) /* or y is NaN */
+ return (x*y)/(x*y);
+ if(hx<=hy) {
+ if((hx<hy)||(lx<ly)) return x; /* |x|<|y| return x */
+ if(lx==ly)
+ {
+ *ik = 2;
+ return Zero[(uint)sx>>31]; /* |x|=|y| return x*0*/
+ }
+ }
+
+ /* determine ix = ilogb(x) */
+ if(hx<0x00100000) { /* subnormal x */
+ if(hx==0) {
+ for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
+ } else {
+ for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
+ }
+ } else ix = (hx>>20)-1023;
+
+ /* determine iy = ilogb(y) */
+ if(hy<0x00100000) { /* subnormal y */
+ if(hy==0) {
+ for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
+ } else {
+ for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
+ }
+ } else iy = (hy>>20)-1023;
+
+ /* set up {hx,lx}, {hy,ly} and align y to x */
+ if(ix >= -1022)
+ hx = 0x00100000|(0x000fffff&hx);
+ else { /* subnormal x, shift x to normal */
+ n = -1022-ix;
+ if(n<=31) {
+ hx = (hx<<n)|(lx>>(32-n));
+ lx <<= n;
+ } else {
+ hx = lx<<(n-32);
+ lx = 0;
+ }
+ }
+ if(iy >= -1022)
+ hy = 0x00100000|(0x000fffff&hy);
+ else { /* subnormal y, shift y to normal */
+ n = -1022-iy;
+ if(n<=31) {
+ hy = (hy<<n)|(ly>>(32-n));
+ ly <<= n;
+ } else {
+ hy = ly<<(n-32);
+ ly = 0;
+ }
+ }
+
+ /* fix point fmod */
+ n = ix - iy;
+ while(n--)
+ {
+ hz=hx-hy;lz=lx-ly;
+ if(lx<ly) hz -= 1;
+
+ ifactor <<= 1;
+ if(hz<0)
+ {
+ hx = hx+hx+(lx>>31);
+ lx = lx+lx;
+ }
+ else
+ {
+ if(n<16)ifactor |= 1;
+
+ if((hz|lz)==0) /* return sign(x)*0 */
+ {
+ *ik = (ifactor << (n + 2));
+ return Zero[(uint)sx>>31];
+ }
+
+ hx = hz+hz+(lz>>31); lx = lz+lz;
+ }
+ }
+
+ hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+ if(hz>=0){hx=hz;lx=lz;ifactor <<= 1; ifactor |= 1;}
+ else ifactor = (ifactor << 1);
+
+ ifactor = (ifactor << 1);
+ *ik = ifactor;
+ /* convert back to floating value and restore the sign */
+ if((hx|lx)==0) /* return sign(x)*0 */
+ return Zero[(uint)sx>>31];
+ while(hx<0x00100000) { /* normalize x */
+ hx = hx+hx+(lx>>31); lx = lx+lx;
+ iy -= 1;
+ }
+ if(iy>= -1022) { /* normalize output */
+ hx = ((hx-0x00100000)|((iy+1023)<<20));
+ __setHigh(&x,hx|sx);
+ __setLow(&x, lx);
+ } else { /* subnormal output */
+ n = -1022 - iy;
+ if(n<=20) {
+ lx = (lx>>n)|((uint)hx<<(32-n));
+ hx >>= n;
+ } else if (n<=31) {
+ lx = (hx<<(32-n))|(lx>>n); hx = sx;
+ } else {
+ lx = hx>>(n-32); hx = sx;
+ }
+ __setHigh(&x,hx|sx);
+ __setLow(&x, lx);
+
+ x *= one; /* create necessary signal */
+ }
+ return x; /* exact output */
+
+}
+
OVERLOADABLE double remquo(double x, double p, int *quo)
{
- int hx,hp;
- unsigned sx,lx,lp;
+ int hx,hp, ik = 0;
+ unsigned sx,sy, lx,lp;
double p_half, zero = 0.0;
double xx = x;
hx = __HI(x); /* high word of x */
- lx = __LO(x); /* low word of x */
+ lx = __LO(x); /* low word of x */
hp = __HI(p); /* high word of p */
- lp = __LO(p); /* low word of p */
+ lp = __LO(p); /* low word of p */
sx = hx&0x80000000;
+ sy = hp&0x80000000;
hp &= 0x7fffffff;
hx &= 0x7fffffff;
/* purge off exception values */
- if((hp|lp)==0) return (x*p)/(x*p); /* p = 0 */
+ if((hp|lp)==0){*quo = 0; return (x*p)/(x*p);} /* p = 0 */
if((hx>=0x7ff00000)|| /* x not finite */
((hp>=0x7ff00000)&& /* p is NaN */
(((hp-0x7ff00000)|lp)!=0)))
- return (x*p)/(x*p);
+ {*quo = 0; return (x*p)/(x*p);}
+
+ if (hp<=0x7fdfffff) x = __fmod(x,p+p, &ik); /* now x < 2p */
- if (hp<=0x7fdfffff) x = fmod(x,p+p); /* now x < 2p */
- if (((hx-hp)|(lx-lp))==0) return zero*x;
x = fabs(x);
p = fabs(p);
if (hp<0x00200000)
+ {
+ if(x+x>p)
{
- if(x+x>p) {
- x-=p;
- if(x+x>=p) x -= p;
+ x-=p;
+ ik += 1;
+ if(x+x>=p)
+ {
+ x -= p;
+ ik += 1;
+ }
}
}
- else
- {
+ else
+ {
p_half = 0.5*p;
if(x>p_half)
+ {
+ x-=p;
+ ik += 1;
+ if(x>=p_half)
{
- x-=p;
- if(x>=p_half) x -= p;
+ ik += 1;
+ x -= p;
+ }
}
}
__setHigh(&x, __HI(x) ^sx);
- *quo = (xx -x)/p;
- return x;
+ ik &= 0x7f;
+ if(sx != sy) ik = -ik;
+ *quo = ik;
+ return x;
}
+
OVERLOADABLE double sincos(double x, double *cosval)
{
*cosval = cos(x);