diff options
author | rander <rander.wang@intel.com> | 2017-05-18 16:18:03 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-06-09 18:55:59 +0800 |
commit | 65410edd68681dedaf1f41ba432dcdf4463286aa (patch) | |
tree | f2ad3f49ea1dcab74cf8447a2681ce9b04da9f5f /backend | |
parent | 6804cca263edd11fd03b2d7f5b7ba034d9a013c3 (diff) | |
download | beignet-65410edd68681dedaf1f41ba432dcdf4463286aa.tar.gz |
backend: refine hypot function
the test OCL_Magnitude of opencv is slow on beignet because
of hypot. refine the hypot, change algorithm and remove
unnecessary code to get 30% up
Signed-off-by: rander.wang <rander.wang@intel.com>
Reviewed-by: Pan Xiuli <xiuli.pan@intel.com>
Diffstat (limited to 'backend')
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 74 |
1 files changed, 60 insertions, 14 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl index 6b942dbe..f7153219 100644 --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl @@ -2894,12 +2894,35 @@ float __gen_ocl_internal_pown(float x, int y) { return as_float((a & (0x807FFFFFu)) | (u & 0x80000000u) | 0x3F000000); float __gen_ocl_internal_frexp(float x, int *exp) { BODY; } +float __fast_scalbnf(float x, int n) { + /* copy from fdlibm */ + float two25 = 3.355443200e+07, /* 0x4c000000 */ + twom25 = 2.9802322388e-08, /* 0x33000000 */ + huge = 1.0e+30, tiny = 1.0e-30; + int k, ix, t, tmp; + float retVal; + + GEN_OCL_GET_FLOAT_WORD(ix, x); + k = (ix & 0x7f800000) >> 23; /* extract exponent */ + t = k; + k = k + n; + tmp = (ix & 0x807fffff); + x = as_float(tmp | (k << 23)); + retVal = (k > 0) ? x : 0.0f; + retVal = (k > 0xfe) ? INFINITY : retVal; + retVal = (k <= -25) ? 0.0f : retVal; + x = as_float(tmp | ((k + 25) << 23)); + retVal = ((k > 0) && (k <= 25)) ? x * twom25 : retVal; + retVal = (t == 0) ? 0.0f : retVal; + + return retVal; +} + OVERLOADABLE float hypot(float x, float y) { if (__ocl_math_fastpath_flag) return __gen_ocl_internal_fastpath_hypot(x, y); - //return __gen_ocl_sqrt(x*x + y*y); - float a,b,an,bn,cn; + float a, b, an, bn, cn, retVal; int e; if (isfinite (x) && isfinite (y)){ /* Determine absolute values. */ x = __gen_ocl_fabs (x); @@ -2907,19 +2930,42 @@ OVERLOADABLE float hypot(float x, float y) { /* Find the bigger and the smaller one. */ a = max(x,y); b = min(x,y); - /* Now 0 <= b <= a. */ - /* Write a = an * 2^e, b = bn * 2^e with 0 <= bn <= an < 1. */ - an = __gen_ocl_internal_frexp (a, &e); - bn = ldexp (b, - e); - /* Through the normalization, no unneeded overflow or underflow will occur here. */ - cn = __gen_ocl_sqrt (an * an + bn * bn); - return ldexp (cn, e); - }else{ - if (isinf (x) || isinf (y)) /* x or y is infinite. Return +Infinity. */ - return INFINITY; - else /* x or y is NaN. Return NaN. */ - return x + y; + + bool skip = false; + uint u = as_uint(a); + uint x = u; + if (x == 0) { + e = 0; + an = x; + skip = true; } + + if (x >= 0x800000) { + e = (x >> 23) - 126; + an = as_float((u & (0x807FFFFFu)) | 0x3F000000); + skip = true; + } + + if (!skip) { + int msbOne = clz(x); + x <<= (msbOne - 8); + e = -117 - msbOne; + an = as_float((x & (0x807FFFFFu)) | 0x3F000000); + } + + bn = __fast_scalbnf(b, -e); + /* Through the normalization, no unneeded overflow or underflow will occur + * here. */ + cn = __gen_ocl_sqrt(mad(an, an, bn * bn)); + retVal = __fast_scalbnf(cn, e); + } else { + retVal = NAN; /* x or y is NaN. Return NaN. */ + retVal = (isinf(x) || isinf(y)) + ? INFINITY + : retVal; /* x or y is infinite. Return +Infinity. */ + } + + return retVal; } OVERLOADABLE float powr(float x, float y) { |