diff options
author | Torbjorn Granlund <tege@gmplib.org> | 2012-12-27 14:10:16 +0100 |
---|---|---|
committer | Torbjorn Granlund <tege@gmplib.org> | 2012-12-27 14:10:16 +0100 |
commit | 644efe58c9a67f753cf9c5bf3f9b23fa66ee02f3 (patch) | |
tree | e8ab0c4142257ef5d49c64fb7f03fd0c5fdad460 | |
parent | 0015fc1a497c18c9c484da2f590d5d2a5a853abd (diff) | |
download | gmp-644efe58c9a67f753cf9c5bf3f9b23fa66ee02f3.tar.gz |
Complete rewrite of non-IEEE code.
-rw-r--r-- | mpn/generic/get_d.c | 391 |
1 files changed, 178 insertions, 213 deletions
diff --git a/mpn/generic/get_d.c b/mpn/generic/get_d.c index 65bdf2a88..88a46784c 100644 --- a/mpn/generic/get_d.c +++ b/mpn/generic/get_d.c @@ -29,33 +29,20 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define _GMP_IEEE_FLOATS 0 #endif -#if ! _GMP_IEEE_FLOATS -/* dummy definition, just to let dead code compile */ -union ieee_double_extract { - struct { - int manh, manl, sig, exp; - } s; - double d; -}; -#endif - /* To force use of the generic C code for testing, put "#define _GMP_IEEE_FLOATS 0" at this point. */ - /* In alpha gcc prior to 3.4, signed DI comparisons involving constants are rearranged from "x < n" to "x+(-n) < 0", which is of course hopelessly wrong if that addition overflows. - The workaround here avoids this bug by ensuring n is not a literal - constant. Note that this is alpha specific. The offending transformation - is/was in alpha.c alpha_emit_conditional_branch() under "We want to use - cmpcc/bcc". + The workaround here avoids this bug by ensuring n is not a literal constant. + Note that this is alpha specific. The offending transformation is/was in + alpha.c alpha_emit_conditional_branch() under "We want to use cmpcc/bcc". - Bizarrely, it turns out this happens also with Cray cc on - alphaev5-cray-unicosmk2.0.6.X, and has the same solution. Don't know why - or how. */ + Bizarrely, this happens also with Cray cc on alphaev5-cray-unicosmk2.0.6.X, + and has the same solution. Don't know why or how. */ #if HAVE_HOST_CPU_FAMILY_alpha \ && ((defined (__GNUC__) && ! __GMP_GNUC_PREREQ(3,4)) \ @@ -70,55 +57,57 @@ static volatile const long CONST_NEG_1022_SUB_53 = -1022 - 53; #endif - -/* Return the value {ptr,size}*2^exp, and negative if sign<0. - Must have size>=1, and a non-zero high limb ptr[size-1]. +/* Return the value {ptr,size}*2^exp, and negative if sign<0. Must have + size>=1, and a non-zero high limb ptr[size-1]. {ptr,size} is truncated towards zero. This is consistent with other gmp - conversions, like mpz_set_f or mpz_set_q, and is easy to implement and - test. + conversions, like mpz_set_f or mpz_set_q, and is easy to implement and test. In the past conversions had attempted (imperfectly) to let the hardware float rounding mode take effect, but that gets tricky since multiple roundings need to be avoided, or taken into account, and denorms mean the effective precision of the mantissa is not constant. (For reference, - mpz_get_d on IEEE systems was ok, except it operated on the absolute - value. mpf_get_d and mpq_get_d suffered from multiple roundings and from - not always using enough bits to get the rounding right.) + mpz_get_d on IEEE systems was ok, except it operated on the absolute value. + mpf_get_d and mpq_get_d suffered from multiple roundings and from not always + using enough bits to get the rounding right.) It's felt that GMP is not primarily concerned with hardware floats, and really isn't enhanced by getting involved with hardware rounding modes - (which could even be some weird unknown style), so something unambiguous - and straightforward is best. + (which could even be some weird unknown style), so something unambiguous and + straightforward is best. The IEEE code below is the usual case, it knows either a 32-bit or 64-bit limb and is done with shifts and masks. The 64-bit case in particular should come out nice and compact. - The generic code works one bit at a time, which will be quite slow, but - should support any binary-based "double" and be safe against any rounding - mode. Note in particular it works on IEEE systems too. + The generic code used to work one bit at a time, which was ont only slow, + but implicitly relied upon denoms for intermediates, since the lowest bits' + weight of a perfectly valid fp number underflows in non-denorm. Therefore, + the generic code now works limb-per-limb, initially creating a number x such + that 1 <= x <= BASE. (BASE is reached only as result of rounding.) Then + x's exponent is scaled with explicit code (not ldexp to avoid libm + dependency). It is a tap-dance to avoid underflow or overflow, beware! Traps: - Hardware traps for overflow to infinity, underflow to zero, or - unsupported denorms may or may not be taken. The IEEE code works bitwise - and so probably won't trigger them, the generic code works by float - operations and so probably will. This difference might be thought less - than ideal, but again its felt straightforward code is better than trying - to get intimate with hardware exceptions (of perhaps unknown nature). + Hardware traps for overflow to infinity, underflow to zero, or unsupported + denorms may or may not be taken. The IEEE code works bitwise and so + probably won't trigger them, the generic code works by float operations and + so probably will. This difference might be thought less than ideal, but + again its felt straightforward code is better than trying to get intimate + with hardware exceptions (of perhaps unknown nature). Not done: - mpz_get_d in the past handled size==1 with a cast limb->double. This - might still be worthwhile there (for up to the mantissa many bits), but - for mpn_get_d here, the cost of applying "exp" to the resulting exponent - would probably use up any benefit a cast may have over bit twiddling. - Also, if the exponent is pushed into denorm range then bit twiddling is - the only option, to ensure the desired truncation is obtained. + mpz_get_d in the past handled size==1 with a cast limb->double. This might + still be worthwhile there (for up to the mantissa many bits), but for + mpn_get_d here, the cost of applying "exp" to the resulting exponent would + probably use up any benefit a cast may have over bit twiddling. Also, if + the exponent is pushed into denorm range then bit twiddling is the only + option, to ensure the desired truncation is obtained. Other: @@ -130,11 +119,11 @@ static volatile const long CONST_NEG_1022_SUB_53 = -1022 - 53; cast, neither in the IEEE or generic code. */ + double mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp) { int lshift, nbits; - union ieee_double_extract u; mp_limb_t x, mhi, mlo; ASSERT (size >= 0); @@ -150,8 +139,9 @@ mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp) if (UNLIKELY ((unsigned long) (GMP_NUMB_BITS * size) > (unsigned long) (LONG_MAX - exp))) { - if (_GMP_IEEE_FLOATS) - goto ieee_infinity; +#if _GMP_IEEE_FLOATS + goto ieee_infinity; +#endif /* generic */ exp = LONG_MAX; @@ -162,224 +152,199 @@ mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp) } #if ! _GMP_IEEE_FLOATS - { - /* Non-IEEE or strange limb size, do something generic. */ - - mp_size_t i; - mp_limb_t limb, bit; - int shift; - double base, factor, prev_factor, d, new_d, diff; - - /* "limb" is "up[i]" the limb being examined, "bit" is a mask for the - bit being examined, initially the highest non-zero bit. */ - i = size-1; - limb = up[i]; - count_leading_zeros (shift, limb); - bit = GMP_LIMB_HIGHBIT >> shift; - - /* relative to just under high non-zero bit */ - exp -= (shift - GMP_NAIL_BITS) + 1; - - /* Power up "factor" to 2^exp, being the value of the "bit" in "limb" - being examined. */ - base = (exp >= 0 ? 2.0 : 0.5); - exp = ABS (exp); - factor = 1.0; - for (;;) + { /* Non-IEEE or strange limb size, do something generic. */ + mp_size_t i; + double d, weight; + unsigned long uexp; + + /* First generate an fp number disregarding exp, instead keeping things + within the numb base factor from 1, which should prevent overflow and + underflow even for the most exponent limited fp formats. The + termination criteria should be refined, since we now include too many + limbs. */ + weight = 1/MP_BASE_AS_DOUBLE; + d = up[size - 1]; + for (i = size - 2; i >= 0; i--) { - if (exp & 1) - { - prev_factor = factor; - factor *= base; - FORCE_DOUBLE (factor); - if (factor == 0.0) - return 0.0; /* underflow */ - if (factor == prev_factor) - { - d = factor; /* overflow, apparent infinity */ - goto generic_done; - } - } - exp >>= 1; - if (exp == 0) + d += up[i] * weight; + weight /= MP_BASE_AS_DOUBLE; + if (weight == 0) break; - base *= base; } - /* Add a "factor" for each non-zero bit, working from high to low. - Stop if any rounding occurs, hence implementing a truncation. - - Note no attention is paid to DBL_MANT_DIG, since the effective - number of bits in the mantissa isn't constant when in denorm range. - We also encountered an ARM system with apparently somewhat doubtful - software floats where DBL_MANT_DIG claimed 53 bits but only 32 - actually worked. */ - - d = factor; /* high bit */ - for (;;) + /* Now apply exp. */ + exp -= GMP_NUMB_BITS; + if (exp > 0) { - factor *= 0.5; /* next bit */ - bit >>= 1; - if (bit == 0) - { - /* next limb, if any */ - i--; - if (i < 0) - break; - limb = up[i]; - bit = GMP_NUMB_HIGHBIT; - } - - if (bit & limb) - { - new_d = d + factor; - FORCE_DOUBLE (new_d); - diff = new_d - d; - if (diff != factor) - break; /* rounding occured, stop now */ - d = new_d; - } + weight = 2.0; + uexp = exp; } + else + { + weight = 0.5; + uexp = 1 - (unsigned long) (exp + 1); + } +#if 1 + /* Square-and-multiply exponentiation. */ + if (uexp & 1) + d *= weight; + while (uexp >>= 1) + { + weight *= weight; + if (uexp & 1) + d *= weight; + } +#else + /* Plain exponentiation. */ + while (uexp > 0) + { + d *= weight; + uexp--; + } +#endif - generic_done: - return (sign >= 0 ? d : -d); + return sign >= 0 ? d : -d; } -#endif +#else /* _GMP_IEEE_FLOATS */ + { + union ieee_double_extract u; - up += size; + up += size; #if GMP_LIMB_BITS == 64 - mlo = up[-1]; - count_leading_zeros (lshift, mlo); - - exp -= (lshift - GMP_NAIL_BITS) + 1; - mlo <<= lshift; + mlo = up[-1]; + count_leading_zeros (lshift, mlo); - nbits = GMP_LIMB_BITS - lshift; + exp -= (lshift - GMP_NAIL_BITS) + 1; + mlo <<= lshift; - if (nbits < 53 && size > 1) - { - x = up[-2]; - x <<= GMP_NAIL_BITS; - x >>= nbits; - mlo |= x; - nbits += GMP_NUMB_BITS; + nbits = GMP_LIMB_BITS - lshift; - if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2) + if (nbits < 53 && size > 1) { - x = up[-3]; + x = up[-2]; x <<= GMP_NAIL_BITS; x >>= nbits; mlo |= x; nbits += GMP_NUMB_BITS; + + if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2) + { + x = up[-3]; + x <<= GMP_NAIL_BITS; + x >>= nbits; + mlo |= x; + nbits += GMP_NUMB_BITS; + } } - } - mhi = mlo >> (32 + 11); - mlo = mlo >> 11; /* later implicitly truncated to 32 bits */ + mhi = mlo >> (32 + 11); + mlo = mlo >> 11; /* later implicitly truncated to 32 bits */ #endif #if GMP_LIMB_BITS == 32 - x = *--up; - count_leading_zeros (lshift, x); + x = *--up; + count_leading_zeros (lshift, x); - exp -= (lshift - GMP_NAIL_BITS) + 1; - x <<= lshift; - mhi = x >> 11; + exp -= (lshift - GMP_NAIL_BITS) + 1; + x <<= lshift; + mhi = x >> 11; - if (lshift < 11) /* FIXME: never true if NUMB < 20 bits */ - { - /* All 20 bits in mhi */ - mlo = x << 21; - /* >= 1 bit in mlo */ - nbits = GMP_LIMB_BITS - lshift - 21; - } - else - { - if (size > 1) + if (lshift < 11) /* FIXME: never true if NUMB < 20 bits */ { - nbits = GMP_LIMB_BITS - lshift; - - x = *--up, size--; - x <<= GMP_NAIL_BITS; - mhi |= x >> nbits >> 11; - - mlo = x << GMP_LIMB_BITS - nbits - 11; - nbits = nbits + 11 - GMP_NAIL_BITS; + /* All 20 bits in mhi */ + mlo = x << 21; + /* >= 1 bit in mlo */ + nbits = GMP_LIMB_BITS - lshift - 21; } else { - mlo = 0; - goto done; - } - } + if (size > 1) + { + nbits = GMP_LIMB_BITS - lshift; - /* Now all needed bits in mhi have been accumulated. Add bits to mlo. */ + x = *--up, size--; + x <<= GMP_NAIL_BITS; + mhi |= x >> nbits >> 11; - if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size > 1) - { - x = up[-1]; - x <<= GMP_NAIL_BITS; - x >>= nbits; - mlo |= x; - nbits += GMP_NUMB_BITS; + mlo = x << GMP_LIMB_BITS - nbits - 11; + nbits = nbits + 11 - GMP_NAIL_BITS; + } + else + { + mlo = 0; + goto done; + } + } - if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size > 2) + /* Now all needed bits in mhi have been accumulated. Add bits to mlo. */ + + if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size > 1) { - x = up[-2]; + x = up[-1]; x <<= GMP_NAIL_BITS; x >>= nbits; mlo |= x; nbits += GMP_NUMB_BITS; - if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size > 3) + if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size > 2) { - x = up[-3]; + x = up[-2]; x <<= GMP_NAIL_BITS; x >>= nbits; mlo |= x; nbits += GMP_NUMB_BITS; + + if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size > 3) + { + x = up[-3]; + x <<= GMP_NAIL_BITS; + x >>= nbits; + mlo |= x; + nbits += GMP_NUMB_BITS; + } } } - } - done:; + done:; #endif - if (UNLIKELY (exp >= CONST_1024)) - { - /* overflow, return infinity */ - ieee_infinity: - mhi = 0; - mlo = 0; - exp = 1024; - } - else if (UNLIKELY (exp <= CONST_NEG_1023)) - { - int rshift; + if (UNLIKELY (exp >= CONST_1024)) + { + /* overflow, return infinity */ + ieee_infinity: + mhi = 0; + mlo = 0; + exp = 1024; + } + else if (UNLIKELY (exp <= CONST_NEG_1023)) + { + int rshift; - if (LIKELY (exp <= CONST_NEG_1022_SUB_53)) - return 0.0; /* denorm underflows to zero */ + if (LIKELY (exp <= CONST_NEG_1022_SUB_53)) + return 0.0; /* denorm underflows to zero */ - rshift = -1022 - exp; - ASSERT (rshift > 0 && rshift < 53); + rshift = -1022 - exp; + ASSERT (rshift > 0 && rshift < 53); #if GMP_LIMB_BITS > 53 - mlo >>= rshift; - mhi = mlo >> 32; + mlo >>= rshift; + mhi = mlo >> 32; #else - if (rshift >= 32) - { - mlo = mhi; - mhi = 0; - rshift -= 32; - } - lshift = GMP_LIMB_BITS - rshift; - mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift); - mhi >>= rshift; + if (rshift >= 32) + { + mlo = mhi; + mhi = 0; + rshift -= 32; + } + lshift = GMP_LIMB_BITS - rshift; + mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift); + mhi >>= rshift; #endif - exp = -1023; + exp = -1023; + } + u.s.manh = mhi; + u.s.manl = mlo; + u.s.exp = exp + 1023; + u.s.sig = (sign < 0); + return u.d; } - u.s.manh = mhi; - u.s.manl = mlo; - u.s.exp = exp + 1023; - u.s.sig = (sign < 0); - return u.d; +#endif } |