diff options
author | vlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-08-17 11:17:51 +0000 |
---|---|---|
committer | vlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-08-17 11:17:51 +0000 |
commit | 97abbda37a06f409bd386d0657e4b2ce11b0b9f7 (patch) | |
tree | bd340cd57839e663b822480bde4b20723c6a71b2 /src/set_float128.c | |
parent | b933be0c4b2e32f0acacd7696f8077a22d7700e1 (diff) | |
download | mpfr-97abbda37a06f409bd386d0657e4b2ce11b0b9f7.tar.gz |
[src/set_float128.c] No longer depend on the native FP type "double"
(via mpfr_set_d), avoiding the usual precision issues with the x87
traditional FPU in particular. Use the internal representation with
limbs, instead. The code is simpler and should also be faster.
Note: together with r11627, this avoids the tset_float128 failure
with the "-m32 -mpc32" GCC options.
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11629 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/set_float128.c')
-rw-r--r-- | src/set_float128.c | 168 |
1 files changed, 62 insertions, 106 deletions
diff --git a/src/set_float128.c b/src/set_float128.c index 19bf80ea9..3bc9a143c 100644 --- a/src/set_float128.c +++ b/src/set_float128.c @@ -34,9 +34,10 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., int mpfr_set_float128 (mpfr_ptr r, __float128 d, mpfr_rnd_t rnd_mode) { - mpfr_t t, u; - int inexact, shift_exp; - __float128 x; + mpfr_t t; + mp_limb_t *tp; + int inexact, shift_exp, neg, e, i; + __float128 p[14], q[14]; MPFR_SAVE_EXPO_DECL (expo); /* Check for NaN */ @@ -63,118 +64,73 @@ mpfr_set_float128 (mpfr_ptr r, __float128 d, mpfr_rnd_t rnd_mode) else if (MPFR_UNLIKELY (d == (__float128) 0.0)) return mpfr_set_d (r, (double) d, rnd_mode); - mpfr_init2 (t, IEEE_FLOAT128_MANT_DIG); - mpfr_init2 (u, IEEE_FLOAT128_MANT_DIG); - - MPFR_SAVE_EXPO_MARK (expo); + shift_exp = 0; /* invariant: remainder to deal with is d*2^shift_exp */ + neg = d < 0; + if (d < 0) + d = -d; - x = d; - MPFR_SET_ZERO (t); /* The sign doesn't matter. */ - shift_exp = 0; /* invariant: remainder to deal with is x*2^shift_exp */ - /* 2^(-16494) <= x < 2^16384 */ - while (x != (__float128) 0.0) + /* Scaling, avoiding (slow) divisions. Should the tables be cached? */ + if (d >= 1.0) { - /* check overflow of double */ - if (x > (__float128) DBL_MAX || (-x) > (__float128) DBL_MAX) - { - __float128 div9, div10, div11, div12, div13; - -#define TWO_64 18446744073709551616.0 /* 2^64 */ -#define TWO_128 (TWO_64 * TWO_64) -#define TWO_256 (TWO_128 * TWO_128) - div9 = (__float128) (double) (TWO_256 * TWO_256); /* 2^(2^9) */ - div10 = div9 * div9; - div11 = div10 * div10; /* 2^(2^11) */ - div12 = div11 * div11; /* 2^(2^12) */ - div13 = div12 * div12; /* 2^(2^13) */ - if (ABS (x) >= div13) - { - x /= div13; /* exact */ - shift_exp += 8192; - mpfr_div_2si (t, t, 8192, MPFR_RNDZ); - } - /* now |x| < 2^8192 */ - if (ABS (x) >= div12) - { - x /= div12; /* exact */ - shift_exp += 4096; - mpfr_div_2si (t, t, 4096, MPFR_RNDZ); - } - /* now |x| < 2^4096 */ - if (ABS (x) >= div11) - { - x /= div11; /* exact */ - shift_exp += 2048; - mpfr_div_2si (t, t, 2048, MPFR_RNDZ); - } - /* now |x| < 2^2048 */ - if (ABS (x) >= (0.5 * div10)) - { - x /= (2.0 * div10); /* exact */ - shift_exp += 1025; - mpfr_div_2si (t, t, 1025, MPFR_RNDZ); - } - /* now |x| < 2^1023 */ - } /* end of check overflow of double */ - - /* check underflow on double */ - else if ((x < (__float128) DBL_MIN) && ((-x) < (__float128) DBL_MIN)) + p[0] = 2.0; + q[0] = 0.5; + e = 1; + /* p[i] = 2^(2^i), q[i] = 1/p[i] */ + for (i = 0; i < 13 && d >= p[i]; i++) { - __float128 div9, div10, div11, div12, div13; - - div9 = (__float128) (double) 7.4583407312002067432909653e-155; - /* div9 = 2^(-512) */ - div10 = div9 * div9; /* 2^(-1024) */ - div11 = div10 * div10; /* 2^(-2048) */ - div12 = div11 * div11; /* 2^(-4096) */ - div13 = div12 * div12; /* 2^(-8192) */ - if (ABS (x) <= div13) - { - x /= div13; /* exact */ - shift_exp -= 8192; - mpfr_mul_2si (t, t, 8192, MPFR_RNDZ); - } - /* now |x| > 2^(-8192) */ - if (ABS (x) <= div12) - { - x /= div12; /* exact */ - shift_exp -= 4096; - mpfr_mul_2si (t, t, 4096, MPFR_RNDZ); - } - /* now |x| > 2^(-4096) */ - if (ABS (x) <= div11) - { - x /= div11; /* exact */ - shift_exp -= 2048; - mpfr_mul_2si (t, t, 2048, MPFR_RNDZ); - } - /* now |x| > 2^(-2048) */ - if (ABS (x) <= (div10 * 4.0)) /* div10 * 4.0 = 2^(-1022) */ - { - x /= (div10 * 0.25); /* exact, div10 * 0.25 = 2^(-1026) */ - shift_exp -= 1026; - mpfr_mul_2si (t, t, 1026, MPFR_RNDZ); - } - /* now |x| > 2^(-1022) */ + p[i+1] = p[i] * p[i]; + q[i+1] = q[i] * q[i]; + e <<= 1; } - else /* no underflow: 2^(-512) <= |x| < 2^1023 */ + for (; i >= 0; e >>= 1, i--) + if (d >= p[i]) + { + d *= q[i]; + shift_exp += e; + } + d *= 0.5; + shift_exp++; + } + else if (d < 0.5) + { + p[0] = 2.0; + q[0] = 0.5; + e = 1; + /* p[i] = 2^(2^i), q[i] = 1/p[i] */ + for (i = 0; i < 13 && d < q[i]; i++) { - inexact = mpfr_set_d (u, (double) x, MPFR_RNDZ); - MPFR_ASSERTD (inexact == 0); - if (mpfr_add (t, t, u, MPFR_RNDZ) != 0) - { - if (!mpfr_number_p (t)) - break; - /* Inexact. This cannot happen unless the C implementation - "lies" on the precision. */ - } - x -= (__float128) mpfr_get_d1 (u); /* exact */ + p[i+1] = p[i] * p[i]; + q[i+1] = q[i] * q[i]; + e <<= 1; } + /* The while() may be needed for i = 13 due to subnormals. + This can probably be improved without yielding an underflow. */ + for (; i >= 0; e >>= 1, i--) + while (d < q[i]) + { + d *= p[i]; + shift_exp -= e; + } + } + + MPFR_ASSERTD (d >= 0.5 && d < 1.0); + + mpfr_init2 (t, IEEE_FLOAT128_MANT_DIG); + tp = MPFR_MANT (t); + + MPFR_SAVE_EXPO_MARK (expo); + MPFR_SET_EXP (t, shift_exp); + MPFR_SET_SIGN (t, neg ? MPFR_SIGN_NEG : MPFR_SIGN_POS); + + for (i = MPFR_LAST_LIMB (t); i >= 0; i--) + { + d *= 2 * (__float128) MPFR_LIMB_HIGHBIT; + tp[i] = (mp_limb_t) d; + d -= tp[i]; } - inexact = mpfr_mul_2si (r, t, shift_exp, rnd_mode); + inexact = mpfr_set (r, t, rnd_mode); mpfr_clear (t); - mpfr_clear (u); MPFR_SAVE_EXPO_FREE (expo); return mpfr_check_range (r, inexact, rnd_mode); |