summaryrefslogtreecommitdiff
path: root/src/set_float128.c
diff options
context:
space:
mode:
authorvlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>2017-08-17 11:17:51 +0000
committervlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>2017-08-17 11:17:51 +0000
commit97abbda37a06f409bd386d0657e4b2ce11b0b9f7 (patch)
treebd340cd57839e663b822480bde4b20723c6a71b2 /src/set_float128.c
parentb933be0c4b2e32f0acacd7696f8077a22d7700e1 (diff)
downloadmpfr-97abbda37a06f409bd386d0657e4b2ce11b0b9f7.tar.gz
[src/set_float128.c] No longer depend on the native FP type "double"
(via mpfr_set_d), avoiding the usual precision issues with the x87 traditional FPU in particular. Use the internal representation with limbs, instead. The code is simpler and should also be faster. Note: together with r11627, this avoids the tset_float128 failure with the "-m32 -mpc32" GCC options. git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11629 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/set_float128.c')
-rw-r--r--src/set_float128.c168
1 files changed, 62 insertions, 106 deletions
diff --git a/src/set_float128.c b/src/set_float128.c
index 19bf80ea9..3bc9a143c 100644
--- a/src/set_float128.c
+++ b/src/set_float128.c
@@ -34,9 +34,10 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
int
mpfr_set_float128 (mpfr_ptr r, __float128 d, mpfr_rnd_t rnd_mode)
{
- mpfr_t t, u;
- int inexact, shift_exp;
- __float128 x;
+ mpfr_t t;
+ mp_limb_t *tp;
+ int inexact, shift_exp, neg, e, i;
+ __float128 p[14], q[14];
MPFR_SAVE_EXPO_DECL (expo);
/* Check for NaN */
@@ -63,118 +64,73 @@ mpfr_set_float128 (mpfr_ptr r, __float128 d, mpfr_rnd_t rnd_mode)
else if (MPFR_UNLIKELY (d == (__float128) 0.0))
return mpfr_set_d (r, (double) d, rnd_mode);
- mpfr_init2 (t, IEEE_FLOAT128_MANT_DIG);
- mpfr_init2 (u, IEEE_FLOAT128_MANT_DIG);
-
- MPFR_SAVE_EXPO_MARK (expo);
+ shift_exp = 0; /* invariant: remainder to deal with is d*2^shift_exp */
+ neg = d < 0;
+ if (d < 0)
+ d = -d;
- x = d;
- MPFR_SET_ZERO (t); /* The sign doesn't matter. */
- shift_exp = 0; /* invariant: remainder to deal with is x*2^shift_exp */
- /* 2^(-16494) <= x < 2^16384 */
- while (x != (__float128) 0.0)
+ /* Scaling, avoiding (slow) divisions. Should the tables be cached? */
+ if (d >= 1.0)
{
- /* check overflow of double */
- if (x > (__float128) DBL_MAX || (-x) > (__float128) DBL_MAX)
- {
- __float128 div9, div10, div11, div12, div13;
-
-#define TWO_64 18446744073709551616.0 /* 2^64 */
-#define TWO_128 (TWO_64 * TWO_64)
-#define TWO_256 (TWO_128 * TWO_128)
- div9 = (__float128) (double) (TWO_256 * TWO_256); /* 2^(2^9) */
- div10 = div9 * div9;
- div11 = div10 * div10; /* 2^(2^11) */
- div12 = div11 * div11; /* 2^(2^12) */
- div13 = div12 * div12; /* 2^(2^13) */
- if (ABS (x) >= div13)
- {
- x /= div13; /* exact */
- shift_exp += 8192;
- mpfr_div_2si (t, t, 8192, MPFR_RNDZ);
- }
- /* now |x| < 2^8192 */
- if (ABS (x) >= div12)
- {
- x /= div12; /* exact */
- shift_exp += 4096;
- mpfr_div_2si (t, t, 4096, MPFR_RNDZ);
- }
- /* now |x| < 2^4096 */
- if (ABS (x) >= div11)
- {
- x /= div11; /* exact */
- shift_exp += 2048;
- mpfr_div_2si (t, t, 2048, MPFR_RNDZ);
- }
- /* now |x| < 2^2048 */
- if (ABS (x) >= (0.5 * div10))
- {
- x /= (2.0 * div10); /* exact */
- shift_exp += 1025;
- mpfr_div_2si (t, t, 1025, MPFR_RNDZ);
- }
- /* now |x| < 2^1023 */
- } /* end of check overflow of double */
-
- /* check underflow on double */
- else if ((x < (__float128) DBL_MIN) && ((-x) < (__float128) DBL_MIN))
+ p[0] = 2.0;
+ q[0] = 0.5;
+ e = 1;
+ /* p[i] = 2^(2^i), q[i] = 1/p[i] */
+ for (i = 0; i < 13 && d >= p[i]; i++)
{
- __float128 div9, div10, div11, div12, div13;
-
- div9 = (__float128) (double) 7.4583407312002067432909653e-155;
- /* div9 = 2^(-512) */
- div10 = div9 * div9; /* 2^(-1024) */
- div11 = div10 * div10; /* 2^(-2048) */
- div12 = div11 * div11; /* 2^(-4096) */
- div13 = div12 * div12; /* 2^(-8192) */
- if (ABS (x) <= div13)
- {
- x /= div13; /* exact */
- shift_exp -= 8192;
- mpfr_mul_2si (t, t, 8192, MPFR_RNDZ);
- }
- /* now |x| > 2^(-8192) */
- if (ABS (x) <= div12)
- {
- x /= div12; /* exact */
- shift_exp -= 4096;
- mpfr_mul_2si (t, t, 4096, MPFR_RNDZ);
- }
- /* now |x| > 2^(-4096) */
- if (ABS (x) <= div11)
- {
- x /= div11; /* exact */
- shift_exp -= 2048;
- mpfr_mul_2si (t, t, 2048, MPFR_RNDZ);
- }
- /* now |x| > 2^(-2048) */
- if (ABS (x) <= (div10 * 4.0)) /* div10 * 4.0 = 2^(-1022) */
- {
- x /= (div10 * 0.25); /* exact, div10 * 0.25 = 2^(-1026) */
- shift_exp -= 1026;
- mpfr_mul_2si (t, t, 1026, MPFR_RNDZ);
- }
- /* now |x| > 2^(-1022) */
+ p[i+1] = p[i] * p[i];
+ q[i+1] = q[i] * q[i];
+ e <<= 1;
}
- else /* no underflow: 2^(-512) <= |x| < 2^1023 */
+ for (; i >= 0; e >>= 1, i--)
+ if (d >= p[i])
+ {
+ d *= q[i];
+ shift_exp += e;
+ }
+ d *= 0.5;
+ shift_exp++;
+ }
+ else if (d < 0.5)
+ {
+ p[0] = 2.0;
+ q[0] = 0.5;
+ e = 1;
+ /* p[i] = 2^(2^i), q[i] = 1/p[i] */
+ for (i = 0; i < 13 && d < q[i]; i++)
{
- inexact = mpfr_set_d (u, (double) x, MPFR_RNDZ);
- MPFR_ASSERTD (inexact == 0);
- if (mpfr_add (t, t, u, MPFR_RNDZ) != 0)
- {
- if (!mpfr_number_p (t))
- break;
- /* Inexact. This cannot happen unless the C implementation
- "lies" on the precision. */
- }
- x -= (__float128) mpfr_get_d1 (u); /* exact */
+ p[i+1] = p[i] * p[i];
+ q[i+1] = q[i] * q[i];
+ e <<= 1;
}
+ /* The while() may be needed for i = 13 due to subnormals.
+ This can probably be improved without yielding an underflow. */
+ for (; i >= 0; e >>= 1, i--)
+ while (d < q[i])
+ {
+ d *= p[i];
+ shift_exp -= e;
+ }
+ }
+
+ MPFR_ASSERTD (d >= 0.5 && d < 1.0);
+
+ mpfr_init2 (t, IEEE_FLOAT128_MANT_DIG);
+ tp = MPFR_MANT (t);
+
+ MPFR_SAVE_EXPO_MARK (expo);
+ MPFR_SET_EXP (t, shift_exp);
+ MPFR_SET_SIGN (t, neg ? MPFR_SIGN_NEG : MPFR_SIGN_POS);
+
+ for (i = MPFR_LAST_LIMB (t); i >= 0; i--)
+ {
+ d *= 2 * (__float128) MPFR_LIMB_HIGHBIT;
+ tp[i] = (mp_limb_t) d;
+ d -= tp[i];
}
- inexact = mpfr_mul_2si (r, t, shift_exp, rnd_mode);
+ inexact = mpfr_set (r, t, rnd_mode);
mpfr_clear (t);
- mpfr_clear (u);
MPFR_SAVE_EXPO_FREE (expo);
return mpfr_check_range (r, inexact, rnd_mode);