summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS1
-rw-r--r--src/const_euler.c371
2 files changed, 199 insertions, 173 deletions
diff --git a/NEWS b/NEWS
index 217ea0115..0259a0572 100644
--- a/NEWS
+++ b/NEWS
@@ -46,6 +46,7 @@ Changes from versions 3.1.* to version 3.2.0:
- Dropped K&R C compatibility.
- Improved MPFR manual.
- New MPFRbench program (see the tools/bench directory).
+- Speedup in the mpfr_const_euler function (contributed by Fredrik Johansson).
- Bug fixes. In particular, a speed improvement when the --enable-assert
or --enable-assert=full configure option is used with GCC.
diff --git a/src/const_euler.c b/src/const_euler.c
index db033d80b..6e3c3cd37 100644
--- a/src/const_euler.c
+++ b/src/const_euler.c
@@ -1,7 +1,7 @@
/* mpfr_const_euler -- Euler's constant
Copyright 2001-2014 Free Software Foundation, Inc.
-Contributed by the AriC and Caramel projects, INRIA.
+Contributed by Fredrik Johansson.
This file is part of the GNU MPFR Library.
@@ -20,6 +20,9 @@ along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
+/* The approximation error bound uses Theorem 1 and Remark 2 in
+ http://arxiv.org/pdf/1312.0039v1.pdf */
+
#define MPFR_NEED_LONGLONG_H
#include "mpfr-impl.h"
@@ -34,219 +37,241 @@ mpfr_const_euler (mpfr_ptr x, mpfr_rnd_t rnd_mode) {
}
-static void mpfr_const_euler_S2 (mpfr_ptr, mpfr_t);
-static void mpfr_const_euler_R (mpfr_ptr, unsigned long);
-
-int
-mpfr_const_euler_internal (mpfr_t x, mpfr_rnd_t rnd)
+typedef struct
{
- mpfr_prec_t prec = MPFR_PREC(x), m, log2m;
- mpfr_t y, z, l2;
- unsigned long n;
- int inexact;
- MPFR_ZIV_DECL (loop);
+ mpz_t P;
+ mpz_t Q;
+ mpz_t T;
+ mpz_t C;
+ mpz_t D;
+ mpz_t V;
+} mpfr_const_euler_bs_struct;
- MPFR_LOG_FUNC (
- ("rnd=%d", rnd),
- ("x[%Pu]=%.*Rg inex=%d", mpfr_get_prec(x), mpfr_log_prec, x, inexact));
+typedef mpfr_const_euler_bs_struct mpfr_const_euler_bs_t[1];
- log2m = MPFR_INT_CEIL_LOG2 (prec);
- m = MPFR_ADD_PREC (prec, 2 * log2m + 23);
-
- mpfr_init2 (y, m);
- mpfr_init2 (z, m);
+static void
+mpfr_const_euler_bs_init (mpfr_const_euler_bs_t s)
+{
+ mpz_init (s->P);
+ mpz_init (s->Q);
+ mpz_init (s->T);
+ mpz_init (s->C);
+ mpz_init (s->D);
+ mpz_init (s->V);
+}
- mpfr_init2 (l2, sizeof (mpfr_prec_t) * CHAR_BIT);
- mpfr_const_log2 (l2, MPFR_RNDU);
- mpfr_div_2ui (l2, l2, 1, MPFR_RNDU); /* log(2)/2, rounded upward */
+static void
+mpfr_const_euler_bs_clear (mpfr_const_euler_bs_t s)
+{
+ mpz_clear (s->P);
+ mpz_clear (s->Q);
+ mpz_clear (s->T);
+ mpz_clear (s->C);
+ mpz_clear (s->D);
+ mpz_clear (s->V);
+}
- MPFR_ZIV_INIT (loop, m);
- for (;;)
+static void
+mpfr_const_euler_bs_1 (mpfr_const_euler_bs_t s,
+ unsigned long n1, unsigned long n2, unsigned long N,
+ int cont)
+{
+ if (n2 - n1 == 1)
{
- mpfr_exp_t exp_S, err;
- MPFR_BLOCK_DECL (flags);
-
- /* since prec >= 1, we have m >= 24 here, which ensures n >= 9 below */
- /* Compute n >= prec * log(2)/2 (see algorithms.tex). */
- mpfr_set_exp_t (z, m, MPFR_RNDU);
- mpfr_mul (z, z, l2, MPFR_RNDU);
- mpfr_ceil (z, z);
- MPFR_LOG_VAR (z);
- MPFR_BLOCK (flags, n = mpfr_get_ui (z, MPFR_RNDN));
- MPFR_ASSERTD (n >= 9 && ! MPFR_ERANGEFLAG (flags));
- mpfr_const_euler_S2 (y, z); /* error <= 3 ulps */
- exp_S = MPFR_EXP(y);
- mpfr_log (z, z, MPFR_RNDD); /* error <= 1 ulp */
- mpfr_sub (y, y, z, MPFR_RNDN); /* S'(n) - log(n) */
- /* the error is less than 1/2 + 3*2^(exp_S-EXP(y)) + 2^(EXP(z)-EXP(y))
- <= 1/2 + 2^(exp_S+2-EXP(y)) + 2^(EXP(z)-EXP(y))
- <= 1/2 + 2^(1+MAX(exp_S+2,EXP(z))-EXP(y)) */
- err = 1 + MAX(exp_S + 2, MPFR_EXP(z)) - MPFR_EXP(y);
- err = (err >= -1) ? err + 1 : 0; /* error <= 2^err ulp(y) */
- exp_S = MPFR_EXP(y);
- mpfr_const_euler_R (z, n); /* err <= ulp(1/2) = 2^(-m) */
- mpfr_sub (y, y, z, MPFR_RNDN);
- /* err <= 1/2 ulp(y) + 2^(-m) + 2^(err + exp_S - EXP(y)) ulp(y).
- Since the result is between 0.5 and 1, ulp(y) = 2^(-m).
- So we get 3/2*ulp(y) + 2^(err + exp_S - EXP(y)) ulp(y).
- 3/2 + 2^e <= 2^(e+1) for e>=1, and <= 2^2 otherwise */
- err = err + exp_S - MPFR_EXP(y);
- err = (err >= 1) ? err + 1 : 2;
- if (MPFR_LIKELY (MPFR_CAN_ROUND (y, m - err, prec, rnd)))
- break;
- MPFR_ZIV_NEXT (loop, m);
- mpfr_set_prec (y, m);
- mpfr_set_prec (z, m);
+ mpz_set_ui (s->P, N);
+ mpz_mul (s->P, s->P, s->P);
+ mpz_set_ui (s->Q, n1 + 1);
+ mpz_mul (s->Q, s->Q, s->Q);
+ mpz_set_ui (s->C, 1);
+ mpz_set_ui (s->D, n1 + 1);
+ mpz_set (s->T, s->P);
+ mpz_set (s->V, s->P);
}
- MPFR_ZIV_FREE (loop);
+ else
+ {
+ mpfr_const_euler_bs_t L, R;
+ mpz_t t, u, v;
+ unsigned long m = (n1 + n2) / 2;
- inexact = mpfr_set (x, y, rnd);
+ mpfr_const_euler_bs_init (L);
+ mpfr_const_euler_bs_init (R);
+ mpfr_const_euler_bs_1 (L, n1, m, N, 1);
+ mpfr_const_euler_bs_1 (R, m, n2, N, 1);
- mpfr_clear (y);
- mpfr_clear (z);
- mpfr_clear (l2);
+ mpz_init (t);
+ mpz_init (u);
+ mpz_init (v);
- return inexact; /* always inexact */
+ if (cont)
+ mpz_mul (s->P, L->P, R->P);
+
+ mpz_mul (s->Q, L->Q, R->Q);
+ mpz_mul (s->D, L->D, R->D);
+
+ /* T = LP RT + RQ LT*/
+ mpz_mul (t, L->P, R->T);
+ mpz_mul (v, R->Q, L->T);
+ mpz_add (s->T, t, v);
+
+ /* C = LC RD + RC LD */
+ if (cont)
+ {
+ mpz_mul (s->C, L->C, R->D);
+ mpz_addmul (s->C, R->C, L->D);
+ }
+
+ /* V = RD (RQ LV + LC LP RT) + LD LP RV */
+ mpz_mul (u, L->P, R->V);
+ mpz_mul (u, u, L->D);
+ mpz_mul (v, R->Q, L->V);
+ mpz_addmul (v, t, L->C);
+ mpz_mul (v, v, R->D);
+ mpz_add (s->V, u, v);
+
+ mpfr_const_euler_bs_clear (L);
+ mpfr_const_euler_bs_clear (R);
+ mpz_clear (t);
+ mpz_clear (u);
+ mpz_clear (v);
+ }
}
static void
-mpfr_const_euler_S2_aux (mpz_t P, mpz_t Q, mpz_t T, mpfr_t n,
- unsigned long a, unsigned long b, int need_P)
+mpfr_const_euler_bs_2 (mpz_t P, mpz_t Q, mpz_t T,
+ unsigned long n1, unsigned long n2, unsigned long N,
+ int cont)
{
- if (a + 1 == b)
+ if (n2 - n1 == 1)
{
- int inex;
- MPFR_BLOCK_DECL (flags);
-
- MPFR_BLOCK (flags, MPFR_DBGRES (inex = mpfr_get_z (P, n, MPFR_RNDN)));
- MPFR_ASSERTD (inex == 0 && ! MPFR_ERANGEFLAG (flags));
- if (a > 1)
- mpz_mul_si (P, P, 1 - (long) a);
+ if (n1 == 0)
+ {
+ mpz_set_ui (P, 1);
+ mpz_set_ui (Q, 4 * N);
+ }
+ else
+ {
+ mpz_set_ui (P, 2 * n1 - 1);
+ mpz_pow_ui (P, P, 3);
+ mpz_set_ui (Q, 32 * n1);
+ mpz_mul_ui (Q, Q, N);
+ mpz_mul_ui (Q, Q, N);
+ }
mpz_set (T, P);
- mpz_set_ui (Q, a);
- mpz_mul_ui (Q, Q, a);
}
else
{
- unsigned long c = (a + b) / 2;
mpz_t P2, Q2, T2;
- mpfr_const_euler_S2_aux (P, Q, T, n, a, c, 1);
+ unsigned long m = (n1 + n2) / 2;
+
mpz_init (P2);
mpz_init (Q2);
mpz_init (T2);
- mpfr_const_euler_S2_aux (P2, Q2, T2, n, c, b, 1);
+ mpfr_const_euler_bs_2 (P, Q, T, n1, m, N, 1);
+ mpfr_const_euler_bs_2 (P2, Q2, T2, m, n2, N, 1);
mpz_mul (T, T, Q2);
mpz_mul (T2, T2, P);
mpz_add (T, T, T2);
- if (need_P)
+ if (cont)
mpz_mul (P, P, P2);
mpz_mul (Q, Q, Q2);
mpz_clear (P2);
mpz_clear (Q2);
mpz_clear (T2);
- /* divide by 2 if possible */
- {
- unsigned long v2;
- v2 = mpz_scan1 (P, 0);
- c = mpz_scan1 (Q, 0);
- if (c < v2)
- v2 = c;
- c = mpz_scan1 (T, 0);
- if (c < v2)
- v2 = c;
- if (v2)
- {
- mpz_tdiv_q_2exp (P, P, v2);
- mpz_tdiv_q_2exp (Q, Q, v2);
- mpz_tdiv_q_2exp (T, T, v2);
- }
- }
}
}
-/* computes S(n) = sum(n^k*(-1)^(k-1)/k!/k, k=1..ceil(4.319136566 * n))
- using binary splitting.
- We have S(n) = sum(f(k), k=1..N) with N=ceil(4.319136566 * n)
- and f(k) = n^k*(-1)*(k-1)/k!/k,
- thus f(k)/f(k-1) = -n*(k-1)/k^2
-*/
-static void
-mpfr_const_euler_S2 (mpfr_t x, mpfr_t n)
-{
- mpz_t P, Q, T;
- unsigned long N;
- mpfr_t NN;
- MPFR_BLOCK_DECL (flags);
-
- mpfr_init2 (NN, 64);
- mpfr_set_str_binary (NN, /* a+2 = a*log(a), rounded to +infinity */
- "100.01010001101100101110111100011011001011011111001111001111");
- mpfr_mul (NN, NN, n, MPFR_RNDU);
- MPFR_LOG_VAR (NN);
- /* N = ceil(a.n) upper bound */
- MPFR_BLOCK (flags, N = mpfr_get_ui (NN, MPFR_RNDU));
- MPFR_ASSERTD (! MPFR_ERANGEFLAG (flags));
- mpz_init (P);
- mpz_init (Q);
- mpz_init (T);
- mpfr_const_euler_S2_aux (P, Q, T, n, 1, N + 1, 0);
- mpfr_set_z (x, T, MPFR_RNDN);
- mpfr_div_z (x, x, Q, MPFR_RNDN);
- mpz_clear (P);
- mpz_clear (Q);
- mpz_clear (T);
- mpfr_clear (NN);
-}
-
-/* computes R(n) = exp(-n)/n * sum(k!/(-n)^k, k=0..n-2)
- with error at most 4*ulp(x). Assumes n>=2.
- Since x <= exp(-n)/n <= 1/8, then 4*ulp(x) <= ulp(1).
-*/
-static void
-mpfr_const_euler_R (mpfr_t x, unsigned long n)
+int
+mpfr_const_euler_internal (mpfr_t x, mpfr_rnd_t rnd)
{
- unsigned long k, m;
- mpz_t a, s;
+ mpfr_const_euler_bs_t sum;
+ mpz_t t, u, v;
+ unsigned long n, N;
+ mpfr_prec_t prec, wp, magn;
mpfr_t y;
+ int inexact;
+ MPFR_ZIV_DECL (loop);
- MPFR_ASSERTN (n >= 2); /* ensures sum(k!/(-n)^k, k=0..n-2) >= 2/3 */
-
- /* as we multiply the sum by exp(-n), we need only PREC(x) - n/LOG2 bits */
- m = MPFR_PREC(x) - (unsigned long) ((double) n / LOG2);
+ prec = mpfr_get_prec (x);
+ wp = prec + 24;
- mpz_init_set_ui (a, 1);
- mpz_mul_2exp (a, a, m);
- mpz_init_set (s, a);
+ mpfr_init2 (y, wp);
+ mpfr_const_euler_bs_init (sum);
+ mpz_init (t);
+ mpz_init (u);
+ mpz_init (v);
- for (k = 1; k <= n; k++)
+ MPFR_ZIV_INIT (loop, wp);
+ for (;;)
{
- mpz_mul_ui (a, a, k);
- mpz_fdiv_q_ui (a, a, n);
- /* the error e(k) on a is e(k) <= 1 + k/n*e(k-1) with e(0)=0,
- i.e. e(k) <= k */
- if (k % 2)
- mpz_sub (s, s, a);
- else
- mpz_add (s, s, a);
+ /* The approximation error is bounded by 24 exp(-8n) when
+ n > 1, which is smaller than 2^-wp if
+ n > (wp + log_2(24)) * (log(2)/8).
+ Note log2(24) < 5 and log(2)/8 < 866434 / 10000000. */
+ mpz_set_ui (t, wp + 5);
+ mpz_mul_ui (t, t, 866434);
+ mpz_cdiv_q_ui (t, t, 10000000);
+ n = mpz_get_ui (t);
+
+ /* It is sufficient to take N >= alpha*n + 1
+ where alpha = 3/LambertW(3/e) = 4.970625759544... */
+ mpz_set_ui (t, n);
+ mpz_mul_ui (t, t, 4970626);
+ mpz_cdiv_q_ui (t, t, 1000000);
+ mpz_add_ui (t, t, 1);
+ N = mpz_get_ui (t);
+
+ /* V / ((T + Q) * D) = S / I
+ where S = sum_{k=0}^{N-1} H_k n^(2k) / (k!)^2,
+ I = sum_{k=0}^{N-1} n^(2k) / (k!)^2 */
+ mpfr_const_euler_bs_1 (sum, 0, N, n, 0);
+ mpz_add (sum->T, sum->T, sum->Q);
+ mpz_mul (t, sum->T, sum->D);
+ mpz_mul_2exp (u, sum->V, wp);
+ mpz_tdiv_q (v, u, t);
+ /* v * 2^-wp = S/I with error < 1 */
+
+ /* C / (D * V) = U where
+ U = (1/(4n)) sum_{k=0}^{2n-1} [(2k)!]^3 / ((k!)^4 8^(2k) (2n)^(2k)) */
+ mpfr_const_euler_bs_2 (sum->C, sum->D, sum->V, 0, 2*n, n, 0);
+ mpz_mul (t, sum->Q, sum->Q);
+ mpz_mul (t, t, sum->V);
+ mpz_mul (u, sum->T, sum->T);
+ mpz_mul (u, u, sum->D);
+ mpz_mul_2exp (t, t, wp);
+ mpz_div (t, t, u);
+ /* t * 2^-wp = U/I^2 with error < 1 */
+
+ /* gamma = S/I - U/I^2 - log(n) with error at most 2^-wp */
+ mpz_sub (v, v, t);
+ /* v * 2^-wp now equals gamma + log(n) with error at most 3*2^-wp */
+
+ /* log(n) < 2^ceil(log2(n)) */
+ magn = MPFR_INT_CEIL_LOG2(n);
+ mpfr_set_prec (y, wp + magn);
+ mpfr_set_ui (y, n, MPFR_RNDZ); /* exact */
+ mpfr_log (y, y, MPFR_RNDZ); /* error < 2^-wp */
+
+ mpfr_mul_2exp (y, y, wp, MPFR_RNDZ);
+ mpfr_z_sub (y, v, y, MPFR_RNDZ);
+ mpfr_div_2exp (y, y, wp, MPFR_RNDZ);
+ /* rounding error from the last subtraction < 2^-wp */
+ /* so y = gamma with error < 5*2^-wp */
+
+ if (MPFR_LIKELY (MPFR_CAN_ROUND (y, wp - 3, prec, rnd)))
+ break;
+
+ MPFR_ZIV_NEXT (loop, wp);
}
- /* the error on s is at most 1+2+...+n = n*(n+1)/2 */
- mpz_fdiv_q_ui (s, s, n); /* err <= 1 + (n+1)/2 */
- MPFR_ASSERTN (MPFR_PREC(x) >= mpz_sizeinbase(s, 2));
- mpfr_set_z (x, s, MPFR_RNDD); /* exact */
- mpfr_div_2ui (x, x, m, MPFR_RNDD);
- /* now x = 1/n * sum(k!/(-n)^k, k=0..n-2) <= 1/n */
- /* err(x) <= (n+1)/2^m <= (n+1)*exp(n)/2^PREC(x) */
-
- mpfr_init2 (y, m);
- mpfr_set_si (y, -(long)n, MPFR_RNDD); /* assumed exact */
- mpfr_exp (y, y, MPFR_RNDD); /* err <= ulp(y) <= exp(-n)*2^(1-m) */
- mpfr_mul (x, x, y, MPFR_RNDD);
- /* err <= ulp(x) + (n + 1 + 2/n) / 2^prec(x)
- <= ulp(x) + (n + 1 + 2/n) ulp(x)/x since x*2^(-prec(x)) < ulp(x)
- <= ulp(x) + (n + 1 + 2/n) 3/(2n) ulp(x) since x >= 2/3*n for n >= 2
- <= 4 * ulp(x) for n >= 2 */
+
+ MPFR_ZIV_FREE (loop);
+ inexact = mpfr_set (x, y, rnd);
+
mpfr_clear (y);
+ mpz_clear (t);
+ mpz_clear (u);
+ mpz_clear (v);
+ mpfr_const_euler_bs_clear (sum);
- mpz_clear (a);
- mpz_clear (s);
+ return inexact; /* always inexact */
}
+