summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2016-05-25 18:39:24 +0000
committerzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2016-05-25 18:39:24 +0000
commit85a4a5a204367155de17432baad47d7d17b7a1b0 (patch)
tree4edc4b13dc659939f434dac1494ff76eae1bc871
parentcf2ada9015c240d1bcb270209d0316f154e11cbd (diff)
downloadmpfr-85a4a5a204367155de17432baad47d7d17b7a1b0.tar.gz
improved mpfr_log_ui
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@10374 280ebfd0-de03-0410-8827-d642c229c3f4
-rw-r--r--src/log_ui.c129
1 files changed, 80 insertions, 49 deletions
diff --git a/src/log_ui.c b/src/log_ui.c
index f041bf44b..ca9f91d01 100644
--- a/src/log_ui.c
+++ b/src/log_ui.c
@@ -24,53 +24,62 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
#include "mpfr-impl.h"
/* FIXME: mpfr_log_ui is much slower than mpfr_log on some values of n,
- e.g. something like 8 times as slow for n around ULONG_MAX/3 on an
- x86_64 Linux machine. */
-
-/* Auxiliary function: Compute using binary splitting the sum
- -sum((-x)^(n+1-n1)/n, n = n1..n2-1) where x = p/2^k,
- -1/3 <= x <= 1/3. For n1=1 we get -sum((-x)^n/n, n = 1..n2-1).
- Numerator is P[0], denominator is Q[0],
- Need 1+ceil(log(n2-n1)/log(2)) cells in P[],Q[].
+ e.g. about 4 times as slow for n around ULONG_MAX/3 on an
+ x86_64 Linux machine, for 10^6 bits of precision. The reason is that
+ for say n=6148914691236517205 and prec=10^6, the value of T computed
+ has more than 50M bits, which is much more than needed. Indeed the
+ binary splitting algorithm for series with a finite radius of convergence
+ gives rationals of size n*log(n) for a target precision n. One might
+ truncate the rationals inside the algorithm, but then the error analysis
+ should be redone. */
+
+/* Cf http://www.ginac.de/CLN/binsplit.pdf: the Taylor series of log(1+x)
+ up to order N for x=p/2^k is T/(B*Q).
+ P[0] <- (-p)^(n2-n1) [with opposite sign when n1=1]
+ q <- k*(n2-n1) [corresponding to Q[0] = 2^q]
+ B[0] <- n1 * (n1+1) * ... * (n2-1)
+ T[0] <- B[0]*Q[0] * S(n1,n2)
+ where S(n1,n2) = -sum((-x)^(i-n1+1)/i, i=n1..n2-1)
+ Assumes p is odd or zero, and -1/3 <= x = p/2^k <= 1/3.
*/
static void
-S (mpz_t *P, mpz_t *Q, unsigned long n1, unsigned long n2,
- long p, unsigned long k)
+S (mpz_t *P, unsigned long *q, mpz_t *B, mpz_t *T, unsigned long n1,
+ unsigned long n2, long p, unsigned long k, int need_P)
{
MPFR_ASSERTD (n1 < n2);
if (n2 == n1 + 1)
{
- mpz_set_si (P[0], p);
- mpz_set_ui (Q[0], n1);
- mpz_mul_2exp (Q[0], Q[0], k);
+ mpz_set_si (P[0], (n1 == 1) ? p : -p);
+ *q = k;
+ mpz_set_ui (B[0], n1);
+ /* T = B*Q*S where S = P/(B*Q) thus T = P */
+ mpz_set (T[0], P[0]);
+ /* since p is odd (or zero), there is no common factor 2 between
+ P and Q, or T and B */
}
else
{
- unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2);
+ unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2), q1;
/* m = floor((n1+n2)/2) */
MPFR_ASSERTD (n1 < m && m < n2);
- S (P, Q, n1, m, p, k);
- S (P + 1, Q + 1, m, n2, p, k);
- /* P1/Q1 + (-p)^(m-n1)/2^(k(m-n1)) P2/Q2 =
- (P1*2^(k(m-n1))*Q2 + Q1*(-p)^(m-n1)*P2)/(Q1*Q2*2^(k(m-n1)))
- We know 2^(k(m-n1)) divides Q1, thus:
- (P1*Q2 + (Q1/2^(k(m-n1)))*(-p)^(m-n1)*P2)/(Q1*Q2) */
- mpz_mul (P[0], P[0], Q[1]); /* P1*Q2 */
- mpz_mul (Q[1], Q[0], Q[1]); /* Q1*Q2 */
- mpz_tdiv_q_2exp (Q[0], Q[0], k * (m - n1)); /* Q1/2^(k(m-n1)) */
- mpz_mul (P[1], P[1], Q[0]); /* Q1*P2/2^(k(m-n1)) */
- mpz_swap (Q[0], Q[1]); /* Q1*Q2 */
- if (p > 0)
- {
- mpz_ui_pow_ui (Q[1], p, m - n1); /* p^m */
- if ((m - n1) & 1)
- mpz_neg (Q[1], Q[1]);
- }
- else
- mpz_ui_pow_ui (Q[1], -p, m - n1); /* (-p)^m */
- mpz_mul (P[1], P[1], Q[1]); /* Q1*Q2*2^(km) */
- mpz_add (P[0], P[0], P[1]);
+ S (P, q, B, T, n1, m, p, k, 1);
+ S (P + 1, &q1, B + 1, T + 1, m, n2, p, k, need_P);
+
+ /* T0 <- T0*B1*Q1 + P0*B0*T1 */
+ mpz_mul (T[1], T[1], P[0]);
+ mpz_mul (T[1], T[1], B[0]);
+ mpz_mul (T[0], T[0], B[1]);
+ /* Q[1] = 2^q1 */
+ mpz_mul_2exp (T[0], T[0], q1); /* mpz_mul (T[0], T[0], Q[1]) */
+ mpz_add (T[0], T[0], T[1]);
+ if (need_P)
+ mpz_mul (P[0], P[0], P[1]);
+ *q += q1; /* mpz_mul (Q[0], Q[0], Q[1]) */
+ mpz_mul (B[0], B[0], B[1]);
+
+ /* there should be no common factors 2 between P, Q and T,
+ since P is odd (or zero) */
}
}
@@ -79,7 +88,7 @@ mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
{
unsigned long k;
mpfr_prec_t w; /* working precision */
- mpz_t three_n, *P, *Q;
+ mpz_t three_n, *P, *B, *T;
mpfr_t t, q;
int inexact;
unsigned long N, lgN, i;
@@ -132,16 +141,25 @@ mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
MPFR_GROUP_INIT_2(group, w, t, q);
MPFR_SAVE_EXPO_MARK (expo);
+ unsigned long kk = k;
+ if (p != 0)
+ while ((p % 2) == 0) /* replace p/2^kk by (p/2)/2^(kk-1) */
+ {
+ p /= 2;
+ kk --;
+ }
+
MPFR_ZIV_INIT (loop, w);
for (;;)
{
mpfr_t tmp;
unsigned int err;
- /* we need at most w/log2(2^k/|p|) terms for an accuracy of w bits */
+
+ /* we need at most w/log2(2^kk/|p|) terms for an accuracy of w bits */
mpfr_init2 (tmp, 32);
mpfr_set_ui (tmp, (p > 0) ? p : -p, MPFR_RNDU);
mpfr_log2 (tmp, tmp, MPFR_RNDU);
- mpfr_ui_sub (tmp, k, tmp, MPFR_RNDD);
+ mpfr_ui_sub (tmp, kk, tmp, MPFR_RNDD);
MPFR_ASSERTN (w <= ULONG_MAX);
mpfr_ui_div (tmp, w, tmp, MPFR_RNDU);
N = mpfr_get_ui (tmp, MPFR_RNDU);
@@ -149,17 +167,25 @@ mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
N = 2;
lgN = MPFR_INT_CEIL_LOG2 (N) + 1;
mpfr_clear (tmp);
- P = (mpz_t *) MPFR_TMP_ALLOC (2 * lgN * sizeof (mpz_t));
- Q = P + lgN;
+ P = (mpz_t *) MPFR_TMP_ALLOC (3 * lgN * sizeof (mpz_t));
+ B = P + lgN;
+ T = B + lgN;
for (i = 0; i < lgN; i++)
{
mpz_init (P[i]);
- mpz_init (Q[i]);
+ mpz_init (B[i]);
+ mpz_init (T[i]);
}
- S (P, Q, 1, N, p, k);
- mpfr_set_z (t, P[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */
- mpfr_set_z (q, Q[0], MPFR_RNDN); /* q = Q[0] * (1 + theta_2) */
- mpfr_div (t, t, q, MPFR_RNDN); /* t = P[0]/Q[0] * (1 + theta_3)^3
+
+ unsigned long q0;
+ S (P, &q0, B, T, 1, N, p, kk, 0);
+ // mpz_mul (Q[0], B[0], Q[0]);
+ // mpz_mul_2exp (B[0], B[0], q0);
+
+ mpfr_set_z (t, T[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */
+ mpfr_set_z (q, B[0], MPFR_RNDN); /* q = B[0] * (1 + theta_2) */
+ mpfr_mul_2exp (q, q, q0, MPFR_RNDN); /* B[0]*Q[0] */
+ mpfr_div (t, t, q, MPFR_RNDN); /* t = T[0]/(B[0]*Q[0])*(1 + theta_3)^3
= log(n/2^k) * (1 + theta_4)^4
for |theta_i| < 2^(-w) */
@@ -170,12 +196,17 @@ mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
for (i = 0; i < lgN; i++)
{
mpz_clear (P[i]);
- mpz_clear (Q[i]);
+ mpz_clear (B[i]);
+ mpz_clear (T[i]);
}
- /* the maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u
+ /* The maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u
for u < 2^(-12), k ulps for k*log(2), and 1 ulp for the addition,
- thus at most k+6 ulps */
- err = MPFR_INT_CEIL_LOG2 (k + 6);
+ thus at most k+6 ulps.
+ Note that there might be some cancellation in the addition: the worst
+ case is when log(1 + p/2^kk) = log(2/3) ~ -0.405, and with n=3 which
+ gives k=2, thus we add 2*log(2) = 1.386. Thus in the worst case we
+ have an exponent decrease of 1, which accounts for +1 in the error. */
+ err = MPFR_INT_CEIL_LOG2 (k + 6) + 1;
if (MPFR_LIKELY (MPFR_CAN_ROUND (t, w - err, MPFR_PREC(x), rnd_mode)))
break;