diff options
author | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2005-02-14 10:38:04 +0000 |
---|---|---|
committer | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2005-02-14 10:38:04 +0000 |
commit | e5461d9e27f0979083cfe3c88ac9ce1416cf6ac6 (patch) | |
tree | e87fbdcc87d4ee75a585ad9d0af0d51269dc3f04 /const_log2.c | |
parent | 7eac6d942d2a841113a8517bae862d11b6e914fd (diff) | |
download | mpfr-e5461d9e27f0979083cfe3c88ac9ce1416cf6ac6.tar.gz |
further efficiency improvement (avoid mpz_init/mpz_clear)
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@3297 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'const_log2.c')
-rw-r--r-- | const_log2.c | 94 |
1 files changed, 52 insertions, 42 deletions
diff --git a/const_log2.c b/const_log2.c index debb3f082..7aa5b0498 100644 --- a/const_log2.c +++ b/const_log2.c @@ -19,6 +19,7 @@ along with the MPFR Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include <stdlib.h> #define MPFR_NEED_LONGLONG_H #include "mpfr-impl.h" @@ -34,75 +35,69 @@ mpfr_const_log2 (mpfr_ptr x, mp_rnd_t rnd_mode) { /* Auxiliary function: Compute the terms from n1 to n2 (excluded) 3/4*sum((-1)^n*n!^2/2^n/(2*n+1)!, n = n1..n2-1). - Numerator is T, denominator is Q. - Compute P only when need_P is non-zero. + Numerator is T[0], denominator is Q[0], + Compute P[0] only when need_P is non-zero. + Need 1+ceil(log(n2-n1)/log(2)) cells in T[],P[],Q[]. */ static void -S (mpz_t T, mpz_t P, mpz_t Q, unsigned long n1, unsigned long n2, int need_P) +S (mpz_t *T, mpz_t *P, mpz_t *Q, unsigned long n1, unsigned long n2, int need_P) { if (n2 == n1 + 1) { if (n1 == 0) - mpz_set_ui (P, 3); + mpz_set_ui (P[0], 3); else { - mpz_set_ui (P, n1); - mpz_neg (P, P); + mpz_set_ui (P[0], n1); + mpz_neg (P[0], P[0]); } if (n1 <= (ULONG_MAX / 4 - 1) / 2) - mpz_set_ui (Q, 4 * (2 * n1 + 1)); + mpz_set_ui (Q[0], 4 * (2 * n1 + 1)); else /* to avoid overflow in 4 * (2 * n1 + 1) */ { - mpz_set_ui (Q, n1); - mpz_mul_2exp (Q, Q, 1); - mpz_add_ui (Q, Q, 1); - mpz_mul_2exp (Q, Q, 2); + mpz_set_ui (Q[0], n1); + mpz_mul_2exp (Q[0], Q[0], 1); + mpz_add_ui (Q[0], Q[0], 1); + mpz_mul_2exp (Q[0], Q[0], 2); } - mpz_set (T, P); + mpz_set (T[0], P[0]); } else { unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2); - mpz_t T2, P2, Q2; unsigned long v, w; + S (T, P, Q, n1, m, 1); - mpz_init (T2); - mpz_init (P2); - mpz_init (Q2); - S (T2, P2, Q2, m, n2, need_P); - mpz_mul (T, T, Q2); - mpz_mul (T2, T2, P); - mpz_add (T, T, T2); + S (T + 1, P + 1, Q + 1, m, n2, need_P); + mpz_mul (T[0], T[0], Q[1]); + mpz_mul (T[1], T[1], P[0]); + mpz_add (T[0], T[0], T[1]); if (need_P) - mpz_mul (P, P, P2); - mpz_mul (Q, Q, Q2); + mpz_mul (P[0], P[0], P[1]); + mpz_mul (Q[0], Q[0], Q[1]); /* remove common trailing zeroes if any */ - v = mpz_scan1 (T, 0); + v = mpz_scan1 (T[0], 0); if (v > 0) { - w = mpz_scan1 (Q, 0); + w = mpz_scan1 (Q[0], 0); if (w < v) v = w; if (need_P) { - w = mpz_scan1 (P, 0); + w = mpz_scan1 (P[0], 0); if (w < v) v = w; } /* now v = min(val(T), val(Q), val(P)) */ if (v > 0) { - mpz_div_2exp (T, T, v); - mpz_div_2exp (Q, Q, v); + mpz_div_2exp (T[0], T[0], v); + mpz_div_2exp (Q[0], Q[0], v); if (need_P) - mpz_div_2exp (P, P, v); + mpz_div_2exp (P[0], P[0], v); } } - - mpz_clear (T2); - mpz_clear (P2); - mpz_clear (Q2); } } @@ -112,16 +107,14 @@ mpfr_const_log2_internal (mpfr_ptr x, mp_rnd_t rnd_mode) unsigned long n = mpfr_get_prec (x); mp_prec_t w; /* working precision */ unsigned long N; - mpz_t T, P, Q; + mpz_t *T, *P, *Q; mpfr_t t, q; int inexact; int ok = 1; /* ensures that the 1st try will give correct rounding */ + unsigned long lgN, i; MPFR_SAVE_EXPO_DECL (expo); MPFR_SAVE_EXPO_MARK (expo); - mpz_init (T); - mpz_init (P); - mpz_init (Q); mpfr_init2 (t, MPFR_PREC_MIN); mpfr_init2 (q, MPFR_PREC_MIN); @@ -151,15 +144,36 @@ mpfr_const_log2_internal (mpfr_ptr x, mp_rnd_t rnd_mode) /* the following are needed for error analysis (see algorithms.tex) */ MPFR_ASSERTD(w >= 3 && N >= 2); + lgN = __gmpfr_ceil_log2 ((double) N) + 1; + T = (mpz_t*) malloc (lgN * sizeof (mpz_t)); + P = (mpz_t*) malloc (lgN * sizeof (mpz_t)); + Q = (mpz_t*) malloc (lgN * sizeof (mpz_t)); + for (i = 0; i < lgN; i++) + { + mpz_init (T[i]); + mpz_init (P[i]); + mpz_init (Q[i]); + } + S (T, P, Q, 0, N, 0); mpfr_set_prec (t, w); mpfr_set_prec (q, w); - mpfr_set_z (t, T, GMP_RNDN); - mpfr_set_z (q, Q, GMP_RNDN); + mpfr_set_z (t, T[0], GMP_RNDN); + mpfr_set_z (q, Q[0], GMP_RNDN); mpfr_div (t, t, q, GMP_RNDN); + for (i = 0; i < lgN; i++) + { + mpz_clear (T[i]); + mpz_clear (P[i]); + mpz_clear (Q[i]); + } + free (T); + free (P); + free (Q); + if (ok == 0) { ok = mpfr_can_round (t, w - 2, GMP_RNDN, rnd_mode, n); @@ -171,10 +185,6 @@ mpfr_const_log2_internal (mpfr_ptr x, mp_rnd_t rnd_mode) inexact = mpfr_set (x, t, rnd_mode); - mpz_clear (T); - mpz_clear (P); - mpz_clear (Q); - mpfr_clear (t); mpfr_clear (q); |