summaryrefslogtreecommitdiff
path: root/const_log2.c
diff options
context:
space:
mode:
authorzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2005-02-14 10:38:04 +0000
committerzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2005-02-14 10:38:04 +0000
commite5461d9e27f0979083cfe3c88ac9ce1416cf6ac6 (patch)
treee87fbdcc87d4ee75a585ad9d0af0d51269dc3f04 /const_log2.c
parent7eac6d942d2a841113a8517bae862d11b6e914fd (diff)
downloadmpfr-e5461d9e27f0979083cfe3c88ac9ce1416cf6ac6.tar.gz
further efficiency improvement (avoid mpz_init/mpz_clear)
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@3297 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'const_log2.c')
-rw-r--r--const_log2.c94
1 files changed, 52 insertions, 42 deletions
diff --git a/const_log2.c b/const_log2.c
index debb3f082..7aa5b0498 100644
--- a/const_log2.c
+++ b/const_log2.c
@@ -19,6 +19,7 @@ along with the MPFR Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
+#include <stdlib.h>
#define MPFR_NEED_LONGLONG_H
#include "mpfr-impl.h"
@@ -34,75 +35,69 @@ mpfr_const_log2 (mpfr_ptr x, mp_rnd_t rnd_mode) {
/* Auxiliary function: Compute the terms from n1 to n2 (excluded)
3/4*sum((-1)^n*n!^2/2^n/(2*n+1)!, n = n1..n2-1).
- Numerator is T, denominator is Q.
- Compute P only when need_P is non-zero.
+ Numerator is T[0], denominator is Q[0],
+ Compute P[0] only when need_P is non-zero.
+ Need 1+ceil(log(n2-n1)/log(2)) cells in T[],P[],Q[].
*/
static void
-S (mpz_t T, mpz_t P, mpz_t Q, unsigned long n1, unsigned long n2, int need_P)
+S (mpz_t *T, mpz_t *P, mpz_t *Q, unsigned long n1, unsigned long n2, int need_P)
{
if (n2 == n1 + 1)
{
if (n1 == 0)
- mpz_set_ui (P, 3);
+ mpz_set_ui (P[0], 3);
else
{
- mpz_set_ui (P, n1);
- mpz_neg (P, P);
+ mpz_set_ui (P[0], n1);
+ mpz_neg (P[0], P[0]);
}
if (n1 <= (ULONG_MAX / 4 - 1) / 2)
- mpz_set_ui (Q, 4 * (2 * n1 + 1));
+ mpz_set_ui (Q[0], 4 * (2 * n1 + 1));
else /* to avoid overflow in 4 * (2 * n1 + 1) */
{
- mpz_set_ui (Q, n1);
- mpz_mul_2exp (Q, Q, 1);
- mpz_add_ui (Q, Q, 1);
- mpz_mul_2exp (Q, Q, 2);
+ mpz_set_ui (Q[0], n1);
+ mpz_mul_2exp (Q[0], Q[0], 1);
+ mpz_add_ui (Q[0], Q[0], 1);
+ mpz_mul_2exp (Q[0], Q[0], 2);
}
- mpz_set (T, P);
+ mpz_set (T[0], P[0]);
}
else
{
unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2);
- mpz_t T2, P2, Q2;
unsigned long v, w;
+
S (T, P, Q, n1, m, 1);
- mpz_init (T2);
- mpz_init (P2);
- mpz_init (Q2);
- S (T2, P2, Q2, m, n2, need_P);
- mpz_mul (T, T, Q2);
- mpz_mul (T2, T2, P);
- mpz_add (T, T, T2);
+ S (T + 1, P + 1, Q + 1, m, n2, need_P);
+ mpz_mul (T[0], T[0], Q[1]);
+ mpz_mul (T[1], T[1], P[0]);
+ mpz_add (T[0], T[0], T[1]);
if (need_P)
- mpz_mul (P, P, P2);
- mpz_mul (Q, Q, Q2);
+ mpz_mul (P[0], P[0], P[1]);
+ mpz_mul (Q[0], Q[0], Q[1]);
/* remove common trailing zeroes if any */
- v = mpz_scan1 (T, 0);
+ v = mpz_scan1 (T[0], 0);
if (v > 0)
{
- w = mpz_scan1 (Q, 0);
+ w = mpz_scan1 (Q[0], 0);
if (w < v)
v = w;
if (need_P)
{
- w = mpz_scan1 (P, 0);
+ w = mpz_scan1 (P[0], 0);
if (w < v)
v = w;
}
/* now v = min(val(T), val(Q), val(P)) */
if (v > 0)
{
- mpz_div_2exp (T, T, v);
- mpz_div_2exp (Q, Q, v);
+ mpz_div_2exp (T[0], T[0], v);
+ mpz_div_2exp (Q[0], Q[0], v);
if (need_P)
- mpz_div_2exp (P, P, v);
+ mpz_div_2exp (P[0], P[0], v);
}
}
-
- mpz_clear (T2);
- mpz_clear (P2);
- mpz_clear (Q2);
}
}
@@ -112,16 +107,14 @@ mpfr_const_log2_internal (mpfr_ptr x, mp_rnd_t rnd_mode)
unsigned long n = mpfr_get_prec (x);
mp_prec_t w; /* working precision */
unsigned long N;
- mpz_t T, P, Q;
+ mpz_t *T, *P, *Q;
mpfr_t t, q;
int inexact;
int ok = 1; /* ensures that the 1st try will give correct rounding */
+ unsigned long lgN, i;
MPFR_SAVE_EXPO_DECL (expo);
MPFR_SAVE_EXPO_MARK (expo);
- mpz_init (T);
- mpz_init (P);
- mpz_init (Q);
mpfr_init2 (t, MPFR_PREC_MIN);
mpfr_init2 (q, MPFR_PREC_MIN);
@@ -151,15 +144,36 @@ mpfr_const_log2_internal (mpfr_ptr x, mp_rnd_t rnd_mode)
/* the following are needed for error analysis (see algorithms.tex) */
MPFR_ASSERTD(w >= 3 && N >= 2);
+ lgN = __gmpfr_ceil_log2 ((double) N) + 1;
+ T = (mpz_t*) malloc (lgN * sizeof (mpz_t));
+ P = (mpz_t*) malloc (lgN * sizeof (mpz_t));
+ Q = (mpz_t*) malloc (lgN * sizeof (mpz_t));
+ for (i = 0; i < lgN; i++)
+ {
+ mpz_init (T[i]);
+ mpz_init (P[i]);
+ mpz_init (Q[i]);
+ }
+
S (T, P, Q, 0, N, 0);
mpfr_set_prec (t, w);
mpfr_set_prec (q, w);
- mpfr_set_z (t, T, GMP_RNDN);
- mpfr_set_z (q, Q, GMP_RNDN);
+ mpfr_set_z (t, T[0], GMP_RNDN);
+ mpfr_set_z (q, Q[0], GMP_RNDN);
mpfr_div (t, t, q, GMP_RNDN);
+ for (i = 0; i < lgN; i++)
+ {
+ mpz_clear (T[i]);
+ mpz_clear (P[i]);
+ mpz_clear (Q[i]);
+ }
+ free (T);
+ free (P);
+ free (Q);
+
if (ok == 0)
{
ok = mpfr_can_round (t, w - 2, GMP_RNDN, rnd_mode, n);
@@ -171,10 +185,6 @@ mpfr_const_log2_internal (mpfr_ptr x, mp_rnd_t rnd_mode)
inexact = mpfr_set (x, t, rnd_mode);
- mpz_clear (T);
- mpz_clear (P);
- mpz_clear (Q);
-
mpfr_clear (t);
mpfr_clear (q);