summaryrefslogtreecommitdiff
path: root/atan.c
diff options
context:
space:
mode:
authorpelissip <pelissip@280ebfd0-de03-0410-8827-d642c229c3f4>2004-12-16 12:04:10 +0000
committerpelissip <pelissip@280ebfd0-de03-0410-8827-d642c229c3f4>2004-12-16 12:04:10 +0000
commitd5dc08b03723af03099cde510511066613e74de6 (patch)
tree974c41bc45d09a5c7806c71792796ab3a0ea89c8 /atan.c
parentc78c0ddbc6fa21db6fda7a28a52aa2b00d3776f3 (diff)
downloadmpfr-d5dc08b03723af03099cde510511066613e74de6.tar.gz
Optimize mpfr_atan.
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@3143 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'atan.c')
-rw-r--r--atan.c114
1 files changed, 92 insertions, 22 deletions
diff --git a/atan.c b/atan.c
index cd2659766..414c4edc6 100644
--- a/atan.c
+++ b/atan.c
@@ -24,8 +24,7 @@ MA 02111-1307, USA. */
#define CST 2.27 /* CST=1+ln(2.4)/ln(2) */
-static int mpfr_atan_aux _MPFR_PROTO((mpfr_ptr, mpz_srcptr, long, int));
-
+/*
#define A
#define A1 1
#define A2 2
@@ -43,6 +42,76 @@ static int mpfr_atan_aux _MPFR_PROTO((mpfr_ptr, mpz_srcptr, long, int));
#undef A2
#undef NO_FACTORIAL
#undef GENERIC
+*/
+static void
+mpfr_atan_aux (mpfr_ptr y, mpz_srcptr p, long r, int m)
+{
+ unsigned long n,i,k,j,l;
+ mpz_t *S, *T, *ptoj;
+ mp_exp_t diff, expo;
+ TMP_DECL (maker);
+
+ TMP_MARK (maker);
+ S = (mpz_t*) TMP_ALLOC (3*(m+1)*sizeof (mpz_t));
+ ptoj = S + 1*(m+1);
+ T = S + 2*(m+1);
+
+ for (i = 0 ; i <= m ; i++) {
+ mpz_init (S[i]);
+ mpz_init (ptoj[i]);
+ mpz_init (T[i]);
+ }
+
+ mpz_mul_2exp (ptoj[0], p, 1);
+ for (i=1;i<m;i++)
+ mpz_mul (ptoj[i], ptoj[i-1], ptoj[i-1]);
+
+ mpz_set_ui (S[0], 1);
+ mpz_set_ui (T[0], 1);
+
+ k = 0;
+ n = 1UL << m;
+ for (i = 1 ; i < n ; i++) {
+ k++;
+ mpz_set_ui (T[k], 1 + 2*i);
+ mpz_set_ui (S[k], 1);
+
+ for (j = i+1, l = 0 ; (j & 1) == 0 ; l++, j>>=1, k--) {
+ mpz_mul (S[k], S[k], ptoj[l]);
+ mpz_mul (S[k], S[k], T[k-1]);
+ mpz_mul (S[k-1], S[k-1], T[k]);
+ mpz_mul_2exp (S[k-1], S[k-1], (r+1)*(1<<l));
+ mpz_add (S[k-1], S[k-1], S[k]);
+ mpz_mul (T[k-1], T[k-1], T[k]);
+ }
+ }
+
+ diff = mpz_sizeinbase (S[0], 2) - 2*MPFR_PREC (y);
+ expo = diff;
+ if (diff >=0)
+ mpz_fdiv_q_2exp (S[0],S[0],diff);
+ else
+ mpz_mul_2exp (S[0],S[0],-diff);
+
+ mpz_mul_2exp (T[0], T[0], (1<<m) -1);
+ diff = mpz_sizeinbase (T[0], 2) - MPFR_PREC (y);
+ expo -= diff;
+ if (diff >=0)
+ mpz_fdiv_q_2exp (T[0], T[0],diff);
+ else
+ mpz_mul_2exp (T[0], T[0],-diff);
+
+ mpz_tdiv_q (S[0], S[0], T[0]);
+ mpfr_set_z (y, S[0], GMP_RNDD);
+ MPFR_SET_EXP (y, MPFR_EXP (y) + expo - r*(i-1) );
+
+ for (i = 0 ; i <= m ; i++) {
+ mpz_clear (S[i]);
+ mpz_clear (ptoj[i]);
+ mpz_clear (T[i]);
+ }
+ TMP_FREE (maker);
+}
int
mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mp_rnd_t rnd_mode)
@@ -50,7 +119,7 @@ mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mp_rnd_t rnd_mode)
mpfr_t xp;
mpfr_t arctgt, sk, tmp, tmp2;
mpz_t ukz;
- int comparaison, sign, inexact;
+ int comparaison, sign, inexact, inexact2;
mp_exp_t supplement, estimated_delta;
mp_prec_t prec, realprec;
mp_exp_t exptol;
@@ -76,13 +145,16 @@ mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mp_rnd_t rnd_mode)
MPFR_INVERT_RND (rnd_mode));
MPFR_CHANGE_SIGN (atan);
}
- mpfr_div_2ui (atan, atan, 1, rnd_mode);
- return inexact;
+ inexact2 = mpfr_div_2ui (atan, atan, 1, rnd_mode);
+ if (MPFR_UNLIKELY (inexact2))
+ inexact = inexact2; /* An underflow occurs */
+ MPFR_RET (inexact);
}
else /* x is necessarily 0 */
{
MPFR_ASSERTD (MPFR_IS_ZERO (x));
- return mpfr_set_ui (atan, 0, rnd_mode);
+ MPFR_SET_ZERO (atan);
+ MPFR_RET (0);
}
}
@@ -97,31 +169,30 @@ mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mp_rnd_t rnd_mode)
{
inexact = mpfr_const_pi (atan, MPFR_IS_POS_SIGN (sign) ? rnd_mode
: MPFR_INVERT_RND (rnd_mode));
- mpfr_div_2ui (atan, atan, 2, rnd_mode);
if (MPFR_IS_NEG_SIGN (sign))
{
inexact = -inexact;
MPFR_CHANGE_SIGN (atan);
}
+ inexact2 = mpfr_div_2ui (atan, atan, 2, rnd_mode);
+ if (MPFR_UNLIKELY (inexact2))
+ inexact = inexact2; /* an underflow occurs */
return inexact;
}
supplement = 2 - (comparaison > 0) ? 0 : MPFR_GET_EXP (xp);
+ realprec = MPFR_PREC (atan) + MPFR_INT_CEIL_LOG2 (MPFR_PREC (atan)) + 4;
+ prec = realprec + supplement + BITS_PER_MP_LIMB;
MPFR_SAVE_EXPO_MARK (expo);
- /* FIXME: Could use MPFR_INT_CEIL_LOG2? */
- prec = __gmpfr_ceil_log2 ((double) MPFR_PREC(x) / BITS_PER_MP_LIMB);
-
- realprec = MPFR_PREC (atan) + MPFR_INT_CEIL_LOG2 (MPFR_PREC (atan)) + 4;
-
mpz_init (ukz);
/* Initialisation */
- mpfr_init (sk);
- mpfr_init (tmp2);
- mpfr_init (tmp);
- mpfr_init (arctgt);
+ mpfr_init2 (sk, prec);
+ mpfr_init2 (tmp2, prec);
+ mpfr_init2 (tmp, prec);
+ mpfr_init2 (arctgt, prec);
for (;;)
{
@@ -144,13 +215,12 @@ mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mp_rnd_t rnd_mode)
/* sk is 1/|x| if |x| > 1, and |x| otherwise, i.e. min(|x|, 1/|x|) */
/* Assignation */
- mpfr_set_ui (arctgt, 0, GMP_RNDN);
+ MPFR_SET_ZERO (arctgt);
twopoweri = 1;
for (i = 0; i <= n0; i++)
{
- mpfr_mul_2ui (tmp, sk, twopoweri, GMP_RNDN);
-
/* Calculation of trunc(tmp) --> mpz */
+ mpfr_mul_2ui (tmp, sk, twopoweri, GMP_RNDN);
mpfr_trunc (tmp, tmp);
exptol = mpfr_get_z_exp (ukz, tmp);
/* since the s_k are decreasing (see algorithms.tex),
@@ -195,10 +265,10 @@ mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mp_rnd_t rnd_mode)
inexact = mpfr_set4 (atan, arctgt, rnd_mode, sign);
- mpfr_clear (sk);
- mpfr_clear (tmp2);
- mpfr_clear (tmp);
mpfr_clear (arctgt);
+ mpfr_clear (tmp);
+ mpfr_clear (tmp2);
+ mpfr_clear (sk);
mpz_clear (ukz);