summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpelissip <pelissip@280ebfd0-de03-0410-8827-d642c229c3f4>2004-12-17 13:20:13 +0000
committerpelissip <pelissip@280ebfd0-de03-0410-8827-d642c229c3f4>2004-12-17 13:20:13 +0000
commit39c7d106d8d50ef58ada11ab07b351e12db9269b (patch)
treefa5ad663072a7184bf2afaa56efee8f86fdf0ccc
parent3fc99916bf870e4a7f8a7eaf9389ae7025e27297 (diff)
downloadmpfr-39c7d106d8d50ef58ada11ab07b351e12db9269b.tar.gz
Optimize mpfr_cos.
From 3363 / 21663.99 / 79727 to 3139 / 18920.58 / 69624 (opteron). git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@3152 280ebfd0-de03-0410-8827-d642c229c3f4
-rw-r--r--cos.c92
1 files changed, 48 insertions, 44 deletions
diff --git a/cos.c b/cos.c
index b3fe6ddbe..eb1b55f6b 100644
--- a/cos.c
+++ b/cos.c
@@ -22,12 +22,51 @@ MA 02111-1307, USA. */
#include <stdio.h>
#include "mpfr-impl.h"
-static int mpfr_cos2_aux _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr));
+/* s <- 1 - r/2! + r^2/4! + ... + (-1)^l r^l/(2l)! + ...
+ Assumes |r| < 1.
+ Returns the index l0 of the last term (-1)^l r^l/(2l)!.
+ The absolute error on s is at most 2 * l0 * 2^(-m).
+*/
+static int
+mpfr_cos2_aux (mpfr_ptr s, mpfr_srcptr r)
+{
+ unsigned int l, b = 2;
+ mp_exp_t prec, m = MPFR_PREC(s);
+ mpfr_t t;
+
+ MPFR_ASSERTD (MPFR_GET_EXP (r) <= 0);
+
+ mpfr_init2 (t, m);
+ MPFR_SET_ONE (t);
+ mpfr_set (s, t, GMP_RNDN);
+
+ for (l = 1; MPFR_GET_EXP (t) + m >= 0; l++)
+ {
+ mpfr_mul (t, t, r, GMP_RNDU); /* err <= (3l-1) ulp */
+ mpfr_div_ui (t, t, (2*l-1)*(2*l), GMP_RNDU); /* err <= 3l ulp */
+ if (l % 2 == 0)
+ mpfr_add1 (s, s, t, GMP_RNDD);
+ else
+ mpfr_sub1 (s, s, t, GMP_RNDD);
+ MPFR_ASSERTD (MPFR_GET_EXP (s) == 0); /* check 1/2 <= s < 1 */
+ /* err(s) <= l * 2^(-m) */
+ if (MPFR_UNLIKELY(3 * l > (1U << b)))
+ b++;
+ /* now 3l <= 2^b, we want 3l*ulp(t) <= 2^(-m)
+ i.e. b+EXP(t)-PREC(t) <= -m */
+ prec = m + MPFR_GET_EXP (t) + b;
+ if (MPFR_LIKELY(prec >= MPFR_PREC_MIN))
+ mpfr_prec_round (t, prec, GMP_RNDN);
+ }
+ mpfr_clear (t);
+
+ return l;
+}
int
mpfr_cos (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode)
{
- int K0, K, precy, m, k, l;
+ mp_prec_t K0, K, precy, m, k, l;
int inexact;
mpfr_t r, s;
mp_limb_t *rp, *sp;
@@ -54,7 +93,11 @@ mpfr_cos (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode)
precy = MPFR_PREC(y);
K0 = __gmpfr_isqrt(precy / 2); /* Need K + log2(precy/K) extra bits */
- m = precy + 3 * (K0 + 2 * MAX(MPFR_GET_EXP (x), 0)) + 3;
+ m = precy + 3 * K0 + 3;
+ if (MPFR_GET_EXP (x) >= 0)
+ m += 5*MPFR_GET_EXP (x);
+ else
+ m += -MPFR_GET_EXP (x);
TMP_MARK(marker);
sm = (m + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
@@ -97,11 +140,12 @@ mpfr_cos (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode)
m += cancel - exps;
cancel = exps;
}
+
sm = (m + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
MPFR_TMP_INIT(rp, r, m, sm);
MPFR_TMP_INIT(sp, s, m, sm);
}
-
+
MPFR_SAVE_EXPO_FREE (expo);
inexact = mpfr_set (y, s, rnd_mode);
/* FIXME: Dont' need check range? */
@@ -111,43 +155,3 @@ mpfr_cos (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode)
return inexact;
}
-/* s <- 1 - r/2! + r^2/4! + ... + (-1)^l r^l/(2l)! + ...
- Assumes |r| < 1.
- Returns the index l0 of the last term (-1)^l r^l/(2l)!.
- The absolute error on s is at most 2 * l0 * 2^(-m).
-*/
-static int
-mpfr_cos2_aux (mpfr_ptr s, mpfr_srcptr r)
-{
- unsigned int l, b = 2;
- long int prec, m = MPFR_PREC(s);
- mpfr_t t;
-
- MPFR_ASSERTD (MPFR_GET_EXP (r) <= 0);
-
- mpfr_init2 (t, m);
- MPFR_SET_ONE (t);
- mpfr_set (s, t, GMP_RNDN);
-
- for (l = 1; MPFR_GET_EXP (t) + m >= 0; l++)
- {
- mpfr_mul (t, t, r, GMP_RNDU); /* err <= (3l-1) ulp */
- mpfr_div_ui (t, t, (2*l-1)*(2*l), GMP_RNDU); /* err <= 3l ulp */
- if (l % 2 == 0)
- mpfr_add (s, s, t, GMP_RNDD);
- else
- mpfr_sub (s, s, t, GMP_RNDD);
- MPFR_ASSERTD (MPFR_GET_EXP (s) == 0); /* check 1/2 <= s < 1 */
- /* err(s) <= l * 2^(-m) */
- if (MPFR_UNLIKELY(3 * l > (1U << b)))
- b++;
- /* now 3l <= 2^b, we want 3l*ulp(t) <= 2^(-m)
- i.e. b+EXP(t)-PREC(t) <= -m */
- prec = m + MPFR_GET_EXP (t) + b;
- if (MPFR_LIKELY(prec >= MPFR_PREC_MIN))
- mpfr_prec_round (t, prec, GMP_RNDN);
- }
- mpfr_clear (t);
-
- return l;
-}