diff options
author | pelissip <pelissip@280ebfd0-de03-0410-8827-d642c229c3f4> | 2004-12-17 13:20:13 +0000 |
---|---|---|
committer | pelissip <pelissip@280ebfd0-de03-0410-8827-d642c229c3f4> | 2004-12-17 13:20:13 +0000 |
commit | 39c7d106d8d50ef58ada11ab07b351e12db9269b (patch) | |
tree | fa5ad663072a7184bf2afaa56efee8f86fdf0ccc | |
parent | 3fc99916bf870e4a7f8a7eaf9389ae7025e27297 (diff) | |
download | mpfr-39c7d106d8d50ef58ada11ab07b351e12db9269b.tar.gz |
Optimize mpfr_cos.
From 3363 / 21663.99 / 79727 to 3139 / 18920.58 / 69624 (opteron).
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@3152 280ebfd0-de03-0410-8827-d642c229c3f4
-rw-r--r-- | cos.c | 92 |
1 files changed, 48 insertions, 44 deletions
@@ -22,12 +22,51 @@ MA 02111-1307, USA. */ #include <stdio.h> #include "mpfr-impl.h" -static int mpfr_cos2_aux _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr)); +/* s <- 1 - r/2! + r^2/4! + ... + (-1)^l r^l/(2l)! + ... + Assumes |r| < 1. + Returns the index l0 of the last term (-1)^l r^l/(2l)!. + The absolute error on s is at most 2 * l0 * 2^(-m). +*/ +static int +mpfr_cos2_aux (mpfr_ptr s, mpfr_srcptr r) +{ + unsigned int l, b = 2; + mp_exp_t prec, m = MPFR_PREC(s); + mpfr_t t; + + MPFR_ASSERTD (MPFR_GET_EXP (r) <= 0); + + mpfr_init2 (t, m); + MPFR_SET_ONE (t); + mpfr_set (s, t, GMP_RNDN); + + for (l = 1; MPFR_GET_EXP (t) + m >= 0; l++) + { + mpfr_mul (t, t, r, GMP_RNDU); /* err <= (3l-1) ulp */ + mpfr_div_ui (t, t, (2*l-1)*(2*l), GMP_RNDU); /* err <= 3l ulp */ + if (l % 2 == 0) + mpfr_add1 (s, s, t, GMP_RNDD); + else + mpfr_sub1 (s, s, t, GMP_RNDD); + MPFR_ASSERTD (MPFR_GET_EXP (s) == 0); /* check 1/2 <= s < 1 */ + /* err(s) <= l * 2^(-m) */ + if (MPFR_UNLIKELY(3 * l > (1U << b))) + b++; + /* now 3l <= 2^b, we want 3l*ulp(t) <= 2^(-m) + i.e. b+EXP(t)-PREC(t) <= -m */ + prec = m + MPFR_GET_EXP (t) + b; + if (MPFR_LIKELY(prec >= MPFR_PREC_MIN)) + mpfr_prec_round (t, prec, GMP_RNDN); + } + mpfr_clear (t); + + return l; +} int mpfr_cos (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode) { - int K0, K, precy, m, k, l; + mp_prec_t K0, K, precy, m, k, l; int inexact; mpfr_t r, s; mp_limb_t *rp, *sp; @@ -54,7 +93,11 @@ mpfr_cos (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode) precy = MPFR_PREC(y); K0 = __gmpfr_isqrt(precy / 2); /* Need K + log2(precy/K) extra bits */ - m = precy + 3 * (K0 + 2 * MAX(MPFR_GET_EXP (x), 0)) + 3; + m = precy + 3 * K0 + 3; + if (MPFR_GET_EXP (x) >= 0) + m += 5*MPFR_GET_EXP (x); + else + m += -MPFR_GET_EXP (x); TMP_MARK(marker); sm = (m + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB; @@ -97,11 +140,12 @@ mpfr_cos (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode) m += cancel - exps; cancel = exps; } + sm = (m + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB; MPFR_TMP_INIT(rp, r, m, sm); MPFR_TMP_INIT(sp, s, m, sm); } - + MPFR_SAVE_EXPO_FREE (expo); inexact = mpfr_set (y, s, rnd_mode); /* FIXME: Dont' need check range? */ @@ -111,43 +155,3 @@ mpfr_cos (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode) return inexact; } -/* s <- 1 - r/2! + r^2/4! + ... + (-1)^l r^l/(2l)! + ... - Assumes |r| < 1. - Returns the index l0 of the last term (-1)^l r^l/(2l)!. - The absolute error on s is at most 2 * l0 * 2^(-m). -*/ -static int -mpfr_cos2_aux (mpfr_ptr s, mpfr_srcptr r) -{ - unsigned int l, b = 2; - long int prec, m = MPFR_PREC(s); - mpfr_t t; - - MPFR_ASSERTD (MPFR_GET_EXP (r) <= 0); - - mpfr_init2 (t, m); - MPFR_SET_ONE (t); - mpfr_set (s, t, GMP_RNDN); - - for (l = 1; MPFR_GET_EXP (t) + m >= 0; l++) - { - mpfr_mul (t, t, r, GMP_RNDU); /* err <= (3l-1) ulp */ - mpfr_div_ui (t, t, (2*l-1)*(2*l), GMP_RNDU); /* err <= 3l ulp */ - if (l % 2 == 0) - mpfr_add (s, s, t, GMP_RNDD); - else - mpfr_sub (s, s, t, GMP_RNDD); - MPFR_ASSERTD (MPFR_GET_EXP (s) == 0); /* check 1/2 <= s < 1 */ - /* err(s) <= l * 2^(-m) */ - if (MPFR_UNLIKELY(3 * l > (1U << b))) - b++; - /* now 3l <= 2^b, we want 3l*ulp(t) <= 2^(-m) - i.e. b+EXP(t)-PREC(t) <= -m */ - prec = m + MPFR_GET_EXP (t) + b; - if (MPFR_LIKELY(prec >= MPFR_PREC_MIN)) - mpfr_prec_round (t, prec, GMP_RNDN); - } - mpfr_clear (t); - - return l; -} |