summaryrefslogtreecommitdiff
path: root/src/sin_cos.c
diff options
context:
space:
mode:
authorvlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>2010-08-17 09:10:13 +0000
committervlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>2010-08-17 09:10:13 +0000
commitc9583bdfe064e1069828e518533f7bc29a8fdddb (patch)
tree2400842d4095628b8486fbeabaf7bc7b8af4ed02 /src/sin_cos.c
parent50ac5b5985174201c7fa6e20496cd2b096107001 (diff)
downloadmpfr-c9583bdfe064e1069828e518533f7bc29a8fdddb.tar.gz
Source reorganization. In short:
* Added directories and moved related files into them: - src for the MPFR source files (to build the library). - doc for documentation files (except INSTALL, README...). - tools for various tools (scripts) and mbench. - tune for tuneup-related source files. - other for other source files (not distributed in tarballs). Existing directories: - tests for the source files of the test suite (make check). - examples for examples. - m4 for m4 files. * Renamed configure.in to configure.ac. * Added/updated Makefile.am files where needed. * Updated acinclude.m4 and configure.ac (AC_CONFIG_FILES line). * Updated the documentation (INSTALL, README, doc/README.dev and doc/mpfr.texi). * Updated NEWS and TODO. * Updated the scripts now in tools. The following script was used: #!/usr/bin/env zsh svn mkdir doc other src tools tune svn mv ${${(M)$(sed -n '/libmpfr_la_SOURCES/,/[^\]$/p' \ Makefile.am):#*.[ch]}:#get_patches.c} mparam_h.in \ round_raw_generic.c jyn_asympt.c src svn mv mbench check_inits_clears coverage get_patches.sh mpfrlint \ nightly-test update-patchv update-version tools svn mv bidimensional_sample.c speed.c tuneup.c tune svn mv *.{c,h} other svn mv FAQ.html README.dev algorithm* faq.xsl fdl.texi mpfr.texi \ update-faq doc svn mv configure.in configure.ac svn cp Makefile.am src/Makefile.am svn rm replace_all [Modifying some files, see above] svn add doc/Makefile.am svn add tune/Makefile.am git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@7087 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/sin_cos.c')
-rw-r--r--src/sin_cos.c662
1 files changed, 662 insertions, 0 deletions
diff --git a/src/sin_cos.c b/src/sin_cos.c
new file mode 100644
index 000000000..fd3f56577
--- /dev/null
+++ b/src/sin_cos.c
@@ -0,0 +1,662 @@
+/* mpfr_sin_cos -- sine and cosine of a floating-point number
+
+Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Contributed by the Arenaire and Caramel projects, INRIA.
+
+This file is part of the GNU MPFR Library.
+
+The GNU MPFR Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MPFR Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
+http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#define MPFR_NEED_LONGLONG_H
+#include "mpfr-impl.h"
+
+#define INEXPOS(y) ((y) == 0 ? 0 : (((y) > 0) ? 1 : 2))
+#define INEX(y,z) (INEXPOS(y) | (INEXPOS(z) << 2))
+
+/* (y, z) <- (sin(x), cos(x)), return value is 0 iff both results are exact
+ ie, iff x = 0 */
+int
+mpfr_sin_cos (mpfr_ptr y, mpfr_ptr z, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
+{
+ mpfr_prec_t prec, m;
+ int neg, reduce;
+ mpfr_t c, xr;
+ mpfr_srcptr xx;
+ mpfr_exp_t err, expx;
+ int inexy, inexz;
+ MPFR_ZIV_DECL (loop);
+ MPFR_SAVE_EXPO_DECL (expo);
+
+ MPFR_ASSERTN (y != z);
+
+ if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
+ {
+ if (MPFR_IS_NAN(x) || MPFR_IS_INF(x))
+ {
+ MPFR_SET_NAN (y);
+ MPFR_SET_NAN (z);
+ MPFR_RET_NAN;
+ }
+ else /* x is zero */
+ {
+ MPFR_ASSERTD (MPFR_IS_ZERO (x));
+ MPFR_SET_ZERO (y);
+ MPFR_SET_SAME_SIGN (y, x);
+ /* y = 0, thus exact, but z is inexact in case of underflow
+ or overflow */
+ inexy = 0; /* y is exact */
+ inexz = mpfr_set_ui (z, 1, rnd_mode);
+ return INEX(inexy,inexz);
+ }
+ }
+
+ MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd_mode),
+ ("sin[%#R]=%R cos[%#R]=%R", y, y, z, z));
+
+ MPFR_SAVE_EXPO_MARK (expo);
+
+ prec = MAX (MPFR_PREC (y), MPFR_PREC (z));
+ m = prec + MPFR_INT_CEIL_LOG2 (prec) + 13;
+ expx = MPFR_GET_EXP (x);
+
+ /* When x is close to 0, say 2^(-k), then there is a cancellation of about
+ 2k bits in 1-cos(x)^2. FIXME: in that case, it would be more efficient
+ to compute sin(x) directly. VL: This is partly done by using
+ MPFR_FAST_COMPUTE_IF_SMALL_INPUT from the mpfr_sin and mpfr_cos
+ functions. Moreover, any overflow on m is avoided. */
+ if (expx < 0)
+ {
+ /* Warning: in case y = x, and the first call to
+ MPFR_FAST_COMPUTE_IF_SMALL_INPUT succeeds but the second fails,
+ we will have clobbered the original value of x.
+ The workaround is to first compute z = cos(x) in that case, since
+ y and z are different. */
+ if (y != x)
+ /* y and x differ, thus we can safely try to compute y first */
+ {
+ MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
+ y, x, -2 * expx, 2, 0, rnd_mode,
+ { inexy = _inexact;
+ goto small_input; });
+ if (0)
+ {
+ small_input:
+ /* we can go here only if we can round sin(x) */
+ MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
+ z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
+ { inexz = _inexact;
+ MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
+ goto end; });
+ }
+
+ /* if we go here, one of the two MPFR_FAST_COMPUTE_IF_SMALL_INPUT
+ calls failed */
+ }
+ else /* y and x are the same variable: try to compute z first, which
+ necessarily differs */
+ {
+ MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
+ z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
+ { inexz = _inexact;
+ goto small_input2; });
+ if (0)
+ {
+ small_input2:
+ /* we can go here only if we can round cos(x) */
+ MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
+ y, x, -2 * expx, 2, 0, rnd_mode,
+ { inexy = _inexact;
+ MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
+ goto end; });
+ }
+ }
+ m += 2 * (-expx);
+ }
+
+ if (prec >= MPFR_SINCOS_THRESHOLD)
+ {
+ MPFR_SAVE_EXPO_FREE (expo);
+ return mpfr_sincos_fast (y, z, x, rnd_mode);
+ }
+
+ mpfr_init (c);
+ mpfr_init (xr);
+
+ MPFR_ZIV_INIT (loop, m);
+ for (;;)
+ {
+ /* the following is copied from sin.c */
+ if (expx >= 2) /* reduce the argument */
+ {
+ reduce = 1;
+ mpfr_set_prec (c, expx + m - 1);
+ mpfr_set_prec (xr, m);
+ mpfr_const_pi (c, MPFR_RNDN);
+ mpfr_mul_2ui (c, c, 1, MPFR_RNDN);
+ mpfr_remainder (xr, x, c, MPFR_RNDN);
+ mpfr_div_2ui (c, c, 1, MPFR_RNDN);
+ if (MPFR_SIGN (xr) > 0)
+ mpfr_sub (c, c, xr, MPFR_RNDZ);
+ else
+ mpfr_add (c, c, xr, MPFR_RNDZ);
+ if (MPFR_IS_ZERO(xr)
+ || MPFR_EXP(xr) < (mpfr_exp_t) 3 - (mpfr_exp_t) m
+ || MPFR_EXP(c) < (mpfr_exp_t) 3 - (mpfr_exp_t) m)
+ goto next_step;
+ xx = xr;
+ }
+ else /* the input argument is already reduced */
+ {
+ reduce = 0;
+ xx = x;
+ }
+
+ neg = MPFR_IS_NEG (xx); /* gives sign of sin(x) */
+ mpfr_set_prec (c, m);
+ mpfr_cos (c, xx, MPFR_RNDZ);
+ /* If no argument reduction was performed, the error is at most ulp(c),
+ otherwise it is at most ulp(c) + 2^(2-m). Since |c| < 1, we have
+ ulp(c) <= 2^(-m), thus the error is bounded by 2^(3-m) in that later
+ case. */
+ if (reduce == 0)
+ err = m;
+ else
+ err = MPFR_GET_EXP (c) + (mpfr_exp_t) (m - 3);
+ if (!mpfr_can_round (c, err, MPFR_RNDN, MPFR_RNDZ,
+ MPFR_PREC (z) + (rnd_mode == MPFR_RNDN)))
+ goto next_step;
+
+ /* we can't set z now, because in case z = x, and the mpfr_can_round()
+ call below fails, we will have clobbered the input */
+ mpfr_set_prec (xr, MPFR_PREC(c));
+ mpfr_swap (xr, c); /* save the approximation of the cosine in xr */
+ mpfr_sqr (c, xr, MPFR_RNDU); /* the absolute error is bounded by
+ 2^(5-m) if reduce=1, and by 2^(2-m)
+ otherwise */
+ mpfr_ui_sub (c, 1, c, MPFR_RNDN); /* error bounded by 2^(6-m) if reduce
+ is 1, and 2^(3-m) otherwise */
+ mpfr_sqrt (c, c, MPFR_RNDN); /* the absolute error is bounded by
+ 2^(6-m-Exp(c)) if reduce=1, and
+ 2^(3-m-Exp(c)) otherwise */
+ err = 3 + 3 * reduce - MPFR_GET_EXP (c);
+ if (neg)
+ MPFR_CHANGE_SIGN (c);
+
+ /* the absolute error on c is at most 2^(err-m), which we must put
+ in the form 2^(EXP(c)-err). */
+ err = MPFR_GET_EXP (c) + (mpfr_exp_t) m - err;
+ if (mpfr_can_round (c, err, MPFR_RNDN, MPFR_RNDZ,
+ MPFR_PREC (y) + (rnd_mode == MPFR_RNDN)))
+ break;
+ /* check for huge cancellation */
+ if (err < (mpfr_exp_t) MPFR_PREC (y))
+ m += MPFR_PREC (y) - err;
+ /* Check if near 1 */
+ if (MPFR_GET_EXP (c) == 1
+ && MPFR_MANT (c)[MPFR_LIMB_SIZE (c)-1] == MPFR_LIMB_HIGHBIT)
+ m += m;
+
+ next_step:
+ MPFR_ZIV_NEXT (loop, m);
+ mpfr_set_prec (c, m);
+ }
+ MPFR_ZIV_FREE (loop);
+
+ inexy = mpfr_set (y, c, rnd_mode);
+ inexz = mpfr_set (z, xr, rnd_mode);
+
+ mpfr_clear (c);
+ mpfr_clear (xr);
+
+ end:
+ MPFR_SAVE_EXPO_FREE (expo);
+ mpfr_check_range (y, inexy, rnd_mode);
+ mpfr_check_range (z, inexz, rnd_mode);
+ MPFR_RET (INEX(inexy,inexz));
+}
+
+/*************** asymptotically fast implementation below ********************/
+
+/* truncate Q from R to at most prec bits.
+ Return the number of truncated bits.
+ */
+static mpfr_prec_t
+reduce (mpz_t Q, mpz_srcptr R, mpfr_prec_t prec)
+{
+ mpfr_prec_t l = mpz_sizeinbase (R, 2);
+
+ l = (l > prec) ? l - prec : 0;
+ mpz_fdiv_q_2exp (Q, R, l);
+ return l;
+}
+
+/* truncate S and C so that the smaller has prec bits.
+ Return the number of truncated bits.
+ */
+static unsigned long
+reduce2 (mpz_t S, mpz_t C, mpfr_prec_t prec)
+{
+ unsigned long ls = mpz_sizeinbase (S, 2);
+ unsigned long lc = mpz_sizeinbase (C, 2);
+ unsigned long l;
+
+ l = (ls < lc) ? ls : lc; /* smaller length */
+ l = (l > prec) ? l - prec : 0;
+ mpz_fdiv_q_2exp (S, S, l);
+ mpz_fdiv_q_2exp (C, C, l);
+ return l;
+}
+
+/* return in S0/Q0 a rational approximation of sin(X) with absolute error
+ bounded by 9*2^(-prec), where 0 <= X=p/2^r <= 1/2,
+ and in C0/Q0 a rational approximation of cos(X), with relative error
+ bounded by 9*2^(-prec) (and also absolute error, since
+ |cos(X)| <= 1).
+ We have sin(X)/X = sum((-1)^i*(p/2^r)^i/(2i+1)!, i=0..infinity).
+ We use the following binary splitting formula:
+ P(a,b) = (-p)^(b-a)
+ Q(a,b) = (2a)*(2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
+ T(a,b) = 1 if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise.
+
+ Since we use P(a,b) for b-a=2^k only, we compute only p^(2^k).
+ We do not store the factor 2^r in Q().
+
+ Then sin(X)/X ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
+
+ Return l such that Q0 has to be multiplied by 2^l.
+
+ Assumes prec >= 10.
+*/
+static unsigned long
+sin_bs_aux (mpz_t Q0, mpz_t S0, mpz_t C0, mpz_srcptr p, mpfr_prec_t r,
+ mpfr_prec_t prec)
+{
+ mpz_t T[GMP_NUMB_BITS], Q[GMP_NUMB_BITS], ptoj[GMP_NUMB_BITS], pp;
+ mpfr_prec_t log2_nb_terms[GMP_NUMB_BITS], mult[GMP_NUMB_BITS];
+ mpfr_prec_t accu[GMP_NUMB_BITS], size_ptoj[GMP_NUMB_BITS];
+ mpfr_prec_t prec_i_have, r0 = r;
+ unsigned long alloc, i, j, k;
+ mpfr_prec_t l;
+
+ if (MPFR_UNLIKELY(mpz_cmp_ui (p, 0) == 0)) /* sin(x)/x -> 1 */
+ {
+ mpz_set_ui (Q0, 1);
+ mpz_set_ui (S0, 1);
+ mpz_set_ui (C0, 1);
+ return 0;
+ }
+
+ /* check that X=p/2^r <= 1/2 */
+ MPFR_ASSERTN(mpz_sizeinbase (p, 2) - (mpfr_exp_t) r <= -1);
+
+ mpz_init (pp);
+
+ /* normalize p (non-zero here) */
+ l = mpz_scan1 (p, 0);
+ mpz_fdiv_q_2exp (pp, p, l); /* p = pp * 2^l */
+ mpz_mul (pp, pp, pp);
+ r = 2 * (r - l); /* x^2 = (p/2^r0)^2 = pp / 2^r */
+
+ /* now p is odd */
+ alloc = 2;
+ mpz_init_set_ui (T[0], 6);
+ mpz_init_set_ui (Q[0], 6);
+ mpz_init_set (ptoj[0], pp); /* ptoj[i] = pp^(2^i) */
+ mpz_init (T[1]);
+ mpz_init (Q[1]);
+ mpz_init (ptoj[1]);
+ mpz_mul (ptoj[1], pp, pp); /* ptoj[1] = pp^2 */
+ size_ptoj[1] = mpz_sizeinbase (ptoj[1], 2);
+
+ mpz_mul_2exp (T[0], T[0], r);
+ mpz_sub (T[0], T[0], pp); /* 6*2^r - pp = 6*2^r*(1 - x^2/6) */
+ log2_nb_terms[0] = 1;
+
+ /* already take into account the factor x=p/2^r in sin(x) = x * (...) */
+ mult[0] = r - mpz_sizeinbase (pp, 2) + r0 - mpz_sizeinbase (p, 2);
+ /* we have x^3 < 1/2^mult[0] */
+
+ for (i = 2, k = 0, prec_i_have = mult[0]; prec_i_have < prec; i += 2)
+ {
+ /* i is even here */
+ /* invariant: Q[0]*Q[1]*...*Q[k] equals (2i-1)!,
+ we have already summed terms of index < i
+ in S[0]/Q[0], ..., S[k]/Q[k] */
+ k ++;
+ if (k + 1 >= alloc) /* necessarily k + 1 = alloc */
+ {
+ alloc ++;
+ mpz_init (T[k+1]);
+ mpz_init (Q[k+1]);
+ mpz_init (ptoj[k+1]);
+ mpz_mul (ptoj[k+1], ptoj[k], ptoj[k]); /* pp^(2^(k+1)) */
+ size_ptoj[k+1] = mpz_sizeinbase (ptoj[k+1], 2);
+ }
+ /* for i even, we have Q[k] = (2*i)*(2*i+1), T[k] = 1,
+ then Q[k+1] = (2*i+2)*(2*i+3), T[k+1] = 1,
+ which reduces to T[k] = (2*i+2)*(2*i+3)*2^r-pp,
+ Q[k] = (2*i)*(2*i+1)*(2*i+2)*(2*i+3). */
+ log2_nb_terms[k] = 1;
+ mpz_set_ui (Q[k], (2 * i + 2) * (2 * i + 3));
+ mpz_mul_2exp (T[k], Q[k], r);
+ mpz_sub (T[k], T[k], pp);
+ mpz_mul_ui (Q[k], Q[k], (2 * i) * (2 * i + 1));
+ /* the next term of the series is divided by Q[k] and multiplied
+ by pp^2/2^(2r), thus the mult. factor < 1/2^mult[k] */
+ mult[k] = mpz_sizeinbase (Q[k], 2) + 2 * r - size_ptoj[1] - 1;
+ /* the absolute contribution of the next term is 1/2^accu[k] */
+ accu[k] = (k == 0) ? mult[k] : mult[k] + accu[k-1];
+ prec_i_have = accu[k]; /* the current term is < 1/2^accu[k] */
+ j = (i + 2) / 2;
+ l = 1;
+ while ((j & 1) == 0) /* combine and reduce */
+ {
+ mpz_mul (T[k], T[k], ptoj[l]);
+ mpz_mul (T[k-1], T[k-1], Q[k]);
+ mpz_mul_2exp (T[k-1], T[k-1], r << l);
+ mpz_add (T[k-1], T[k-1], T[k]);
+ mpz_mul (Q[k-1], Q[k-1], Q[k]);
+ log2_nb_terms[k-1] ++; /* number of terms in S[k-1]
+ is a power of 2 by construction */
+ prec_i_have = mpz_sizeinbase (Q[k], 2);
+ mult[k-1] += prec_i_have + (r << l) - size_ptoj[l] - 1;
+ accu[k-1] = (k == 1) ? mult[k-1] : mult[k-1] + accu[k-2];
+ prec_i_have = accu[k-1];
+ l ++;
+ j >>= 1;
+ k --;
+ }
+ }
+
+ /* accumulate all products in T[0] and Q[0]. Warning: contrary to above,
+ here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
+ l = 0; /* number of accumulated terms in the right part T[k]/Q[k] */
+ while (k > 0)
+ {
+ j = log2_nb_terms[k-1];
+ mpz_mul (T[k], T[k], ptoj[j]);
+ mpz_mul (T[k-1], T[k-1], Q[k]);
+ l += 1 << log2_nb_terms[k];
+ mpz_mul_2exp (T[k-1], T[k-1], r * l);
+ mpz_add (T[k-1], T[k-1], T[k]);
+ mpz_mul (Q[k-1], Q[k-1], Q[k]);
+ k--;
+ }
+
+ l = r0 + r * (i - 1); /* implicit multiplier 2^r for Q0 */
+ /* at this point T[0]/(2^l*Q[0]) is an approximation of sin(x) where the 1st
+ neglected term has contribution < 1/2^prec, thus since the series has
+ alternate signs, the error is < 1/2^prec */
+
+ /* we truncate Q0 to prec bits: the relative error is at most 2^(1-prec),
+ which means that Q0 = Q[0] * (1+theta) with |theta| <= 2^(1-prec)
+ [up to a power of two] */
+ l += reduce (Q0, Q[0], prec);
+ l -= reduce (T[0], T[0], prec);
+ /* multiply by x = p/2^l */
+ mpz_mul (S0, T[0], p);
+ l -= reduce (S0, S0, prec); /* S0 = T[0] * (1 + theta)^2 up to power of 2 */
+ /* sin(X) ~ S0/Q0*(1 + theta)^3 + err with |theta| <= 2^(1-prec) and
+ |err| <= 2^(-prec), thus since |S0/Q0| <= 1:
+ |sin(X) - S0/Q0| <= 4*|theta*S0/Q0| + |err| <= 9*2^(-prec) */
+
+ mpz_clear (pp);
+ for (j = 0; j < alloc; j ++)
+ {
+ mpz_clear (T[j]);
+ mpz_clear (Q[j]);
+ mpz_clear (ptoj[j]);
+ }
+
+ /* compute cos(X) from sin(X): sqrt(1-(S/Q)^2) = sqrt(Q^2-S^2)/Q
+ = sqrt(Q0^2*2^(2l)-S0^2)/Q0.
+ Write S/Q = sin(X) + eps with |eps| <= 9*2^(-prec),
+ then sqrt(Q^2-S^2) = sqrt(Q^2-Q^2*(sin(X)+eps)^2)
+ = sqrt(Q^2*cos(X)^2-Q^2*(2*sin(X)*eps+eps^2))
+ = sqrt(Q^2*cos(X)^2-Q^2*eps1) with |eps1|<=9*2^(-prec)
+ [using X<=1/2 and eps<=9*2^(-prec) and prec>=10]
+
+ Since we truncate the square root, we get:
+ sqrt(Q^2*cos(X)^2-Q^2*eps1)+eps2 with |eps2|<1
+ = Q*sqrt(cos(X)^2-eps1)+eps2
+ = Q*cos(X)*(1+eps3)+eps2 with |eps3| <= 6*2^(-prec)
+ = Q*cos(X)*(1+eps3+eps2/(Q*cos(X)))
+ = Q*cos(X)*(1+eps4) with |eps4| <= 9*2^(-prec)
+ since |Q| >= 2^(prec-1) */
+ /* we assume that Q0*2^l >= 2^(prec-1) */
+ MPFR_ASSERTN(l + mpz_sizeinbase (Q0, 2) >= prec);
+ mpz_mul (C0, Q0, Q0);
+ mpz_mul_2exp (C0, C0, 2 * l);
+ mpz_submul (C0, S0, S0);
+ mpz_sqrt (C0, C0);
+
+ return l;
+}
+
+/* Put in s and c approximations of sin(x) and cos(x) respectively.
+ Assumes 0 < x < Pi/4 and PREC(s) = PREC(c) >= 10.
+ Return err such that the relative error is bounded by 2^err ulps.
+*/
+static int
+sincos_aux (mpfr_t s, mpfr_t c, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
+{
+ mpfr_prec_t prec_s, sh;
+ mpz_t Q, S, C, Q2, S2, C2, y;
+ mpfr_t x2;
+ unsigned long l, l2, j, err;
+
+ MPFR_ASSERTD(MPFR_PREC(s) == MPFR_PREC(c));
+
+ prec_s = MPFR_PREC(s);
+
+ mpfr_init2 (x2, MPFR_PREC(x));
+ mpz_init (Q);
+ mpz_init (S);
+ mpz_init (C);
+ mpz_init (Q2);
+ mpz_init (S2);
+ mpz_init (C2);
+ mpz_init (y);
+
+ mpfr_set (x2, x, MPFR_RNDN); /* exact */
+ mpz_set_ui (Q, 1);
+ l = 0;
+ mpz_set_ui (S, 0); /* sin(0) = S/(2^l*Q), exact */
+ mpz_set_ui (C, 1); /* cos(0) = C/(2^l*Q), exact */
+
+ /* Invariant: x = X + x2/2^(sh-1), where the part X was already treated,
+ S/(2^l*Q) ~ sin(X), C/(2^l*Q) ~ cos(X), and x2/2^(sh-1) < Pi/4.
+ 'sh-1' is the number of already shifted bits in x2.
+ */
+
+ for (sh = 1, j = 0; mpfr_cmp_ui (x2, 0) != 0 && sh <= prec_s; sh <<= 1, j++)
+ {
+ if (sh > prec_s / 2) /* sin(x) = x + O(x^3), cos(x) = 1 + O(x^2) */
+ {
+ l2 = -mpfr_get_z_2exp (S2, x2); /* S2/2^l2 = x2 */
+ l2 += sh - 1;
+ mpz_set_ui (Q2, 1);
+ mpz_set_ui (C2, 1);
+ mpz_mul_2exp (C2, C2, l2);
+ mpfr_set_ui (x2, 0, MPFR_RNDN);
+ }
+ else
+ {
+ /* y <- trunc(x2 * 2^sh) = trunc(x * 2^(2*sh-1)) */
+ mpfr_mul_2exp (x2, x2, sh, MPFR_RNDN); /* exact */
+ mpfr_get_z (y, x2, MPFR_RNDZ); /* round towards zero: now
+ 0 <= x2 < 2^sh, thus
+ 0 <= x2/2^(sh-1) < 2^(1-sh) */
+ if (mpz_cmp_ui (y, 0) == 0)
+ continue;
+ mpfr_sub_z (x2, x2, y, MPFR_RNDN); /* should be exact */
+ l2 = sin_bs_aux (Q2, S2, C2, y, 2 * sh - 1, prec_s);
+ /* we now have |S2/Q2/2^l2 - sin(X)| <= 9*2^(prec_s)
+ and |C2/Q2/2^l2 - cos(X)| <= 6*2^(prec_s), with X=y/2^(2sh-1) */
+ }
+ if (sh == 1) /* S=0, C=1 */
+ {
+ l = l2;
+ mpz_swap (Q, Q2);
+ mpz_swap (S, S2);
+ mpz_swap (C, C2);
+ }
+ else
+ {
+ /* s <- s*c2+c*s2, c <- c*c2-s*s2, using Karatsuba:
+ a = s+c, b = s2+c2, t = a*b, d = s*s2, e = c*c2,
+ s <- t - d - e, c <- e - d */
+ mpz_add (y, S, C); /* a */
+ mpz_mul (C, C, C2); /* e */
+ mpz_add (C2, C2, S2); /* b */
+ mpz_mul (S2, S, S2); /* d */
+ mpz_mul (y, y, C2); /* a*b */
+ mpz_sub (S, y, S2); /* t - d */
+ mpz_sub (S, S, C); /* t - d - e */
+ mpz_sub (C, C, S2); /* e - d */
+ mpz_mul (Q, Q, Q2);
+ /* after j loops, the error is <= (11j-2)*2^(prec_s) */
+ l += l2;
+ /* reduce Q to prec_s bits */
+ l += reduce (Q, Q, prec_s);
+ /* reduce S,C to prec_s bits, error <= 11*j*2^(prec_s) */
+ l -= reduce2 (S, C, prec_s);
+ }
+ }
+
+ j = 11 * j;
+ for (err = 0; j > 1; j = (j + 1) / 2, err ++);
+
+ mpfr_set_z (s, S, MPFR_RNDN);
+ mpfr_div_z (s, s, Q, MPFR_RNDN);
+ mpfr_div_2exp (s, s, l, MPFR_RNDN);
+
+ mpfr_set_z (c, C, MPFR_RNDN);
+ mpfr_div_z (c, c, Q, MPFR_RNDN);
+ mpfr_div_2exp (c, c, l, MPFR_RNDN);
+
+ mpz_clear (Q);
+ mpz_clear (S);
+ mpz_clear (C);
+ mpz_clear (Q2);
+ mpz_clear (S2);
+ mpz_clear (C2);
+ mpz_clear (y);
+ mpfr_clear (x2);
+ return err;
+}
+
+/* Assumes x is neither NaN, +/-Inf, nor +/- 0.
+ One of s and c might be NULL, in which case the corresponding value is
+ not computed.
+ Assumes s differs from c.
+ */
+int
+mpfr_sincos_fast (mpfr_t s, mpfr_t c, mpfr_srcptr x, mpfr_rnd_t rnd)
+{
+ int inexs, inexc;
+ mpfr_t x_red, ts, tc;
+ mpfr_prec_t w;
+ mpfr_exp_t err, errs, errc;
+ MPFR_ZIV_DECL (loop);
+
+ MPFR_ASSERTN(s != c);
+ if (s == NULL)
+ w = MPFR_PREC(c);
+ else if (c == NULL)
+ w = MPFR_PREC(s);
+ else
+ w = MPFR_PREC(s) >= MPFR_PREC(c) ? MPFR_PREC(s) : MPFR_PREC(c);
+ w += MPFR_INT_CEIL_LOG2(w) + 9; /* ensures w >= 10 (needed by sincos_aux) */
+ mpfr_init2 (ts, w);
+ mpfr_init2 (tc, w);
+
+ MPFR_ZIV_INIT (loop, w);
+ for (;;)
+ {
+ /* if 0 < x <= Pi/4, we can call sincos_aux directly */
+ if (MPFR_IS_POS(x) && mpfr_cmp_ui_2exp (x, 1686629713, -31) <= 0)
+ {
+ err = sincos_aux (ts, tc, x, MPFR_RNDN);
+ }
+ /* if -Pi/4 <= x < 0, use sin(-x)=-sin(x) */
+ else if (MPFR_IS_NEG(x) && mpfr_cmp_si_2exp (x, -1686629713, -31) >= 0)
+ {
+ mpfr_init2 (x_red, MPFR_PREC(x));
+ mpfr_neg (x_red, x, rnd); /* exact */
+ err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
+ mpfr_neg (ts, ts, MPFR_RNDN);
+ mpfr_clear (x_red);
+ }
+ else /* argument reduction is needed */
+ {
+ long q;
+ mpfr_t pi;
+ int neg = 0;
+
+ mpfr_init2 (x_red, w);
+ mpfr_init2 (pi, (MPFR_EXP(x) > 0) ? w + MPFR_EXP(x) : w);
+ mpfr_const_pi (pi, MPFR_RNDN);
+ mpfr_div_2exp (pi, pi, 1, MPFR_RNDN); /* Pi/2 */
+ mpfr_remquo (x_red, &q, x, pi, MPFR_RNDN);
+ /* x = q * (Pi/2 + eps1) + x_red + eps2,
+ where |eps1| <= 1/2*ulp(Pi/2) = 2^(-w-MAX(0,EXP(x))),
+ and eps2 <= 1/2*ulp(x_red) <= 1/2*ulp(Pi/2) = 2^(-w)
+ Since |q| <= x/(Pi/2) <= |x|, we have
+ q*|eps1| <= 2^(-w), thus
+ |x - q * Pi/2 - x_red| <= 2^(1-w) */
+ /* now -Pi/4 <= x_red <= Pi/4: if x_red < 0, consider -x_red */
+ if (MPFR_IS_NEG(x_red))
+ {
+ mpfr_neg (x_red, x_red, MPFR_RNDN);
+ neg = 1;
+ }
+ err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
+ err ++; /* to take into account the argument reduction */
+ if (neg) /* sin(-x) = -sin(x), cos(-x) = cos(x) */
+ mpfr_neg (ts, ts, MPFR_RNDN);
+ if (q & 2) /* sin(x+Pi) = -sin(x), cos(x+Pi) = -cos(x) */
+ {
+ mpfr_neg (ts, ts, MPFR_RNDN);
+ mpfr_neg (tc, tc, MPFR_RNDN);
+ }
+ if (q & 1) /* sin(x+Pi/2) = cos(x), cos(x+Pi/2) = -sin(x) */
+ {
+ mpfr_neg (ts, ts, MPFR_RNDN);
+ mpfr_swap (ts, tc);
+ }
+ mpfr_clear (x_red);
+ mpfr_clear (pi);
+ }
+ /* adjust errors with respect to absolute values */
+ errs = err - MPFR_EXP(ts);
+ errc = err - MPFR_EXP(tc);
+ if ((s == NULL || MPFR_CAN_ROUND (ts, w - errs, MPFR_PREC(s), rnd)) &&
+ (c == NULL || MPFR_CAN_ROUND (tc, w - errc, MPFR_PREC(c), rnd)))
+ break;
+ MPFR_ZIV_NEXT (loop, w);
+ mpfr_set_prec (ts, w);
+ mpfr_set_prec (tc, w);
+ }
+ MPFR_ZIV_FREE (loop);
+
+ inexs = (s == NULL) ? 0 : mpfr_set (s, ts, rnd);
+ inexc = (c == NULL) ? 0 : mpfr_set (c, tc, rnd);
+
+ mpfr_clear (ts);
+ mpfr_clear (tc);
+ return INEX(inexs,inexc);
+}