diff options
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | ai.c | 357 | ||||
-rw-r--r-- | ai2.c | 347 | ||||
-rw-r--r-- | bidimensional_sample.c | 146 | ||||
-rw-r--r-- | mparam_h.in | 9 | ||||
-rw-r--r-- | mpfr-impl.h | 3 | ||||
-rw-r--r-- | tuneup.c | 711 |
7 files changed, 1033 insertions, 542 deletions
diff --git a/Makefile.am b/Makefile.am index a7c5a088c..0f35893d4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -56,7 +56,7 @@ round_near_x.c constant.c abort_prec_max.c stack_interface.c lngamma.c \ zeta_ui.c set_d64.c get_d64.c jn.c yn.c rem1.c get_patches.c add_d.c \ sub_d.c d_sub.c mul_d.c div_d.c d_div.c li2.c rec_sqrt.c min_prec.c \ buildopt.c digamma.c bernoulli.c isregular.c set_flt.c get_flt.c \ -scale2.c set_z_exp.c ai.c ai2.c gammaonethird.c +scale2.c set_z_exp.c ai.c gammaonethird.c libmpfr_la_LIBADD = @LIBOBJS@ @@ -23,9 +23,6 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., #define MPFR_NEED_LONGLONG_H #include "mpfr-impl.h" -#define MPFR_SMALL_PRECISION 32 - - /* Reminder and notations: ----------------------- @@ -50,7 +47,7 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., /* Airy function Ai evaluated by the most naive algorithm */ int -mpfr_ai (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd) +mpfr_ai1 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd) { MPFR_ZIV_DECL (loop); MPFR_SAVE_EXPO_DECL (expo); @@ -120,11 +117,11 @@ mpfr_ai (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd) mpfr_sqrt (tmp_sp, tmp_sp, MPFR_RNDU); /* tmp_sp ~ x^3/2 */ /* 0.96179669392597567 >~ 2/3 * log2(e). See algorithms.tex */ - mpfr_set_str(tmp2_sp, "0.96179669392597567", 10, MPFR_RNDU); + mpfr_set_str (tmp2_sp, "0.96179669392597567", 10, MPFR_RNDU); mpfr_mul (tmp2_sp, tmp_sp, tmp2_sp, MPFR_RNDU); /* cond represents the number of lost bits in the evaluation of the sum */ - if ( (MPFR_IS_ZERO(x)) || (MPFR_GET_EXP (x) <= 0) ) + if ( (MPFR_IS_ZERO (x)) || (MPFR_GET_EXP (x) <= 0) ) cond = 0; else cond = mpfr_get_ui (tmp2_sp, MPFR_RNDU) - (MPFR_GET_EXP (x)-1)/4 - 1; @@ -132,7 +129,7 @@ mpfr_ai (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd) /* The variable assumed_exponent is used to store the maximal assumed */ /* exponent of Ai(x). More precisely, we assume that |Ai(x)| will be */ /* greater than 2^{-assumed_exponent}. */ - if (MPFR_IS_ZERO(x)) + if (MPFR_IS_ZERO (x)) assumed_exponent = 2; else { @@ -281,3 +278,349 @@ mpfr_ai (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd) MPFR_SAVE_EXPO_FREE (expo); return mpfr_check_range (y, r, rnd); } + + +/* Airy function Ai evaluated by Smith algorithm */ +int +mpfr_ai2 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd) +{ + MPFR_ZIV_DECL (loop); + MPFR_SAVE_EXPO_DECL (expo); + mpfr_prec_t wprec; /* working precision */ + mpfr_prec_t prec; /* target precision */ + mpfr_prec_t err; /* used to estimate the evaluation error */ + mpfr_prec_t correctBits; /* estimates the number of correct bits*/ + unsigned long int i, j, L, t; + unsigned long int cond; /* condition number of the series */ + unsigned long int assumed_exponent; /* used as a lowerbound of |EXP(Ai(x))| */ + int r; /* returned ternary value */ + mpfr_t s; /* used to store the partial sum */ + mpfr_t u0, u1; + mpfr_t *z; /* used to store the (x^3j) */ + mpfr_t result; + mpfr_t tmp_sp, tmp2_sp; /* small precision variables */ + unsigned long int x3u; /* used to store ceil (x^3) */ + mpfr_t temp1, temp2; + int test0, test1; + + /* Logging */ + MPFR_LOG_FUNC ( ("x[%#R]=%R rnd=%d", x, x, rnd), ("y[%#R]=%R", y, y) ); + + /* Special cases */ + if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) + { + if (MPFR_IS_NAN (x)) + { + MPFR_SET_NAN (y); + MPFR_RET_NAN; + } + else if (MPFR_IS_INF (x)) + return mpfr_set_ui (y, 0, rnd); + } + + /* Save current exponents range */ + MPFR_SAVE_EXPO_MARK (expo); + + /* FIXME: underflow for large values of |x| */ + + + /* Set initial precision */ + /* See the analysis for the naive evaluation */ + + /* We begin with 11 guard bits */ + prec = MPFR_PREC (y) + 11; + MPFR_ZIV_INIT (loop, prec); + + mpfr_init2 (tmp_sp, MPFR_SMALL_PRECISION); + mpfr_init2 (tmp2_sp, MPFR_SMALL_PRECISION); + mpfr_abs (tmp_sp, x, MPFR_RNDU); + mpfr_pow_ui (tmp_sp, tmp_sp, 3, MPFR_RNDU); + mpfr_sqrt (tmp_sp, tmp_sp, MPFR_RNDU); /* tmp_sp ~ x^3/2 */ + + /* 0.96179669392597567 >~ 2/3 * log2(e). See algorithms.tex */ + mpfr_set_str (tmp2_sp, "0.96179669392597567", 10, MPFR_RNDU); + mpfr_mul (tmp2_sp, tmp_sp, tmp2_sp, MPFR_RNDU); + + /* cond represents the number of lost bits in the evaluation of the sum */ + if ( (MPFR_IS_ZERO (x)) || (MPFR_GET_EXP (x) <= 0) ) + cond = 0; + else + cond = mpfr_get_ui (tmp2_sp, MPFR_RNDU) - (MPFR_GET_EXP (x) - 1)/4 - 1; + + /* This variable is used to store the maximal assumed exponent of */ + /* Ai (x). More precisely, we assume that |Ai (x)| will be greater than */ + /* 2^{-assumedExp}. */ + if (MPFR_IS_ZERO (x)) + assumed_exponent = 2; + else + { + if (MPFR_IS_POS (x)) + { + if (MPFR_GET_EXP (x) <= 0) + assumed_exponent = 3; + else + assumed_exponent = (2 + (MPFR_GET_EXP (x)/4 + 1) + + mpfr_get_ui (tmp2_sp, MPFR_RNDU)); + } + /* We do not know Ai (x) yet */ + /* We cover the case when EXP (Ai (x))>=-10 */ + else + assumed_exponent = 10; + } + + wprec = prec + MPFR_INT_CEIL_LOG2 (prec) + 6 + cond + assumed_exponent; + + /* We assume that the truncation rank will be ~ prec */ + L = __gmpfr_isqrt (prec); + MPFR_LOG_MSG (("size of blocks L = %lu\n", L)); + + z = (mpfr_t *) (*__gmp_allocate_func) ( (L + 1) * sizeof (mpfr_t) ); + MPFR_ASSERTN (z != NULL); + for (j=0; j<=L; j++) + mpfr_init (z[j]); + + mpfr_init (s); + mpfr_init (u0); mpfr_init (u1); + mpfr_init (result); + mpfr_init (temp1); + mpfr_init (temp2); + + /* ZIV loop */ + for (;;) + { + MPFR_LOG_MSG (("working precision: %Pu\n", wprec)); + + for (j=0; j<=L; j++) + mpfr_set_prec (z[j], wprec); + mpfr_set_prec (s, wprec); + mpfr_set_prec (u0, wprec); mpfr_set_prec (u1, wprec); + mpfr_set_prec (result, wprec); + + mpfr_set_ui (u0, 1, MPFR_RNDN); + mpfr_set (u1, x, MPFR_RNDN); + + mpfr_set_ui (z[0], 1, MPFR_RNDU); + mpfr_sqr (z[1], u1, MPFR_RNDU); + mpfr_mul (z[1], z[1], x, (MPFR_IS_POS (x) ? MPFR_RNDU : MPFR_RNDD) ); + + if (MPFR_IS_NEG (x)) + MPFR_CHANGE_SIGN (z[1]); + x3u = mpfr_get_ui (z[1], MPFR_RNDU); /* x3u >= ceil (x^3) */ + if (MPFR_IS_NEG (x)) + MPFR_CHANGE_SIGN (z[1]); + + for (j=2; j<=L ;j++) + { + if (j%2 == 0) + mpfr_sqr (z[j], z[j/2], MPFR_RNDN); + else + mpfr_mul (z[j], z[j-1], z[1], MPFR_RNDN); + } + + mpfr_gamma_one_and_two_third (temp1, temp2, wprec); + mpfr_set_ui (u0, 9, MPFR_RNDN); + mpfr_cbrt (u0, u0, MPFR_RNDN); + mpfr_mul (u0, u0, temp2, MPFR_RNDN); + mpfr_ui_div (u0, 1, u0 , MPFR_RNDN); /* u0 = 1/( Gamma (2/3)*9^(1/3) ) */ + + mpfr_set_ui (u1, 3, MPFR_RNDN); + mpfr_cbrt (u1, u1, MPFR_RNDN); + mpfr_mul (u1, u1, temp1, MPFR_RNDN); + mpfr_neg (u1, u1, MPFR_RNDN); + mpfr_div (u1, x, u1, MPFR_RNDN); /* u1 = -x/(Gamma (1/3)*3^(1/3)) */ + + mpfr_set_ui (result, 0, MPFR_RNDN); + t = 0; + + /* Evaluation of the series by Smith' method */ + for (i=0; ; i++) + { + t += 3 * L; + + /* k = 0 */ + t -= 3; + mpfr_set (s, z[L-1], MPFR_RNDN); + for (j=L-2; ; j--) + { + t -= 3; + mpfr_div_ui2 (s, s, (t+2), (t+3), MPFR_RNDN); + mpfr_add (s, s, z[j], MPFR_RNDN); + if (j==0) + break; + } + mpfr_mul (s, s, u0, MPFR_RNDN); + mpfr_add (result, result, s, MPFR_RNDN); + + mpfr_mul (u0, u0, z[L], MPFR_RNDN); + for (j=0; j<=L-1; j++) + { + mpfr_div_ui2 (u0, u0, (t + 2), (t + 3), MPFR_RNDN); + t += 3; + } + + t++; + + /* k = 1 */ + t -= 3; + mpfr_set (s, z[L-1], MPFR_RNDN); + for (j=L-2; ; j--) + { + t -= 3; + mpfr_div_ui2 (s, s, (t + 2), (t + 3), MPFR_RNDN); + mpfr_add (s, s, z[j], MPFR_RNDN); + if (j==0) + break; + } + mpfr_mul (s, s, u1, MPFR_RNDN); + mpfr_add (result, result, s, MPFR_RNDN); + + mpfr_mul (u1, u1, z[L], MPFR_RNDN); + for (j=0; j<=L-1; j++) + { + mpfr_div_ui2 (u1, u1, (t + 2), (t + 3), MPFR_RNDN); + t += 3; + } + + t++; + + /* k = 2 */ + t++; + + /* End of the loop over k */ + t -= 3; + + test0 = MPFR_IS_ZERO (u0) || + (MPFR_GET_EXP (u0) + (mp_exp_t)prec + 4 <= MPFR_GET_EXP (result)); + test1 = MPFR_IS_ZERO (u1) || + (MPFR_GET_EXP (u1) + (mp_exp_t)prec + 4 <= MPFR_GET_EXP (result)); + + if ( test0 && test1 && (x3u <= (t + 2) * (t + 3) / 2) ) + break; + } + + MPFR_LOG_MSG (("Truncation rank: %lu\n", t)); + + err = (5 + MPFR_INT_CEIL_LOG2 (L+1) + MPFR_INT_CEIL_LOG2 (i+1) + + cond - MPFR_GET_EXP (result)); + + /* err is the number of bits lost due to the evaluation error */ + /* wprec-(prec+1): number of bits lost due to the approximation error */ + MPFR_LOG_MSG (("Roundoff error: %Pu\n", err)); + MPFR_LOG_MSG (("Approxim error: %Pu\n", wprec - prec - 1)); + + if (wprec < err+1) + correctBits = 0; + else + { + if (wprec < err+prec+1) + correctBits = wprec - err - 1; + else + correctBits = prec; + } + + if (MPFR_LIKELY (MPFR_CAN_ROUND (result, correctBits, + MPFR_PREC (y), rnd))) + break; + + for (j=0; j<=L; j++) + mpfr_clear (z[j]); + (*__gmp_free_func) (z, (L + 1) * sizeof (mpfr_t)); + L = __gmpfr_isqrt (t); + MPFR_LOG_MSG (("size of blocks L = %lu\n", L)); + z = (mpfr_t *) (*__gmp_allocate_func) ( (L + 1) * sizeof (mpfr_t)); + MPFR_ASSERTN (z != NULL); + for (j=0; j<=L; j++) + mpfr_init (z[j]); + + if (correctBits == 0) + { + assumed_exponent *= 2; + MPFR_LOG_MSG (("Not a single bit correct (assumed_exponent=%lu)\n", + assumed_exponent)); + wprec = prec + 6 + MPFR_INT_CEIL_LOG2 (t) + cond + assumed_exponent; + } + else + { + if (correctBits < prec) + { /* The precision was badly chosen */ + MPFR_LOG_MSG (("Bad assumption on the exponent of Ai (x)", 0)); + MPFR_LOG_MSG ((" (E=%ld)\n", (long) (MPFR_GET_EXP (result)))); + wprec = prec + err + 1; + } + else + { /* We are really in a bad case of the TMD */ + MPFR_ZIV_NEXT (loop, prec); + + /* We update wprec */ + /* We assume that t will not be multiplied by more than 4 */ + wprec = (prec + (MPFR_INT_CEIL_LOG2 (t) + 2) + 6 + cond + - MPFR_GET_EXP (result)); + } + } + } /* End of ZIV loop */ + + MPFR_ZIV_FREE (loop); + MPFR_SAVE_EXPO_FREE (expo); + + r = mpfr_set (y, result, rnd); + + mpfr_clear (tmp_sp); + mpfr_clear (tmp2_sp); + for (j=0; j<=L; j++) + mpfr_clear (z[j]); + (*__gmp_free_func) (z, (L + 1) * sizeof (mpfr_t)); + + mpfr_clear (s); + mpfr_clear (u0); mpfr_clear (u1); + mpfr_clear (result); + mpfr_clear (temp1); + mpfr_clear (temp2); + + return r; +} + +/* We consider that the boundary between the area where the naive method + should preferably be used and the area where Smith' method should preferably + be used has the following form: + it is a triangle defined by two lines (one for the negative values of x, and + one for the positive values of x) crossing at x=0. + + More precisely, + + * If x<0 and MPFR_AI_THRESHOLD1*x + MPFR_AI_THRESHOLD2*prec > MPFR_AI_SCALE, + use Smith' algorithm; + * If x>0 and MPFR_AI_THRESHOLD3*x + MPFR_AI_THRESHOLD2*prec > MPFR_AI_SCALE, + use Smith' algorithm; + * otherwise, use the naive method. +*/ + +#define MPFR_AI_SCALE 1048576 + +int +mpfr_ai (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd) +{ + mpfr_t temp1, temp2; + int ternary; + mpfr_init2 (temp1, MPFR_SMALL_PRECISION); + mpfr_init2 (temp2, MPFR_SMALL_PRECISION); + + mpfr_set (temp1, x, MPFR_SMALL_PRECISION); + mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN); + mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (y), MPFR_RNDN); + + if (MPFR_IS_NEG (x)) + mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN); + else + mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN); + + mpfr_add (temp1, temp1, temp2, MPFR_RNDN); + + if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0) + ternary = mpfr_ai2 (y, x, rnd); + else + ternary = mpfr_ai1 (y, x, rnd); + + mpfr_clear (temp1); + mpfr_clear (temp2); + return ternary; +} diff --git a/ai2.c b/ai2.c deleted file mode 100644 index cb0515bdc..000000000 --- a/ai2.c +++ /dev/null @@ -1,347 +0,0 @@ -/* mpfr_ai2 -- implementation of Airy Ai function by Smith algorithm. - -Copyright 2010 Free Software Foundation, Inc. -Contributed by the Arenaire and Cacao projects, INRIA. - -This file is part of the GNU MPFR Library. - -The GNU MPFR Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 3 of the License, or (at your -option) any later version. - -The GNU MPFR Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see -http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#define MPFR_NEED_LONGLONG_H -#include "mpfr-impl.h" - -#define SMALL_PRECISION 32 - -/* Reminder and notations: - ----------------------- - - Ai is the solution of: - / y'' - x*y = 0 - { Ai (0) = 1/ ( 9^(1/3) * Gamma (2/3) ) - \ Ai' (0) = -1/ ( 3^(1/3) * Gamma (1/3) ) - - Series development: - Ai (x) = sum (a_i*x^i) - = sum (t_i) - - Recurrences: - a_(i+3) = a_i / ((i+2)*(i+3)) - t_(i+3) = t_i * x^3 / ((i+2)*(i+3)) - - Values: - a_0 = Ai (0) ~ 0.355 - a_1 = Ai' (0) ~ -0.259 -*/ - - -/* Airy function Ai evaluated by Smith algorithm */ -int -mpfr_ai2 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd) -{ - MPFR_ZIV_DECL (loop); - MPFR_SAVE_EXPO_DECL (expo); - mpfr_prec_t wprec; /* working precision */ - mpfr_prec_t prec; /* target precision */ - mpfr_prec_t err; /* used to estimate the evaluation error */ - mpfr_prec_t correctBits; /* estimates the number of correct bits*/ - unsigned long int i, j, L, t; - unsigned long int cond; /* condition number of the series */ - unsigned long int assumed_exponent; /* used as a lowerbound of |EXP(Ai(x))| */ - int r; /* returned ternary value */ - mpfr_t s; /* used to store the partial sum */ - mpfr_t u0, u1; - mpfr_t *z; /* used to store the (x^3j) */ - mpfr_t result; - mpfr_t tmp_sp, tmp2_sp; /* small precision variables */ - unsigned long int x3u; /* used to store ceil (x^3) */ - mpfr_t temp1, temp2; - int test0, test1; - - /* Logging */ - MPFR_LOG_FUNC ( ("x[%#R]=%R rnd=%d", x, x, rnd), ("y[%#R]=%R", y, y) ); - - /* Special cases */ - if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) - { - if (MPFR_IS_NAN (x)) - { - MPFR_SET_NAN (y); - MPFR_RET_NAN; - } - else if (MPFR_IS_INF (x)) - return mpfr_set_ui (y, 0, rnd); - } - - /* Save current exponents range */ - MPFR_SAVE_EXPO_MARK (expo); - - /* FIXME: underflow for large values of |x| */ - - - /* Set initial precision */ - /* See the analysis for the naive evaluation */ - - /* We begin with 11 guard bits */ - prec = MPFR_PREC (y) + 11; - MPFR_ZIV_INIT (loop, prec); - - mpfr_init2 (tmp_sp, SMALL_PRECISION); - mpfr_init2 (tmp2_sp, SMALL_PRECISION); - mpfr_abs (tmp_sp, x, MPFR_RNDU); - mpfr_pow_ui (tmp_sp, tmp_sp, 3, MPFR_RNDU); - mpfr_sqrt (tmp_sp, tmp_sp, MPFR_RNDU); /* tmp_sp ~ x^3/2 */ - - /* 0.96179669392597567 >~ 2/3 * log2(e). See algorithms.tex */ - mpfr_set_str(tmp2_sp, "0.96179669392597567", 10, MPFR_RNDU); - mpfr_mul (tmp2_sp, tmp_sp, tmp2_sp, MPFR_RNDU); - - /* cond represents the number of lost bits in the evaluation of the sum */ - if ( (MPFR_IS_ZERO(x)) || (MPFR_GET_EXP (x) <= 0) ) - cond = 0; - else - cond = mpfr_get_ui (tmp2_sp, MPFR_RNDU) - (MPFR_GET_EXP (x) - 1)/4 - 1; - - /* This variable is used to store the maximal assumed exponent of */ - /* Ai (x). More precisely, we assume that |Ai (x)| will be greater than */ - /* 2^{-assumedExp}. */ - if (MPFR_IS_ZERO(x)) - assumed_exponent = 2; - else - { - if (MPFR_IS_POS (x)) - { - if (MPFR_GET_EXP (x) <= 0) - assumed_exponent = 3; - else - assumed_exponent = (2 + (MPFR_GET_EXP (x)/4 + 1) - + mpfr_get_ui (tmp2_sp, MPFR_RNDU)); - } - /* We do not know Ai (x) yet */ - /* We cover the case when EXP (Ai (x))>=-10 */ - else - assumed_exponent = 10; - } - - wprec = prec + MPFR_INT_CEIL_LOG2 (prec) + 6 + cond + assumed_exponent; - - /* We assume that the truncation rank will be ~ prec */ - L = __gmpfr_isqrt (prec); - MPFR_LOG_MSG (("size of blocks L = %lu\n", L)); - - z = (mpfr_t *) (*__gmp_allocate_func) ( (L + 1) * sizeof (mpfr_t) ); - MPFR_ASSERTN (z != NULL); - for (j=0; j<=L; j++) - mpfr_init (z[j]); - - mpfr_init (s); - mpfr_init (u0); mpfr_init (u1); - mpfr_init (result); - mpfr_init (temp1); - mpfr_init (temp2); - - /* ZIV loop */ - for (;;) - { - MPFR_LOG_MSG (("working precision: %Pu\n", wprec)); - - for (j=0; j<=L; j++) - mpfr_set_prec (z[j], wprec); - mpfr_set_prec (s, wprec); - mpfr_set_prec (u0, wprec); mpfr_set_prec (u1, wprec); - mpfr_set_prec (result, wprec); - - mpfr_set_ui (u0, 1, MPFR_RNDN); - mpfr_set (u1, x, MPFR_RNDN); - - mpfr_set_ui (z[0], 1, MPFR_RNDU); - mpfr_sqr (z[1], u1, MPFR_RNDU); - mpfr_mul (z[1], z[1], x, (MPFR_IS_POS (x) ? MPFR_RNDU : MPFR_RNDD) ); - - if (MPFR_IS_NEG (x)) - MPFR_CHANGE_SIGN (z[1]); - x3u = mpfr_get_ui (z[1], MPFR_RNDU); /* x3u >= ceil (x^3) */ - if (MPFR_IS_NEG (x)) - MPFR_CHANGE_SIGN (z[1]); - - for (j=2; j<=L ;j++) - { - if (j%2 == 0) - mpfr_sqr (z[j], z[j/2], MPFR_RNDN); - else - mpfr_mul (z[j], z[j-1], z[1], MPFR_RNDN); - } - - mpfr_gamma_one_and_two_third (temp1, temp2, wprec); - mpfr_set_ui (u0, 9, MPFR_RNDN); - mpfr_cbrt (u0, u0, MPFR_RNDN); - mpfr_mul (u0, u0, temp2, MPFR_RNDN); - mpfr_ui_div (u0, 1, u0 , MPFR_RNDN); /* u0 = 1/( Gamma (2/3)*9^(1/3) ) */ - - mpfr_set_ui (u1, 3, MPFR_RNDN); - mpfr_cbrt (u1, u1, MPFR_RNDN); - mpfr_mul (u1, u1, temp1, MPFR_RNDN); - mpfr_neg (u1, u1, MPFR_RNDN); - mpfr_div (u1, x, u1, MPFR_RNDN); /* u1 = -x/(Gamma (1/3)*3^(1/3)) */ - - mpfr_set_ui (result, 0, MPFR_RNDN); - t = 0; - - /* Evaluation of the series by Smith' method */ - for (i=0; ; i++) - { - t += 3 * L; - - /* k = 0 */ - t -= 3; - mpfr_set (s, z[L-1], MPFR_RNDN); - for (j=L-2; ; j--) - { - t -= 3; - mpfr_div_ui2 (s, s, (t+2), (t+3), MPFR_RNDN); - mpfr_add (s, s, z[j], MPFR_RNDN); - if (j==0) - break; - } - mpfr_mul (s, s, u0, MPFR_RNDN); - mpfr_add (result, result, s, MPFR_RNDN); - - mpfr_mul (u0, u0, z[L], MPFR_RNDN); - for (j=0; j<=L-1; j++) - { - mpfr_div_ui2 (u0, u0, (t + 2), (t + 3), MPFR_RNDN); - t += 3; - } - - t++; - - /* k = 1 */ - t -= 3; - mpfr_set (s, z[L-1], MPFR_RNDN); - for (j=L-2; ; j--) - { - t -= 3; - mpfr_div_ui2 (s, s, (t + 2), (t + 3), MPFR_RNDN); - mpfr_add (s, s, z[j], MPFR_RNDN); - if (j==0) - break; - } - mpfr_mul (s, s, u1, MPFR_RNDN); - mpfr_add (result, result, s, MPFR_RNDN); - - mpfr_mul (u1, u1, z[L], MPFR_RNDN); - for (j=0; j<=L-1; j++) - { - mpfr_div_ui2 (u1, u1, (t + 2), (t + 3), MPFR_RNDN); - t += 3; - } - - t++; - - /* k = 2 */ - t++; - - /* End of the loop over k */ - t -= 3; - - test0 = MPFR_IS_ZERO (u0) || - (MPFR_GET_EXP (u0) + (mpfr_exp_t)prec + 4 <= MPFR_GET_EXP (result)); - test1 = MPFR_IS_ZERO (u1) || - (MPFR_GET_EXP (u1) + (mpfr_exp_t)prec + 4 <= MPFR_GET_EXP (result)); - - if ( test0 && test1 && (x3u <= (t + 2) * (t + 3) / 2) ) - break; - } - - MPFR_LOG_MSG (("Truncation rank: %lu\n", t)); - - err = (5 + MPFR_INT_CEIL_LOG2 (L+1) + MPFR_INT_CEIL_LOG2 (i+1) - + cond - MPFR_GET_EXP (result)); - - /* err is the number of bits lost due to the evaluation error */ - /* wprec-(prec+1): number of bits lost due to the approximation error */ - MPFR_LOG_MSG (("Roundoff error: %Pu\n", err)); - MPFR_LOG_MSG (("Approxim error: %Pu\n", wprec - prec - 1)); - - if (wprec < err+1) - correctBits = 0; - else - { - if (wprec < err+prec+1) - correctBits = wprec - err - 1; - else - correctBits = prec; - } - - if (MPFR_LIKELY (MPFR_CAN_ROUND (result, correctBits, - MPFR_PREC (y), rnd))) - break; - - for (j=0; j<=L; j++) - mpfr_clear (z[j]); - (*__gmp_free_func) (z, (L + 1) * sizeof (mpfr_t)); - L = __gmpfr_isqrt (t); - MPFR_LOG_MSG (("size of blocks L = %lu\n", L)); - z = (mpfr_t *) (*__gmp_allocate_func) ( (L + 1) * sizeof (mpfr_t)); - MPFR_ASSERTN (z != NULL); - for (j=0; j<=L; j++) - mpfr_init (z[j]); - - if (correctBits == 0) - { - assumed_exponent *= 2; - MPFR_LOG_MSG (("Not a single bit correct (assumed_exponent=%lu)\n", - assumed_exponent)); - wprec = prec + 6 + MPFR_INT_CEIL_LOG2 (t) + cond + assumed_exponent; - } - else - { - if (correctBits < prec) - { /* The precision was badly chosen */ - MPFR_LOG_MSG (("Bad assumption on the exponent of Ai (x)", 0)); - MPFR_LOG_MSG ((" (E=%ld)\n", (long) (MPFR_GET_EXP (result)))); - wprec = prec + err + 1; - } - else - { /* We are really in a bad case of the TMD */ - MPFR_ZIV_NEXT (loop, prec); - - /* We update wprec */ - /* We assume that t will not be multiplied by more than 4 */ - wprec = (prec + (MPFR_INT_CEIL_LOG2 (t) + 2) + 6 + cond - - MPFR_GET_EXP (result)); - } - } - } /* End of ZIV loop */ - - MPFR_ZIV_FREE (loop); - MPFR_SAVE_EXPO_FREE (expo); - - r = mpfr_set (y, result, rnd); - - mpfr_clear (tmp_sp); - mpfr_clear (tmp2_sp); - for (j=0; j<=L; j++) - mpfr_clear (z[j]); - (*__gmp_free_func) (z, (L + 1) * sizeof (mpfr_t)); - - mpfr_clear (s); - mpfr_clear (u0); mpfr_clear (u1); - mpfr_clear (result); - mpfr_clear (temp1); - mpfr_clear (temp2); - - return r; -} diff --git a/bidimensional_sample.c b/bidimensional_sample.c index 3c5de3f63..82b55bdfd 100644 --- a/bidimensional_sample.c +++ b/bidimensional_sample.c @@ -8,8 +8,6 @@ #define _PROTO __GMP_PROTO #include "speed.h" -#define SMALL_PRECISION 32 - /* Let f be a function for which we have several implementations f1, f2... */ /* We wish to have a quick overview of which implementation is the best */ /* in function of the point x where f(x) is computed and of the prectision */ @@ -99,7 +97,7 @@ void generate_2D_sample (FILE *output, struct speed_params2D param) } - mpfr_init2 (temp, SMALL_PRECISION); + mpfr_init2 (temp, MPFR_SMALL_PRECISION); /* The precision is sampled from min_prec to max_prec with */ /* approximately nb_points_prec points. If logarithmic_scale_prec */ @@ -158,6 +156,7 @@ void generate_2D_sample (FILE *output, struct speed_params2D param) prec = (double)param.min_prec; while (prec <= param.max_prec) { + printf ("prec = %d\n", (int)prec); if (param.logarithmic_scale_x == 0) mpfr_set_d (temp, param.min_x, MPFR_RNDU); else if (param.logarithmic_scale_x == -1) @@ -297,32 +296,145 @@ timing_exp3 (struct speed_params *s) SPEED_MPFR_FUNC_2D (mpfr_exp_3); } +double +timing_ai1 (struct speed_params *s) +{ + SPEED_MPFR_FUNC_2D (mpfr_ai); +} + +double +timing_ai2 (struct speed_params *s) +{ + SPEED_MPFR_FUNC_2D (mpfr_ai2); +} + +/* These functions are for testing purpose only */ +/* They are used to draw which method is actually used */ + +#include "ai.c" +double +virtual_timing_ai1 (struct speed_params *s) +{ + double t; + unsigned i; + mpfr_t w, x; + mp_size_t size; + mpfr_t temp1, temp2; + + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); + + size = (s->size-1)/GMP_NUMB_BITS+1; + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; + MPFR_TMP_INIT1 (s->xp, x, s->size); + MPFR_SET_EXP (x, (mpfr_exp_t) s->r); + if (s->align_xp == 2) MPFR_SET_NEG (x); + + mpfr_init2 (w, s->size); + speed_starttime (); + i = s->reps; + + mpfr_init2 (temp1, MPFR_SMALL_PRECISION); + mpfr_init2 (temp2, MPFR_SMALL_PRECISION); + + mpfr_set (temp1, x, MPFR_SMALL_PRECISION); + mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN); + mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN); + + if (MPFR_IS_NEG (x)) + mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN); + else + mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN); + + mpfr_add (temp1, temp1, temp2, MPFR_RNDN); + + if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0) + t = 1000.; + else + t = 1.; + + mpfr_clear (temp1); + mpfr_clear (temp2); + + return t; +} + +double +virtual_timing_ai2 (struct speed_params *s) +{ + double t; + unsigned i; + mpfr_t w, x; + mp_size_t size; + mpfr_t temp1, temp2; + + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); + + size = (s->size-1)/GMP_NUMB_BITS+1; + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; + MPFR_TMP_INIT1 (s->xp, x, s->size); + MPFR_SET_EXP (x, (mpfr_exp_t) s->r); + if (s->align_xp == 2) MPFR_SET_NEG (x); + + mpfr_init2 (w, s->size); + speed_starttime (); + i = s->reps; + + mpfr_init2 (temp1, MPFR_SMALL_PRECISION); + mpfr_init2 (temp2, MPFR_SMALL_PRECISION); + + mpfr_set (temp1, x, MPFR_SMALL_PRECISION); + mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN); + mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN); + + if (MPFR_IS_NEG (x)) + mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN); + else + mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN); + + mpfr_add (temp1, temp1, temp2, MPFR_RNDN); + + if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0) + t = 1.; + else + t = 1000.; + + mpfr_clear (temp1); + mpfr_clear (temp2); + + return t; +} + int main (void) { - char filename[256] = "tune.dat"; FILE *output; struct speed_params2D param; - double (*speed_funcs[4]) (struct speed_params *s); - speed_funcs[0] = timing_exp1; - speed_funcs[1] = timing_exp2; - speed_funcs[2] = timing_exp3; - speed_funcs[3] = NULL; + double (*speed_funcs[3]) (struct speed_params *s); + + /* char filename[256] = "virtual_timing_ai.dat"; */ + /* speed_funcs[0] = virtual_timing_ai1; */ + /* speed_funcs[1] = virtual_timing_ai2; */ + + char filename[256] = "airy.dat"; + speed_funcs[0] = timing_ai1; + speed_funcs[1] = timing_ai2; + speed_funcs[2] = NULL; output = fopen (filename, "w"); if (output == NULL) { fprintf (stderr, "Can't open file '%s' for writing.\n", filename); abort (); } - - param.min_x = -13.; - param.max_x = 27.; + param.min_x = -80; + param.max_x = 60; param.min_prec = 50; - param.max_prec = 100000; - param.nb_points_x = 400; - param.nb_points_prec = 400; - param.logarithmic_scale_x = -1; - param.logarithmic_scale_prec = 1; + param.max_prec = 1500; + param.nb_points_x = 200; + param.nb_points_prec = 200; + param.logarithmic_scale_x = 0; + param.logarithmic_scale_prec = 0; param.speed_funcs = speed_funcs; generate_2D_sample (output, param); diff --git a/mparam_h.in b/mparam_h.in index 600933a32..9000a9217 100644 --- a/mparam_h.in +++ b/mparam_h.in @@ -1420,3 +1420,12 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., #ifndef MPFR_SINCOS_THRESHOLD # define MPFR_SINCOS_THRESHOLD 30000 /* bits */ #endif +#ifndef MPFR_AI_THRESHOLD1 +# define MPFR_AI_THRESHOLD1 -13107 +#endif +#ifndef MPFR_AI_THRESHOLD2 +# define MPFR_AI_THRESHOLD2 1311 +#endif +#ifndef MPFR_AI_THRESHOLD3 +# define MPFR_AI_THRESHOLD3 19661 +#endif
\ No newline at end of file diff --git a/mpfr-impl.h b/mpfr-impl.h index 6338e07fb..de62e58e0 100644 --- a/mpfr-impl.h +++ b/mpfr-impl.h @@ -377,6 +377,9 @@ __MPFR_DECLSPEC extern const mpfr_t __gmpfr_four; ****************** double macros ********************* ******************************************************/ +/* Precision used for lower precision computations */ +#define MPFR_SMALL_PRECISION 32 + /* Definition of constants */ #define LOG2 0.69314718055994528622 /* log(2) rounded to zero on 53 bits */ #define ALPHA 4.3191365662914471407 /* a+2 = a*log(a), rounded to +infinity */ @@ -36,115 +36,166 @@ int verbose; s->xp : Mantissa of first input s->yp : mantissa of second input */ -#define SPEED_MPFR_FUNC(mean_fun) do { \ - unsigned i; \ - mp_ptr wp; \ - double t; \ - mpfr_t w, x; \ - mp_size_t size; \ - MPFR_TMP_DECL (marker); \ - \ - SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ - SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ - MPFR_TMP_MARK (marker); \ - \ - size = (s->size-1)/GMP_NUMB_BITS+1; \ - s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ - MPFR_TMP_INIT1 (s->xp, x, s->size); \ - MPFR_SET_EXP (x, 0); \ - \ - MPFR_TMP_INIT (wp, w, s->size, size); \ - \ - speed_operand_src (s, s->xp, size); \ - speed_operand_dst (s, wp, size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - mean_fun (w, x, MPFR_RNDN); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - MPFR_TMP_FREE (marker); \ - return t; \ -} while (0) +#define SPEED_MPFR_FUNC(mean_fun) \ + do \ + { \ + unsigned i; \ + mp_ptr wp; \ + double t; \ + mpfr_t w, x; \ + mp_size_t size; \ + MPFR_TMP_DECL (marker); \ + \ + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ + MPFR_TMP_MARK (marker); \ + \ + size = (s->size-1)/GMP_NUMB_BITS+1; \ + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->xp, x, s->size); \ + MPFR_SET_EXP (x, 0); \ + \ + MPFR_TMP_INIT (wp, w, s->size, size); \ + \ + speed_operand_src (s, s->xp, size); \ + speed_operand_dst (s, wp, size); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + mean_fun (w, x, MPFR_RNDN); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + MPFR_TMP_FREE (marker); \ + return t; \ + } \ + while (0) /* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */ -#define SPEED_MPFR_FUNC2(mean_fun) do { \ - unsigned i; \ - mp_ptr vp, wp; \ - double t; \ - mpfr_t v, w, x; \ - mp_size_t size; \ - MPFR_TMP_DECL (marker); \ - \ - SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ - SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ - MPFR_TMP_MARK (marker); \ - \ - size = (s->size-1)/GMP_NUMB_BITS+1; \ - s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ - MPFR_TMP_INIT1 (s->xp, x, s->size); \ - MPFR_SET_EXP (x, 0); \ - \ - MPFR_TMP_INIT (vp, v, s->size, size); \ - MPFR_TMP_INIT (wp, w, s->size, size); \ - \ - speed_operand_src (s, s->xp, size); \ - speed_operand_dst (s, vp, size); \ - speed_operand_dst (s, wp, size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - mean_fun (v, w, x, MPFR_RNDN); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - MPFR_TMP_FREE (marker); \ - return t; \ -} while (0) - -#define SPEED_MPFR_OP(mean_fun) do { \ - unsigned i; \ - mp_ptr wp; \ - double t; \ - mpfr_t w, x, y; \ - mp_size_t size; \ - MPFR_TMP_DECL (marker); \ - \ - SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ - SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ - MPFR_TMP_MARK (marker); \ - \ - size = (s->size-1)/GMP_NUMB_BITS+1; \ - s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ - MPFR_TMP_INIT1 (s->xp, x, s->size); \ - MPFR_SET_EXP (x, 0); \ - s->yp[size-1] |= MPFR_LIMB_HIGHBIT; \ - MPFR_TMP_INIT1 (s->yp, y, s->size); \ - MPFR_SET_EXP (y, 0); \ - \ - MPFR_TMP_INIT (wp, w, s->size, size); \ - \ - speed_operand_src (s, s->xp, size); \ - speed_operand_src (s, s->yp, size); \ - speed_operand_dst (s, wp, size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - mean_fun (w, x, y, MPFR_RNDN); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - MPFR_TMP_FREE (marker); \ - return t; \ -} while (0) +#define SPEED_MPFR_FUNC2(mean_fun) \ + do \ + { \ + unsigned i; \ + mp_ptr vp, wp; \ + double t; \ + mpfr_t v, w, x; \ + mp_size_t size; \ + MPFR_TMP_DECL (marker); \ + \ + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ + MPFR_TMP_MARK (marker); \ + \ + size = (s->size-1)/GMP_NUMB_BITS+1; \ + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->xp, x, s->size); \ + MPFR_SET_EXP (x, 0); \ + \ + MPFR_TMP_INIT (vp, v, s->size, size); \ + MPFR_TMP_INIT (wp, w, s->size, size); \ + \ + speed_operand_src (s, s->xp, size); \ + speed_operand_dst (s, vp, size); \ + speed_operand_dst (s, wp, size); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + mean_fun (v, w, x, MPFR_RNDN); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + MPFR_TMP_FREE (marker); \ + return t; \ + } \ + while (0) + +#define SPEED_MPFR_OP(mean_fun) \ + do \ + { \ + unsigned i; \ + mp_ptr wp; \ + double t; \ + mpfr_t w, x, y; \ + mp_size_t size; \ + MPFR_TMP_DECL (marker); \ + \ + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ + MPFR_TMP_MARK (marker); \ + \ + size = (s->size-1)/GMP_NUMB_BITS+1; \ + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->xp, x, s->size); \ + MPFR_SET_EXP (x, 0); \ + s->yp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->yp, y, s->size); \ + MPFR_SET_EXP (y, 0); \ + \ + MPFR_TMP_INIT (wp, w, s->size, size); \ + \ + speed_operand_src (s, s->xp, size); \ + speed_operand_src (s, s->yp, size); \ + speed_operand_dst (s, wp, size); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + mean_fun (w, x, y, MPFR_RNDN); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + MPFR_TMP_FREE (marker); \ + return t; \ + } \ + while (0) +/* s->size: precision of both input and output + s->xp : Mantissa of first input + s->r : exponent + s->align_xp : sign (1 means positive, 2 means negative) +*/ +#define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun) \ + do \ + { \ + unsigned i; \ + mp_ptr wp; \ + double t; \ + mpfr_t w, x; \ + mp_size_t size; \ + MPFR_TMP_DECL (marker); \ + \ + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ + MPFR_TMP_MARK (marker); \ + \ + size = (s->size-1)/GMP_NUMB_BITS+1; \ + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->xp, x, s->size); \ + MPFR_SET_EXP (x, s->r); \ + if (s->align_xp == 2) MPFR_SET_NEG (x); \ + \ + MPFR_TMP_INIT (wp, w, s->size, size); \ + \ + speed_operand_src (s, s->xp, size); \ + speed_operand_dst (s, wp, size); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + mean_fun (w, x, MPFR_RNDN); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + MPFR_TMP_FREE (marker); \ + return t; \ + } \ + while (0) /* First we include all the functions we want to tune inside this program. We can't use the GNU MPFR library since the thresholds are fixed macros. */ @@ -154,7 +205,9 @@ mpfr_prec_t mpfr_exp_2_threshold; #undef MPFR_EXP_2_THRESHOLD #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold #include "exp_2.c" -static double speed_mpfr_exp_2 (struct speed_params *s) { +static double +speed_mpfr_exp_2 (struct speed_params *s) +{ SPEED_MPFR_FUNC (mpfr_exp_2); } @@ -163,7 +216,9 @@ mpfr_prec_t mpfr_exp_threshold; #undef MPFR_EXP_THRESHOLD #define MPFR_EXP_THRESHOLD mpfr_exp_threshold #include "exp.c" -static double speed_mpfr_exp (struct speed_params *s) { +static double +speed_mpfr_exp (struct speed_params *s) +{ SPEED_MPFR_FUNC (mpfr_exp); } @@ -173,7 +228,9 @@ mpfr_prec_t mpfr_sincos_threshold; #define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold #include "sin_cos.c" #include "cos.c" -static double speed_mpfr_sincos (struct speed_params *s) { +static double +speed_mpfr_sincos (struct speed_params *s) +{ SPEED_MPFR_FUNC2 (mpfr_sin_cos); } @@ -182,7 +239,9 @@ mpfr_prec_t mpfr_mul_threshold; #undef MPFR_MUL_THRESHOLD #define MPFR_MUL_THRESHOLD mpfr_mul_threshold #include "mul.c" -static double speed_mpfr_mul (struct speed_params *s) { +static double +speed_mpfr_mul (struct speed_params *s) +{ SPEED_MPFR_OP (mpfr_mul); } @@ -192,7 +251,8 @@ static double speed_mpfr_mul (struct speed_params *s) { * Common functions (inspired by GMP function) * ************************************************/ static int -analyze_data (double *dat, int ndat) { +analyze_data (double *dat, int ndat) +{ double x, min_x; int j, min_j; @@ -217,9 +277,10 @@ analyze_data (double *dat, int ndat) { #define THRESHOLD_WINDOW 16 #define THRESHOLD_FINAL_WINDOW 128 -static double domeasure (mpfr_prec_t *threshold, - double (*func) (struct speed_params *), - mpfr_prec_t p) +static double +domeasure (mpfr_prec_t *threshold, + double (*func) (struct speed_params *), + mpfr_prec_t p) { struct speed_params s; mp_size_t size; @@ -262,6 +323,81 @@ static double domeasure (mpfr_prec_t *threshold, return d; } +/* Performs measures when both the precision and the point of evaluation + shall vary. s.yp is ignored and not initialized. + It assumes that func depends on three thresholds with a boundary of the + form threshold1*x + threshold2*p = some scaling factor, if x<0, + and threshold3*x + threshold2*p = some scaling factor, if x>=0. +*/ +static double +domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3, + double (*func) (struct speed_params *), + mpfr_prec_t p, + mpfr_t x) +{ + struct speed_params s; + mp_size_t size; + double t1, t2, d; + mpfr_t xtmp; + + if (MPFR_IS_SINGULAR (x)) + { + mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n"); + abort (); + } + if (MPFR_IS_NEG (x)) + s.align_xp = 2; + else + s.align_xp = 1; + + s.align_yp = s.align_wp = 64; + s.size = p; + size = (p - 1)/GMP_NUMB_BITS+1; + + mpfr_init2 (xtmp, p); + mpn_random (xtmp->_mpfr_d, size); + xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT; + MPFR_SET_EXP (xtmp, -53); + mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN); + mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb) */ + /* where perturb ~ 2^(-53) is */ + /* randomly chosen. */ + s.xp = xtmp->_mpfr_d; + s.r = MPFR_GET_EXP (xtmp); + + *threshold1 = 0; + *threshold2 = 0; + *threshold3 = 0; + t1 = speed_measure (func, &s); + if (t1 == -1.0) + { + fprintf (stderr, "Failed to measure function 1!\n"); + abort (); + } + + if (MPFR_IS_NEG (x)) + *threshold1 = INT_MIN; + else + *threshold3 = INT_MAX; + *threshold2 = INT_MAX; + t2 = speed_measure (func, &s); + if (t2 == -1.0) + { + fprintf (stderr, "Failed to measure function 2!\n"); + abort (); + } + + /* t1 is the time of the first algo (used for low prec) */ + if (t2 >= t1) + d = (t2 - t1) / t2; + else + d = (t2 - t1) / t1; + /* d > 0 if we have to use algo 1. + d < 0 if we have to use algo 2 */ + mpfr_clear (xtmp); + return d; +} + /* Tune a function with a simple THRESHOLD The function doesn't depend on another threshold. It assumes that it uses algo1 if p < THRESHOLD @@ -282,37 +418,41 @@ tune_simple_func (mpfr_prec_t *threshold, /* first look for a lower bound within 10% */ pmin = p = pstart; d = domeasure (threshold, func, pmin); - if (d < 0.0) { - if (verbose) - printf ("Oops: even for %lu, algo 2 seems to be faster!\n", - (unsigned long) pmin); - *threshold = MPFR_PREC_MIN; - return; - } + if (d < 0.0) + { + if (verbose) + printf ("Oops: even for %lu, algo 2 seems to be faster!\n", + (unsigned long) pmin); + *threshold = MPFR_PREC_MIN; + return; + } if (d >= 1.00) - for (;;) { + for (;;) + { + d = domeasure (threshold, func, pmin); + if (d < 1.00) + break; + p = pmin; + pmin += pmin/2; + } + pmin = p; + for (;;) + { d = domeasure (threshold, func, pmin); - if (d < 1.00) + if (d < 0.10) break; - p = pmin; - pmin += pmin/2; + pmin += GMP_NUMB_BITS; } - pmin = p; - for (;;) { - d = domeasure (threshold, func, pmin); - if (d < 0.10) - break; - pmin += GMP_NUMB_BITS; - } /* then look for an upper bound within 20% */ pmax = pmin * 2; - for (;;) { - d = domeasure (threshold, func, pmax); - if (d < -0.20) - break; - pmax += pmin / 2; /* don't increase too rapidly */ - } + for (;;) + { + d = domeasure (threshold, func, pmax); + if (d < -0.20) + break; + pmax += pmin / 2; /* don't increase too rapidly */ + } /* The threshold is between pmin and pmax. Affine them */ try = 0; @@ -322,14 +462,15 @@ tune_simple_func (mpfr_prec_t *threshold, if (verbose) printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep); p = (pmin + pmax) / 2; - for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++) { - measure[i] = domeasure (threshold, func, - p+(i-THRESHOLD_WINDOW/2)*pstep); - if (measure[i] > 0) - numpos ++; - else if (measure[i] < 0) - numneg ++; - } + for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++) + { + measure[i] = domeasure (threshold, func, + p+(i-THRESHOLD_WINDOW/2)*pstep); + if (measure[i] > 0) + numpos ++; + else if (measure[i] < 0) + numneg ++; + } if (numpos > numneg) /* We use more often algo 1 than algo 2 */ pmin = p - THRESHOLD_WINDOW/2*pstep; @@ -337,12 +478,13 @@ tune_simple_func (mpfr_prec_t *threshold, pmax = p + THRESHOLD_WINDOW/2*pstep; else /* numpos == numneg ... */ - if (++ try > 2) { - *threshold = p; - if (verbose) - printf ("Quick find: %lu\n", *threshold); - return ; - } + if (++ try > 2) + { + *threshold = p; + if (verbose) + printf ("Quick find: %lu\n", *threshold); + return ; + } } /* Final tune... */ @@ -357,6 +499,154 @@ tune_simple_func (mpfr_prec_t *threshold, return; } +/* Tune a function which behavior depends on both p and x, + in a given direction. + It assumes that for (x,p) close to zero, algo1 is used + and algo2 is used when (x,p) is far from zero. + If algo2 is better for low prec, and algo1 better for high prec, + the behaviour of this function is undefined. + This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp) + until it finds a point on the boundary. It returns ell. + */ +static void +tune_simple_func_in_some_direction (long int *threshold1, + long int *threshold2, + long int *threshold3, + double (*func) (struct speed_params *), + mpfr_prec_t pstart, + int dirx, int dirp, + mpfr_t xres, mpfr_prec_t *pres) +{ + double measure[THRESHOLD_FINAL_WINDOW+1]; + double d; + mpfr_prec_t pstep; + int i, numpos, numneg, try; + mpfr_prec_t pmin, pmax, p; + mpfr_t xmin, xmax, x; + mpfr_t ratio; + + mpfr_init2 (ratio, MPFR_SMALL_PRECISION); + mpfr_set_si (ratio, dirx, MPFR_RNDN); + mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN); + + mpfr_init2 (xmin, MPFR_SMALL_PRECISION); + mpfr_init2 (xmax, MPFR_SMALL_PRECISION); + mpfr_init2 (x, MPFR_SMALL_PRECISION); + + /* first look for a lower bound within 10% */ + pmin = p = pstart; + mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN); + mpfr_set (x, xmin, MPFR_RNDN); + + d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin); + if (d < 0.0) + { + if (verbose) + printf ("Oops: even for %lu, algo 2 seems to be faster!\n", + (unsigned long) pmin); + *pres = MPFR_PREC_MIN; + mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); + mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax); + return; + } + if (d >= 1.00) + for (;;) + { + d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin); + if (d < 1.00) + break; + p = pmin; + mpfr_set (x, xmin, MPFR_RNDN); + pmin += pmin/2; + mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN); + } + pmin = p; + mpfr_set (xmin, x, MPFR_RNDN); + for (;;) + { + d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin); + if (d < 0.10) + break; + pmin += GMP_NUMB_BITS; + mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN); + } + + /* then look for an upper bound within 20% */ + pmax = pmin * 2; + mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN); + for (;;) + { + d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax); + if (d < -0.20) + break; + pmax += pmin / 2; /* don't increase too rapidly */ + mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN); + } + + /* The threshold is between pmin and pmax. Affine them */ + try = 0; + while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW) + { + pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1); + if (verbose) + printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep); + p = (pmin + pmax) / 2; + mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN); + for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++) + { + *pres = p+(i-THRESHOLD_WINDOW/2)*pstep; + mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); + measure[i] = domeasure2 (threshold1, threshold2, threshold3, + func, *pres, xres); + if (measure[i] > 0) + numpos ++; + else if (measure[i] < 0) + numneg ++; + } + if (numpos > numneg) + { + /* We use more often algo 1 than algo 2 */ + pmin = p - THRESHOLD_WINDOW/2*pstep; + mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN); + } + else if (numpos < numneg) + { + pmax = p + THRESHOLD_WINDOW/2*pstep; + mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN); + } + else + /* numpos == numneg ... */ + if (++ try > 2) + { + *pres = p; + mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); + if (verbose) + printf ("Quick find: %lu\n", *pres); + mpfr_clear (ratio); + mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax); + return ; + } + } + + /* Final tune... */ + if (verbose) + printf ("Finalizing in [%lu, %lu]... ", pmin, pmax); + for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++) + { + *pres = pmin+i; + mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); + measure[i] = domeasure2 (threshold1, threshold2, threshold3, + func, *pres, xres); + } + i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1); + *pres = pmin + i; + mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); + if (verbose) + printf ("%lu\n", *pres); + mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax); + return; +} + /************************************ * Tune Mulders' mulhigh function * ************************************/ @@ -371,10 +661,15 @@ tune_simple_func (mpfr_prec_t *threshold, #define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE #include "mulders.c" -static double speed_mpfr_mulhigh (struct speed_params *s) { +static double +speed_mpfr_mulhigh (struct speed_params *s) +{ SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n); } -static double speed_mpfr_sqrhigh (struct speed_params *s) { + +static double +speed_mpfr_sqrhigh (struct speed_params *s) +{ SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n); } @@ -519,6 +814,29 @@ tune_sqr_mulders (FILE *f) } /******************************************************* + * Tuning functions for mpfr_ai * + *******************************************************/ + +long int mpfr_ai_threshold1; +long int mpfr_ai_threshold2; +long int mpfr_ai_threshold3; +#undef MPFR_AI_THRESHOLD1 +#define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1 +#undef MPFR_AI_THRESHOLD2 +#define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2 +#undef MPFR_AI_THRESHOLD3 +#define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3 + +#include "ai.c" + +static double +speed_mpfr_ai (struct speed_params *s) +{ + SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai); +} + + +/******************************************************* * Tune all the threshold of MPFR * * Warning: tune the function in their dependent order!* *******************************************************/ @@ -528,26 +846,30 @@ all (const char *filename) FILE *f; time_t start_time, end_time; struct tm *tp; + mpfr_t x1, x2, x3, tmp1, tmp2; + mpfr_prec_t p1, p2, p3; f = fopen (filename, "w"); - if (f == NULL) { - fprintf (stderr, "Can't open file '%s' for writing.\n", filename); - abort (); - } + if (f == NULL) + { + fprintf (stderr, "Can't open file '%s' for writing.\n", filename); + abort (); + } speed_time_init (); - if (verbose) { - printf ("Using: %s\n", speed_time_string); - printf ("speed_precision %d", speed_precision); - if (speed_unittime == 1.0) - printf (", speed_unittime 1 cycle"); - else - printf (", speed_unittime %.2e secs", speed_unittime); - if (speed_cycletime == 1.0 || speed_cycletime == 0.0) - printf (", CPU freq unknown\n"); - else - printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime); - } + if (verbose) + { + printf ("Using: %s\n", speed_time_string); + printf ("speed_precision %d", speed_precision); + if (speed_unittime == 1.0) + printf (", speed_unittime 1 cycle"); + else + printf (", speed_unittime %.2e secs", speed_unittime); + if (speed_cycletime == 1.0 || speed_cycletime == 0.0) + printf (", CPU freq unknown\n"); + else + printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime); + } time (&start_time); tp = localtime (&start_time); @@ -616,6 +938,55 @@ all (const char *filename) fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n", (unsigned long) mpfr_sincos_threshold); + /* Tune mpfr_ai */ + if (verbose) + printf ("Tuning mpfr_ai...\n"); + mpfr_init2 (x1, MPFR_SMALL_PRECISION); + mpfr_init2 (x2, MPFR_SMALL_PRECISION); + mpfr_init2 (x3, MPFR_SMALL_PRECISION); + mpfr_init2 (tmp1, MPFR_SMALL_PRECISION); + mpfr_init2 (tmp2, MPFR_SMALL_PRECISION); + + tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2, + &mpfr_ai_threshold3, speed_mpfr_ai, + MPFR_PREC_MIN+GMP_NUMB_BITS, + -60, 200, x1, &p1); + tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2, + &mpfr_ai_threshold3, speed_mpfr_ai, + MPFR_PREC_MIN+GMP_NUMB_BITS, + -20, 500, x2, &p2); + tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2, + &mpfr_ai_threshold3, speed_mpfr_ai, + MPFR_PREC_MIN+GMP_NUMB_BITS, + 40, 200, x3, &p3); + + mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN); + mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN); + mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN); + mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN); + + mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN); + mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN); + mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN); + mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN); + + mpfr_sub (tmp2, x2, x1, MPFR_RNDN); + mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN); + mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN); + + mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN); + mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN); + mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN); + mpfr_div (tmp1, tmp1, x3, MPFR_RNDN); + mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN); + + fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld\n", mpfr_ai_threshold1); + fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2); + fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3); + + mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3); + mpfr_clear (tmp1); mpfr_clear (tmp2); + /* End of tuning */ time (&end_time); fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n", |