diff options
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | doc/algorithms.tex | 2 | ||||
-rw-r--r-- | src/mpc-impl.h | 3 | ||||
-rw-r--r-- | src/pow_si.c | 15 | ||||
-rw-r--r-- | src/pow_ui.c | 67 | ||||
-rw-r--r-- | src/pow_z.c | 38 |
6 files changed, 72 insertions, 56 deletions
@@ -1,5 +1,6 @@ Changes in version 0.8.3: - - Speed-up of mpc_pow_z when the exponent fits in an unsigned long + - Speed-up of mpc_pow_si through binary exponentiation, and of + mpc_pow_z when the exponent fits in a long Changes in version 0.8.2: - Speed-up of mpc_pow_ui through binary exponentiation diff --git a/doc/algorithms.tex b/doc/algorithms.tex index 4284ef6..6ca7454 100644 --- a/doc/algorithms.tex +++ b/doc/algorithms.tex @@ -1647,7 +1647,7 @@ on the real part and of on the imaginary part of the result. If we further assume that $(n-1) 2^{-p} \leq 1$, then -$(1 + 2^{-p})^{n-1} - 1 \leq (2 n - 2) 2^{-p}$, +$(1 + 2^{-p})^{n-1} - 1 \leq 2 (n - 1) 2^{-p}$, because $(1+\varepsilon)^m-1 = \exp(m \log(1+\varepsilon)) - 1 \leq \exp(\varepsilon m) - 1 \leq 2 \varepsilon m$ as long as $\varepsilon m \leq 1$. This gives the simplified bounds diff --git a/src/mpc-impl.h b/src/mpc-impl.h index 41a899f..e1e8444 100644 --- a/src/mpc-impl.h +++ b/src/mpc-impl.h @@ -136,8 +136,9 @@ do { \ extern "C" { #endif -__MPC_DECLSPEC int mpc_mul_naive __MPC_PROTO ((mpc_ptr, mpc_srcptr, mpc_srcptr, mpc_rnd_t)); +__MPC_DECLSPEC int mpc_mul_naive __MPC_PROTO ((mpc_ptr, mpc_srcptr, mpc_srcptr, mpc_rnd_t)); __MPC_DECLSPEC int mpc_mul_karatsuba __MPC_PROTO ((mpc_ptr, mpc_srcptr, mpc_srcptr, mpc_rnd_t)); +__MPC_DECLSPEC int mpc_pow_usi __MPC_PROTO ((mpc_ptr, mpc_srcptr, unsigned long, int, mpc_rnd_t)); __MPC_DECLSPEC char* mpc_alloc_str __MPC_PROTO ((size_t)); __MPC_DECLSPEC char* mpc_realloc_str __MPC_PROTO ((char*, size_t, size_t)); __MPC_DECLSPEC void mpc_free_str __MPC_PROTO ((char*)); diff --git a/src/pow_si.c b/src/pow_si.c index c7f3957..4b16245 100644 --- a/src/pow_si.c +++ b/src/pow_si.c @@ -19,22 +19,13 @@ along with the MPC Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <limits.h> /* for CHAR_BIT */ #include "mpc-impl.h" int mpc_pow_si (mpc_ptr z, mpc_srcptr x, long y, mpc_rnd_t rnd) { if (y >= 0) - return mpc_pow_ui (z, x, (unsigned long) y, rnd); - else { - mpc_t yy; - int inex; - - mpc_init3 (yy, sizeof (unsigned long) * CHAR_BIT, MPFR_PREC_MIN); - mpc_set_si (yy, y, MPC_RNDNN); /* exact */ - inex = mpc_pow (z, x, yy, rnd); - mpc_clear (yy); - return inex; - } + return mpc_pow_usi (z, x, (unsigned long) y, 1, rnd); + else + return mpc_pow_usi (z, x, (unsigned long) (-y), -1, rnd); } diff --git a/src/pow_ui.c b/src/pow_ui.c index 1dd1fa0..c484500 100644 --- a/src/pow_ui.c +++ b/src/pow_ui.c @@ -23,12 +23,17 @@ MA 02111-1307, USA. */ #include "mpc-impl.h" static int -mpc_pow_ui_naive (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd) +mpc_pow_usi_naive (mpc_ptr z, mpc_srcptr x, unsigned long y, int sign, + mpc_rnd_t rnd) { int inex; mpc_t t; + mpc_init3 (t, sizeof (unsigned long) * CHAR_BIT, MPFR_PREC_MIN); - mpc_set_ui (t, y, MPC_RNDNN); /* exact */ + if (sign > 0) + mpc_set_ui (t, y, MPC_RNDNN); /* exact */ + else + mpc_set_si (t, - (signed long) y, MPC_RNDNN); inex = mpc_pow (z, x, t, rnd); mpc_clear (t); @@ -37,7 +42,9 @@ mpc_pow_ui_naive (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd) int -mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd) +mpc_pow_usi (mpc_ptr z, mpc_srcptr x, unsigned long y, int sign, + mpc_rnd_t rnd) + /* computes z = x^(sign*y) */ { int inex; mpc_t t, x3; @@ -46,10 +53,20 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd) int has3; /* non-zero if y has '11' in its binary representation */ int loop, done; + /* let mpc_pow deal with special values */ if (!mpc_fin_p (x) || mpfr_zero_p (MPC_RE (x)) || mpfr_zero_p (MPC_IM(x)) || y == 0) - /* let mpc_pow deal with special cases */ - return mpc_pow_ui_naive (z, x, y, rnd); + return mpc_pow_usi_naive (z, x, y, sign, rnd); + /* easy special cases */ + else if (y == 1) { + if (sign > 0) + return mpc_set (z, x, rnd); + else + return mpc_ui_div (z, 1ul, x, rnd); + } + else if (y == 2 && sign > 0) + return mpc_sqr (z, x, rnd); + /* let mpc_pow treat potential over- and underflows */ else { mpfr_exp_t exp_r = mpfr_get_exp (MPC_RE (x)), exp_i = mpfr_get_exp (MPC_IM (x)); @@ -58,19 +75,14 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd) || MPC_MAX (-exp_r, -exp_i) > (-mpfr_get_emin ()) / (mpfr_exp_t) y /* heuristic for underflow */ ) - return mpc_pow_ui_naive (z, x, y, rnd); + return mpc_pow_usi_naive (z, x, y, sign, rnd); } - if (y == 1) - return mpc_set (z, x, rnd); - else if (y == 2) - return mpc_sqr (z, x, rnd); - for (l = 0, u = y, has3 = u&3; u > 3; l ++, u >>= 1, has3 |= u&3); /* l>0 is the number of bits of y, minus 2, thus y has bits: y_{l+1} y_l y_{l-1} ... y_1 y_0 */ l0 = l + 2; - p = MPC_MAX_PREC(z) + l0 + 32; /* ensures that 2^{-p} (y-1) <= 1 below */ + p = MPC_MAX_PREC(z) + l0 + 32; /* l0 ensures that y*2^{-p} <= 1 below */ mpc_init2 (t, p); if (has3) mpc_init2 (x3, p); @@ -98,28 +110,27 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd) mpc_mul (t, t, x, MPC_RNDNN); } } + if (sign < 0) + mpc_ui_div (t, 1ul, t, MPC_RNDNN); - /* the absolute error on the real and imaginary parts is bounded - by |x|^y (|1+2^{-p}|^{y-1}-1) [see algorithms.tex]. - For em <= 1, (1+e)^m - 1 <= 2em since - (1+e)^m - 1 = exp(m*log(1+e))-1 <= exp(em)-1 <= 2em for em <= 1. - We apply this for e=2^{-p} and m=y-1, thus the absolute error is - bounded by |x|^y 2^{1-p} (y-1) < 2^{l0+1-p} */ if (mpfr_zero_p (MPC_RE(t)) || mpfr_zero_p (MPC_IM(t))) { - inex = mpc_pow_ui_naive (z, x, y, rnd); + inex = mpc_pow_usi_naive (z, x, y, sign, rnd); /* since mpfr_get_exp() is not defined for zero */ done = 1; } else { + /* see error bound in algorithms.tex; we use y<2^l0 instead of y-1 + also when sign>0 */ mpfr_exp_t diff; mpfr_prec_t er, ei; + diff = mpfr_get_exp (MPC_RE(t)) - mpfr_get_exp (MPC_IM(t)); /* the factor on the real part is 2+2^(-diff+2) <= 4 for diff >= 1 - and <= 2^(-diff+3) for diff <= 0 */ - er = (diff >= 1) ? l0 + 3 : l0 + (-diff) + 4; + and < 2^(-diff+3) for diff <= 0 */ + er = (diff >= 1) ? l0 + 3 : l0 + (-diff) + 3; /* the factor on the imaginary part is 2+2^(diff+2) <= 4 for diff <= -1 - and <= 2^(diff+3) for diff >= 0 */ - ei = (diff <= -1) ? l0 + 3 : l0 + diff + 4; + and < 2^(diff+3) for diff >= 0 */ + ei = (diff <= -1) ? l0 + 3 : l0 + diff + 3; if (mpfr_can_round (MPC_RE(t), p - er, GMP_RNDZ, GMP_RNDZ, MPFR_PREC(MPC_RE(z)) + (MPC_RND_RE(rnd) == GMP_RNDN)) && mpfr_can_round (MPC_IM(t), p - ei, GMP_RNDZ, GMP_RNDZ, @@ -136,7 +147,8 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd) l = l0 - 2; } else { - inex = mpc_pow_ui_naive (z, x, y, rnd); + /* stop the loop and use mpc_pow */ + inex = mpc_pow_usi_naive (z, x, y, sign, rnd); done = 1; } } @@ -148,3 +160,10 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd) return inex; } + + +int +mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd) +{ + return mpc_pow_usi (z, x, y, 1, rnd); +} diff --git a/src/pow_z.c b/src/pow_z.c index 9b769fe..dcc0530 100644 --- a/src/pow_z.c +++ b/src/pow_z.c @@ -1,6 +1,6 @@ /* mpc_pow_z -- Raise a complex number to an integer power. -Copyright (C) 2009, 2010 Paul Zimmermann +Copyright (C) 2009, 2010 Paul Zimmermann, Andreas Enge This file is part of the MPC Library. @@ -24,21 +24,25 @@ MA 02111-1307, USA. */ int mpc_pow_z (mpc_ptr z, mpc_srcptr x, mpz_srcptr y, mpc_rnd_t rnd) { - mpc_t yy; - int inex; - size_t n = mpz_sizeinbase (y, 2); - - /* if y fits in an unsigned long or long, call the corresponding functions, - which are supposed to be more efficient */ - if (mpz_fits_ulong_p (y)) - return mpc_pow_ui (z, x, mpz_get_ui (y), rnd); - else if (mpz_fits_slong_p (y)) - return mpc_pow_si (z, x, mpz_get_si (y), rnd); - - mpc_init3 (yy, (n < MPFR_PREC_MIN) ? MPFR_PREC_MIN : n, MPFR_PREC_MIN); - mpc_set_z (yy, y, MPC_RNDNN); /* exact */ - inex = mpc_pow (z, x, yy, rnd); - mpc_clear (yy); - return inex; + mpc_t yy; + int inex; + size_t n = mpz_sizeinbase (y, 2); + + /* if y fits in an unsigned long or long, call the corresponding functions, + which are supposed to be more efficient */ + if (mpz_cmp_ui (y, 0ul) >= 0) { + if (mpz_fits_ulong_p (y)) + return mpc_pow_usi (z, x, mpz_get_ui (y), 1, rnd); + } + else { + if (mpz_fits_slong_p (y)) + return mpc_pow_usi (z, x, (unsigned long) (-mpz_get_si (y)), -1, rnd); + } + + mpc_init3 (yy, (n < MPFR_PREC_MIN) ? MPFR_PREC_MIN : n, MPFR_PREC_MIN); + mpc_set_z (yy, y, MPC_RNDNN); /* exact */ + inex = mpc_pow (z, x, yy, rnd); + mpc_clear (yy); + return inex; } |