summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorenge <enge@211d60ee-9f03-0410-a15a-8952a2c7a4e4>2010-06-17 17:40:38 +0000
committerenge <enge@211d60ee-9f03-0410-a15a-8952a2c7a4e4>2010-06-17 17:40:38 +0000
commit8f669fde281f3512784a5c53e3c8a1a87e5c283d (patch)
tree80af8ef84f59bcb51d8a51720b64edfc699035e5
parentcdae67dc9bf52fd118eff5ea82462fef46691143 (diff)
downloadmpc-8f669fde281f3512784a5c53e3c8a1a87e5c283d.tar.gz
unified computation of pow_ui and pow_si in a function pow_usi, thereby
applying binary exponentiation in the case of negative exponent git-svn-id: svn://scm.gforge.inria.fr/svn/mpc/trunk@788 211d60ee-9f03-0410-a15a-8952a2c7a4e4
-rw-r--r--NEWS3
-rw-r--r--doc/algorithms.tex2
-rw-r--r--src/mpc-impl.h3
-rw-r--r--src/pow_si.c15
-rw-r--r--src/pow_ui.c67
-rw-r--r--src/pow_z.c38
6 files changed, 72 insertions, 56 deletions
diff --git a/NEWS b/NEWS
index c7049f4..bcc5dbe 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,6 @@
Changes in version 0.8.3:
- - Speed-up of mpc_pow_z when the exponent fits in an unsigned long
+ - Speed-up of mpc_pow_si through binary exponentiation, and of
+ mpc_pow_z when the exponent fits in a long
Changes in version 0.8.2:
- Speed-up of mpc_pow_ui through binary exponentiation
diff --git a/doc/algorithms.tex b/doc/algorithms.tex
index 4284ef6..6ca7454 100644
--- a/doc/algorithms.tex
+++ b/doc/algorithms.tex
@@ -1647,7 +1647,7 @@ on the real part and of
on the imaginary part of the result.
If we further assume that $(n-1) 2^{-p} \leq 1$, then
-$(1 + 2^{-p})^{n-1} - 1 \leq (2 n - 2) 2^{-p}$,
+$(1 + 2^{-p})^{n-1} - 1 \leq 2 (n - 1) 2^{-p}$,
because $(1+\varepsilon)^m-1 = \exp(m \log(1+\varepsilon)) - 1
\leq \exp(\varepsilon m) - 1 \leq 2 \varepsilon m$ as long as
$\varepsilon m \leq 1$. This gives the simplified bounds
diff --git a/src/mpc-impl.h b/src/mpc-impl.h
index 41a899f..e1e8444 100644
--- a/src/mpc-impl.h
+++ b/src/mpc-impl.h
@@ -136,8 +136,9 @@ do { \
extern "C" {
#endif
-__MPC_DECLSPEC int mpc_mul_naive __MPC_PROTO ((mpc_ptr, mpc_srcptr, mpc_srcptr, mpc_rnd_t));
+__MPC_DECLSPEC int mpc_mul_naive __MPC_PROTO ((mpc_ptr, mpc_srcptr, mpc_srcptr, mpc_rnd_t));
__MPC_DECLSPEC int mpc_mul_karatsuba __MPC_PROTO ((mpc_ptr, mpc_srcptr, mpc_srcptr, mpc_rnd_t));
+__MPC_DECLSPEC int mpc_pow_usi __MPC_PROTO ((mpc_ptr, mpc_srcptr, unsigned long, int, mpc_rnd_t));
__MPC_DECLSPEC char* mpc_alloc_str __MPC_PROTO ((size_t));
__MPC_DECLSPEC char* mpc_realloc_str __MPC_PROTO ((char*, size_t, size_t));
__MPC_DECLSPEC void mpc_free_str __MPC_PROTO ((char*));
diff --git a/src/pow_si.c b/src/pow_si.c
index c7f3957..4b16245 100644
--- a/src/pow_si.c
+++ b/src/pow_si.c
@@ -19,22 +19,13 @@ along with the MPC Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
-#include <limits.h> /* for CHAR_BIT */
#include "mpc-impl.h"
int
mpc_pow_si (mpc_ptr z, mpc_srcptr x, long y, mpc_rnd_t rnd)
{
if (y >= 0)
- return mpc_pow_ui (z, x, (unsigned long) y, rnd);
- else {
- mpc_t yy;
- int inex;
-
- mpc_init3 (yy, sizeof (unsigned long) * CHAR_BIT, MPFR_PREC_MIN);
- mpc_set_si (yy, y, MPC_RNDNN); /* exact */
- inex = mpc_pow (z, x, yy, rnd);
- mpc_clear (yy);
- return inex;
- }
+ return mpc_pow_usi (z, x, (unsigned long) y, 1, rnd);
+ else
+ return mpc_pow_usi (z, x, (unsigned long) (-y), -1, rnd);
}
diff --git a/src/pow_ui.c b/src/pow_ui.c
index 1dd1fa0..c484500 100644
--- a/src/pow_ui.c
+++ b/src/pow_ui.c
@@ -23,12 +23,17 @@ MA 02111-1307, USA. */
#include "mpc-impl.h"
static int
-mpc_pow_ui_naive (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
+mpc_pow_usi_naive (mpc_ptr z, mpc_srcptr x, unsigned long y, int sign,
+ mpc_rnd_t rnd)
{
int inex;
mpc_t t;
+
mpc_init3 (t, sizeof (unsigned long) * CHAR_BIT, MPFR_PREC_MIN);
- mpc_set_ui (t, y, MPC_RNDNN); /* exact */
+ if (sign > 0)
+ mpc_set_ui (t, y, MPC_RNDNN); /* exact */
+ else
+ mpc_set_si (t, - (signed long) y, MPC_RNDNN);
inex = mpc_pow (z, x, t, rnd);
mpc_clear (t);
@@ -37,7 +42,9 @@ mpc_pow_ui_naive (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
int
-mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
+mpc_pow_usi (mpc_ptr z, mpc_srcptr x, unsigned long y, int sign,
+ mpc_rnd_t rnd)
+ /* computes z = x^(sign*y) */
{
int inex;
mpc_t t, x3;
@@ -46,10 +53,20 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
int has3; /* non-zero if y has '11' in its binary representation */
int loop, done;
+ /* let mpc_pow deal with special values */
if (!mpc_fin_p (x) || mpfr_zero_p (MPC_RE (x)) || mpfr_zero_p (MPC_IM(x))
|| y == 0)
- /* let mpc_pow deal with special cases */
- return mpc_pow_ui_naive (z, x, y, rnd);
+ return mpc_pow_usi_naive (z, x, y, sign, rnd);
+ /* easy special cases */
+ else if (y == 1) {
+ if (sign > 0)
+ return mpc_set (z, x, rnd);
+ else
+ return mpc_ui_div (z, 1ul, x, rnd);
+ }
+ else if (y == 2 && sign > 0)
+ return mpc_sqr (z, x, rnd);
+ /* let mpc_pow treat potential over- and underflows */
else {
mpfr_exp_t exp_r = mpfr_get_exp (MPC_RE (x)),
exp_i = mpfr_get_exp (MPC_IM (x));
@@ -58,19 +75,14 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
|| MPC_MAX (-exp_r, -exp_i) > (-mpfr_get_emin ()) / (mpfr_exp_t) y
/* heuristic for underflow */
)
- return mpc_pow_ui_naive (z, x, y, rnd);
+ return mpc_pow_usi_naive (z, x, y, sign, rnd);
}
- if (y == 1)
- return mpc_set (z, x, rnd);
- else if (y == 2)
- return mpc_sqr (z, x, rnd);
-
for (l = 0, u = y, has3 = u&3; u > 3; l ++, u >>= 1, has3 |= u&3);
/* l>0 is the number of bits of y, minus 2, thus y has bits:
y_{l+1} y_l y_{l-1} ... y_1 y_0 */
l0 = l + 2;
- p = MPC_MAX_PREC(z) + l0 + 32; /* ensures that 2^{-p} (y-1) <= 1 below */
+ p = MPC_MAX_PREC(z) + l0 + 32; /* l0 ensures that y*2^{-p} <= 1 below */
mpc_init2 (t, p);
if (has3)
mpc_init2 (x3, p);
@@ -98,28 +110,27 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
mpc_mul (t, t, x, MPC_RNDNN);
}
}
+ if (sign < 0)
+ mpc_ui_div (t, 1ul, t, MPC_RNDNN);
- /* the absolute error on the real and imaginary parts is bounded
- by |x|^y (|1+2^{-p}|^{y-1}-1) [see algorithms.tex].
- For em <= 1, (1+e)^m - 1 <= 2em since
- (1+e)^m - 1 = exp(m*log(1+e))-1 <= exp(em)-1 <= 2em for em <= 1.
- We apply this for e=2^{-p} and m=y-1, thus the absolute error is
- bounded by |x|^y 2^{1-p} (y-1) < 2^{l0+1-p} */
if (mpfr_zero_p (MPC_RE(t)) || mpfr_zero_p (MPC_IM(t))) {
- inex = mpc_pow_ui_naive (z, x, y, rnd);
+ inex = mpc_pow_usi_naive (z, x, y, sign, rnd);
/* since mpfr_get_exp() is not defined for zero */
done = 1;
}
else {
+ /* see error bound in algorithms.tex; we use y<2^l0 instead of y-1
+ also when sign>0 */
mpfr_exp_t diff;
mpfr_prec_t er, ei;
+
diff = mpfr_get_exp (MPC_RE(t)) - mpfr_get_exp (MPC_IM(t));
/* the factor on the real part is 2+2^(-diff+2) <= 4 for diff >= 1
- and <= 2^(-diff+3) for diff <= 0 */
- er = (diff >= 1) ? l0 + 3 : l0 + (-diff) + 4;
+ and < 2^(-diff+3) for diff <= 0 */
+ er = (diff >= 1) ? l0 + 3 : l0 + (-diff) + 3;
/* the factor on the imaginary part is 2+2^(diff+2) <= 4 for diff <= -1
- and <= 2^(diff+3) for diff >= 0 */
- ei = (diff <= -1) ? l0 + 3 : l0 + diff + 4;
+ and < 2^(diff+3) for diff >= 0 */
+ ei = (diff <= -1) ? l0 + 3 : l0 + diff + 3;
if (mpfr_can_round (MPC_RE(t), p - er, GMP_RNDZ, GMP_RNDZ,
MPFR_PREC(MPC_RE(z)) + (MPC_RND_RE(rnd) == GMP_RNDN))
&& mpfr_can_round (MPC_IM(t), p - ei, GMP_RNDZ, GMP_RNDZ,
@@ -136,7 +147,8 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
l = l0 - 2;
}
else {
- inex = mpc_pow_ui_naive (z, x, y, rnd);
+ /* stop the loop and use mpc_pow */
+ inex = mpc_pow_usi_naive (z, x, y, sign, rnd);
done = 1;
}
}
@@ -148,3 +160,10 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
return inex;
}
+
+
+int
+mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
+{
+ return mpc_pow_usi (z, x, y, 1, rnd);
+}
diff --git a/src/pow_z.c b/src/pow_z.c
index 9b769fe..dcc0530 100644
--- a/src/pow_z.c
+++ b/src/pow_z.c
@@ -1,6 +1,6 @@
/* mpc_pow_z -- Raise a complex number to an integer power.
-Copyright (C) 2009, 2010 Paul Zimmermann
+Copyright (C) 2009, 2010 Paul Zimmermann, Andreas Enge
This file is part of the MPC Library.
@@ -24,21 +24,25 @@ MA 02111-1307, USA. */
int
mpc_pow_z (mpc_ptr z, mpc_srcptr x, mpz_srcptr y, mpc_rnd_t rnd)
{
- mpc_t yy;
- int inex;
- size_t n = mpz_sizeinbase (y, 2);
-
- /* if y fits in an unsigned long or long, call the corresponding functions,
- which are supposed to be more efficient */
- if (mpz_fits_ulong_p (y))
- return mpc_pow_ui (z, x, mpz_get_ui (y), rnd);
- else if (mpz_fits_slong_p (y))
- return mpc_pow_si (z, x, mpz_get_si (y), rnd);
-
- mpc_init3 (yy, (n < MPFR_PREC_MIN) ? MPFR_PREC_MIN : n, MPFR_PREC_MIN);
- mpc_set_z (yy, y, MPC_RNDNN); /* exact */
- inex = mpc_pow (z, x, yy, rnd);
- mpc_clear (yy);
- return inex;
+ mpc_t yy;
+ int inex;
+ size_t n = mpz_sizeinbase (y, 2);
+
+ /* if y fits in an unsigned long or long, call the corresponding functions,
+ which are supposed to be more efficient */
+ if (mpz_cmp_ui (y, 0ul) >= 0) {
+ if (mpz_fits_ulong_p (y))
+ return mpc_pow_usi (z, x, mpz_get_ui (y), 1, rnd);
+ }
+ else {
+ if (mpz_fits_slong_p (y))
+ return mpc_pow_usi (z, x, (unsigned long) (-mpz_get_si (y)), -1, rnd);
+ }
+
+ mpc_init3 (yy, (n < MPFR_PREC_MIN) ? MPFR_PREC_MIN : n, MPFR_PREC_MIN);
+ mpc_set_z (yy, y, MPC_RNDNN); /* exact */
+ inex = mpc_pow (z, x, yy, rnd);
+ mpc_clear (yy);
+ return inex;
}