6 files changed, 72 insertions, 56 deletions
diff --git a/NEWS b/NEWS
index c7049f4..bcc5dbe 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,6 @@
 Changes in version 0.8.3:
-  - Speed-up of mpc_pow_z when the exponent fits in an unsigned long
+  - Speed-up of mpc_pow_si through binary exponentiation, and of
+    mpc_pow_z when the exponent fits in a long
 
 Changes in version 0.8.2:
   - Speed-up of mpc_pow_ui through binary exponentiation
diff --git a/doc/algorithms.tex b/doc/algorithms.tex
index 4284ef6..6ca7454 100644
--- a/doc/algorithms.tex
+++ b/doc/algorithms.tex
@@ -1647,7 +1647,7 @@ on the real part and of
 on the imaginary part of the result.
 
 If we further assume that $(n-1) 2^{-p} \leq 1$, then
-$(1 + 2^{-p})^{n-1} - 1 \leq (2 n - 2) 2^{-p}$,
+$(1 + 2^{-p})^{n-1} - 1 \leq 2 (n - 1) 2^{-p}$,
 because $(1+\varepsilon)^m-1 = \exp(m \log(1+\varepsilon)) - 1
 \leq \exp(\varepsilon m) - 1 \leq 2 \varepsilon m$ as long as
 $\varepsilon m \leq 1$. This gives the simplified bounds
diff --git a/src/mpc-impl.h b/src/mpc-impl.h
index 41a899f..e1e8444 100644
--- a/src/mpc-impl.h
+++ b/src/mpc-impl.h
@@ -136,8 +136,9 @@ do {                                                            \
 extern "C" {
 #endif
 
-__MPC_DECLSPEC int  mpc_mul_naive     __MPC_PROTO ((mpc_ptr, mpc_srcptr, mpc_srcptr, mpc_rnd_t));
+__MPC_DECLSPEC int  mpc_mul_naive __MPC_PROTO ((mpc_ptr, mpc_srcptr, mpc_srcptr, mpc_rnd_t));
 __MPC_DECLSPEC int  mpc_mul_karatsuba __MPC_PROTO ((mpc_ptr, mpc_srcptr, mpc_srcptr, mpc_rnd_t));
+__MPC_DECLSPEC int  mpc_pow_usi __MPC_PROTO ((mpc_ptr, mpc_srcptr, unsigned long, int, mpc_rnd_t));
 __MPC_DECLSPEC char* mpc_alloc_str __MPC_PROTO ((size_t));
 __MPC_DECLSPEC char* mpc_realloc_str __MPC_PROTO ((char*, size_t, size_t));
 __MPC_DECLSPEC void mpc_free_str __MPC_PROTO ((char*));
diff --git a/src/pow_si.c b/src/pow_si.c
index c7f3957..4b16245 100644
--- a/src/pow_si.c
+++ b/src/pow_si.c
@@ -19,22 +19,13 @@ along with the MPC Library; see the file COPYING.LIB.  If not, write to
 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 MA 02111-1307, USA. */
 
-#include <limits.h> /* for CHAR_BIT */
 #include "mpc-impl.h"
 
 int
 mpc_pow_si (mpc_ptr z, mpc_srcptr x, long y, mpc_rnd_t rnd)
 {
    if (y >= 0)
-     return mpc_pow_ui (z, x, (unsigned long) y, rnd);
-   else {
-      mpc_t yy;
-      int inex;
-
-      mpc_init3 (yy, sizeof (unsigned long) * CHAR_BIT, MPFR_PREC_MIN);
-      mpc_set_si (yy, y, MPC_RNDNN);   /* exact */
-      inex = mpc_pow (z, x, yy, rnd);
-      mpc_clear (yy);
-      return inex;
-   }
+     return mpc_pow_usi (z, x, (unsigned long) y, 1, rnd);
+   else
+     return mpc_pow_usi (z, x, (unsigned long) (-y), -1, rnd);
 }
diff --git a/src/pow_ui.c b/src/pow_ui.c
index 1dd1fa0..c484500 100644
--- a/src/pow_ui.c
+++ b/src/pow_ui.c
@@ -23,12 +23,17 @@ MA 02111-1307, USA. */
 #include "mpc-impl.h"
 
 static int
-mpc_pow_ui_naive (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
+mpc_pow_usi_naive (mpc_ptr z, mpc_srcptr x, unsigned long y, int sign,
+   mpc_rnd_t rnd)
 {
    int inex;
    mpc_t t;
+
    mpc_init3 (t, sizeof (unsigned long) * CHAR_BIT, MPFR_PREC_MIN);
-   mpc_set_ui (t, y, MPC_RNDNN);   /* exact */
+   if (sign > 0)
+      mpc_set_ui (t, y, MPC_RNDNN); /* exact */
+   else
+      mpc_set_si (t, - (signed long) y, MPC_RNDNN);
    inex = mpc_pow (z, x, t, rnd);
    mpc_clear (t);
 
@@ -37,7 +42,9 @@ mpc_pow_ui_naive (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
 
 
 int
-mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
+mpc_pow_usi (mpc_ptr z, mpc_srcptr x, unsigned long y, int sign,
+   mpc_rnd_t rnd)
+   /* computes z = x^(sign*y) */
 {
    int inex;
    mpc_t t, x3;
@@ -46,10 +53,20 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
    int has3; /* non-zero if y has '11' in its binary representation */
    int loop, done;
 
+   /* let mpc_pow deal with special values */
    if (!mpc_fin_p (x) || mpfr_zero_p (MPC_RE (x)) || mpfr_zero_p (MPC_IM(x))
        || y == 0)
-      /* let mpc_pow deal with special cases */
-      return mpc_pow_ui_naive (z, x, y, rnd);
+      return mpc_pow_usi_naive (z, x, y, sign, rnd);
+   /* easy special cases */
+   else if (y == 1) {
+      if (sign > 0)
+         return mpc_set (z, x, rnd);
+      else
+         return mpc_ui_div (z, 1ul, x, rnd);
+   }
+   else if (y == 2 && sign > 0)
+      return mpc_sqr (z, x, rnd);
+   /* let mpc_pow treat potential over- and underflows */
    else {
       mpfr_exp_t exp_r = mpfr_get_exp (MPC_RE (x)),
                  exp_i = mpfr_get_exp (MPC_IM (x));
@@ -58,19 +75,14 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
           || MPC_MAX (-exp_r, -exp_i) > (-mpfr_get_emin ()) / (mpfr_exp_t) y
              /* heuristic for underflow */
          )
-         return mpc_pow_ui_naive (z, x, y, rnd);
+         return mpc_pow_usi_naive (z, x, y, sign, rnd);
    }
 
-   if (y == 1)
-      return mpc_set (z, x, rnd);
-   else if (y == 2)
-      return mpc_sqr (z, x, rnd);
-
    for (l = 0, u = y, has3 = u&3; u > 3; l ++, u >>= 1, has3 |= u&3);
       /* l>0 is the number of bits of y, minus 2, thus y has bits:
          y_{l+1} y_l y_{l-1} ... y_1 y_0 */
    l0 = l + 2;
-   p = MPC_MAX_PREC(z) + l0 + 32; /* ensures that 2^{-p} (y-1) <= 1 below */
+   p = MPC_MAX_PREC(z) + l0 + 32; /* l0 ensures that y*2^{-p} <= 1 below */
    mpc_init2 (t, p);
    if (has3)
       mpc_init2 (x3, p);
@@ -98,28 +110,27 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
                mpc_mul (t, t, x, MPC_RNDNN);
          }
       }
+      if (sign < 0)
+         mpc_ui_div (t, 1ul, t, MPC_RNDNN);
 
-      /* the absolute error on the real and imaginary parts is bounded
-         by |x|^y (|1+2^{-p}|^{y-1}-1) [see algorithms.tex].
-         For em <= 1, (1+e)^m - 1 <= 2em since
-         (1+e)^m - 1 = exp(m*log(1+e))-1 <= exp(em)-1 <= 2em for em <= 1.
-         We apply this for e=2^{-p} and m=y-1, thus the absolute error is
-         bounded by |x|^y 2^{1-p} (y-1) < 2^{l0+1-p} */
       if (mpfr_zero_p (MPC_RE(t)) || mpfr_zero_p (MPC_IM(t))) {
-         inex = mpc_pow_ui_naive (z, x, y, rnd);
+         inex = mpc_pow_usi_naive (z, x, y, sign, rnd);
             /* since mpfr_get_exp() is not defined for zero */
          done = 1;
       }
       else {
+         /* see error bound in algorithms.tex; we use y<2^l0 instead of y-1
+            also when sign>0                                                */
          mpfr_exp_t diff;
          mpfr_prec_t er, ei;
+
          diff = mpfr_get_exp (MPC_RE(t)) - mpfr_get_exp (MPC_IM(t));
          /* the factor on the real part is 2+2^(-diff+2) <= 4 for diff >= 1
-            and <= 2^(-diff+3) for diff <= 0 */
-         er = (diff >= 1) ? l0 + 3 : l0 + (-diff) + 4;
+            and < 2^(-diff+3) for diff <= 0 */
+         er = (diff >= 1) ? l0 + 3 : l0 + (-diff) + 3;
          /* the factor on the imaginary part is 2+2^(diff+2) <= 4 for diff <= -1
-            and <= 2^(diff+3) for diff >= 0 */
-         ei = (diff <= -1) ? l0 + 3 : l0 + diff + 4;
+            and < 2^(diff+3) for diff >= 0 */
+         ei = (diff <= -1) ? l0 + 3 : l0 + diff + 3;
          if (mpfr_can_round (MPC_RE(t), p - er, GMP_RNDZ, GMP_RNDZ,
                               MPFR_PREC(MPC_RE(z)) + (MPC_RND_RE(rnd) == GMP_RNDN))
                && mpfr_can_round (MPC_IM(t), p - ei, GMP_RNDZ, GMP_RNDZ,
@@ -136,7 +147,8 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
             l = l0 - 2;
          }
          else {
-            inex = mpc_pow_ui_naive (z, x, y, rnd);
+            /* stop the loop and use mpc_pow */
+            inex = mpc_pow_usi_naive (z, x, y, sign, rnd);
             done = 1;
          }
       }
@@ -148,3 +160,10 @@ mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
 
    return inex;
 }
+
+
+int
+mpc_pow_ui (mpc_ptr z, mpc_srcptr x, unsigned long y, mpc_rnd_t rnd)
+{
+  return mpc_pow_usi (z, x, y, 1, rnd);
+}
diff --git a/src/pow_z.c b/src/pow_z.c
index 9b769fe..dcc0530 100644
--- a/src/pow_z.c
+++ b/src/pow_z.c
@@ -1,6 +1,6 @@
 /* mpc_pow_z -- Raise a complex number to an integer power.
 
-Copyright (C) 2009, 2010 Paul Zimmermann
+Copyright (C) 2009, 2010 Paul Zimmermann, Andreas Enge
 
 This file is part of the MPC Library.
 
@@ -24,21 +24,25 @@ MA 02111-1307, USA. */
 int
 mpc_pow_z (mpc_ptr z, mpc_srcptr x, mpz_srcptr y, mpc_rnd_t rnd)
 {
-  mpc_t yy;
-  int inex;
-  size_t n = mpz_sizeinbase (y, 2);
-
-  /* if y fits in an unsigned long or long, call the corresponding functions,
-     which are supposed to be more efficient */
-  if (mpz_fits_ulong_p (y))
-    return mpc_pow_ui (z, x, mpz_get_ui (y), rnd);
-  else if (mpz_fits_slong_p (y))
-    return mpc_pow_si (z, x, mpz_get_si (y), rnd);
-
-  mpc_init3 (yy, (n < MPFR_PREC_MIN) ? MPFR_PREC_MIN : n, MPFR_PREC_MIN);
-  mpc_set_z (yy, y, MPC_RNDNN);   /* exact */
-  inex = mpc_pow (z, x, yy, rnd);
-  mpc_clear (yy);
-  return inex;
+   mpc_t yy;
+   int inex;
+   size_t n = mpz_sizeinbase (y, 2);
+
+   /* if y fits in an unsigned long or long, call the corresponding functions,
+      which are supposed to be more efficient */
+   if (mpz_cmp_ui (y, 0ul) >= 0) {
+      if (mpz_fits_ulong_p (y))
+         return mpc_pow_usi (z, x, mpz_get_ui (y), 1, rnd);
+   }
+   else {
+      if (mpz_fits_slong_p (y))
+         return mpc_pow_usi (z, x, (unsigned long) (-mpz_get_si (y)), -1, rnd);
+   }
+
+   mpc_init3 (yy, (n < MPFR_PREC_MIN) ? MPFR_PREC_MIN : n, MPFR_PREC_MIN);
+   mpc_set_z (yy, y, MPC_RNDNN);   /* exact */
+   inex = mpc_pow (z, x, yy, rnd);
+   mpc_clear (yy);
+   return inex;
 }