1 files changed, 11 insertions, 3 deletions
diff --git a/mpf/pow_ui.c b/mpf/pow_ui.c
index 558addc4b..6d0528bed 100644
--- a/mpf/pow_ui.c
+++ b/mpf/pow_ui.c
@@ -32,6 +32,11 @@ see https://www.gnu.org/licenses/.  */
 #include "gmp-impl.h"
 #include "longlong.h"
 
+/* This uses a plain right-to-left square-and-multiply algorithm.
+
+   FIXME: When popcount(e) is not too small, it would probably speed things up
+   to use a k-ary sliding window algorithm.  */
+
 void
 mpf_pow_ui (mpf_ptr r, mpf_srcptr b, unsigned long int e)
 {
@@ -50,9 +55,11 @@ mpf_pow_ui (mpf_ptr r, mpf_srcptr b, unsigned long int e)
   count_leading_zeros (cnt, (mp_limb_t) e);
   cnt = GMP_LIMB_BITS - 1 - cnt;
 
-  /* Use a temp for all intermediate result.  We might want to add an exponent
-     derived # of bits here too, e.g.  ((cnt >> GMP_LIMB_BITS/2) != 0).  */
-  mpf_init2 (t, mpf_get_prec (r) + GMP_LIMB_BITS);
+  /* Increase computation precision as a function of the exponent.  Adding
+     log2(popcount(e) + log2(e)) bits should be sufficient, but we add log2(e),
+     i.e. much more.  With mpf's rounding of precision to whole limbs, this
+     will be excessive only when limbs are artificially small.  */
+  mpf_init2 (t, mpf_get_prec (r) + cnt);
 
   mpf_set (t, b);		/* consume most significant bit */
   while (--cnt > 0)
@@ -62,6 +69,7 @@ mpf_pow_ui (mpf_ptr r, mpf_srcptr b, unsigned long int e)
 	mpf_mul (t, t, b);
     }
 
+  /* Do the last iteration specially in order to save a copy operation.  */
   if (e & 1)
     {
       mpf_mul (t, t, t);