From ea5ae4cf3b6d33e8b41d67d5bbf31efd5fe02199 Mon Sep 17 00:00:00 2001
From: zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>
Date: Tue, 10 Jun 2008 08:47:11 +0000
Subject: the O(l^{1/2}) method to evaluate power series is due to Paterson and
 Stockmeyer and not Brent/Kung

git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@5386 280ebfd0-de03-0410-8827-d642c229c3f4
---
 algorithms.bib | 21 +++++++++++++++++++++
 algorithms.tex |  6 +++++-
 exp_2.c        | 10 +++++-----
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/algorithms.bib b/algorithms.bib
index d18a620eb..51e6c0535 100644
--- a/algorithms.bib
+++ b/algorithms.bib
@@ -192,3 +192,24 @@ url         = {http://www.inria.fr/rrrt/rr-5852.html},
   pages =	 {200--202},
   month =	 {April}
 }
+
+@Article{PaSt73,
+  author = 	 {Michael S. Paterson and Larry J. Stockmeyer},
+  title = 	 {On the Number of Nonscalar Multiplications Necessary to
+                  Evaluate Polynomials},
+  journal = 	 sicomp,
+  year = 	 1973,
+  volume =	 2,
+  number =	 1,
+  pages =	 {60--66}
+}
+
+@Article{Smith91,
+  author = 	 {David M. Smith},
+  title = 	 {Algorithm 693. A {F}ORTRAN Package for Floating-Point Multiple-Precision Arithmetic},
+  journal = 	 toms,
+  year = 	 1991,
+  volume =	 17,
+  number =	 2,
+  pages =	 {273--283}
+}
diff --git a/algorithms.tex b/algorithms.tex
index eea074b42..e88cf912f 100644
--- a/algorithms.tex
+++ b/algorithms.tex
@@ -1129,11 +1129,15 @@ value of $k$ is about $n^{1/2}$, giving a complexity of $\O(n^{1/2} M(n))$.
 This is what is implemented in {\tt mpfr\_exp2\_aux}.
 
 If we use a baby step/giant step approach, the Taylor series
-can be evaluated in $\O(l^{1/2})$ operations,
+can be evaluated in $\O(l^{1/2})$ nonscalar multiplications 
+--- i.e., with both operands of full $n$-bit size --- as described in
+\cite{PaSt73},
 thus the evaluation requires $(n/k)^{1/2} + k$ multiplications,
 and the optimal $k$ is now about $n^{1/3}$,
 giving a total complexity of $\O(n^{1/3} M(n))$.
 This is implemented in the function {\tt mpfr\_exp2\_aux2}.
+(Note: the algorithm from Paterson and Stockmeyer was rediscovered by Smith,
+who named it ``concurrent series'' in \cite{Smith91}.)
 
 \subsection{The error function}
 
diff --git a/exp_2.c b/exp_2.c
index 103268007..1a6b893b3 100644
--- a/exp_2.c
+++ b/exp_2.c
@@ -1,5 +1,5 @@
 /* mpfr_exp_2 -- exponential of a floating-point number
-                using Brent's algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
+                 using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
 
 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
 Contributed by the Arenaire and Cacao projects, INRIA.
@@ -75,8 +75,8 @@ mpz_normalize2 (mpz_t rop, mpz_t z, mp_exp_t expz, mp_exp_t target)
 
 /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
    where x = n*log(2)+(2^K)*r
-   together with Brent-Kung O(t^(1/2)) algorithm for the evaluation of
-   power series. The resulting complexity is O(n^(1/3)*M(n)).
+   together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the
+   evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)).
 */
 int
 mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode)
@@ -168,7 +168,7 @@ mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode)
       /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
       l = (precy < MPFR_EXP_2_THRESHOLD)
         ? mpfr_exp2_aux (ss, r, q, &exps)      /* naive method */
-        : mpfr_exp2_aux2 (ss, r, q, &exps);    /* Brent/Kung method */
+        : mpfr_exp2_aux2 (ss, r, q, &exps);    /* Paterson/Stockmeyer method */
 
       MPFR_LOG_MSG (("l=%d q=%d (K+l)*q^2=%1.3e\n", l, q, (K+l)*(double)q*q));
 
@@ -282,7 +282,7 @@ mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, mp_prec_t q, mp_exp_t *exps)
 }
 
 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
-   using Brent/Kung method with O(sqrt(l)) multiplications.
+   using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications.
    Return l.
    Uses m multiplications of full size and 2l/m of decreasing size,
    i.e. a total equivalent to about m+l/m full multiplications,
-- 
cgit v1.2.1