algorithms.tex: improve explanations in section "generic error of division ",

use rounding away from zero in the analysis of tan algorithm. tan.c: rewrite code according to algorithm described in algorithm.tex. git-svn-id: svn://scm.gforge.inria.fr/svn/mpc/trunk@154 211d60ee-9f03-0410-a15a-8952a2c7a4e4
author: thevenyp <thevenyp@211d60ee-9f03-0410-a15a-8952a2c7a4e4> 2008-06-25 12:36:16 +0000
committer: thevenyp <thevenyp@211d60ee-9f03-0410-a15a-8952a2c7a4e4> 2008-06-25 12:36:16 +0000
commit: 3ae400f3825b869ee1fdca4cf13888366119ffe0 (patch)
tree: eebca032cf11a6feb37cad6dcc033f575f030dea
parent: 92f431b9650ad6562e5856f64ff94537e8f7ad9e (diff)
download: mpc-3ae400f3825b869ee1fdca4cf13888366119ffe0.tar.gz
2 files changed, 119 insertions, 74 deletions
diff --git a/doc/algorithms.tex b/doc/algorithms.tex
index 4985321..113760e 100644
--- a/doc/algorithms.tex
+++ b/doc/algorithms.tex
@@ -15,6 +15,7 @@
 \DeclareMathOperator{\pinf}{\bigtriangleup}
 \DeclareMathOperator{\minf}{\bigtriangledown}
 \DeclareMathOperator{\N}{\mathcal N}
+\DeclareMathOperator{\A}{\mathcal A}
 
 \title {MPC: Algorithms and Error Analysis}
 \author {Andreas Enge \and Philippe Th\'eveny}
@@ -43,15 +44,15 @@ $\widetilde{z_1}$ and $\widetilde{z_2}$ respectively. For $n=1, 2$, let
 $\widetilde{z_n} = \widetilde{x_n} + i \widetilde{y_n}$ and $z_n = x_n + i
 y_n$, let $k_{R,n}$ and $k_{I,n}$ the upper bound coefficients: $\error(x_n)
 \leq k_{R,n} \Ulp(x_n)$ and $\error(y_n) \leq k_{I,n} \Ulp(y_n)$.  Note that
-we always have $x_n.c_{R,n}^- \leq \widetilde{x_n} \leq x_n.c_{R,n}^+$ and
-$y_n.c_{I,n}^- \leq \widetilde{y_n} \leq y_n.c_{I,n}^+$, with $c_{\cdot,n}^- =
-1-k_{\cdot,n}2^{-p}$ and $c_{\cdot,n}^+ = 1+k_{\cdot,n}2^{1-p}$. When $z_n$ is
-obtained by rounding $\widetilde{z_n}$ to the precision $p$ (we note
-$z_n=\circ(\widetilde{z_n})$), we have another inequalities: $\frac{1}{2}|x_n|
-\leq |\widetilde{x_n}| \leq 2|x_n|$; if the rounding mode is rounding towards
-plus infinity: $\widetilde{x_n} \leq x_n$, and if the rounding mode is toward
-minus infinity: $x_n \leq \widetilde{x_n}$ (similar relations hold for the
-imaginary part).
+we always have $|x_n|.c_{R,n}^- \leq |\widetilde{x_n}| \leq |x_n|.c_{R,n}^+$
+and $|y_n|.c_{I,n}^- \leq |\widetilde{y_n}| \leq |y_n|.c_{I,n}^+$, with
+$c_{\cdot,n}^- = 1-k_{\cdot,n}2^{1-p}$ and $c_{\cdot,n}^+ =
+1+k_{\cdot,n}2^{1-p}$. When $z_n$ is obtained by rounding $\widetilde{z_n}$ to
+the precision $p$ (we note $z_n=\circ(\widetilde{z_n})$), we have another
+inequalities: $\frac{1}{2}|x_n| \leq |\widetilde{x_n}| \leq 2|x_n|$; if the
+rounding mode is rounding towards plus infinity: $\widetilde{x_n} \leq x_n$,
+and if the rounding mode is toward minus infinity: $x_n \leq \widetilde{x_n}$
+(similar relations hold for the imaginary part).
 
 Let $z=x+iy$ the result of the operation $z_1\diamond z_2$ rounded to the
 precision of $z$ (we note $z=\circ(z_1 \diamond z_2)$). Let $c_R$ the bound
@@ -62,7 +63,7 @@ the imaginary part, \emph{mutatis mutandis}).
 
 \subsection {Generic error of addition/substraction}
 
-Let
+Using the notations of the introductory paragraphs above, let
 \[
 z=\circ(z_1+z_2).
 \]
@@ -96,7 +97,7 @@ simpler expression
 
 \subsection {Generic error of multiplication}
 
-Let
+Using the notations of the introductory paragraphs above, let
 \[
 z=\circ(z_1\times z_2).
 \]
@@ -166,7 +167,7 @@ $\delta' = \Exp(y_1x_2)-\Exp(y) \leq \Exp(y_1)+\Exp(x_2)-\Exp(y)$.
 
 \subsection {Generic error of division}\label{generic:div}
 
-Let
+Using the notations of the introductory paragraphs above, let
 \[
 z=\circ(\frac{z_1}{z_2}).
 \]
@@ -182,15 +183,16 @@ c&=x_2^2+y_2^2
 \end{align*}
 then the error of the real part is
 \[
-\error(x) = |x-\frac{A}{C}| \leq |x-\frac{a}{c}| + |\frac{a}{c}-\frac{A}{C}|.
+\error(x) = \left|x-\frac{A}{C}\right| \leq \left|x-\frac{a}{c}\right| +
+\left|\frac{a}{c}-\frac{A}{C}\right|.
 \]
 The first term of the right hand side is the rounding error:
 \[
-|x-\frac{a}{c}|\leq c_R \Ulp(x).
+\left|x-\frac{a}{c}\right|\leq c_R \Ulp(x).
 \]
 The second term can be bounded as follows:
 \[
-|\frac{a}{c}-\frac{A}{C}| \leq \frac{1}{c} |a-A|
+\left|\frac{a}{c}-\frac{A}{C}\right| \leq \frac{1}{c} |a-A|
 +\left|\frac{A}{C}\right|\frac{1}{c}|c-C|.
 \]
 We have
@@ -220,7 +222,10 @@ and we can show in a similar way that
 |y_1y_2-\widetilde{y_1}\widetilde{y_2}| \leq \left( (1+c_{I,1})k_{I,2}
 +(1+c_{I,2})k_{I,1} \right)2^{d'} \Ulp(a),
 \]
-where $d'=\Exp(y_1y_2)-\Exp(a)$. Using the preceding inequalities we have
+where $d'=\Exp(y_1y_2)-\Exp(a)$. Because $x$ is $a/c$ rounded to the working
+precision, we have $\Ulp(a/c) \leq \Ulp(x)$ and, as $c$ is positive,
+$\Ulp(a)/c \leq 2\Ulp(a/c)$. Thus $\Ulp(a)/c \leq 2\Ulp(x)$. Using the
+preceding inequalities we have
 \[
 \frac{1}{c}|a-A| \leq \left[
 \left((1+c_{R,1})k_{R,2}+(1+c_{R,2})k_{R,1}\right)2^{1+d}
@@ -229,30 +234,34 @@ where $d'=\Exp(y_1y_2)-\Exp(a)$. Using the preceding inequalities we have
 \]
 In addition, we have
 \[
-|c-C| \leq |x_2^2-\widetilde{x_2}^2| + |y_2-\widetilde{y_2}^2|,
+|c-C| \leq \left|x_2^2-\widetilde{x_2}^2\right| +
+\left|y_2^2-\widetilde{y_2}^2\right|,
 \]
 with
 \begin{align*}
-|x_2^2-\widetilde{x_2}^2| &\leq |x_2+\widetilde{x_2}||x_2-\widetilde{x_2}|\\
-&\leq (1+c_{R,2})|x_2|k_{R,2} \Ulp(x_2)\\
-&\leq 2(1+c_{R,2})k_{R,2}\Ulp(x_2^2),
+\left|x_2^2-\widetilde{x_2}^2\right| &= \left|x_2+\widetilde{x_2}\right|
+\left|x_2-\widetilde{x_2}\right|\\ &\leq (1+c_{R,2})|x_2|k_{R,2}
+\Ulp(x_2)\\ &\leq 2(1+c_{R,2})k_{R,2}\Ulp(x_2^2),
 \end{align*}
 we can show in a similar way that
 \[
-|y_2^2-\widetilde{y_2}^2| \leq 2(1+c_{I,2})k_{I,2}\Ulp(y_2^2).
+\left|y_2^2-\widetilde{y_2}^2\right| \leq 2(1+c_{I,2})k_{I,2}\Ulp(y_2^2).
 \]
 But $\Ulp(x_2^2) \leq \Ulp(x_2^2+y_2^2) = \Ulp(c)$, and the same relation
 holds for $\Ulp(y_2^2)$, thus
 \[
 |c-C| \leq 2\left[(1+c_{R,2})k_{R,2}+(1+c_{I,2})k_{I,2}\right]\Ulp(c);
 \]
-note that
+let $c_{R,2}^-$ and $c_{I,2}^-$ two positive numbers such that $c_{R,2}^-|x_2|
+\leq \left|\widetilde{x_2}\right|$ and $c_{I,2}^-|y_2| \leq
+\left|\widetilde{y_2}\right|$, then
 \[
 \left|\frac{A}{C}\right| \leq
 \frac{c_{R,1}c_{R,2}+c_{I,1}c_{I,2}}{(c_{R,2}^-)^2+(c_{I,2}^-)^2}
 \left|\frac{a}{c}\right|,
 \]
-thus we have
+and, noticing that $\left|\frac{a}{c}\right|\frac{1}{c}\Ulp(c) \leq 2\Ulp(x)$,
+we have
 \[
 \left|\frac{A}{C}\right|\frac{1}{c}|c-C| \leq
 4\frac{c_{R,1}c_{R,2}+c_{I,1}c_{I,2}}{(c_{R,2}^-)^2+(c_{I,2}^-)^2}
@@ -274,7 +283,7 @@ is bounded in the following way
 An analog process gives
 \begin{equation*}
   \begin{split}
-    \error(y) &\leq [c_R\\
+    \error(y) &\leq [c_I\\
     &\quad+\left((1+c_{I,1})k_{R,2}+(1+c_{R,2})k_{I,1}\right)2^{1+\delta}
     +\left((1+c_{R,1})k_{I,2}+(1+c_{I,2})k_{R,1}\right)2^{1+\delta'}\\
     &\quad+4\frac{c_{R,1}c_{R,2}+c_{I,1}c_{I,2}}{(c_{R,2}^-)^2+(c_{I,2}^-)^2}
@@ -285,20 +294,22 @@ An analog process gives
 with $\delta=\Exp(y_1x_2)-\Exp(b)$ and $\delta'=\Exp(x_1y_2)-\Exp(b)$.
 
 Note that, when $z_1$ and $z_2$ are the rounded values of $\widetilde{z_1}$
-and $\widetilde{z_2}$ respectively, with rounding away from zero, we have the
-much simpler inequalities:
+and $\widetilde{z_2}$ respectively, with rounding away from zero, then we can
+substitute 1 for $k_{R,n}$, $c_{R,n}$, $k_{I,n}$, $c_{I,n}$ (with $n=1,2$),
+$\frac{1}{2}$ for $c_{R,2}^-$, $c_{I,2}^-$, giving the much simpler
+inequalities:
 \begin{align*}
 \error(x) &\leq [c_R + 2^{3+d} + 2^{3+d'} + 2^6] \Ulp(x)\\
 \error(y) &\leq [c_I + 2^{3+\delta} + 2^{3+\delta'} + 2^6] \Ulp(y).
 \end{align*}
-If the rounding mode is rounding to nearest, we have:
+If the rounding mode is rounding to nearest, then we can substitute
+$\frac{1}{2}$ for $k_{R,n}$, $k_{I,n}$, $c_{R,2}^-$, $c_{I,2}^-$, 2 for
+$c_{R,n}$, $c_{I,n}$ and we have:
 \begin{align*}
-\error(x) &\leq [c_R + 3\times 2^{1+d} +3\times 2^{1+d'} + 48] \Ulp(x)
-\\
-\error(x) 
-&\leq [c_R + 3\times 2^{1+\delta} +3\times 2^{1+\delta'} + 48] \Ulp(y),
+\error(x) &\leq [c_R + 3\times 2^{1+d} +3\times 2^{1+d'} + 192] \Ulp(x)\\
+\error(x) &\leq [c_R + 3\times 2^{1+\delta} +3\times 2^{1+\delta'} + 192]
+\Ulp(y).
 \end{align*}
-note that the preceding equations still hold.
 
 At last, let us remark that $x_1x_2$ and $y_1y_2$ cannot have the same
 sign if $y_1x_2$ and $-x_1y_2$ do have the same sign, thus there is
@@ -345,18 +356,18 @@ Let $z = x + i y$ with $x \neq 0$ and $y \neq 0$.
 
 We compute $\tan z$ as follows:
 \begin{align*}
-u &\leftarrow \N(\sin z) &\error(\Re(u)) &\leq 1 \Ulp(\Re(u))
+u &\leftarrow \A(\sin z) &\error(\Re(u)) &\leq 1 \Ulp(\Re(u))
 &\error(\Im(u)) &\leq 1 \Ulp(\Im(u))
 \\
-v &\leftarrow \N(\cos z) &\error(\Re(v)) &\leq 1 \Ulp(\Re(v))
+v &\leftarrow \A(\cos z) &\error(\Re(v)) &\leq 1 \Ulp(\Re(v))
 &\error(\Im(v)) &\leq 1 \Ulp(\Im(v))
 \\
-t &\leftarrow \N(u/v) &\error(\Re(t)) &\leq k_R \Ulp(\Re(t))
+t &\leftarrow \A(u/v) &\error(\Re(t)) &\leq k_R \Ulp(\Re(t))
 &\error(\Im(t)) &\leq k_I \Ulp(\Im(t))
 \end{align*}
-where $w_2 \leftarrow \N(w_1)$ means that the real and imaginary parts of
-$w_2$ are respectively the real and imaginary part of $w_1$ rounded to nearest
-to the working precision.
+where $w_2 \leftarrow \A(w_1)$ means that the real and imaginary parts of
+$w_2$ are respectively the real and imaginary part of $w_1$ rounded away from
+zero to the working precision.
 
 We know that $\Re(\frac{a+i b}{c+i d})=\frac{a c +b d}{c^2 + d^2}$ and
 $\Im(\frac{a+i b}{c+i d})=\frac{a d -b c}{c^2 + d^2}$, so in the special case
@@ -366,13 +377,13 @@ computation of the real part while it does never happen in the one of the
 imaginary part.  Then, using the generic error of the division (see
 \ref{generic:div}), we have
 \begin{align*}
-\error(\Re(t)) &\leq [1+3\times 2^{1+e_1}+3\times 2^{1+e_2} +48] \Ulp(\Re(t)),
+\error(\Re(t)) &\leq [1+2^{3+e_1}+2^{3+e_2}+2^6] \Ulp(\Re(t)),
 \\
-\error(\Im(t)) &\leq [1 + 3\times2 + 3\times 2 + 48] \Ulp(\Im(t)),
+\error(\Im(t)) &\leq [1+2^3+2^3+2^6] \Ulp(\Im(t)),
 \end{align*}
 where $e_1=\Exp(a c) -\Exp(a c+b d)$ and $e_2=\Exp(b d) -\Exp(a c+b d)$.  The
-second inequality shows that $2^6$ is suitable choice for $k_I$. As $|\sinh
-y|<\cosh y$ for every nonzero $y$, we have $b d<a c$, thus $e_2\leq e_1$. We
+second inequality shows that $2^7$ is suitable choice for $k_I$. As $|\sinh
+y|<\cosh y$ for every nonzero $y$, we have $bd<ac$, thus $e_2\leq e_1$. We
 know that $\Exp(\frac{a c+b d}{c^2+d^2})\leq \Exp(a c+b d) -\Exp(c^2+d^2)$,
 $\Exp(c^2+d^2)\geq2 \min(\Exp(c), \Exp(d))$, and $\Exp(ac) \leq \Exp(a) +
 \Exp(c)$, this gives an upper bound for $e_1$:
@@ -384,7 +395,9 @@ and a suitable value for $k_R$:
 \begin{equation*}
 k_R=\left\{
 \begin{array}{l l}
-  2^7 & \mbox{if $e < 3$;}
+  2^7 & \mbox{if $e < 2$;}
+  \\
+  2^8 & \mbox{if $e = 2$}
   \\
   2^{5 + e} & \mbox{else.}
 \end{array}
diff --git a/src/tan.c b/src/tan.c
index c8e8081..9a1f50b 100644
--- a/src/tan.c
+++ b/src/tan.c
@@ -27,7 +27,7 @@ MA 02111-1307, USA. */
 void
 mpc_tan (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd)
 {
-  mpc_t x, y, z;
+  mpc_t x, y;
   mp_prec_t prec;
   mp_exp_t err;
   int ok = 0;
@@ -146,12 +146,12 @@ mpc_tan (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd)
 
   /* tan(op) = sin(op) / cos(op).
 
-     We use the following algorithm
-     with rounding towards +infinity for all operations, and working precision w:
+     We use the following algorithm with rounding away from 0 for all
+     operations, and working precision w:
 
-     (1) x = N(sin(op))
-     (2) y = N(cos(op))
-     (3) z = N(x/y)
+     (1) x = A(sin(op))
+     (2) y = A(cos(op))
+     (3) z = A(x/y)
 
      the error on Im(z) is at most 81 ulp,
      the error on Re(z) is at most
@@ -169,53 +169,85 @@ mpc_tan (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd)
 
   mpc_init2 (x, 2);
   mpc_init2 (y, 2);
-  mpc_init2 (z, 2);
 
   err = 7;
 
   do
     {
       mp_exp_t k;
-      prec += mpc_ceil_log2 (prec) + err;
+      mp_exp_t exr, eyr, eyi, ezr;
+      int inex;
 
-      MPC_LOG_MSG ("loop prec=%ld", prec);
+      ok = 0;
 
+      /* FIXME: prevent addition overflow */
+      prec += mpc_ceil_log2 (prec) + err;
       mpc_set_prec (x, prec);
       mpc_set_prec (y, prec);
-      mpc_set_prec (z, prec);
-
-      mpc_sin (x, op, MPC_RNDUU);
-      mpc_cos (y, op, MPC_RNDUU);
-      mpc_div (z, x, y, MPC_RNDUU);
 
-      k = mpfr_get_exp (MPC_RE (x)) + mpfr_get_exp (MPC_RE (y))
-        - mpfr_get_exp (MPC_RE (z))
-        +2 * MAX (-mpfr_get_exp (MPC_RE (y)), -mpfr_get_exp (MPC_IM (y)));
-      err = k < 2 ? 7 : k == 2 ? 8 : 5 + k;
-
-      /* TODO: prove that overflow occurs only when the exact result has no
-         floating point representation. For the time being, do as if it were
-         true. */
-      ok = mpfr_inf_p (MPC_RE (z))
-        ||mpfr_can_round (MPC_RE(z), prec - err, GMP_RNDU, MPC_RND_RE(rnd),
-                          MPFR_PREC(MPC_RE(rop)));
+      MPC_LOG_MSG ("loop prec=%ld", prec);
 
-      if (ok) /* compute imaginary part */
+      /* rounding away from zero: except in the cases x=0 or y=0 (processed
+         above), sin x and cos y are never exact, so rounding away from 0 is
+         rounding towards 0 and adding one ulp to the absolute value */
+      mpc_sin (x, op, MPC_RNDZZ);
+      mpfr_signbit (MPC_RE (x)) ?
+        mpfr_nextbelow (MPC_RE (x)) : mpfr_nextabove (MPC_RE (x));
+      mpfr_signbit (MPC_IM (x)) ?
+        mpfr_nextbelow (MPC_IM (x)) : mpfr_nextabove (MPC_IM (x));
+      exr = mpfr_get_exp (MPC_RE (x));
+      mpc_cos (y, op, MPC_RNDZZ);
+      mpfr_signbit (MPC_RE (y)) ?
+        mpfr_nextbelow (MPC_RE (y)) : mpfr_nextabove (MPC_RE (y));
+      mpfr_signbit (MPC_IM (y)) ?
+        mpfr_nextbelow (MPC_IM (y)) : mpfr_nextabove (MPC_IM (y));
+      eyr = mpfr_get_exp (MPC_RE (y));
+      eyi = mpfr_get_exp (MPC_IM (y));
+
+      /* some parts of the quotient may be exact */
+      inex = mpc_div (x, x, y, MPC_RNDZZ);
+      /* OP is no pure real nor pure imaginary, so the real and imaginary
+         parts of its tangent cannot be null. */
+      if (mpfr_zero_p (MPC_RE (x)) || mpfr_zero_p (MPC_IM (x)))
+        {
+          err = prec; /* double precision */
+          continue;
+        }
+      if (MPC_INEX_RE (inex))
+        mpfr_signbit (MPC_RE (x)) ?
+          mpfr_nextbelow (MPC_RE (x)) : mpfr_nextabove (MPC_RE (x));
+      if (MPC_INEX_IM (inex))
+        mpfr_signbit (MPC_IM (x)) ?
+          mpfr_nextbelow (MPC_IM (x)) : mpfr_nextabove (MPC_IM (x));
+      ezr = mpfr_get_exp (MPC_RE (x));
+
+      /* FIXME: compute
+         k = Exp(Re(x))+Exp(Re(y))-2min{Exp(Re(y)), Exp(Im(y))}-Exp(Re(x/y))
+         avoiding overflow */
+      k = exr - ezr + MAX(-eyr, eyr - 2 * eyi);
+      err = k < 2 ? 7 : (k == 2 ? 8 : (5 + k));
+
+      /* Can the real part be rounded ? */
+      ok = mpfr_inf_p (MPC_RE (x))
+        || mpfr_can_round (MPC_RE(x), prec - err, GMP_RNDN,
+                           MPC_RND_RE(rnd), MPFR_PREC(MPC_RE(rop)));
+
+      if (ok)
         {
-          ok = mpfr_inf_p (MPC_RE (z))
-            || mpfr_can_round (MPC_IM(z), prec - 7, GMP_RNDU, MPC_RND_IM(rnd),
+          /* Can the imaginary part be rounded ? */
+          ok = mpfr_inf_p (MPC_IM (x))
+            || mpfr_can_round (MPC_IM(x), prec - 6, GMP_RNDN, MPC_RND_IM(rnd),
                                MPFR_PREC(MPC_IM(rop)));
         }
       MPC_LOG_MSG ("err: %ld", err);
-      MPC_LOG_VAR (z);
+      MPC_LOG_VAR (x);
     }
   while (ok == 0);
 
-  mpc_set (rop, z, rnd);
+  mpc_set (rop, x, rnd);
 
   MPC_LOG_VAR (rop);
 
   mpc_clear (x);
   mpc_clear (y);
-  mpc_clear (z);
 }
author	thevenyp <thevenyp@211d60ee-9f03-0410-a15a-8952a2c7a4e4>	2008-06-25 12:36:16 +0000
committer	thevenyp <thevenyp@211d60ee-9f03-0410-a15a-8952a2c7a4e4>	2008-06-25 12:36:16 +0000
commit	3ae400f3825b869ee1fdca4cf13888366119ffe0 (patch)
tree	eebca032cf11a6feb37cad6dcc033f575f030dea
parent	92f431b9650ad6562e5856f64ff94537e8f7ad9e (diff)
download	mpc-3ae400f3825b869ee1fdca4cf13888366119ffe0.tar.gz