summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorthevenyp <thevenyp@211d60ee-9f03-0410-a15a-8952a2c7a4e4>2008-06-25 12:36:16 +0000
committerthevenyp <thevenyp@211d60ee-9f03-0410-a15a-8952a2c7a4e4>2008-06-25 12:36:16 +0000
commit3ae400f3825b869ee1fdca4cf13888366119ffe0 (patch)
treeeebca032cf11a6feb37cad6dcc033f575f030dea
parent92f431b9650ad6562e5856f64ff94537e8f7ad9e (diff)
downloadmpc-3ae400f3825b869ee1fdca4cf13888366119ffe0.tar.gz
algorithms.tex: improve explanations in section "generic error of division ",
use rounding away from zero in the analysis of tan algorithm. tan.c: rewrite code according to algorithm described in algorithm.tex. git-svn-id: svn://scm.gforge.inria.fr/svn/mpc/trunk@154 211d60ee-9f03-0410-a15a-8952a2c7a4e4
-rw-r--r--doc/algorithms.tex99
-rw-r--r--src/tan.c94
2 files changed, 119 insertions, 74 deletions
diff --git a/doc/algorithms.tex b/doc/algorithms.tex
index 4985321..113760e 100644
--- a/doc/algorithms.tex
+++ b/doc/algorithms.tex
@@ -15,6 +15,7 @@
\DeclareMathOperator{\pinf}{\bigtriangleup}
\DeclareMathOperator{\minf}{\bigtriangledown}
\DeclareMathOperator{\N}{\mathcal N}
+\DeclareMathOperator{\A}{\mathcal A}
\title {MPC: Algorithms and Error Analysis}
\author {Andreas Enge \and Philippe Th\'eveny}
@@ -43,15 +44,15 @@ $\widetilde{z_1}$ and $\widetilde{z_2}$ respectively. For $n=1, 2$, let
$\widetilde{z_n} = \widetilde{x_n} + i \widetilde{y_n}$ and $z_n = x_n + i
y_n$, let $k_{R,n}$ and $k_{I,n}$ the upper bound coefficients: $\error(x_n)
\leq k_{R,n} \Ulp(x_n)$ and $\error(y_n) \leq k_{I,n} \Ulp(y_n)$. Note that
-we always have $x_n.c_{R,n}^- \leq \widetilde{x_n} \leq x_n.c_{R,n}^+$ and
-$y_n.c_{I,n}^- \leq \widetilde{y_n} \leq y_n.c_{I,n}^+$, with $c_{\cdot,n}^- =
-1-k_{\cdot,n}2^{-p}$ and $c_{\cdot,n}^+ = 1+k_{\cdot,n}2^{1-p}$. When $z_n$ is
-obtained by rounding $\widetilde{z_n}$ to the precision $p$ (we note
-$z_n=\circ(\widetilde{z_n})$), we have another inequalities: $\frac{1}{2}|x_n|
-\leq |\widetilde{x_n}| \leq 2|x_n|$; if the rounding mode is rounding towards
-plus infinity: $\widetilde{x_n} \leq x_n$, and if the rounding mode is toward
-minus infinity: $x_n \leq \widetilde{x_n}$ (similar relations hold for the
-imaginary part).
+we always have $|x_n|.c_{R,n}^- \leq |\widetilde{x_n}| \leq |x_n|.c_{R,n}^+$
+and $|y_n|.c_{I,n}^- \leq |\widetilde{y_n}| \leq |y_n|.c_{I,n}^+$, with
+$c_{\cdot,n}^- = 1-k_{\cdot,n}2^{1-p}$ and $c_{\cdot,n}^+ =
+1+k_{\cdot,n}2^{1-p}$. When $z_n$ is obtained by rounding $\widetilde{z_n}$ to
+the precision $p$ (we note $z_n=\circ(\widetilde{z_n})$), we have another
+inequalities: $\frac{1}{2}|x_n| \leq |\widetilde{x_n}| \leq 2|x_n|$; if the
+rounding mode is rounding towards plus infinity: $\widetilde{x_n} \leq x_n$,
+and if the rounding mode is toward minus infinity: $x_n \leq \widetilde{x_n}$
+(similar relations hold for the imaginary part).
Let $z=x+iy$ the result of the operation $z_1\diamond z_2$ rounded to the
precision of $z$ (we note $z=\circ(z_1 \diamond z_2)$). Let $c_R$ the bound
@@ -62,7 +63,7 @@ the imaginary part, \emph{mutatis mutandis}).
\subsection {Generic error of addition/substraction}
-Let
+Using the notations of the introductory paragraphs above, let
\[
z=\circ(z_1+z_2).
\]
@@ -96,7 +97,7 @@ simpler expression
\subsection {Generic error of multiplication}
-Let
+Using the notations of the introductory paragraphs above, let
\[
z=\circ(z_1\times z_2).
\]
@@ -166,7 +167,7 @@ $\delta' = \Exp(y_1x_2)-\Exp(y) \leq \Exp(y_1)+\Exp(x_2)-\Exp(y)$.
\subsection {Generic error of division}\label{generic:div}
-Let
+Using the notations of the introductory paragraphs above, let
\[
z=\circ(\frac{z_1}{z_2}).
\]
@@ -182,15 +183,16 @@ c&=x_2^2+y_2^2
\end{align*}
then the error of the real part is
\[
-\error(x) = |x-\frac{A}{C}| \leq |x-\frac{a}{c}| + |\frac{a}{c}-\frac{A}{C}|.
+\error(x) = \left|x-\frac{A}{C}\right| \leq \left|x-\frac{a}{c}\right| +
+\left|\frac{a}{c}-\frac{A}{C}\right|.
\]
The first term of the right hand side is the rounding error:
\[
-|x-\frac{a}{c}|\leq c_R \Ulp(x).
+\left|x-\frac{a}{c}\right|\leq c_R \Ulp(x).
\]
The second term can be bounded as follows:
\[
-|\frac{a}{c}-\frac{A}{C}| \leq \frac{1}{c} |a-A|
+\left|\frac{a}{c}-\frac{A}{C}\right| \leq \frac{1}{c} |a-A|
+\left|\frac{A}{C}\right|\frac{1}{c}|c-C|.
\]
We have
@@ -220,7 +222,10 @@ and we can show in a similar way that
|y_1y_2-\widetilde{y_1}\widetilde{y_2}| \leq \left( (1+c_{I,1})k_{I,2}
+(1+c_{I,2})k_{I,1} \right)2^{d'} \Ulp(a),
\]
-where $d'=\Exp(y_1y_2)-\Exp(a)$. Using the preceding inequalities we have
+where $d'=\Exp(y_1y_2)-\Exp(a)$. Because $x$ is $a/c$ rounded to the working
+precision, we have $\Ulp(a/c) \leq \Ulp(x)$ and, as $c$ is positive,
+$\Ulp(a)/c \leq 2\Ulp(a/c)$. Thus $\Ulp(a)/c \leq 2\Ulp(x)$. Using the
+preceding inequalities we have
\[
\frac{1}{c}|a-A| \leq \left[
\left((1+c_{R,1})k_{R,2}+(1+c_{R,2})k_{R,1}\right)2^{1+d}
@@ -229,30 +234,34 @@ where $d'=\Exp(y_1y_2)-\Exp(a)$. Using the preceding inequalities we have
\]
In addition, we have
\[
-|c-C| \leq |x_2^2-\widetilde{x_2}^2| + |y_2-\widetilde{y_2}^2|,
+|c-C| \leq \left|x_2^2-\widetilde{x_2}^2\right| +
+\left|y_2^2-\widetilde{y_2}^2\right|,
\]
with
\begin{align*}
-|x_2^2-\widetilde{x_2}^2| &\leq |x_2+\widetilde{x_2}||x_2-\widetilde{x_2}|\\
-&\leq (1+c_{R,2})|x_2|k_{R,2} \Ulp(x_2)\\
-&\leq 2(1+c_{R,2})k_{R,2}\Ulp(x_2^2),
+\left|x_2^2-\widetilde{x_2}^2\right| &= \left|x_2+\widetilde{x_2}\right|
+\left|x_2-\widetilde{x_2}\right|\\ &\leq (1+c_{R,2})|x_2|k_{R,2}
+\Ulp(x_2)\\ &\leq 2(1+c_{R,2})k_{R,2}\Ulp(x_2^2),
\end{align*}
we can show in a similar way that
\[
-|y_2^2-\widetilde{y_2}^2| \leq 2(1+c_{I,2})k_{I,2}\Ulp(y_2^2).
+\left|y_2^2-\widetilde{y_2}^2\right| \leq 2(1+c_{I,2})k_{I,2}\Ulp(y_2^2).
\]
But $\Ulp(x_2^2) \leq \Ulp(x_2^2+y_2^2) = \Ulp(c)$, and the same relation
holds for $\Ulp(y_2^2)$, thus
\[
|c-C| \leq 2\left[(1+c_{R,2})k_{R,2}+(1+c_{I,2})k_{I,2}\right]\Ulp(c);
\]
-note that
+let $c_{R,2}^-$ and $c_{I,2}^-$ two positive numbers such that $c_{R,2}^-|x_2|
+\leq \left|\widetilde{x_2}\right|$ and $c_{I,2}^-|y_2| \leq
+\left|\widetilde{y_2}\right|$, then
\[
\left|\frac{A}{C}\right| \leq
\frac{c_{R,1}c_{R,2}+c_{I,1}c_{I,2}}{(c_{R,2}^-)^2+(c_{I,2}^-)^2}
\left|\frac{a}{c}\right|,
\]
-thus we have
+and, noticing that $\left|\frac{a}{c}\right|\frac{1}{c}\Ulp(c) \leq 2\Ulp(x)$,
+we have
\[
\left|\frac{A}{C}\right|\frac{1}{c}|c-C| \leq
4\frac{c_{R,1}c_{R,2}+c_{I,1}c_{I,2}}{(c_{R,2}^-)^2+(c_{I,2}^-)^2}
@@ -274,7 +283,7 @@ is bounded in the following way
An analog process gives
\begin{equation*}
\begin{split}
- \error(y) &\leq [c_R\\
+ \error(y) &\leq [c_I\\
&\quad+\left((1+c_{I,1})k_{R,2}+(1+c_{R,2})k_{I,1}\right)2^{1+\delta}
+\left((1+c_{R,1})k_{I,2}+(1+c_{I,2})k_{R,1}\right)2^{1+\delta'}\\
&\quad+4\frac{c_{R,1}c_{R,2}+c_{I,1}c_{I,2}}{(c_{R,2}^-)^2+(c_{I,2}^-)^2}
@@ -285,20 +294,22 @@ An analog process gives
with $\delta=\Exp(y_1x_2)-\Exp(b)$ and $\delta'=\Exp(x_1y_2)-\Exp(b)$.
Note that, when $z_1$ and $z_2$ are the rounded values of $\widetilde{z_1}$
-and $\widetilde{z_2}$ respectively, with rounding away from zero, we have the
-much simpler inequalities:
+and $\widetilde{z_2}$ respectively, with rounding away from zero, then we can
+substitute 1 for $k_{R,n}$, $c_{R,n}$, $k_{I,n}$, $c_{I,n}$ (with $n=1,2$),
+$\frac{1}{2}$ for $c_{R,2}^-$, $c_{I,2}^-$, giving the much simpler
+inequalities:
\begin{align*}
\error(x) &\leq [c_R + 2^{3+d} + 2^{3+d'} + 2^6] \Ulp(x)\\
\error(y) &\leq [c_I + 2^{3+\delta} + 2^{3+\delta'} + 2^6] \Ulp(y).
\end{align*}
-If the rounding mode is rounding to nearest, we have:
+If the rounding mode is rounding to nearest, then we can substitute
+$\frac{1}{2}$ for $k_{R,n}$, $k_{I,n}$, $c_{R,2}^-$, $c_{I,2}^-$, 2 for
+$c_{R,n}$, $c_{I,n}$ and we have:
\begin{align*}
-\error(x) &\leq [c_R + 3\times 2^{1+d} +3\times 2^{1+d'} + 48] \Ulp(x)
-\\
-\error(x)
-&\leq [c_R + 3\times 2^{1+\delta} +3\times 2^{1+\delta'} + 48] \Ulp(y),
+\error(x) &\leq [c_R + 3\times 2^{1+d} +3\times 2^{1+d'} + 192] \Ulp(x)\\
+\error(x) &\leq [c_R + 3\times 2^{1+\delta} +3\times 2^{1+\delta'} + 192]
+\Ulp(y).
\end{align*}
-note that the preceding equations still hold.
At last, let us remark that $x_1x_2$ and $y_1y_2$ cannot have the same
sign if $y_1x_2$ and $-x_1y_2$ do have the same sign, thus there is
@@ -345,18 +356,18 @@ Let $z = x + i y$ with $x \neq 0$ and $y \neq 0$.
We compute $\tan z$ as follows:
\begin{align*}
-u &\leftarrow \N(\sin z) &\error(\Re(u)) &\leq 1 \Ulp(\Re(u))
+u &\leftarrow \A(\sin z) &\error(\Re(u)) &\leq 1 \Ulp(\Re(u))
&\error(\Im(u)) &\leq 1 \Ulp(\Im(u))
\\
-v &\leftarrow \N(\cos z) &\error(\Re(v)) &\leq 1 \Ulp(\Re(v))
+v &\leftarrow \A(\cos z) &\error(\Re(v)) &\leq 1 \Ulp(\Re(v))
&\error(\Im(v)) &\leq 1 \Ulp(\Im(v))
\\
-t &\leftarrow \N(u/v) &\error(\Re(t)) &\leq k_R \Ulp(\Re(t))
+t &\leftarrow \A(u/v) &\error(\Re(t)) &\leq k_R \Ulp(\Re(t))
&\error(\Im(t)) &\leq k_I \Ulp(\Im(t))
\end{align*}
-where $w_2 \leftarrow \N(w_1)$ means that the real and imaginary parts of
-$w_2$ are respectively the real and imaginary part of $w_1$ rounded to nearest
-to the working precision.
+where $w_2 \leftarrow \A(w_1)$ means that the real and imaginary parts of
+$w_2$ are respectively the real and imaginary part of $w_1$ rounded away from
+zero to the working precision.
We know that $\Re(\frac{a+i b}{c+i d})=\frac{a c +b d}{c^2 + d^2}$ and
$\Im(\frac{a+i b}{c+i d})=\frac{a d -b c}{c^2 + d^2}$, so in the special case
@@ -366,13 +377,13 @@ computation of the real part while it does never happen in the one of the
imaginary part. Then, using the generic error of the division (see
\ref{generic:div}), we have
\begin{align*}
-\error(\Re(t)) &\leq [1+3\times 2^{1+e_1}+3\times 2^{1+e_2} +48] \Ulp(\Re(t)),
+\error(\Re(t)) &\leq [1+2^{3+e_1}+2^{3+e_2}+2^6] \Ulp(\Re(t)),
\\
-\error(\Im(t)) &\leq [1 + 3\times2 + 3\times 2 + 48] \Ulp(\Im(t)),
+\error(\Im(t)) &\leq [1+2^3+2^3+2^6] \Ulp(\Im(t)),
\end{align*}
where $e_1=\Exp(a c) -\Exp(a c+b d)$ and $e_2=\Exp(b d) -\Exp(a c+b d)$. The
-second inequality shows that $2^6$ is suitable choice for $k_I$. As $|\sinh
-y|<\cosh y$ for every nonzero $y$, we have $b d<a c$, thus $e_2\leq e_1$. We
+second inequality shows that $2^7$ is suitable choice for $k_I$. As $|\sinh
+y|<\cosh y$ for every nonzero $y$, we have $bd<ac$, thus $e_2\leq e_1$. We
know that $\Exp(\frac{a c+b d}{c^2+d^2})\leq \Exp(a c+b d) -\Exp(c^2+d^2)$,
$\Exp(c^2+d^2)\geq2 \min(\Exp(c), \Exp(d))$, and $\Exp(ac) \leq \Exp(a) +
\Exp(c)$, this gives an upper bound for $e_1$:
@@ -384,7 +395,9 @@ and a suitable value for $k_R$:
\begin{equation*}
k_R=\left\{
\begin{array}{l l}
- 2^7 & \mbox{if $e < 3$;}
+ 2^7 & \mbox{if $e < 2$;}
+ \\
+ 2^8 & \mbox{if $e = 2$}
\\
2^{5 + e} & \mbox{else.}
\end{array}
diff --git a/src/tan.c b/src/tan.c
index c8e8081..9a1f50b 100644
--- a/src/tan.c
+++ b/src/tan.c
@@ -27,7 +27,7 @@ MA 02111-1307, USA. */
void
mpc_tan (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd)
{
- mpc_t x, y, z;
+ mpc_t x, y;
mp_prec_t prec;
mp_exp_t err;
int ok = 0;
@@ -146,12 +146,12 @@ mpc_tan (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd)
/* tan(op) = sin(op) / cos(op).
- We use the following algorithm
- with rounding towards +infinity for all operations, and working precision w:
+ We use the following algorithm with rounding away from 0 for all
+ operations, and working precision w:
- (1) x = N(sin(op))
- (2) y = N(cos(op))
- (3) z = N(x/y)
+ (1) x = A(sin(op))
+ (2) y = A(cos(op))
+ (3) z = A(x/y)
the error on Im(z) is at most 81 ulp,
the error on Re(z) is at most
@@ -169,53 +169,85 @@ mpc_tan (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd)
mpc_init2 (x, 2);
mpc_init2 (y, 2);
- mpc_init2 (z, 2);
err = 7;
do
{
mp_exp_t k;
- prec += mpc_ceil_log2 (prec) + err;
+ mp_exp_t exr, eyr, eyi, ezr;
+ int inex;
- MPC_LOG_MSG ("loop prec=%ld", prec);
+ ok = 0;
+ /* FIXME: prevent addition overflow */
+ prec += mpc_ceil_log2 (prec) + err;
mpc_set_prec (x, prec);
mpc_set_prec (y, prec);
- mpc_set_prec (z, prec);
-
- mpc_sin (x, op, MPC_RNDUU);
- mpc_cos (y, op, MPC_RNDUU);
- mpc_div (z, x, y, MPC_RNDUU);
- k = mpfr_get_exp (MPC_RE (x)) + mpfr_get_exp (MPC_RE (y))
- - mpfr_get_exp (MPC_RE (z))
- +2 * MAX (-mpfr_get_exp (MPC_RE (y)), -mpfr_get_exp (MPC_IM (y)));
- err = k < 2 ? 7 : k == 2 ? 8 : 5 + k;
-
- /* TODO: prove that overflow occurs only when the exact result has no
- floating point representation. For the time being, do as if it were
- true. */
- ok = mpfr_inf_p (MPC_RE (z))
- ||mpfr_can_round (MPC_RE(z), prec - err, GMP_RNDU, MPC_RND_RE(rnd),
- MPFR_PREC(MPC_RE(rop)));
+ MPC_LOG_MSG ("loop prec=%ld", prec);
- if (ok) /* compute imaginary part */
+ /* rounding away from zero: except in the cases x=0 or y=0 (processed
+ above), sin x and cos y are never exact, so rounding away from 0 is
+ rounding towards 0 and adding one ulp to the absolute value */
+ mpc_sin (x, op, MPC_RNDZZ);
+ mpfr_signbit (MPC_RE (x)) ?
+ mpfr_nextbelow (MPC_RE (x)) : mpfr_nextabove (MPC_RE (x));
+ mpfr_signbit (MPC_IM (x)) ?
+ mpfr_nextbelow (MPC_IM (x)) : mpfr_nextabove (MPC_IM (x));
+ exr = mpfr_get_exp (MPC_RE (x));
+ mpc_cos (y, op, MPC_RNDZZ);
+ mpfr_signbit (MPC_RE (y)) ?
+ mpfr_nextbelow (MPC_RE (y)) : mpfr_nextabove (MPC_RE (y));
+ mpfr_signbit (MPC_IM (y)) ?
+ mpfr_nextbelow (MPC_IM (y)) : mpfr_nextabove (MPC_IM (y));
+ eyr = mpfr_get_exp (MPC_RE (y));
+ eyi = mpfr_get_exp (MPC_IM (y));
+
+ /* some parts of the quotient may be exact */
+ inex = mpc_div (x, x, y, MPC_RNDZZ);
+ /* OP is no pure real nor pure imaginary, so the real and imaginary
+ parts of its tangent cannot be null. */
+ if (mpfr_zero_p (MPC_RE (x)) || mpfr_zero_p (MPC_IM (x)))
+ {
+ err = prec; /* double precision */
+ continue;
+ }
+ if (MPC_INEX_RE (inex))
+ mpfr_signbit (MPC_RE (x)) ?
+ mpfr_nextbelow (MPC_RE (x)) : mpfr_nextabove (MPC_RE (x));
+ if (MPC_INEX_IM (inex))
+ mpfr_signbit (MPC_IM (x)) ?
+ mpfr_nextbelow (MPC_IM (x)) : mpfr_nextabove (MPC_IM (x));
+ ezr = mpfr_get_exp (MPC_RE (x));
+
+ /* FIXME: compute
+ k = Exp(Re(x))+Exp(Re(y))-2min{Exp(Re(y)), Exp(Im(y))}-Exp(Re(x/y))
+ avoiding overflow */
+ k = exr - ezr + MAX(-eyr, eyr - 2 * eyi);
+ err = k < 2 ? 7 : (k == 2 ? 8 : (5 + k));
+
+ /* Can the real part be rounded ? */
+ ok = mpfr_inf_p (MPC_RE (x))
+ || mpfr_can_round (MPC_RE(x), prec - err, GMP_RNDN,
+ MPC_RND_RE(rnd), MPFR_PREC(MPC_RE(rop)));
+
+ if (ok)
{
- ok = mpfr_inf_p (MPC_RE (z))
- || mpfr_can_round (MPC_IM(z), prec - 7, GMP_RNDU, MPC_RND_IM(rnd),
+ /* Can the imaginary part be rounded ? */
+ ok = mpfr_inf_p (MPC_IM (x))
+ || mpfr_can_round (MPC_IM(x), prec - 6, GMP_RNDN, MPC_RND_IM(rnd),
MPFR_PREC(MPC_IM(rop)));
}
MPC_LOG_MSG ("err: %ld", err);
- MPC_LOG_VAR (z);
+ MPC_LOG_VAR (x);
}
while (ok == 0);
- mpc_set (rop, z, rnd);
+ mpc_set (rop, x, rnd);
MPC_LOG_VAR (rop);
mpc_clear (x);
mpc_clear (y);
- mpc_clear (z);
}