diff options
author | thevenyp <thevenyp@211d60ee-9f03-0410-a15a-8952a2c7a4e4> | 2008-06-25 12:36:16 +0000 |
---|---|---|
committer | thevenyp <thevenyp@211d60ee-9f03-0410-a15a-8952a2c7a4e4> | 2008-06-25 12:36:16 +0000 |
commit | 3ae400f3825b869ee1fdca4cf13888366119ffe0 (patch) | |
tree | eebca032cf11a6feb37cad6dcc033f575f030dea | |
parent | 92f431b9650ad6562e5856f64ff94537e8f7ad9e (diff) | |
download | mpc-3ae400f3825b869ee1fdca4cf13888366119ffe0.tar.gz |
algorithms.tex: improve explanations in section "generic error of division ",
use rounding away from zero in the analysis of tan algorithm.
tan.c: rewrite code according to algorithm described in algorithm.tex.
git-svn-id: svn://scm.gforge.inria.fr/svn/mpc/trunk@154 211d60ee-9f03-0410-a15a-8952a2c7a4e4
-rw-r--r-- | doc/algorithms.tex | 99 | ||||
-rw-r--r-- | src/tan.c | 94 |
2 files changed, 119 insertions, 74 deletions
diff --git a/doc/algorithms.tex b/doc/algorithms.tex index 4985321..113760e 100644 --- a/doc/algorithms.tex +++ b/doc/algorithms.tex @@ -15,6 +15,7 @@ \DeclareMathOperator{\pinf}{\bigtriangleup} \DeclareMathOperator{\minf}{\bigtriangledown} \DeclareMathOperator{\N}{\mathcal N} +\DeclareMathOperator{\A}{\mathcal A} \title {MPC: Algorithms and Error Analysis} \author {Andreas Enge \and Philippe Th\'eveny} @@ -43,15 +44,15 @@ $\widetilde{z_1}$ and $\widetilde{z_2}$ respectively. For $n=1, 2$, let $\widetilde{z_n} = \widetilde{x_n} + i \widetilde{y_n}$ and $z_n = x_n + i y_n$, let $k_{R,n}$ and $k_{I,n}$ the upper bound coefficients: $\error(x_n) \leq k_{R,n} \Ulp(x_n)$ and $\error(y_n) \leq k_{I,n} \Ulp(y_n)$. Note that -we always have $x_n.c_{R,n}^- \leq \widetilde{x_n} \leq x_n.c_{R,n}^+$ and -$y_n.c_{I,n}^- \leq \widetilde{y_n} \leq y_n.c_{I,n}^+$, with $c_{\cdot,n}^- = -1-k_{\cdot,n}2^{-p}$ and $c_{\cdot,n}^+ = 1+k_{\cdot,n}2^{1-p}$. When $z_n$ is -obtained by rounding $\widetilde{z_n}$ to the precision $p$ (we note -$z_n=\circ(\widetilde{z_n})$), we have another inequalities: $\frac{1}{2}|x_n| -\leq |\widetilde{x_n}| \leq 2|x_n|$; if the rounding mode is rounding towards -plus infinity: $\widetilde{x_n} \leq x_n$, and if the rounding mode is toward -minus infinity: $x_n \leq \widetilde{x_n}$ (similar relations hold for the -imaginary part). +we always have $|x_n|.c_{R,n}^- \leq |\widetilde{x_n}| \leq |x_n|.c_{R,n}^+$ +and $|y_n|.c_{I,n}^- \leq |\widetilde{y_n}| \leq |y_n|.c_{I,n}^+$, with +$c_{\cdot,n}^- = 1-k_{\cdot,n}2^{1-p}$ and $c_{\cdot,n}^+ = +1+k_{\cdot,n}2^{1-p}$. When $z_n$ is obtained by rounding $\widetilde{z_n}$ to +the precision $p$ (we note $z_n=\circ(\widetilde{z_n})$), we have another +inequalities: $\frac{1}{2}|x_n| \leq |\widetilde{x_n}| \leq 2|x_n|$; if the +rounding mode is rounding towards plus infinity: $\widetilde{x_n} \leq x_n$, +and if the rounding mode is toward minus infinity: $x_n \leq \widetilde{x_n}$ +(similar relations hold for the imaginary part). Let $z=x+iy$ the result of the operation $z_1\diamond z_2$ rounded to the precision of $z$ (we note $z=\circ(z_1 \diamond z_2)$). Let $c_R$ the bound @@ -62,7 +63,7 @@ the imaginary part, \emph{mutatis mutandis}). \subsection {Generic error of addition/substraction} -Let +Using the notations of the introductory paragraphs above, let \[ z=\circ(z_1+z_2). \] @@ -96,7 +97,7 @@ simpler expression \subsection {Generic error of multiplication} -Let +Using the notations of the introductory paragraphs above, let \[ z=\circ(z_1\times z_2). \] @@ -166,7 +167,7 @@ $\delta' = \Exp(y_1x_2)-\Exp(y) \leq \Exp(y_1)+\Exp(x_2)-\Exp(y)$. \subsection {Generic error of division}\label{generic:div} -Let +Using the notations of the introductory paragraphs above, let \[ z=\circ(\frac{z_1}{z_2}). \] @@ -182,15 +183,16 @@ c&=x_2^2+y_2^2 \end{align*} then the error of the real part is \[ -\error(x) = |x-\frac{A}{C}| \leq |x-\frac{a}{c}| + |\frac{a}{c}-\frac{A}{C}|. +\error(x) = \left|x-\frac{A}{C}\right| \leq \left|x-\frac{a}{c}\right| + +\left|\frac{a}{c}-\frac{A}{C}\right|. \] The first term of the right hand side is the rounding error: \[ -|x-\frac{a}{c}|\leq c_R \Ulp(x). +\left|x-\frac{a}{c}\right|\leq c_R \Ulp(x). \] The second term can be bounded as follows: \[ -|\frac{a}{c}-\frac{A}{C}| \leq \frac{1}{c} |a-A| +\left|\frac{a}{c}-\frac{A}{C}\right| \leq \frac{1}{c} |a-A| +\left|\frac{A}{C}\right|\frac{1}{c}|c-C|. \] We have @@ -220,7 +222,10 @@ and we can show in a similar way that |y_1y_2-\widetilde{y_1}\widetilde{y_2}| \leq \left( (1+c_{I,1})k_{I,2} +(1+c_{I,2})k_{I,1} \right)2^{d'} \Ulp(a), \] -where $d'=\Exp(y_1y_2)-\Exp(a)$. Using the preceding inequalities we have +where $d'=\Exp(y_1y_2)-\Exp(a)$. Because $x$ is $a/c$ rounded to the working +precision, we have $\Ulp(a/c) \leq \Ulp(x)$ and, as $c$ is positive, +$\Ulp(a)/c \leq 2\Ulp(a/c)$. Thus $\Ulp(a)/c \leq 2\Ulp(x)$. Using the +preceding inequalities we have \[ \frac{1}{c}|a-A| \leq \left[ \left((1+c_{R,1})k_{R,2}+(1+c_{R,2})k_{R,1}\right)2^{1+d} @@ -229,30 +234,34 @@ where $d'=\Exp(y_1y_2)-\Exp(a)$. Using the preceding inequalities we have \] In addition, we have \[ -|c-C| \leq |x_2^2-\widetilde{x_2}^2| + |y_2-\widetilde{y_2}^2|, +|c-C| \leq \left|x_2^2-\widetilde{x_2}^2\right| + +\left|y_2^2-\widetilde{y_2}^2\right|, \] with \begin{align*} -|x_2^2-\widetilde{x_2}^2| &\leq |x_2+\widetilde{x_2}||x_2-\widetilde{x_2}|\\ -&\leq (1+c_{R,2})|x_2|k_{R,2} \Ulp(x_2)\\ -&\leq 2(1+c_{R,2})k_{R,2}\Ulp(x_2^2), +\left|x_2^2-\widetilde{x_2}^2\right| &= \left|x_2+\widetilde{x_2}\right| +\left|x_2-\widetilde{x_2}\right|\\ &\leq (1+c_{R,2})|x_2|k_{R,2} +\Ulp(x_2)\\ &\leq 2(1+c_{R,2})k_{R,2}\Ulp(x_2^2), \end{align*} we can show in a similar way that \[ -|y_2^2-\widetilde{y_2}^2| \leq 2(1+c_{I,2})k_{I,2}\Ulp(y_2^2). +\left|y_2^2-\widetilde{y_2}^2\right| \leq 2(1+c_{I,2})k_{I,2}\Ulp(y_2^2). \] But $\Ulp(x_2^2) \leq \Ulp(x_2^2+y_2^2) = \Ulp(c)$, and the same relation holds for $\Ulp(y_2^2)$, thus \[ |c-C| \leq 2\left[(1+c_{R,2})k_{R,2}+(1+c_{I,2})k_{I,2}\right]\Ulp(c); \] -note that +let $c_{R,2}^-$ and $c_{I,2}^-$ two positive numbers such that $c_{R,2}^-|x_2| +\leq \left|\widetilde{x_2}\right|$ and $c_{I,2}^-|y_2| \leq +\left|\widetilde{y_2}\right|$, then \[ \left|\frac{A}{C}\right| \leq \frac{c_{R,1}c_{R,2}+c_{I,1}c_{I,2}}{(c_{R,2}^-)^2+(c_{I,2}^-)^2} \left|\frac{a}{c}\right|, \] -thus we have +and, noticing that $\left|\frac{a}{c}\right|\frac{1}{c}\Ulp(c) \leq 2\Ulp(x)$, +we have \[ \left|\frac{A}{C}\right|\frac{1}{c}|c-C| \leq 4\frac{c_{R,1}c_{R,2}+c_{I,1}c_{I,2}}{(c_{R,2}^-)^2+(c_{I,2}^-)^2} @@ -274,7 +283,7 @@ is bounded in the following way An analog process gives \begin{equation*} \begin{split} - \error(y) &\leq [c_R\\ + \error(y) &\leq [c_I\\ &\quad+\left((1+c_{I,1})k_{R,2}+(1+c_{R,2})k_{I,1}\right)2^{1+\delta} +\left((1+c_{R,1})k_{I,2}+(1+c_{I,2})k_{R,1}\right)2^{1+\delta'}\\ &\quad+4\frac{c_{R,1}c_{R,2}+c_{I,1}c_{I,2}}{(c_{R,2}^-)^2+(c_{I,2}^-)^2} @@ -285,20 +294,22 @@ An analog process gives with $\delta=\Exp(y_1x_2)-\Exp(b)$ and $\delta'=\Exp(x_1y_2)-\Exp(b)$. Note that, when $z_1$ and $z_2$ are the rounded values of $\widetilde{z_1}$ -and $\widetilde{z_2}$ respectively, with rounding away from zero, we have the -much simpler inequalities: +and $\widetilde{z_2}$ respectively, with rounding away from zero, then we can +substitute 1 for $k_{R,n}$, $c_{R,n}$, $k_{I,n}$, $c_{I,n}$ (with $n=1,2$), +$\frac{1}{2}$ for $c_{R,2}^-$, $c_{I,2}^-$, giving the much simpler +inequalities: \begin{align*} \error(x) &\leq [c_R + 2^{3+d} + 2^{3+d'} + 2^6] \Ulp(x)\\ \error(y) &\leq [c_I + 2^{3+\delta} + 2^{3+\delta'} + 2^6] \Ulp(y). \end{align*} -If the rounding mode is rounding to nearest, we have: +If the rounding mode is rounding to nearest, then we can substitute +$\frac{1}{2}$ for $k_{R,n}$, $k_{I,n}$, $c_{R,2}^-$, $c_{I,2}^-$, 2 for +$c_{R,n}$, $c_{I,n}$ and we have: \begin{align*} -\error(x) &\leq [c_R + 3\times 2^{1+d} +3\times 2^{1+d'} + 48] \Ulp(x) -\\ -\error(x) -&\leq [c_R + 3\times 2^{1+\delta} +3\times 2^{1+\delta'} + 48] \Ulp(y), +\error(x) &\leq [c_R + 3\times 2^{1+d} +3\times 2^{1+d'} + 192] \Ulp(x)\\ +\error(x) &\leq [c_R + 3\times 2^{1+\delta} +3\times 2^{1+\delta'} + 192] +\Ulp(y). \end{align*} -note that the preceding equations still hold. At last, let us remark that $x_1x_2$ and $y_1y_2$ cannot have the same sign if $y_1x_2$ and $-x_1y_2$ do have the same sign, thus there is @@ -345,18 +356,18 @@ Let $z = x + i y$ with $x \neq 0$ and $y \neq 0$. We compute $\tan z$ as follows: \begin{align*} -u &\leftarrow \N(\sin z) &\error(\Re(u)) &\leq 1 \Ulp(\Re(u)) +u &\leftarrow \A(\sin z) &\error(\Re(u)) &\leq 1 \Ulp(\Re(u)) &\error(\Im(u)) &\leq 1 \Ulp(\Im(u)) \\ -v &\leftarrow \N(\cos z) &\error(\Re(v)) &\leq 1 \Ulp(\Re(v)) +v &\leftarrow \A(\cos z) &\error(\Re(v)) &\leq 1 \Ulp(\Re(v)) &\error(\Im(v)) &\leq 1 \Ulp(\Im(v)) \\ -t &\leftarrow \N(u/v) &\error(\Re(t)) &\leq k_R \Ulp(\Re(t)) +t &\leftarrow \A(u/v) &\error(\Re(t)) &\leq k_R \Ulp(\Re(t)) &\error(\Im(t)) &\leq k_I \Ulp(\Im(t)) \end{align*} -where $w_2 \leftarrow \N(w_1)$ means that the real and imaginary parts of -$w_2$ are respectively the real and imaginary part of $w_1$ rounded to nearest -to the working precision. +where $w_2 \leftarrow \A(w_1)$ means that the real and imaginary parts of +$w_2$ are respectively the real and imaginary part of $w_1$ rounded away from +zero to the working precision. We know that $\Re(\frac{a+i b}{c+i d})=\frac{a c +b d}{c^2 + d^2}$ and $\Im(\frac{a+i b}{c+i d})=\frac{a d -b c}{c^2 + d^2}$, so in the special case @@ -366,13 +377,13 @@ computation of the real part while it does never happen in the one of the imaginary part. Then, using the generic error of the division (see \ref{generic:div}), we have \begin{align*} -\error(\Re(t)) &\leq [1+3\times 2^{1+e_1}+3\times 2^{1+e_2} +48] \Ulp(\Re(t)), +\error(\Re(t)) &\leq [1+2^{3+e_1}+2^{3+e_2}+2^6] \Ulp(\Re(t)), \\ -\error(\Im(t)) &\leq [1 + 3\times2 + 3\times 2 + 48] \Ulp(\Im(t)), +\error(\Im(t)) &\leq [1+2^3+2^3+2^6] \Ulp(\Im(t)), \end{align*} where $e_1=\Exp(a c) -\Exp(a c+b d)$ and $e_2=\Exp(b d) -\Exp(a c+b d)$. The -second inequality shows that $2^6$ is suitable choice for $k_I$. As $|\sinh -y|<\cosh y$ for every nonzero $y$, we have $b d<a c$, thus $e_2\leq e_1$. We +second inequality shows that $2^7$ is suitable choice for $k_I$. As $|\sinh +y|<\cosh y$ for every nonzero $y$, we have $bd<ac$, thus $e_2\leq e_1$. We know that $\Exp(\frac{a c+b d}{c^2+d^2})\leq \Exp(a c+b d) -\Exp(c^2+d^2)$, $\Exp(c^2+d^2)\geq2 \min(\Exp(c), \Exp(d))$, and $\Exp(ac) \leq \Exp(a) + \Exp(c)$, this gives an upper bound for $e_1$: @@ -384,7 +395,9 @@ and a suitable value for $k_R$: \begin{equation*} k_R=\left\{ \begin{array}{l l} - 2^7 & \mbox{if $e < 3$;} + 2^7 & \mbox{if $e < 2$;} + \\ + 2^8 & \mbox{if $e = 2$} \\ 2^{5 + e} & \mbox{else.} \end{array} @@ -27,7 +27,7 @@ MA 02111-1307, USA. */ void mpc_tan (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd) { - mpc_t x, y, z; + mpc_t x, y; mp_prec_t prec; mp_exp_t err; int ok = 0; @@ -146,12 +146,12 @@ mpc_tan (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd) /* tan(op) = sin(op) / cos(op). - We use the following algorithm - with rounding towards +infinity for all operations, and working precision w: + We use the following algorithm with rounding away from 0 for all + operations, and working precision w: - (1) x = N(sin(op)) - (2) y = N(cos(op)) - (3) z = N(x/y) + (1) x = A(sin(op)) + (2) y = A(cos(op)) + (3) z = A(x/y) the error on Im(z) is at most 81 ulp, the error on Re(z) is at most @@ -169,53 +169,85 @@ mpc_tan (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd) mpc_init2 (x, 2); mpc_init2 (y, 2); - mpc_init2 (z, 2); err = 7; do { mp_exp_t k; - prec += mpc_ceil_log2 (prec) + err; + mp_exp_t exr, eyr, eyi, ezr; + int inex; - MPC_LOG_MSG ("loop prec=%ld", prec); + ok = 0; + /* FIXME: prevent addition overflow */ + prec += mpc_ceil_log2 (prec) + err; mpc_set_prec (x, prec); mpc_set_prec (y, prec); - mpc_set_prec (z, prec); - - mpc_sin (x, op, MPC_RNDUU); - mpc_cos (y, op, MPC_RNDUU); - mpc_div (z, x, y, MPC_RNDUU); - k = mpfr_get_exp (MPC_RE (x)) + mpfr_get_exp (MPC_RE (y)) - - mpfr_get_exp (MPC_RE (z)) - +2 * MAX (-mpfr_get_exp (MPC_RE (y)), -mpfr_get_exp (MPC_IM (y))); - err = k < 2 ? 7 : k == 2 ? 8 : 5 + k; - - /* TODO: prove that overflow occurs only when the exact result has no - floating point representation. For the time being, do as if it were - true. */ - ok = mpfr_inf_p (MPC_RE (z)) - ||mpfr_can_round (MPC_RE(z), prec - err, GMP_RNDU, MPC_RND_RE(rnd), - MPFR_PREC(MPC_RE(rop))); + MPC_LOG_MSG ("loop prec=%ld", prec); - if (ok) /* compute imaginary part */ + /* rounding away from zero: except in the cases x=0 or y=0 (processed + above), sin x and cos y are never exact, so rounding away from 0 is + rounding towards 0 and adding one ulp to the absolute value */ + mpc_sin (x, op, MPC_RNDZZ); + mpfr_signbit (MPC_RE (x)) ? + mpfr_nextbelow (MPC_RE (x)) : mpfr_nextabove (MPC_RE (x)); + mpfr_signbit (MPC_IM (x)) ? + mpfr_nextbelow (MPC_IM (x)) : mpfr_nextabove (MPC_IM (x)); + exr = mpfr_get_exp (MPC_RE (x)); + mpc_cos (y, op, MPC_RNDZZ); + mpfr_signbit (MPC_RE (y)) ? + mpfr_nextbelow (MPC_RE (y)) : mpfr_nextabove (MPC_RE (y)); + mpfr_signbit (MPC_IM (y)) ? + mpfr_nextbelow (MPC_IM (y)) : mpfr_nextabove (MPC_IM (y)); + eyr = mpfr_get_exp (MPC_RE (y)); + eyi = mpfr_get_exp (MPC_IM (y)); + + /* some parts of the quotient may be exact */ + inex = mpc_div (x, x, y, MPC_RNDZZ); + /* OP is no pure real nor pure imaginary, so the real and imaginary + parts of its tangent cannot be null. */ + if (mpfr_zero_p (MPC_RE (x)) || mpfr_zero_p (MPC_IM (x))) + { + err = prec; /* double precision */ + continue; + } + if (MPC_INEX_RE (inex)) + mpfr_signbit (MPC_RE (x)) ? + mpfr_nextbelow (MPC_RE (x)) : mpfr_nextabove (MPC_RE (x)); + if (MPC_INEX_IM (inex)) + mpfr_signbit (MPC_IM (x)) ? + mpfr_nextbelow (MPC_IM (x)) : mpfr_nextabove (MPC_IM (x)); + ezr = mpfr_get_exp (MPC_RE (x)); + + /* FIXME: compute + k = Exp(Re(x))+Exp(Re(y))-2min{Exp(Re(y)), Exp(Im(y))}-Exp(Re(x/y)) + avoiding overflow */ + k = exr - ezr + MAX(-eyr, eyr - 2 * eyi); + err = k < 2 ? 7 : (k == 2 ? 8 : (5 + k)); + + /* Can the real part be rounded ? */ + ok = mpfr_inf_p (MPC_RE (x)) + || mpfr_can_round (MPC_RE(x), prec - err, GMP_RNDN, + MPC_RND_RE(rnd), MPFR_PREC(MPC_RE(rop))); + + if (ok) { - ok = mpfr_inf_p (MPC_RE (z)) - || mpfr_can_round (MPC_IM(z), prec - 7, GMP_RNDU, MPC_RND_IM(rnd), + /* Can the imaginary part be rounded ? */ + ok = mpfr_inf_p (MPC_IM (x)) + || mpfr_can_round (MPC_IM(x), prec - 6, GMP_RNDN, MPC_RND_IM(rnd), MPFR_PREC(MPC_IM(rop))); } MPC_LOG_MSG ("err: %ld", err); - MPC_LOG_VAR (z); + MPC_LOG_VAR (x); } while (ok == 0); - mpc_set (rop, z, rnd); + mpc_set (rop, x, rnd); MPC_LOG_VAR (rop); mpc_clear (x); mpc_clear (y); - mpc_clear (z); } |