summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorenge <enge@211d60ee-9f03-0410-a15a-8952a2c7a4e4>2011-09-06 08:15:28 +0000
committerenge <enge@211d60ee-9f03-0410-a15a-8952a2c7a4e4>2011-09-06 08:15:28 +0000
commit5c27a7fd1535195fc08912fd2fa12c7797f90893 (patch)
treeaf8e284c1af2dd6f19b446d0cf4e1e16632f997d
parent98334988a770d26fcb13a397c02b71ed5ba74043 (diff)
downloadmpc-5c27a7fd1535195fc08912fd2fa12c7797f90893.tar.gz
norm.c: new underflow handling
norm.dat: added different rounding modes to previous example git-svn-id: svn://scm.gforge.inria.fr/svn/mpc/trunk@1086 211d60ee-9f03-0410-a15a-8952a2c7a4e4
-rw-r--r--NEWS4
-rw-r--r--src/norm.c112
-rw-r--r--tests/norm.dat7
3 files changed, 69 insertions, 54 deletions
diff --git a/NEWS b/NEWS
index f894389..a4fffe1 100644
--- a/NEWS
+++ b/NEWS
@@ -2,10 +2,10 @@ Changes in version 1.0:
- First release as a GNU package
- License change: LGPLv3+ for code, GFDLv1.3+ for documentation
- Bug fixes:
+ - div and norm now return a value indicating the effective rounding
+ direction, as the other functions.
- mul and norm now return correct results even if there are over- or
underflows during the computation
- - div now returns a value indicating the effective rounding direction,
- as the other functions.
- asin, proj, sqr: Wrong result when input variable has infinite part and
equals output variable is corrected.
diff --git a/src/norm.c b/src/norm.c
index 26a8f31..9594082 100644
--- a/src/norm.c
+++ b/src/norm.c
@@ -79,17 +79,17 @@ mpc_norm (mpfr_ptr a, mpc_srcptr b, mpfr_rnd_t rnd)
of underflow or overflow.
If loops < max_loops and inexact is zero, we can exit the
while-loop since it only remains to add u and v into a. */
- if (inexact != 0)
- {
+ if (inexact) {
mpfr_set_prec (res, prec);
mpfr_add (res, u, v, GMP_RNDD); /* err <= 3 ulp in prec */
- }
+ }
} while (loops < max_loops && inexact != 0
&& !mpfr_can_round (res, prec - 2, GMP_RNDD, GMP_RNDU,
mpfr_get_prec (a) + (rnd == GMP_RNDN)));
- if (inexact == 0) /* squarings were exact, nor underflow nor overflow */
+ if (!inexact)
+ /* squarings were exact, neither underflow nor overflow */
inexact = mpfr_add (a, u, v, rnd);
/* if there was an overflow in Re(b)^2 or Im(b)^2 or their sum,
since the norm is larger, there is an overflow for the norm */
@@ -99,55 +99,63 @@ mpc_norm (mpfr_ptr a, mpc_srcptr b, mpfr_rnd_t rnd)
inexact = mpfr_mul_2ui (a, a, mpfr_get_emax (), rnd);
}
else if (mpfr_underflow_p ()) {
- /* necessarily one of the squarings did underflow (otherwise their
- sum could not underflow), thus one of u, v is zero. */
- mpfr_exp_t emin = mpfr_get_emin ();
-
- if (mpfr_zero_p (u) && !mpfr_zero_p (v))
- mpfr_swap (u, v);
-
- /* now either both u and v are zero, or only v */
-
- /* If v is zero, thus Im(b)^2 < 2^(emin-1), then if ulp(u) >=
- 2^(emin+1), which implies u exact, rounding u+Im(b)^2 is equivalent
- to rounding u+2^(emin-1). */
- if (!mpfr_zero_p (u) && mpfr_get_exp (u) - 2 * prec_u > emin)
- {
- mpfr_set_prec (v, MPFR_PREC_MIN);
- mpfr_set_ui_2exp (v, 1, emin - 1, GMP_RNDZ);
+ /* necessarily one of the squarings did underflow (otherwise their
+ sum could not underflow), thus one of u, v is zero. */
+ mpfr_exp_t emin = mpfr_get_emin ();
+
+ /* Now either both u and v are zero, or u is zero and v exact,
+ or v is zero and u exact.
+ In the latter case, Im(b)^2 < 2^(emin-1).
+ If ulp(u) >= 2^(emin+1) and norm(b) is not exactly
+ representable at the target precision, then rounding u+Im(b)^2
+ is equivalent to rounding u+2^(emin-1).
+ For instance, if exp(u)>0 and the target precision is smaller
+ than about |emin|, the norm is not representable. To make the
+ scaling in the "else" case work without underflow, we test
+ whether exp(u) is larger than a small negative number instead.
+ The second case is handled analogously. */
+ if (!mpfr_zero_p (u) && mpfr_get_exp (u) - 2 * prec_u > emin
+ && mpfr_get_exp (u) > -10) {
+ mpfr_set_prec (v, MPFR_PREC_MIN);
+ mpfr_set_ui_2exp (v, 1, emin - 1, GMP_RNDZ);
+ inexact = mpfr_add (a, u, v, rnd);
+ }
+ else if (!mpfr_zero_p (v) && mpfr_get_exp (v) - 2 * prec_v > emin
+ && mpfr_get_exp (v) > -10) {
+ mpfr_set_prec (u, MPFR_PREC_MIN);
+ mpfr_set_ui_2exp (u, 1, emin - 1, GMP_RNDZ);
+ inexact = mpfr_add (a, u, v, rnd);
+ }
+ else {
+ unsigned long int scale, exp_re, exp_im;
+ int inex_underflow;
+
+ /* scale the input to an average exponent close to 0 */
+ exp_re = (unsigned long int) (-mpfr_get_exp (MPC_RE (b)));
+ exp_im = (unsigned long int) (-mpfr_get_exp (MPC_IM (b)));
+ scale = exp_re / 2 + exp_im / 2 + (exp_re % 2 + exp_im % 2) / 2;
+ /* (exp_re + exp_im) / 2, computed in a way avoiding
+ integer overflow */
+ if (mpfr_zero_p (u)) {
+ /* recompute the scaled value exactly */
+ mpfr_mul_2ui (u, MPC_RE (b), scale, GMP_RNDN);
+ mpfr_sqr (u, u, GMP_RNDN);
+ }
+ else /* just scale */
+ mpfr_mul_2ui (u, u, 2*scale, GMP_RNDN);
+ if (mpfr_zero_p (v)) {
+ mpfr_mul_2ui (v, MPC_IM (b), scale, GMP_RNDN);
+ mpfr_sqr (v, v, GMP_RNDN);
+ }
+ else
+ mpfr_mul_2ui (v, v, 2*scale, GMP_RNDN);
+
inexact = mpfr_add (a, u, v, rnd);
- }
- else /* ulp(u) <= 2^emin */
- {
- mpfr_exp_t exp_re, exp_im, exp_min, scale, ulp_re, ulp_im, ulp_min;
- mpfr_prec_t prec_res;
-
- /* scale Re(b) and Im(b) so that Re(b)^2 and Im(b)^2 are computed
- exactly, and also their sum */
- exp_re = mpfr_get_exp (MPC_RE(b));
- exp_im = mpfr_get_exp (MPC_IM(b));
- exp_min = (exp_re <= exp_im) ? exp_re : exp_im;
- scale = (emin + 1) / 2 - exp_min;
- mpfr_mul_2ui (u, MPC_RE(b), scale, GMP_RNDZ);
- mpfr_mul_2ui (v, MPC_IM(b), scale, GMP_RNDZ);
- mpfr_sqr (u, u, GMP_RNDZ);
- mpfr_sqr (v, v, GMP_RNDZ);
- /* if there was an overflow with the scaled values, then the
- inputs had a huge precision, we don't deal with that corner
- case */
- MPC_ASSERT (mpfr_overflow_p () == 0);
- ulp_re = exp_re - MPC_PREC_RE (b);
- ulp_im = exp_im - MPC_PREC_IM (b);
- ulp_min = (ulp_re <= ulp_im) ? ulp_re : ulp_im;
- prec_res = ((exp_re >= exp_im) ? exp_re : exp_im) - ulp_min;
- prec_res = 2 * prec_res + 1; /* +1 is because there might be
- an exponent shift */
- mpfr_set_prec (res, prec_res);
- inexact = mpfr_add (res, u, v, GMP_RNDZ);
- MPC_ASSERT (inexact == 0);
- inexact = mpfr_set (a, res, rnd);
- inexact = mpfr_div_2ui (a, res, 2 * scale, rnd);
- }
+ mpfr_clear_underflow ();
+ inex_underflow = mpfr_div_2ui (a, a, 2*scale, rnd);
+ if (mpfr_underflow_p ())
+ inexact = inex_underflow;
+ }
}
else /* no problems, ternary value due to mpfr_can_round trick */
inexact = mpfr_set (a, res, rnd);
diff --git a/tests/norm.dat b/tests/norm.dat
index 794ea10..1a7e341 100644
--- a/tests/norm.dat
+++ b/tests/norm.dat
@@ -157,3 +157,10 @@
+ 10 0b1@-1073741824 10 0b1@-536870913 10 0b1@-536870913 U
0 18 0b1.00100000000001001@-1073741811 2 0b1.1@-536870906 2 0b1.1@-536870913 N
+0 18 0b1.00100000000001001@-1073741811 2 0b1.1@-536870913 2 0b1.1@-536870906 N
+0 18 0b1.00100000000001001@-1073741811 2 0b1.1@-536870906 2 0b1.1@-536870913 Z
+0 18 0b1.00100000000001001@-1073741811 2 0b1.1@-536870913 2 0b1.1@-536870906 Z
+0 18 0b1.00100000000001001@-1073741811 2 0b1.1@-536870906 2 0b1.1@-536870913 D
+0 18 0b1.00100000000001001@-1073741811 2 0b1.1@-536870913 2 0b1.1@-536870906 D
+0 18 0b1.00100000000001001@-1073741811 2 0b1.1@-536870906 2 0b1.1@-536870913 U
+0 18 0b1.00100000000001001@-1073741811 2 0b1.1@-536870913 2 0b1.1@-536870906 U