diff options
author | enge <enge@211d60ee-9f03-0410-a15a-8952a2c7a4e4> | 2012-03-02 20:49:59 +0000 |
---|---|---|
committer | enge <enge@211d60ee-9f03-0410-a15a-8952a2c7a4e4> | 2012-03-02 20:49:59 +0000 |
commit | 2e55720e7dc4cb801e1047c137eef2e9c2cbe672 (patch) | |
tree | bb8c97bb701fb90300b7a20abc11991fd944f5f5 | |
parent | 2ae2f0f5ca393f10d5145fbbedcda6945cb8e71f (diff) | |
download | mpc-2e55720e7dc4cb801e1047c137eef2e9c2cbe672.tar.gz |
mpc-impl.h, mul.c: made mpfr_fmma static again
sqr.c: copied mpfr_fmma as mpfr_fsss and adapted to case a^2-v^2
sqr.dat: activated last test
passes with naive squaring, but not with Karatsuba
git-svn-id: svn://scm.gforge.inria.fr/svn/mpc/trunk@1137 211d60ee-9f03-0410-a15a-8952a2c7a4e4
-rw-r--r-- | src/mpc-impl.h | 1 | ||||
-rw-r--r-- | src/mul.c | 2 | ||||
-rw-r--r-- | src/sqr.c | 140 | ||||
-rw-r--r-- | tests/sqr.dat | 2 |
4 files changed, 140 insertions, 5 deletions
diff --git a/src/mpc-impl.h b/src/mpc-impl.h index 874902b..c8416b2 100644 --- a/src/mpc-impl.h +++ b/src/mpc-impl.h @@ -150,7 +150,6 @@ extern "C" { #endif -__MPC_DECLSPEC int mpfr_fmma __MPC_PROTO ((mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_srcptr d, int sign, mpfr_rnd_t rnd)); __MPC_DECLSPEC int mpc_mul_naive __MPC_PROTO ((mpc_ptr, mpc_srcptr, mpc_srcptr, mpc_rnd_t)); __MPC_DECLSPEC int mpc_mul_karatsuba __MPC_PROTO ((mpc_ptr, mpc_srcptr, mpc_srcptr, mpc_rnd_t)); __MPC_DECLSPEC int mpc_pow_usi __MPC_PROTO ((mpc_ptr, mpc_srcptr, unsigned long, int, mpc_rnd_t)); @@ -171,7 +171,7 @@ mul_imag (mpc_ptr z, mpc_srcptr x, mpc_srcptr y, mpc_rnd_t rnd) } -int +static int mpfr_fmma (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_srcptr d, int sign, mpfr_rnd_t rnd) { @@ -22,6 +22,143 @@ along with this program. If not, see http://www.gnu.org/licenses/ . #include "mpc-impl.h" +static int +mpfr_fsss (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr c, mpfr_rnd_t rnd) +{ + /* Computes z = a^2 - c^2. + Assumes that a and c are finite and non-zero; so a squaring yielding + an infinity is an overflow, and a squaring yielding 0 is an underflow. + Assumes further that z is distinct from a and c. */ + + int inex; + mpfr_t u, v; + + /* u=a^2, v=c^2 exactly */ + mpfr_init2 (u, 2*mpfr_get_prec (a)); + mpfr_init2 (v, 2*mpfr_get_prec (c)); + mpfr_sqr (u, a, GMP_RNDN); + mpfr_sqr (v, c, GMP_RNDN); + + /* tentatively compute z as u-v; here we need z to be distinct + from a and c to not lose the latter */ + inex = mpfr_sub (z, u, v, rnd); + + if (mpfr_inf_p (z)) { + /* replace by "correctly rounded overflow" */ + mpfr_set_si (z, (mpfr_signbit (z) ? -1 : 1), GMP_RNDN); + inex = mpfr_mul_2ui (z, z, mpfr_get_emax (), rnd); + } + else if (mpfr_zero_p (u) && !mpfr_zero_p (v)) { + /* exactly u underflowed, determine inexact flag */ + inex = (mpfr_signbit (u) ? 1 : -1); + } + else if (mpfr_zero_p (v) && !mpfr_zero_p (u)) { + /* exactly v underflowed, determine inexact flag */ + inex = (mpfr_signbit (v) ? -1 : 1); + } + else if (mpfr_nan_p (z) || (mpfr_zero_p (u) && mpfr_zero_p (v))) { + /* In the first case, u and v are +inf. + In the second case, u and v are zeroes; their difference may be 0 + or the least representable number, with a sign to be determined. + Redo the computations with mpz_t exponents */ + mpfr_exp_t ea, ec; + mpz_t eu, ev; + /* cheat to work around the const qualifiers */ + + /* Normalise the input by shifting and keep track of the shifts in + the exponents of u and v */ + ea = mpfr_get_exp (a); + ec = mpfr_get_exp (c); + + mpfr_set_exp ((mpfr_ptr) a, (mpfr_prec_t) 0); + mpfr_set_exp ((mpfr_ptr) c, (mpfr_prec_t) 0); + + mpz_init (eu); + mpz_init (ev); + mpz_set_si (eu, (long int) ea); + mpz_mul_2exp (eu, eu, 1); + mpz_set_si (ev, (long int) ec); + mpz_mul_2exp (ev, ev, 1); + + /* recompute u and v and move exponents to eu and ev */ + mpfr_sqr (u, a, GMP_RNDN); + /* exponent of u is non-positive */ + mpz_sub_ui (eu, eu, (unsigned long int) (-mpfr_get_exp (u))); + mpfr_set_exp (u, (mpfr_prec_t) 0); + mpfr_sqr (v, c, GMP_RNDN); + mpz_sub_ui (ev, ev, (unsigned long int) (-mpfr_get_exp (v))); + mpfr_set_exp (v, (mpfr_prec_t) 0); + if (mpfr_nan_p (z)) { + mpfr_exp_t emax = mpfr_get_emax (); + int overflow; + /* We have a = ma * 2^ea with 1/2 <= |ma| < 1 and ea <= emax. + So eu <= 2*emax, and eu > emax since we have + an overflow. The same holds for ev. Shift u and v by as much as + possible so that one of them has exponent emax and the + remaining exponents in eu and ev are the same. Then carry out + the addition. Shifting u and v prevents an underflow. */ + if (mpz_cmp (eu, ev) >= 0) { + mpfr_set_exp (u, emax); + mpz_sub_ui (eu, eu, (long int) emax); + mpz_sub (ev, ev, eu); + mpfr_set_exp (v, (mpfr_exp_t) mpz_get_ui (ev)); + /* remaining common exponent is now in eu */ + } + else { + mpfr_set_exp (v, emax); + mpz_sub_ui (ev, ev, (long int) emax); + mpz_sub (eu, eu, ev); + mpfr_set_exp (u, (mpfr_exp_t) mpz_get_ui (eu)); + mpz_set (eu, ev); + /* remaining common exponent is now also in eu */ + } + inex = mpfr_sub (z, u, v, rnd); + /* Result is finite since u and v have the same sign. */ + overflow = mpfr_mul_2ui (z, z, mpz_get_ui (eu), rnd); + if (overflow) + inex = overflow; + } + else { + int underflow; + /* Subtraction of two zeroes. We have a = ma * 2^ea + with 1/2 <= |ma| < 1 and ea >= emin and similarly for b. + So 2*emin < 2*emin+1 <= eu < emin < 0, and analogously for v. */ + mpfr_exp_t emin = mpfr_get_emin (); + if (mpz_cmp (eu, ev) <= 0) { + mpfr_set_exp (u, emin); + mpz_add_ui (eu, eu, (unsigned long int) (-emin)); + mpz_sub (ev, ev, eu); + mpfr_set_exp (v, (mpfr_exp_t) mpz_get_si (ev)); + } + else { + mpfr_set_exp (v, emin); + mpz_add_ui (ev, ev, (unsigned long int) (-emin)); + mpz_sub (eu, eu, ev); + mpfr_set_exp (u, (mpfr_exp_t) mpz_get_si (eu)); + mpz_set (eu, ev); + } + inex = mpfr_sub (z, u, v, rnd); + mpz_neg (eu, eu); + underflow = mpfr_div_2ui (z, z, mpz_get_ui (eu), rnd); + if (underflow) + inex = underflow; + } + + mpz_clear (eu); + mpz_clear (ev); + + mpfr_set_exp ((mpfr_ptr) a, ea); + mpfr_set_exp ((mpfr_ptr) c, ec); + /* works also when a == c */ + } + + mpfr_clear (u); + mpfr_clear (v); + + return inex; +} + + int mpc_sqr (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd) { @@ -107,8 +244,7 @@ mpc_sqr (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd) additional multiplication. Using the approach copied from mul, over- and underflows are also handled correctly. */ - inex_re = mpfr_fmma (rop->re, x, x, op->im, op->im, -1, - MPC_RND_RE (rnd)); + inex_re = mpfr_fsss (rop->re, x, op->im, MPC_RND_RE (rnd)); } else { diff --git a/tests/sqr.dat b/tests/sqr.dat index 0917623..095a9eb 100644 --- a/tests/sqr.dat +++ b/tests/sqr.dat @@ -170,4 +170,4 @@ # wrong ternary value for real part with naive algorithm, # sqr.c:297: MPC assertion failed: mpfr_get_exp (u) != emin for Karatsuba -#+ - 10 0b1e-1073741824 10 0 100 0b1@-536870912 100 0b1@-536870913 N N ++ - 10 0b1e-1073741824 10 0 100 0b1@-536870912 100 0b1@-536870913 N N |