diff options
author | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-02-20 18:03:00 +0000 |
---|---|---|
committer | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-02-20 18:03:00 +0000 |
commit | ed88de670947f3b74603f6d6584f459d04e94171 (patch) | |
tree | 100691dbb241eae415b0ad90c86aaf3519c39e98 /src/fma.c | |
parent | b3c0de61895fbebd3809682f744a6ef7eb877cfa (diff) | |
download | mpfr-ed88de670947f3b74603f6d6584f459d04e94171.tar.gz |
[src/fma.c] speedup mpfr_fma for 1 limb
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11327 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/fma.c')
-rw-r--r-- | src/fma.c | 66 |
1 files changed, 43 insertions, 23 deletions
@@ -20,6 +20,7 @@ along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ +#define MPFR_NEED_LONGLONG_H #include "mpfr-impl.h" /* The fused-multiply-add (fma) of x, y and z is defined by: @@ -126,32 +127,51 @@ mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z, we assume mpn_mul_n is faster up to 4*MPFR_MUL_THRESHOLD). Since |EXP(x)|, |EXP(y)| < 2^(k-2) on a k-bit computer, |EXP(x)+EXP(y)| < 2^(k-1), thus cannot overflow nor underflow. */ - n = MPFR_LIMB_SIZE(x); - if (n <= 4 * MPFR_MUL_THRESHOLD && MPFR_PREC(x) == MPFR_PREC(y) && - e <= __gmpfr_emax && e > __gmpfr_emin) + if (MPFR_PREC(x) == MPFR_PREC(y) && e <= __gmpfr_emax && e > __gmpfr_emin) { - mp_size_t un = n + n; - mpfr_limb_ptr up; - MPFR_TMP_DECL(marker); - - MPFR_TMP_MARK(marker); - MPFR_TMP_INIT (up, u, un * GMP_NUMB_BITS, un); - up = MPFR_MANT(u); - /* multiply x*y exactly into u */ - mpn_mul_n (up, MPFR_MANT(x), MPFR_MANT(y), n); - if (MPFR_LIMB_MSB (up[un - 1]) == 0) + if (MPFR_PREC(x) <= GMP_NUMB_BITS && MPFR_PREC(z) <= GMP_NUMB_BITS) { - mpn_lshift (up, up, un, 1); - MPFR_EXP(u) = e - 1; + mp_limb_t umant[2]; + + umul_ppmm (umant[1], umant[0], MPFR_MANT(x)[0], MPFR_MANT(y)[0]); + MPFR_MANT(u) = umant; + MPFR_PREC(u) = 2 * GMP_NUMB_BITS; + if ((umant[1] & MPFR_LIMB_HIGHBIT) == 0) + { + umant[1] = (umant[1] << 1) | (umant[0] >> (GMP_NUMB_BITS - 1)); + umant[0] = umant[0] << 1; + MPFR_EXP(u) = e - 1; + } + else + MPFR_EXP(u) = e; + MPFR_SIGN(u) = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) ); + return mpfr_add (s, u, z, rnd_mode); + } + else if ((n = MPFR_LIMB_SIZE(x)) <= 4 * MPFR_MUL_THRESHOLD) + { + mpfr_limb_ptr up; + mp_size_t un = n + n; + MPFR_TMP_DECL(marker); + + MPFR_TMP_MARK(marker); + MPFR_TMP_INIT (up, u, un * GMP_NUMB_BITS, un); + up = MPFR_MANT(u); + /* multiply x*y exactly into u */ + mpn_mul_n (up, MPFR_MANT(x), MPFR_MANT(y), n); + if (MPFR_LIMB_MSB (up[un - 1]) == 0) + { + mpn_lshift (up, up, un, 1); + MPFR_EXP(u) = e - 1; + } + else + MPFR_EXP(u) = e; + MPFR_SIGN(u) = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) ); + /* The above code does not generate any exception. + The exceptions will come only from mpfr_add. */ + inexact = mpfr_add (s, u, z, rnd_mode); + MPFR_TMP_FREE(marker); + return inexact; } - else - MPFR_EXP(u) = e; - MPFR_SIGN(u) = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) ); - /* The above code does not generate any exception. - The exceptions will come only from mpfr_add. */ - inexact = mpfr_add (s, u, z, rnd_mode); - MPFR_TMP_FREE(marker); - return inexact; } /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y |