diff options
author | Niels M?ller <nisse@lysator.liu.se> | 2013-10-20 16:34:09 +0200 |
---|---|---|
committer | Niels M?ller <nisse@lysator.liu.se> | 2013-10-20 16:34:09 +0200 |
commit | f94c85b30dfbcd68dabe2dee55eb0f2ed1dfac55 (patch) | |
tree | 1c8855c14bc7ebf7d8f521dbb063ee4560bf523a /tune/speed.h | |
parent | 5b499c218f4e6ffc59b67e43e0306bdde92438bb (diff) | |
download | gmp-f94c85b30dfbcd68dabe2dee55eb0f2ed1dfac55.tar.gz |
Implemented tuning of mpn_div_qr_1.
Diffstat (limited to 'tune/speed.h')
-rw-r--r-- | tune/speed.h | 59 |
1 files changed, 45 insertions, 14 deletions
diff --git a/tune/speed.h b/tune/speed.h index b6b6ad4e6..aa827077a 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -193,8 +193,10 @@ double speed_mpn_divrem_1f_inv (struct speed_params *); double speed_mpn_divrem_2 (struct speed_params *); double speed_mpn_divrem_2_div (struct speed_params *); double speed_mpn_divrem_2_inv (struct speed_params *); -double speed_mpn_div_qr_1n (struct speed_params *); -double speed_mpn_div_qr_1u (struct speed_params *); +double speed_mpn_div_qr_1n_pi1 (struct speed_params *); +double speed_mpn_div_qr_1n_pi1_1 (struct speed_params *); +double speed_mpn_div_qr_1n_pi1_2 (struct speed_params *); +double speed_mpn_div_qr_1 (struct speed_params *); double speed_mpn_div_qr_2n (struct speed_params *); double speed_mpn_div_qr_2u (struct speed_params *); double speed_mpn_fib2_ui (struct speed_params *); @@ -466,6 +468,9 @@ extern int speed_option_verbose; extern int speed_option_cycles_broken; void speed_option_set (const char *); +mp_limb_t mpn_div_qr_1n_pi1_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); +mp_limb_t mpn_div_qr_1n_pi1_2 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); + mp_limb_t mpn_divrem_1_div (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); mp_limb_t mpn_divrem_1_inv (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); mp_limb_t mpn_divrem_2_div (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr); @@ -3116,10 +3121,10 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); return t; \ } -#define SPEED_ROUTINE_MPN_DIV_QR_1(function, norm) \ +#define SPEED_ROUTINE_MPN_DIV_QR_1(function) \ { \ mp_ptr wp, xp; \ - mp_limb_t y; \ + mp_limb_t d; \ mp_limb_t r; \ unsigned i; \ double t; \ @@ -3130,16 +3135,42 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); TMP_MARK; \ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \ \ + d = s->r; \ + if (d == 0) \ + d = 1; \ + speed_operand_src (s, s->xp, s->size); \ + speed_operand_dst (s, wp, s->size); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + r = function (wp, wp+s->size-1, s->xp, s->size, d); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + TMP_FREE; \ + return t; \ + } + +#define SPEED_ROUTINE_MPN_DIV_QR_1N_PI1(function) \ + { \ + mp_ptr wp, xp; \ + mp_limb_t d, dinv; \ + mp_limb_t r; \ + unsigned i; \ + double t; \ + TMP_DECL; \ + \ + SPEED_RESTRICT_COND (s->size >= 1); \ + \ + TMP_MARK; \ + SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \ + \ + d = s->r; \ /* divisor must be normalized */ \ - y = s->yp_block[0]; \ - if (norm) \ - y |= GMP_NUMB_HIGHBIT; \ - else \ - { \ - y &= ~GMP_NUMB_HIGHBIT; \ - if (y == 0) \ - y = 1; \ - } \ + SPEED_RESTRICT_COND (d & GMP_NUMB_HIGHBIT); \ + invert_limb (dinv, d); \ speed_operand_src (s, s->xp, s->size); \ speed_operand_dst (s, wp, s->size); \ speed_cache_fill (s); \ @@ -3147,7 +3178,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); speed_starttime (); \ i = s->reps; \ do \ - r = function (wp, wp+s->size-1,s->xp, s->size, y); \ + r = function (wp, s->xp, s->size, 0, d, dinv); \ while (--i != 0); \ t = speed_endtime (); \ \ |