summaryrefslogtreecommitdiff
path: root/tune/speed.h
diff options
context:
space:
mode:
authorNiels M?ller <nisse@lysator.liu.se>2013-10-20 16:34:09 +0200
committerNiels M?ller <nisse@lysator.liu.se>2013-10-20 16:34:09 +0200
commitf94c85b30dfbcd68dabe2dee55eb0f2ed1dfac55 (patch)
tree1c8855c14bc7ebf7d8f521dbb063ee4560bf523a /tune/speed.h
parent5b499c218f4e6ffc59b67e43e0306bdde92438bb (diff)
downloadgmp-f94c85b30dfbcd68dabe2dee55eb0f2ed1dfac55.tar.gz
Implemented tuning of mpn_div_qr_1.
Diffstat (limited to 'tune/speed.h')
-rw-r--r--tune/speed.h59
1 files changed, 45 insertions, 14 deletions
diff --git a/tune/speed.h b/tune/speed.h
index b6b6ad4e6..aa827077a 100644
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -193,8 +193,10 @@ double speed_mpn_divrem_1f_inv (struct speed_params *);
double speed_mpn_divrem_2 (struct speed_params *);
double speed_mpn_divrem_2_div (struct speed_params *);
double speed_mpn_divrem_2_inv (struct speed_params *);
-double speed_mpn_div_qr_1n (struct speed_params *);
-double speed_mpn_div_qr_1u (struct speed_params *);
+double speed_mpn_div_qr_1n_pi1 (struct speed_params *);
+double speed_mpn_div_qr_1n_pi1_1 (struct speed_params *);
+double speed_mpn_div_qr_1n_pi1_2 (struct speed_params *);
+double speed_mpn_div_qr_1 (struct speed_params *);
double speed_mpn_div_qr_2n (struct speed_params *);
double speed_mpn_div_qr_2u (struct speed_params *);
double speed_mpn_fib2_ui (struct speed_params *);
@@ -466,6 +468,9 @@ extern int speed_option_verbose;
extern int speed_option_cycles_broken;
void speed_option_set (const char *);
+mp_limb_t mpn_div_qr_1n_pi1_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t mpn_div_qr_1n_pi1_2 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
mp_limb_t mpn_divrem_1_div (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
mp_limb_t mpn_divrem_1_inv (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
mp_limb_t mpn_divrem_2_div (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
@@ -3116,10 +3121,10 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
return t; \
}
-#define SPEED_ROUTINE_MPN_DIV_QR_1(function, norm) \
+#define SPEED_ROUTINE_MPN_DIV_QR_1(function) \
{ \
mp_ptr wp, xp; \
- mp_limb_t y; \
+ mp_limb_t d; \
mp_limb_t r; \
unsigned i; \
double t; \
@@ -3130,16 +3135,42 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
+ d = s->r; \
+ if (d == 0) \
+ d = 1; \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ r = function (wp, wp+s->size-1, s->xp, s->size, d); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_DIV_QR_1N_PI1(function) \
+ { \
+ mp_ptr wp, xp; \
+ mp_limb_t d, dinv; \
+ mp_limb_t r; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ d = s->r; \
/* divisor must be normalized */ \
- y = s->yp_block[0]; \
- if (norm) \
- y |= GMP_NUMB_HIGHBIT; \
- else \
- { \
- y &= ~GMP_NUMB_HIGHBIT; \
- if (y == 0) \
- y = 1; \
- } \
+ SPEED_RESTRICT_COND (d & GMP_NUMB_HIGHBIT); \
+ invert_limb (dinv, d); \
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
@@ -3147,7 +3178,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
speed_starttime (); \
i = s->reps; \
do \
- r = function (wp, wp+s->size-1,s->xp, s->size, y); \
+ r = function (wp, s->xp, s->size, 0, d, dinv); \
while (--i != 0); \
t = speed_endtime (); \
\