diff options
-rw-r--r-- | ChangeLog | 20 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | gmp-impl.h | 9 | ||||
-rw-r--r-- | mpn/generic/compute_powtab.c | 360 | ||||
-rw-r--r-- | mpn/generic/get_str.c | 139 | ||||
-rw-r--r-- | mpn/generic/set_str.c | 109 | ||||
-rw-r--r-- | tune/set_strb.c | 1 | ||||
-rw-r--r-- | tune/set_strs.c | 1 | ||||
-rw-r--r-- | tune/tuneup.c | 5 |
9 files changed, 420 insertions, 226 deletions
@@ -1,5 +1,25 @@ 2017-01-24 Torbjörn Granlund <tg@gmplib.org> + * mpn/generic/compute_powtab.c: New file, providing mpn_compute_powtab + for both get_str and set_str. + + * gmp-impl.h (mpn_str_powtab_alloc): New macro. + (mpn_dc_set_str_powtab_alloc, mpn_dc_get_str_powtab_alloc): Remove. + (mpn_compute_powtab): Declare. + + * mpn/generic/set_str.c: Use mpn_compute_powtab. + (mpn_set_str_compute_powtab): Remove. + + * mpn/generic/get_str.c: Use mpn_compute_powtab. + (mpn_get_str_compute_powtab): Remove. + (mpn_bc_get_str): New name for mpn_sb_get_str. + + * configure.ac (gmp_mpn_functions): Add compute_powtab. + + * tune/tuneup.c (speed_mpn_pre_set_str): Call mpn_compute_powtab. + * tune/set_strb.c: Remove mpn_set_str_compute_powtab name mangling. + * tune/set_strs.c: Likewise. + * configure.ac: Invoke AC_PROG_CC_C99 instead of AC_PROG_CC_STDC. 2017-01-03 Torbjörn Granlund <tg@gmplib.org> diff --git a/configure.ac b/configure.ac index f86f451f8..fea46bfcd 100644 --- a/configure.ac +++ b/configure.ac @@ -2927,7 +2927,7 @@ gmp_mpn_functions="$extra_functions \ mul mul_fft mul_n sqr mul_basecase sqr_basecase nussbaumer_mul \ mulmid_basecase toom42_mulmid mulmid_n mulmid \ random random2 pow_1 \ - rootrem sqrtrem sizeinbase get_str set_str \ + rootrem sqrtrem sizeinbase get_str set_str compute_powtab \ scan0 scan1 popcount hamdist cmp zero_p \ perfsqr perfpow \ gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step \ diff --git a/gmp-impl.h b/gmp-impl.h index bb69ed0e4..3de4275d4 100644 --- a/gmp-impl.h +++ b/gmp-impl.h @@ -3,7 +3,7 @@ THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES. -Copyright 1991, 1993-1997, 1999-2015 Free Software Foundation, Inc. +Copyright 1991-2017 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -4298,17 +4298,16 @@ struct powers int base; }; typedef struct powers powers_t; -#define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS) +#define mpn_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS) /* FIXME: This can perhaps be trimmed */ #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS) -#define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS) #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS) +#define mpn_compute_powtab __MPN(compute_powtab) +__GMP_DECLSPEC size_t mpn_compute_powtab (powers_t *, mp_ptr, mp_size_t, int); #define mpn_dc_set_str __MPN(dc_set_str) __GMP_DECLSPEC mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr); #define mpn_bc_set_str __MPN(bc_set_str) __GMP_DECLSPEC mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int); -#define mpn_set_str_compute_powtab __MPN(set_str_compute_powtab) -__GMP_DECLSPEC void mpn_set_str_compute_powtab (powers_t *, mp_ptr, mp_size_t, int); /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole diff --git a/mpn/generic/compute_powtab.c b/mpn/generic/compute_powtab.c new file mode 100644 index 000000000..ace794e82 --- /dev/null +++ b/mpn/generic/compute_powtab.c @@ -0,0 +1,360 @@ +/* mpn_compute_powtab. + + Contributed to the GNU project by Torbjorn Granlund. + + THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY + SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST + GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + +Copyright 1991-2017 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +/* + CAVEATS: + * The exptab and powtab vectors are in opposite orders. Probably OK. + * Consider getting rid of exptab, doing bit ops on the un argument instead. + * Consider rounding greatest power slightly upwards to save adjustments. + * In powtab_decide, consider computing cost from just the 2-3 largest + operands, since smaller operand contribute little. This makes most sense + if exptab is suppressed. +*/ + +#include "gmp-impl.h" + +#ifndef DIV_1_VS_MUL_1_PERCENT +#define DIV_1_VS_MUL_1_PERCENT 150 +#endif + +#define SET_powers_t(dest, ptr, size, dib, b, sh) \ + do { \ + dest.p = ptr; \ + dest.n = size; \ + dest.digits_in_base = dib; \ + dest.base = b; \ + dest.shift = sh; \ + } while (0) + +#if DIV_1_VS_MUL_1_PERCENT > 120 +#define HAVE_mpn_compute_powtab_mul 1 +static void +mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, + int base, const size_t *exptab, size_t n_pows) +{ + mp_size_t n; + mp_ptr p, t; + mp_limb_t cy; + long start_idx; + int c; + + mp_limb_t big_base = mp_bases[base].big_base; + int chars_per_limb = mp_bases[base].chars_per_limb; + + mp_ptr powtab_mem_ptr = powtab_mem; + + size_t digits_in_base = chars_per_limb; + + powers_t *pt = powtab; + + p = powtab_mem_ptr; + powtab_mem_ptr += 1; + p[0] = big_base; + + SET_powers_t (pt[0], p, 1, digits_in_base, base, 0); + pt++; + + t = powtab_mem_ptr; + powtab_mem_ptr += 2; + t[1] = mpn_mul_1 (t, p, 1, big_base); + n = 2; + + digits_in_base *= 2; + + c = t[0] == 0; + t += c; + n -= c; + mp_size_t shift = c; + + SET_powers_t (pt[0], t, n, digits_in_base, base, shift); + p = t; + pt++; + + if (exptab[0] == ((size_t) chars_per_limb << n_pows)) + { + start_idx = n_pows - 2; + } + else + { + if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0]) + { + /* 3, sometimes adjusted to 4. */ + t = powtab_mem_ptr; + powtab_mem_ptr += 4; + t[n] = cy = mpn_mul_1 (t, p, n, big_base); + n += cy != 0;; + + digits_in_base += chars_per_limb; + + c = t[0] == 0; + t += c; + n -= c; + shift += c; + } + else + { + /* 2 copy, will always become 3 with back-multiplication. */ + t = powtab_mem_ptr; + powtab_mem_ptr += 3; + t[0] = p[0]; + t[1] = p[1]; + } + + SET_powers_t (pt[0], t, n, digits_in_base, base, shift); + p = t; + pt++; + start_idx = n_pows - 3; + } + + for (long pi = start_idx; pi >= 0; pi--) + { + t = powtab_mem_ptr; + powtab_mem_ptr += 2 * n + 2; + + ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un)); + + mpn_sqr (t, p, n); + + digits_in_base *= 2; + n *= 2; + n -= t[n - 1] == 0; + shift *= 2; + + c = t[0] == 0; + t += c; + n -= c; + shift += c; + + /* Adjust new value if it is too small as input to the next squaring. */ + if (((digits_in_base + chars_per_limb) << pi) <= exptab[0]) + { + t[n] = cy = mpn_mul_1 (t, t, n, big_base); + n += cy != 0; + + digits_in_base += chars_per_limb; + + c = t[0] == 0; + t += c; + n -= c; + shift += c; + } + + SET_powers_t (pt[0], t, n, digits_in_base, base, shift); + + /* Adjust previous value if it is not at its target power. */ + if (pt[-1].digits_in_base < exptab[pi + 1]) + { + mp_size_t n = pt[-1].n; + mp_ptr p = pt[-1].p; + p[n] = cy = mpn_mul_1 (p, p, n, big_base); + n += cy != 0; + + ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]); + pt[-1].digits_in_base = exptab[pi + 1]; + + c = p[0] == 0; + pt[-1].p = p + c; + pt[-1].n = n - c; + pt[-1].shift += c; + } + + p = t; + pt++; + } +} +#endif + +#if DIV_1_VS_MUL_1_PERCENT < 275 +#define HAVE_mpn_compute_powtab_div 1 +static void +mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, + int base, const size_t *exptab, size_t n_pows) +{ + mp_ptr p, t; + + mp_limb_t big_base = mp_bases[base].big_base; + int chars_per_limb = mp_bases[base].chars_per_limb; + + mp_ptr powtab_mem_ptr = powtab_mem; + + size_t digits_in_base = chars_per_limb; + + powers_t *pt = powtab; + + p = powtab_mem_ptr; + powtab_mem_ptr += 1; + p[0] = big_base; + + SET_powers_t (pt[0], p, 1, digits_in_base, base, 0); + pt++; + + mp_size_t n = 1; + mp_size_t shift = 0; + for (long pi = n_pows - 1; pi >= 0; pi--) + { + t = powtab_mem_ptr; + powtab_mem_ptr += 2 * n; + + ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un)); + + mpn_sqr (t, p, n); + n = 2 * n - 1; n += t[n] != 0; + digits_in_base *= 2; + + if (digits_in_base != exptab[pi]) /* if ((((un - 1) >> pi) & 2) == 0) */ + { + mpn_divexact_1 (t, t, n, big_base); + n -= t[n - 1] == 0; + digits_in_base -= chars_per_limb; + } + + shift *= 2; + /* Strip low zero limbs, but be careful to keep the result divisible by + big_base. */ + while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0) + { + t++; + n--; + shift++; + } + p = t; + + SET_powers_t (pt[0], p, n, digits_in_base, base, shift); + pt++; + } + + /* Strip any remaining low zero limbs. */ + pt -= n_pows + 1; + for (long pi = n_pows; pi >= 0; pi--) + { + mp_ptr t = pt[pi].p; + mp_size_t shift = pt[pi].shift; + mp_size_t n = pt[pi].n; + int c; + c = t[0] == 0; + t += c; + n -= c; + shift += c; + pt[pi].p = t; + pt[pi].shift = shift; + pt[pi].n = n; + } +} +#endif + +static long +powtab_decide (size_t *exptab, size_t un, int base) +{ + int chars_per_limb = mp_bases[base].chars_per_limb; + long n_pows = 0; + for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1) + { + exptab[n_pows] = pn * chars_per_limb; + n_pows++; + } + exptab[n_pows] = chars_per_limb; + +#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div + size_t pn = un - 1; + size_t xn = (un + 1) >> 1; + unsigned mcost = 1; + unsigned dcost = 1; + for (long i = n_pows - 2; i >= 0; i--) + { + size_t pow = (pn >> (i + 1)) + 1; + + if (pow & 1) + dcost += pow; + + if (xn != (pow << i)) + { + if (pow > 2 && (pow & 1) == 0) + mcost += 2 * pow; + else + mcost += pow; + } + else + { + if (pow & 1) + mcost += pow; + } + } + + dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100; + + if (mcost <= dcost) + return n_pows; + else + return -n_pows; +#elif HAVE_mpn_compute_powtab_mul + return n_pows; +#elif HAVE_mpn_compute_powtab_div + return -n_pows; +#else +#error "no powtab function available" +#endif +} + +size_t +mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base) +{ + size_t exptab[GMP_LIMB_BITS]; + + long n_pows = powtab_decide (exptab, un, base); + +#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div + if (n_pows >= 0) + { + mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows); + return n_pows; + } + else + { + mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows); + return -n_pows; + } +#elif HAVE_mpn_compute_powtab_mul + ASSERT (n_pows > 0); + mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows); + return n_pows; +#elif HAVE_mpn_compute_powtab_div + ASSERT (n_pows < 0); + mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows); + return -n_pows; +#else +#error "no powtab function available" +#endif +} diff --git a/mpn/generic/get_str.c b/mpn/generic/get_str.c index 8c4577803..19cc581ae 100644 --- a/mpn/generic/get_str.c +++ b/mpn/generic/get_str.c @@ -2,13 +2,12 @@ Contributed to the GNU project by Torbjorn Granlund. - THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH A MUTABLE - INTERFACE. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN - FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE - GNU MP RELEASE. + THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH MUTABLE + INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. + IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A + FUTURE GNU MP RELEASE. -Copyright 1991-1994, 1996, 2000-2002, 2004, 2006-2008, 2011, 2012 Free Software -Foundation, Inc. +Copyright 1991-2017 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -44,7 +43,7 @@ see https://www.gnu.org/licenses/. */ A) Divide U repeatedly by B, generating a quotient and remainder, until the quotient becomes zero. The remainders hold the converted digits. Digits - come out from right to left. (Used in mpn_sb_get_str.) + come out from right to left. (Used in mpn_bc_get_str.) B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction. Then develop digits by multiplying the fraction repeatedly by b. Digits @@ -89,7 +88,7 @@ see https://www.gnu.org/licenses/. */ ... else if (un < GET_STR_PRECOMPUTE_THRESHOLD) - mpn_sb_get_str (str, base, up, un); + mpn_bx_get_str (str, base, up, un); else precompute_power_tables mpn_dc_get_str @@ -97,11 +96,11 @@ see https://www.gnu.org/licenses/. */ mpn_dc_get_str: mpn_tdiv_qr if (qn < GET_STR_DC_THRESHOLD) - mpn_sb_get_str + mpn_bc_get_str else mpn_dc_get_str if (rn < GET_STR_DC_THRESHOLD) - mpn_sb_get_str + mpn_bc_get_str else mpn_dc_get_str @@ -146,7 +145,7 @@ see https://www.gnu.org/licenses/. */ after the last digit of the result string. Complexity is O(un^2); intended for small conversions. */ static unsigned char * -mpn_sb_get_str (unsigned char *str, size_t len, +mpn_bc_get_str (unsigned char *str, size_t len, mp_ptr up, mp_size_t un, int base) { mp_limb_t rl, ul; @@ -314,7 +313,7 @@ mpn_dc_get_str (unsigned char *str, size_t len, if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD)) { if (un != 0) - str = mpn_sb_get_str (str, len, up, un, powtab->base); + str = mpn_bc_get_str (str, len, up, un, powtab->base); else { while (len != 0) @@ -358,7 +357,6 @@ mpn_dc_get_str (unsigned char *str, size_t len, return str; } - /* There are no leading zeros on the digits generated at str, but that's not currently a documented feature. The current mpz_out_str and mpz_get_str rely on it. */ @@ -366,13 +364,9 @@ mpn_dc_get_str (unsigned char *str, size_t len, size_t mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un) { - mp_ptr powtab_mem, powtab_mem_ptr; - mp_limb_t big_base; - size_t digits_in_base; + mp_ptr powtab_mem; powers_t powtab[GMP_LIMB_BITS]; int pi; - mp_size_t n; - mp_ptr p, t; size_t out_len; mp_ptr tmp; TMP_DECL; @@ -433,115 +427,20 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un) /* General case. The base is not a power of 2. */ if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD)) - return mpn_sb_get_str (str, (size_t) 0, up, un, base) - str; + return mpn_bc_get_str (str, (size_t) 0, up, un, base) - str; TMP_MARK; /* Allocate one large block for the powers of big_base. */ - powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_get_str_powtab_alloc (un)); - powtab_mem_ptr = powtab_mem; + powtab_mem = TMP_BALLOC_LIMBS (mpn_str_powtab_alloc (un)); /* Compute a table of powers, were the largest power is >= sqrt(U). */ + size_t ndig; + mp_size_t xn; + DIGITS_IN_BASE_PER_LIMB (ndig, un, base); + xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */ - big_base = mp_bases[base].big_base; - digits_in_base = mp_bases[base].chars_per_limb; - - { - mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp; - mp_limb_t cy; - mp_size_t shift; - size_t ndig; - - DIGITS_IN_BASE_PER_LIMB (ndig, un, base); - xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */ - - n_pows = 0; - for (pn = xn; pn != 1; pn = (pn + 1) >> 1) - { - exptab[n_pows] = pn; - n_pows++; - } - exptab[n_pows] = 1; - - powtab[0].p = &big_base; - powtab[0].n = 1; - powtab[0].digits_in_base = digits_in_base; - powtab[0].base = base; - powtab[0].shift = 0; - - powtab[1].p = powtab_mem_ptr; powtab_mem_ptr += 2; - powtab[1].p[0] = big_base; - powtab[1].n = 1; - powtab[1].digits_in_base = digits_in_base; - powtab[1].base = base; - powtab[1].shift = 0; - - n = 1; - p = &big_base; - bexp = 1; - shift = 0; - for (pi = 2; pi < n_pows; pi++) - { - t = powtab_mem_ptr; - powtab_mem_ptr += 2 * n + 2; - - ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_get_str_powtab_alloc (un)); - - mpn_sqr (t, p, n); - - digits_in_base *= 2; - n *= 2; n -= t[n - 1] == 0; - bexp *= 2; - - if (bexp + 1 < exptab[n_pows - pi]) - { - digits_in_base += mp_bases[base].chars_per_limb; - cy = mpn_mul_1 (t, t, n, big_base); - t[n] = cy; - n += cy != 0; - bexp += 1; - } - shift *= 2; - /* Strip low zero limbs. */ - while (t[0] == 0) - { - t++; - n--; - shift++; - } - p = t; - powtab[pi].p = p; - powtab[pi].n = n; - powtab[pi].digits_in_base = digits_in_base; - powtab[pi].base = base; - powtab[pi].shift = shift; - } - - for (pi = 1; pi < n_pows; pi++) - { - t = powtab[pi].p; - n = powtab[pi].n; - cy = mpn_mul_1 (t, t, n, big_base); - t[n] = cy; - n += cy != 0; - if (t[0] == 0) - { - powtab[pi].p = t + 1; - n--; - powtab[pi].shift++; - } - powtab[pi].n = n; - powtab[pi].digits_in_base += mp_bases[base].chars_per_limb; - } - -#if 0 - { int i; - printf ("Computed table values for base=%d, un=%d, xn=%d:\n", base, un, xn); - for (i = 0; i < n_pows; i++) - printf ("%2d: %10ld %10ld %11ld %ld\n", i, exptab[n_pows-i], powtab[i].n, powtab[i].digits_in_base, powtab[i].shift); - } -#endif - } + pi = 1 + mpn_compute_powtab (powtab, powtab_mem, xn, base); /* Using our precomputed powers, now in powtab[], convert our number. */ tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un)); diff --git a/mpn/generic/set_str.c b/mpn/generic/set_str.c index 0c2243123..2bd584c0b 100644 --- a/mpn/generic/set_str.c +++ b/mpn/generic/set_str.c @@ -4,13 +4,12 @@ Contributed to the GNU project by Torbjorn Granlund. - THE FUNCTIONS IN THIS FILE, EXCEPT mpn_set_str, ARE INTERNAL WITH A MUTABLE - INTERFACE. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN - FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE - GNU MP RELEASE. + THE FUNCTIONS IN THIS FILE, EXCEPT mpn_set_str, ARE INTERNAL WITH MUTABLE + INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. + IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A + FUTURE GNU MP RELEASE. -Copyright 1991-1994, 1996, 2000-2002, 2004, 2006-2008, 2012, 2013 Free -Software Foundation, Inc. +Copyright 1991-2017 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -65,7 +64,6 @@ see https://www.gnu.org/licenses/. */ */ #include "gmp-impl.h" -#include "longlong.h" mp_size_t mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base) @@ -119,103 +117,22 @@ mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base) chars_per_limb = mp_bases[base].chars_per_limb; - un = str_len / chars_per_limb + 1; + un = str_len / chars_per_limb + 1; /* FIXME: scalar integer division */ /* Allocate one large block for the powers of big_base. */ - powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un)); + powtab_mem = TMP_BALLOC_LIMBS (mpn_str_powtab_alloc (un)); - mpn_set_str_compute_powtab (powtab, powtab_mem, un, base); + size_t n_pows = mpn_compute_powtab (powtab, powtab_mem, un, base); + powers_t *pt = powtab + n_pows; tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un)); - size = mpn_dc_set_str (rp, str, str_len, powtab, tp); + size = mpn_dc_set_str (rp, str, str_len, pt, tp); TMP_FREE; return size; } } -void -mpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base) -{ - mp_ptr powtab_mem_ptr; - long i, pi; - mp_size_t n; - mp_ptr p, t; - mp_limb_t big_base; - int chars_per_limb; - size_t digits_in_base; - mp_size_t shift; - - powtab_mem_ptr = powtab_mem; - - chars_per_limb = mp_bases[base].chars_per_limb; - big_base = mp_bases[base].big_base; - - p = powtab_mem_ptr; - powtab_mem_ptr += 1; - - digits_in_base = chars_per_limb; - - p[0] = big_base; - n = 1; - - count_leading_zeros (i, un - 1); - i = GMP_LIMB_BITS - 1 - i; - - powtab[i].p = p; - powtab[i].n = n; - powtab[i].digits_in_base = digits_in_base; - powtab[i].base = base; - powtab[i].shift = 0; - - shift = 0; - for (pi = i - 1; pi >= 0; pi--) - { - t = powtab_mem_ptr; - powtab_mem_ptr += 2 * n; - - ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_set_str_powtab_alloc (un)); - - mpn_sqr (t, p, n); - n = 2 * n - 1; n += t[n] != 0; - digits_in_base *= 2; -#if 1 - if ((((un - 1) >> pi) & 2) == 0) - { - mpn_divexact_1 (t, t, n, big_base); - n -= t[n - 1] == 0; - digits_in_base -= chars_per_limb; - } -#else - if (CLEVER_CONDITION_1 ()) - { - /* perform adjustment operation of previous */ - cy = mpn_mul_1 (p, p, n, big_base); - } - if (CLEVER_CONDITION_2 ()) - { - /* perform adjustment operation of new */ - cy = mpn_mul_1 (t, t, n, big_base); - } -#endif - shift *= 2; - /* Strip low zero limbs, but be careful to keep the result divisible by - big_base. */ - while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0) - { - t++; - n--; - shift++; - } - p = t; - powtab[pi].p = p; - powtab[pi].n = n; - powtab[pi].digits_in_base = digits_in_base; - powtab[pi].base = base; - powtab[pi].shift = shift; - } -} - mp_size_t mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, const powers_t *powtab, mp_ptr tp) @@ -231,7 +148,7 @@ mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, if (BELOW_THRESHOLD (str_len, SET_STR_DC_THRESHOLD)) return mpn_bc_set_str (rp, str, str_len, powtab->base); else - return mpn_dc_set_str (rp, str, str_len, powtab + 1, tp); + return mpn_dc_set_str (rp, str, str_len, powtab - 1, tp); } len_hi = str_len - len_lo; @@ -240,7 +157,7 @@ mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, if (BELOW_THRESHOLD (len_hi, SET_STR_DC_THRESHOLD)) hn = mpn_bc_set_str (tp, str, len_hi, powtab->base); else - hn = mpn_dc_set_str (tp, str, len_hi, powtab + 1, rp); + hn = mpn_dc_set_str (tp, str, len_hi, powtab - 1, rp); sn = powtab->shift; @@ -263,7 +180,7 @@ mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, if (BELOW_THRESHOLD (len_lo, SET_STR_DC_THRESHOLD)) ln = mpn_bc_set_str (tp, str, len_lo, powtab->base); else - ln = mpn_dc_set_str (tp, str, len_lo, powtab + 1, tp + powtab->n + sn + 1); + ln = mpn_dc_set_str (tp, str, len_lo, powtab - 1, tp + powtab->n + sn + 1); if (ln != 0) { diff --git a/tune/set_strb.c b/tune/set_strb.c index 4712bbe7b..128c41b9b 100644 --- a/tune/set_strb.c +++ b/tune/set_strb.c @@ -31,7 +31,6 @@ see https://www.gnu.org/licenses/. */ #define __gmpn_set_str mpn_set_str_basecase #define __gmpn_bc_set_str mpn_bc_set_str_basecase #define __gmpn_dc_set_str mpn_dc_set_str_basecase -#define __gmpn_set_str_compute_powtab mpn_set_str_compute_powtab_basecase #include "gmp-impl.h" diff --git a/tune/set_strs.c b/tune/set_strs.c index 6a0346fbc..d2a9fc2f8 100644 --- a/tune/set_strs.c +++ b/tune/set_strs.c @@ -31,7 +31,6 @@ see https://www.gnu.org/licenses/. */ #define __gmpn_set_str mpn_set_str_subquad #define __gmpn_bc_set_str mpn_bc_set_str_subquad #define __gmpn_dc_set_str mpn_dc_set_str_subquad -#define __gmpn_set_str_compute_powtab mpn_set_str_compute_powtab_subquad #include "gmp-impl.h" diff --git a/tune/tuneup.c b/tune/tuneup.c index b254d4788..1b561e836 100644 --- a/tune/tuneup.c +++ b/tune/tuneup.c @@ -2719,14 +2719,15 @@ speed_mpn_pre_set_str (struct speed_params *s) chars_per_limb = mp_bases[base].chars_per_limb; un = s->size / chars_per_limb + 1; powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un)); - mpn_set_str_compute_powtab (powtab, powtab_mem, un, base); + size_t n_pows = mpn_compute_powtab (powtab, powtab_mem, un, base); + powers_t *pt = powtab + n_pows; tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un)); speed_starttime (); i = s->reps; do { - mpn_pre_set_str (wp, str, s->size, powtab, tp); + mpn_pre_set_str (wp, str, s->size, pt, tp); } while (--i != 0); t = speed_endtime (); |