diff options
author | Torbjorn Granlund <tg@gmplib.org> | 2019-10-01 23:04:11 +0200 |
---|---|---|
committer | Torbjorn Granlund <tg@gmplib.org> | 2019-10-01 23:04:11 +0200 |
commit | 270db079a01f65e33d62998e4020b869c29bdb89 (patch) | |
tree | 6b5a9f10d630aaac1deacfc240cee5b6bf6ec3dc /mpn/arm | |
parent | 0bced342b873ab90755ee3b048851cd88c65a597 (diff) | |
download | gmp-270db079a01f65e33d62998e4020b869c29bdb89.tar.gz |
New tuneup files.
Diffstat (limited to 'mpn/arm')
-rw-r--r-- | mpn/arm/v6/gmp-mparam.h | 193 | ||||
-rw-r--r-- | mpn/arm/v7a/cora15/gmp-mparam.h | 157 | ||||
-rw-r--r-- | mpn/arm/v7a/cora17/gmp-mparam.h | 159 | ||||
-rw-r--r-- | mpn/arm/v7a/cora5/gmp-mparam.h | 166 | ||||
-rw-r--r-- | mpn/arm/v7a/cora7/gmp-mparam.h | 158 | ||||
-rw-r--r-- | mpn/arm/v7a/cora8/gmp-mparam.h | 194 |
6 files changed, 488 insertions, 539 deletions
diff --git a/mpn/arm/v6/gmp-mparam.h b/mpn/arm/v6/gmp-mparam.h index f5915713a..ccf77dc17 100644 --- a/mpn/arm/v6/gmp-mparam.h +++ b/mpn/arm/v6/gmp-mparam.h @@ -1,7 +1,6 @@ /* gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010, 2012, 2015 Free Software -Foundation, Inc. +Copyright 2019 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -33,61 +32,62 @@ see https://www.gnu.org/licenses/. */ #define GMP_LIMB_BYTES 4 /* 700 MHz ARM11 (raspberry pi) */ -/* FFT tuning limit = 10 M */ -/* Generated by tuneup.c, 2015-10-05, gcc 4.6 */ +/* FFT tuning limit = 0.5 M */ +/* Generated by tuneup.c, 2019-10-01, gcc 8.3 */ #define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */ #define DIVREM_1_UNNORM_THRESHOLD 0 /* always */ #define MOD_1_NORM_THRESHOLD 0 /* always */ #define MOD_1_UNNORM_THRESHOLD 0 /* always */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 3 +#define MOD_1N_TO_MOD_1_1_THRESHOLD 8 #define MOD_1U_TO_MOD_1_1_THRESHOLD 6 #define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */ #define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 19 +#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 23 #define USE_PREINV_DIVREM_1 1 /* preinv always */ -#define DIV_QR_1N_PI1_METHOD 1 +#define DIV_QR_1N_PI1_METHOD 1 /* 71.04% faster than 2 */ #define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 39 +#define BMOD_1_TO_MOD_1_THRESHOLD 40 -#define DIV_1_VS_MUL_1_PERCENT 178 +#define DIV_1_VS_MUL_1_PERCENT 253 -#define MUL_TOOM22_THRESHOLD 44 -#define MUL_TOOM33_THRESHOLD 138 -#define MUL_TOOM44_THRESHOLD 517 +#define MUL_TOOM22_THRESHOLD 38 +#define MUL_TOOM33_THRESHOLD 130 +#define MUL_TOOM44_THRESHOLD 512 #define MUL_TOOM6H_THRESHOLD 0 /* always */ -#define MUL_TOOM8H_THRESHOLD 692 +#define MUL_TOOM8H_THRESHOLD 620 -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 141 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 407 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 132 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 211 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 225 +#define MUL_TOOM32_TO_TOOM43_THRESHOLD 209 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 608 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 211 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 186 +#define MUL_TOOM43_TO_TOOM54_THRESHOLD 295 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 56 -#define SQR_TOOM3_THRESHOLD 173 -#define SQR_TOOM4_THRESHOLD 711 +#define SQR_TOOM2_THRESHOLD 55 +#define SQR_TOOM3_THRESHOLD 194 +#define SQR_TOOM4_THRESHOLD 507 #define SQR_TOOM6_THRESHOLD 0 /* always */ -#define SQR_TOOM8_THRESHOLD 915 +#define SQR_TOOM8_THRESHOLD 866 -#define MULMID_TOOM42_THRESHOLD 70 +#define MULMID_TOOM42_THRESHOLD 64 -#define MULMOD_BNM1_THRESHOLD 24 -#define SQRMOD_BNM1_THRESHOLD 28 +#define MULMOD_BNM1_THRESHOLD 21 +#define SQRMOD_BNM1_THRESHOLD 27 #define MUL_FFT_MODF_THRESHOLD 560 /* k = 5 */ #define MUL_FFT_TABLE3 \ - { { 560, 5}, { 27, 6}, { 15, 5}, { 31, 6}, \ - { 28, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 36, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \ + { { 560, 5}, { 25, 6}, { 13, 5}, { 28, 6}, \ + { 15, 5}, { 31, 6}, { 29, 7}, { 15, 6}, \ + { 33, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \ + { 39, 7}, { 21, 6}, { 43, 7}, { 23, 6}, \ { 47, 7}, { 25, 6}, { 51, 7}, { 27, 6}, \ - { 55, 7}, { 29, 8}, { 15, 7}, { 37, 8}, \ + { 55, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \ { 19, 7}, { 43, 8}, { 23, 7}, { 51, 8}, \ - { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \ + { 27, 7}, { 55, 8}, { 31, 7}, { 65, 8}, \ { 35, 7}, { 71, 8}, { 43, 9}, { 23, 8}, \ { 55, 9}, { 31, 8}, { 71, 9}, { 39, 8}, \ { 83, 9}, { 47, 8}, { 99, 9}, { 55,10}, \ @@ -95,96 +95,77 @@ see https://www.gnu.org/licenses/. */ { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ { 159,10}, { 95, 9}, { 191,10}, { 111,11}, \ { 63,10}, { 159,11}, { 95,10}, { 207,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ - { 271,11}, { 159,10}, { 335,11}, { 191,10}, \ - { 399,11}, { 223,12}, { 127,11}, { 255,10}, \ - { 511,11}, { 287,10}, { 575,11}, { 319,10}, \ - { 639,11}, { 351,12}, { 191,11}, { 383,10}, \ - { 767,11}, { 415,13}, { 127,12}, { 255,11}, \ - { 575,12}, { 319,11}, { 703,12}, { 383,11}, \ - { 799,12}, { 447,13}, { 255,12}, { 511,11}, \ - { 1023,12}, { 575,11}, { 1151,12}, { 703,13}, \ - { 383,12}, { 831,14}, { 255,13}, { 511,12}, \ - { 1151,13}, { 639,12}, { 1343,13}, { 767,12}, \ - { 1599,13}, { 895,14}, { 511,13}, { 1023,12}, \ - { 2111,13}, { 1151,12}, { 2367,13}, { 8192,14}, \ - { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 110 -#define MUL_FFT_THRESHOLD 5760 + { 63,11}, { 127,10}, { 287,11}, { 159,10}, \ + { 351,11}, { 191,10}, { 399,11}, { 223,12}, \ + { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} } +#define MUL_FFT_TABLE3_SIZE 72 +#define MUL_FFT_THRESHOLD 6592 -#define SQR_FFT_MODF_THRESHOLD 505 /* k = 5 */ +#define SQR_FFT_MODF_THRESHOLD 525 /* k = 5 */ #define SQR_FFT_TABLE3 \ - { { 505, 5}, { 27, 6}, { 15, 5}, { 31, 6}, \ - { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 35, 7}, { 19, 6}, { 40, 7}, { 23, 6}, \ - { 47, 7}, { 29, 8}, { 15, 7}, { 37, 8}, \ - { 19, 7}, { 43, 8}, { 23, 7}, { 49, 8}, \ - { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \ - { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \ - { 71, 9}, { 39, 8}, { 83, 9}, { 47, 8}, \ - { 99, 9}, { 55,10}, { 31, 9}, { 79,10}, \ - { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \ - { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \ - { 191,10}, { 111,11}, { 63,10}, { 127, 9}, \ - { 255,10}, { 143, 9}, { 287,10}, { 159,11}, \ - { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \ - { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \ - { 543,10}, { 287,11}, { 159,10}, { 351,11}, \ - { 191,10}, { 415,11}, { 223,12}, { 127,11}, \ - { 255,10}, { 543,11}, { 287,10}, { 607,11}, \ - { 319,10}, { 639,11}, { 351,12}, { 191,11}, \ - { 383,10}, { 767,11}, { 415,13}, { 127,12}, \ - { 255,11}, { 607,12}, { 319,11}, { 703,12}, \ - { 383,11}, { 831,12}, { 447,13}, { 255,12}, \ - { 511,11}, { 1023,12}, { 703,13}, { 383,12}, \ - { 831,14}, { 255,13}, { 511,12}, { 1087,13}, \ - { 639,12}, { 1343,13}, { 767,12}, { 1599,13}, \ - { 895,14}, { 511,13}, { 1023,12}, { 2111,13}, \ - { 1151,12}, { 2431,13}, { 8192,14}, { 16384,15}, \ - { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 109 -#define SQR_FFT_THRESHOLD 4672 + { { 525, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ + { 15, 5}, { 31, 6}, { 29, 7}, { 15, 6}, \ + { 33, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \ + { 39, 7}, { 23, 6}, { 47, 7}, { 29, 8}, \ + { 15, 7}, { 37, 8}, { 19, 7}, { 43, 8}, \ + { 23, 7}, { 49, 8}, { 27, 7}, { 55, 9}, \ + { 15, 8}, { 31, 7}, { 63, 8}, { 35, 7}, \ + { 71, 8}, { 43, 9}, { 23, 8}, { 55, 9}, \ + { 31, 8}, { 71, 9}, { 39, 8}, { 83, 9}, \ + { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \ + { 79,10}, { 47, 9}, { 103,11}, { 31,10}, \ + { 63, 9}, { 135,10}, { 79, 9}, { 167,10}, \ + { 95, 9}, { 191,10}, { 111,11}, { 63,10}, \ + { 143, 9}, { 287,10}, { 159,11}, { 95,10}, \ + { 191, 9}, { 383,12}, { 63,11}, { 127,10}, \ + { 255, 9}, { 511,10}, { 271, 9}, { 543,10}, \ + { 287,11}, { 159,10}, { 351,11}, { 191,10}, \ + { 415,11}, { 223,12}, { 4096,13}, { 8192,14}, \ + { 16384,15}, { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 74 +#define SQR_FFT_THRESHOLD 4736 #define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 55 -#define MULLO_MUL_N_THRESHOLD 11278 -#define SQRLO_BASECASE_THRESHOLD 8 -#define SQRLO_DC_THRESHOLD 51 -#define SQRLO_SQR_THRESHOLD 8907 +#define MULLO_DC_THRESHOLD 54 +#define MULLO_MUL_N_THRESHOLD 13071 +#define SQRLO_BASECASE_THRESHOLD 12 +#define SQRLO_DC_THRESHOLD 108 +#define SQRLO_SQR_THRESHOLD 8937 -#define DC_DIV_QR_THRESHOLD 46 +#define DC_DIV_QR_THRESHOLD 41 #define DC_DIVAPPR_Q_THRESHOLD 148 #define DC_BDIV_QR_THRESHOLD 57 -#define DC_BDIV_Q_THRESHOLD 160 +#define DC_BDIV_Q_THRESHOLD 158 -#define INV_MULMOD_BNM1_THRESHOLD 86 -#define INV_NEWTON_THRESHOLD 138 -#define INV_APPR_THRESHOLD 139 +#define INV_MULMOD_BNM1_THRESHOLD 74 +#define INV_NEWTON_THRESHOLD 146 +#define INV_APPR_THRESHOLD 147 -#define BINV_NEWTON_THRESHOLD 216 +#define BINV_NEWTON_THRESHOLD 372 #define REDC_1_TO_REDC_2_THRESHOLD 6 -#define REDC_2_TO_REDC_N_THRESHOLD 124 +#define REDC_2_TO_REDC_N_THRESHOLD 134 -#define MU_DIV_QR_THRESHOLD 2642 -#define MU_DIVAPPR_Q_THRESHOLD 2492 -#define MUPI_DIV_QR_THRESHOLD 74 -#define MU_BDIV_QR_THRESHOLD 2130 -#define MU_BDIV_Q_THRESHOLD 2541 +#define MU_DIV_QR_THRESHOLD 2857 +#define MU_DIVAPPR_Q_THRESHOLD 2801 +#define MUPI_DIV_QR_THRESHOLD 79 +#define MU_BDIV_QR_THRESHOLD 2541 +#define MU_BDIV_Q_THRESHOLD 2686 -#define POWM_SEC_TABLE 5,26,143,446 +#define POWM_SEC_TABLE 7,19,133,473 -#define GET_STR_DC_THRESHOLD 20 -#define GET_STR_PRECOMPUTE_THRESHOLD 39 -#define SET_STR_DC_THRESHOLD 527 -#define SET_STR_PRECOMPUTE_THRESHOLD 1069 +#define GET_STR_DC_THRESHOLD 29 +#define GET_STR_PRECOMPUTE_THRESHOLD 46 +#define SET_STR_DC_THRESHOLD 384 +#define SET_STR_PRECOMPUTE_THRESHOLD 1323 -#define FAC_DSC_THRESHOLD 430 -#define FAC_ODD_THRESHOLD 55 +#define FAC_DSC_THRESHOLD 369 +#define FAC_ODD_THRESHOLD 450 -#define MATRIX22_STRASSEN_THRESHOLD 23 -#define HGCD_THRESHOLD 79 -#define HGCD_APPR_THRESHOLD 71 +#define MATRIX22_STRASSEN_THRESHOLD 22 +#define HGCD2_DIV1_METHOD 5 /* 1.32% faster than 3 */ +#define HGCD_THRESHOLD 82 +#define HGCD_APPR_THRESHOLD 97 #define HGCD_REDUCE_THRESHOLD 3810 -#define GCD_DC_THRESHOLD 283 +#define GCD_DC_THRESHOLD 333 #define GCDEXT_DC_THRESHOLD 253 -#define JACOBI_BASE_METHOD 1 +#define JACOBI_BASE_METHOD 1 /* 0.00% faster than 2 */ diff --git a/mpn/arm/v7a/cora15/gmp-mparam.h b/mpn/arm/v7a/cora15/gmp-mparam.h index b075523a1..65228aa9f 100644 --- a/mpn/arm/v7a/cora15/gmp-mparam.h +++ b/mpn/arm/v7a/cora15/gmp-mparam.h @@ -1,6 +1,6 @@ /* gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 2017 Free Software Foundation, Inc. +Copyright 2019 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -33,140 +33,137 @@ see https://www.gnu.org/licenses/. */ /* 2000 MHz Cortex-A15 with Neon (in spite of file position) */ /* FFT tuning limit = 0.5 M */ -/* Generated by tuneup.c, 2017-02-23, gcc 5.4 */ +/* Generated by tuneup.c, 2019-10-01, gcc 5.4 */ #define MOD_1_NORM_THRESHOLD 0 /* always */ #define MOD_1_UNNORM_THRESHOLD 0 /* always */ #define MOD_1N_TO_MOD_1_1_THRESHOLD 3 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 3 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10 +#define MOD_1U_TO_MOD_1_1_THRESHOLD 2 +#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11 #define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 9 +#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8 #define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 +#define DIV_QR_1N_PI1_METHOD 1 /* 44.35% faster than 2 */ #define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVEXACT_1_THRESHOLD 0 /* always (native) */ #define BMOD_1_TO_MOD_1_THRESHOLD 17 -#define DIV_1_VS_MUL_1_PERCENT 265 +#define DIV_1_VS_MUL_1_PERCENT 284 #define MUL_TOOM22_THRESHOLD 28 -#define MUL_TOOM33_THRESHOLD 114 +#define MUL_TOOM33_THRESHOLD 115 #define MUL_TOOM44_THRESHOLD 178 #define MUL_TOOM6H_THRESHOLD 238 #define MUL_TOOM8H_THRESHOLD 597 -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 113 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 130 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 115 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 120 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 170 +#define MUL_TOOM32_TO_TOOM43_THRESHOLD 109 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 133 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 110 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 116 +#define MUL_TOOM43_TO_TOOM54_THRESHOLD 154 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 40 -#define SQR_TOOM3_THRESHOLD 126 -#define SQR_TOOM4_THRESHOLD 342 +#define SQR_TOOM2_THRESHOLD 41 +#define SQR_TOOM3_THRESHOLD 121 +#define SQR_TOOM4_THRESHOLD 345 #define SQR_TOOM6_THRESHOLD 446 -#define SQR_TOOM8_THRESHOLD 644 +#define SQR_TOOM8_THRESHOLD 656 #define MULMID_TOOM42_THRESHOLD 52 -#define MULMOD_BNM1_THRESHOLD 19 -#define SQRMOD_BNM1_THRESHOLD 20 +#define MULMOD_BNM1_THRESHOLD 23 +#define SQRMOD_BNM1_THRESHOLD 19 -#define MUL_FFT_MODF_THRESHOLD 560 /* k = 5 */ +#define MUL_FFT_MODF_THRESHOLD 555 /* k = 5 */ #define MUL_FFT_TABLE3 \ - { { 560, 5}, { 25, 6}, { 27, 7}, { 15, 6}, \ + { { 555, 5}, { 25, 6}, { 27, 7}, { 15, 6}, \ { 31, 7}, { 19, 6}, { 39, 7}, { 25, 6}, \ { 51, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \ { 19, 7}, { 41, 8}, { 23, 7}, { 51, 8}, \ { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \ { 39, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \ - { 67, 9}, { 39, 8}, { 83, 9}, { 47, 8}, \ - { 99, 9}, { 55,10}, { 31, 9}, { 79,10}, \ - { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \ - { 135,10}, { 79, 9}, { 167,10}, { 95, 9}, \ - { 191,10}, { 111,11}, { 63,10}, { 127, 9}, \ - { 255,10}, { 143, 9}, { 287, 8}, { 575,10}, \ - { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \ - { 383, 8}, { 767,12}, { 63,11}, { 127, 9}, \ - { 511,10}, { 271, 9}, { 543, 8}, { 1087,10}, \ - { 287, 9}, { 575,10}, { 303,11}, { 159,10}, \ - { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \ - { 351, 9}, { 735,11}, { 191,10}, { 383, 9}, \ - { 767,10}, { 399, 9}, { 799,10}, { 415, 9}, \ - { 831,11}, { 223,10}, { 447,12}, { 4096,13}, \ + { 63, 9}, { 47, 8}, { 99, 9}, { 55,10}, \ + { 31, 9}, { 63, 8}, { 127, 9}, { 71, 8}, \ + { 143, 9}, { 79,10}, { 47, 9}, { 103,11}, \ + { 31,10}, { 63, 9}, { 127,10}, { 79, 9}, \ + { 159, 8}, { 319,10}, { 111,11}, { 63,10}, \ + { 143, 9}, { 287,10}, { 159,11}, { 95,10}, \ + { 191, 9}, { 383,12}, { 63,11}, { 127,10}, \ + { 255, 9}, { 511,10}, { 271, 9}, { 543,10}, \ + { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \ + { 639,10}, { 335, 9}, { 671,10}, { 351,11}, \ + { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \ + { 799,10}, { 415,11}, { 223,12}, { 4096,13}, \ { 8192,14}, { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 83 +#define MUL_FFT_TABLE3_SIZE 75 #define MUL_FFT_THRESHOLD 5760 -#define SQR_FFT_MODF_THRESHOLD 525 /* k = 5 */ +#define SQR_FFT_MODF_THRESHOLD 500 /* k = 5 */ #define SQR_FFT_TABLE3 \ - { { 525, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ - { 27, 7}, { 15, 6}, { 32, 7}, { 19, 6}, \ + { { 500, 5}, { 25, 6}, { 27, 7}, { 15, 6}, \ + { 31, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \ { 39, 7}, { 25, 6}, { 51, 7}, { 27, 8}, \ - { 15, 7}, { 33, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 51, 8}, { 27, 9}, { 15, 8}, \ - { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \ - { 55,10}, { 15, 9}, { 31, 8}, { 67, 9}, \ - { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \ - { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \ - { 79, 9}, { 159,10}, { 95, 9}, { 191,10}, \ - { 111,11}, { 63,10}, { 143, 9}, { 287,10}, \ - { 159,11}, { 95,10}, { 191, 9}, { 383, 8}, \ - { 767,12}, { 63,11}, { 127, 9}, { 511,10}, \ - { 271, 9}, { 543,10}, { 287, 9}, { 575,11}, \ + { 15, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \ + { 23, 7}, { 51, 9}, { 15, 8}, { 31, 7}, \ + { 63, 8}, { 39, 9}, { 23, 8}, { 55,10}, \ + { 15, 9}, { 31, 8}, { 67, 9}, { 55,10}, \ + { 31, 9}, { 63, 8}, { 127, 9}, { 79,10}, \ + { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \ + { 127,10}, { 111,11}, { 63,10}, { 127, 9}, \ + { 255,10}, { 143, 9}, { 303,10}, { 159,11}, \ + { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \ + { 127,10}, { 271, 9}, { 543,10}, { 287,11}, \ { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \ - { 671,10}, { 351, 9}, { 735,11}, { 191,10}, \ - { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \ - { 415, 9}, { 831,11}, { 223,12}, { 4096,13}, \ - { 8192,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 79 + { 671,10}, { 351,11}, { 191,10}, { 383, 9}, \ + { 767,10}, { 399, 9}, { 799,10}, { 415,11}, \ + { 223,12}, { 4096,13}, { 8192,14}, { 16384,15}, \ + { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 69 #define SQR_FFT_THRESHOLD 5312 #define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 37 +#define MULLO_DC_THRESHOLD 34 #define MULLO_MUL_N_THRESHOLD 8907 #define SQRLO_BASECASE_THRESHOLD 10 #define SQRLO_DC_THRESHOLD 33 #define SQRLO_SQR_THRESHOLD 10323 -#define DC_DIV_QR_THRESHOLD 57 -#define DC_DIVAPPR_Q_THRESHOLD 268 -#define DC_BDIV_QR_THRESHOLD 48 -#define DC_BDIV_Q_THRESHOLD 296 +#define DC_DIV_QR_THRESHOLD 54 +#define DC_DIVAPPR_Q_THRESHOLD 254 +#define DC_BDIV_QR_THRESHOLD 52 +#define DC_BDIV_Q_THRESHOLD 286 #define INV_MULMOD_BNM1_THRESHOLD 54 -#define INV_NEWTON_THRESHOLD 262 -#define INV_APPR_THRESHOLD 260 +#define INV_NEWTON_THRESHOLD 250 +#define INV_APPR_THRESHOLD 252 #define BINV_NEWTON_THRESHOLD 372 -#define REDC_1_TO_REDC_2_THRESHOLD 62 +#define REDC_1_TO_REDC_2_THRESHOLD 87 #define REDC_2_TO_REDC_N_THRESHOLD 0 /* always */ -#define MU_DIV_QR_THRESHOLD 1787 -#define MU_DIVAPPR_Q_THRESHOLD 1718 -#define MUPI_DIV_QR_THRESHOLD 99 -#define MU_BDIV_QR_THRESHOLD 1528 -#define MU_BDIV_Q_THRESHOLD 1836 +#define MU_DIV_QR_THRESHOLD 1752 +#define MU_DIVAPPR_Q_THRESHOLD 1787 +#define MUPI_DIV_QR_THRESHOLD 110 +#define MU_BDIV_QR_THRESHOLD 1718 +#define MU_BDIV_Q_THRESHOLD 1800 -#define POWM_SEC_TABLE 1,32,161,473,1509 +#define POWM_SEC_TABLE 3,32,224,480,1509 -#define GET_STR_DC_THRESHOLD 16 +#define GET_STR_DC_THRESHOLD 17 #define GET_STR_PRECOMPUTE_THRESHOLD 35 -#define SET_STR_DC_THRESHOLD 100 -#define SET_STR_PRECOMPUTE_THRESHOLD 1057 +#define SET_STR_DC_THRESHOLD 126 +#define SET_STR_PRECOMPUTE_THRESHOLD 1120 -#define FAC_DSC_THRESHOLD 312 +#define FAC_DSC_THRESHOLD 162 #define FAC_ODD_THRESHOLD 27 -#define MATRIX22_STRASSEN_THRESHOLD 31 -#define HGCD_THRESHOLD 145 -#define HGCD_APPR_THRESHOLD 173 +#define MATRIX22_STRASSEN_THRESHOLD 22 +#define HGCD2_DIV1_METHOD 1 /* 4.65% faster than 3 */ +#define HGCD_THRESHOLD 59 +#define HGCD_APPR_THRESHOLD 52 #define HGCD_REDUCE_THRESHOLD 3389 -#define GCD_DC_THRESHOLD 684 -#define GCDEXT_DC_THRESHOLD 469 -#define JACOBI_BASE_METHOD 4 +#define GCD_DC_THRESHOLD 573 +#define GCDEXT_DC_THRESHOLD 492 +#define JACOBI_BASE_METHOD 4 /* 17.62% faster than 1 */ diff --git a/mpn/arm/v7a/cora17/gmp-mparam.h b/mpn/arm/v7a/cora17/gmp-mparam.h index 152b403ff..906d84d1c 100644 --- a/mpn/arm/v7a/cora17/gmp-mparam.h +++ b/mpn/arm/v7a/cora17/gmp-mparam.h @@ -1,6 +1,6 @@ /* gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 2018 Free Software Foundation, Inc. +Copyright 2019 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -33,39 +33,39 @@ see https://www.gnu.org/licenses/. */ /* 1800 MHz Cortex-A17 with Neon (in spite of file position) */ /* FFT tuning limit = 0.5 M */ -/* Generated by tuneup.c, 2018-10-27, gcc 6.3 */ +/* Generated by tuneup.c, 2019-10-01, gcc 6.3 */ #define MOD_1_NORM_THRESHOLD 0 /* always */ #define MOD_1_UNNORM_THRESHOLD 0 /* always */ #define MOD_1N_TO_MOD_1_1_THRESHOLD 5 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 4 +#define MOD_1U_TO_MOD_1_1_THRESHOLD 3 #define MOD_1_1_TO_MOD_1_2_THRESHOLD 8 #define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 12 +#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 13 #define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 +#define DIV_QR_1N_PI1_METHOD 1 /* 48.88% faster than 2 */ #define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 42 +#define BMOD_1_TO_MOD_1_THRESHOLD 44 -#define DIV_1_VS_MUL_1_PERCENT 208 +#define DIV_1_VS_MUL_1_PERCENT 238 #define MUL_TOOM22_THRESHOLD 38 -#define MUL_TOOM33_THRESHOLD 131 +#define MUL_TOOM33_THRESHOLD 129 #define MUL_TOOM44_THRESHOLD 208 -#define MUL_TOOM6H_THRESHOLD 303 -#define MUL_TOOM8H_THRESHOLD 454 +#define MUL_TOOM6H_THRESHOLD 318 +#define MUL_TOOM8H_THRESHOLD 478 -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 141 +#define MUL_TOOM32_TO_TOOM43_THRESHOLD 137 #define MUL_TOOM32_TO_TOOM53_THRESHOLD 176 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 130 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 145 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 178 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 129 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 151 +#define MUL_TOOM43_TO_TOOM54_THRESHOLD 191 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 58 +#define SQR_TOOM2_THRESHOLD 60 #define SQR_TOOM3_THRESHOLD 189 #define SQR_TOOM4_THRESHOLD 348 #define SQR_TOOM6_THRESHOLD 426 @@ -73,16 +73,15 @@ see https://www.gnu.org/licenses/. */ #define MULMID_TOOM42_THRESHOLD 62 -#define MULMOD_BNM1_THRESHOLD 22 -#define SQRMOD_BNM1_THRESHOLD 30 +#define MULMOD_BNM1_THRESHOLD 24 +#define SQRMOD_BNM1_THRESHOLD 29 #define MUL_FFT_MODF_THRESHOLD 565 /* k = 5 */ #define MUL_FFT_TABLE3 \ - { { 565, 5}, { 29, 6}, { 15, 5}, { 31, 6}, \ - { 16, 5}, { 33, 6}, { 17, 5}, { 35, 6}, \ + { { 565, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \ { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 36, 7}, { 19, 6}, { 39, 7}, { 29, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 43, 8}, \ + { 35, 7}, { 19, 6}, { 39, 7}, { 29, 8}, \ + { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ { 23, 7}, { 49, 8}, { 27, 7}, { 55, 9}, \ { 15, 8}, { 31, 7}, { 63, 8}, { 43, 9}, \ { 23, 8}, { 55, 9}, { 31, 8}, { 67, 9}, \ @@ -92,84 +91,88 @@ see https://www.gnu.org/licenses/. */ { 79, 9}, { 159,10}, { 95, 9}, { 191,10}, \ { 111,11}, { 63,10}, { 143, 9}, { 287, 8}, \ { 575,10}, { 159,11}, { 95,10}, { 191, 9}, \ - { 383, 8}, { 767, 9}, { 399,12}, { 63,11}, \ - { 127,10}, { 255, 9}, { 511, 8}, { 1023, 9}, \ - { 543,10}, { 287, 9}, { 575,10}, { 303,11}, \ - { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \ - { 671,10}, { 351, 9}, { 703,10}, { 367, 9}, \ - { 735,11}, { 191,10}, { 383, 9}, { 767,10}, \ - { 399, 9}, { 799,10}, { 415, 9}, { 831,10}, \ - { 431, 9}, { 863,11}, { 223,10}, { 447,12}, \ - { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 92 + { 383, 8}, { 767, 9}, { 399, 8}, { 799,12}, \ + { 63,11}, { 127,10}, { 255, 9}, { 511, 8}, \ + { 1023, 9}, { 543,10}, { 287, 9}, { 575,10}, \ + { 303, 9}, { 607,11}, { 159,10}, { 319, 9}, \ + { 639,10}, { 335, 9}, { 671,10}, { 351, 9}, \ + { 703,10}, { 367, 9}, { 735,11}, { 191,10}, \ + { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \ + { 415, 9}, { 831,10}, { 431, 9}, { 863,11}, \ + { 223,10}, { 447,12}, { 4096,13}, { 8192,14}, \ + { 16384,15}, { 32768,16} } +#define MUL_FFT_TABLE3_SIZE 90 #define MUL_FFT_THRESHOLD 5760 -#define SQR_FFT_MODF_THRESHOLD 525 /* k = 5 */ +#define SQR_FFT_MODF_THRESHOLD 496 /* k = 5 */ #define SQR_FFT_TABLE3 \ - { { 525, 5}, { 29, 6}, { 15, 5}, { 33, 6}, \ - { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 36, 7}, { 19, 6}, { 39, 7}, { 29, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \ - { 43, 9}, { 23, 8}, { 55,10}, { 15, 9}, \ - { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \ - { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \ - { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \ - { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \ - { 95, 9}, { 191,10}, { 111,11}, { 63,10}, \ + { { 496, 5}, { 29, 6}, { 15, 5}, { 31, 6}, \ + { 17, 5}, { 35, 6}, { 28, 7}, { 15, 6}, \ + { 33, 7}, { 17, 6}, { 36, 7}, { 19, 6}, \ + { 39, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \ + { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \ + { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \ + { 55,10}, { 15, 9}, { 31, 8}, { 67, 9}, \ + { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \ + { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \ + { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \ + { 79, 9}, { 159,10}, { 95, 9}, { 191,10}, \ + { 111,11}, { 63,10}, { 127, 9}, { 255,10}, \ { 143, 9}, { 287,10}, { 159, 9}, { 319,11}, \ { 95,10}, { 191, 9}, { 383, 8}, { 767, 9}, \ - { 399,12}, { 63,11}, { 127, 9}, { 511,10}, \ - { 271, 9}, { 543,10}, { 287, 9}, { 575,10}, \ - { 303,11}, { 159,10}, { 319, 9}, { 639,10}, \ - { 335, 9}, { 671,10}, { 351, 9}, { 703,10}, \ - { 367,11}, { 191,10}, { 383, 9}, { 767,10}, \ - { 399, 9}, { 799,10}, { 415, 9}, { 831,10}, \ - { 431, 9}, { 863,10}, { 447,12}, { 4096,13}, \ - { 8192,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 83 + { 399,12}, { 63,11}, { 127,10}, { 255, 9}, \ + { 511,10}, { 271, 9}, { 543,10}, { 287, 9}, \ + { 575,10}, { 303,11}, { 159,10}, { 319, 9}, \ + { 639,10}, { 335, 9}, { 671,10}, { 351, 9}, \ + { 703,10}, { 367, 9}, { 735,11}, { 191,10}, \ + { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \ + { 415, 9}, { 831,10}, { 431, 9}, { 863,10}, \ + { 447,12}, { 4096,13}, { 8192,14}, { 16384,15}, \ + { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 89 #define SQR_FFT_THRESHOLD 4736 #define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 25 -#define MULLO_MUL_N_THRESHOLD 13463 +#define MULLO_DC_THRESHOLD 30 +#define MULLO_MUL_N_THRESHOLD 11278 #define SQRLO_BASECASE_THRESHOLD 0 /* always */ -#define SQRLO_DC_THRESHOLD 26 -#define SQRLO_SQR_THRESHOLD 8907 +#define SQRLO_DC_THRESHOLD 25 +#define SQRLO_SQR_THRESHOLD 9449 -#define DC_DIV_QR_THRESHOLD 35 -#define DC_DIVAPPR_Q_THRESHOLD 111 -#define DC_BDIV_QR_THRESHOLD 32 -#define DC_BDIV_Q_THRESHOLD 108 +#define DC_DIV_QR_THRESHOLD 37 +#define DC_DIVAPPR_Q_THRESHOLD 108 +#define DC_BDIV_QR_THRESHOLD 38 +#define DC_BDIV_Q_THRESHOLD 100 -#define INV_MULMOD_BNM1_THRESHOLD 74 -#define INV_NEWTON_THRESHOLD 173 -#define INV_APPR_THRESHOLD 123 +#define INV_MULMOD_BNM1_THRESHOLD 90 +#define INV_NEWTON_THRESHOLD 157 +#define INV_APPR_THRESHOLD 118 #define BINV_NEWTON_THRESHOLD 303 #define REDC_1_TO_REDC_2_THRESHOLD 2 -#define REDC_2_TO_REDC_N_THRESHOLD 159 +#define REDC_2_TO_REDC_N_THRESHOLD 155 -#define MU_DIV_QR_THRESHOLD 1895 -#define MU_DIVAPPR_Q_THRESHOLD 1895 -#define MUPI_DIV_QR_THRESHOLD 73 +#define MU_DIV_QR_THRESHOLD 2089 +#define MU_DIVAPPR_Q_THRESHOLD 2089 +#define MUPI_DIV_QR_THRESHOLD 70 #define MU_BDIV_QR_THRESHOLD 1685 #define MU_BDIV_Q_THRESHOLD 2089 -#define POWM_SEC_TABLE 5,19,122,480,1486 +#define POWM_SEC_TABLE 4,24,96,480,1420 #define GET_STR_DC_THRESHOLD 14 #define GET_STR_PRECOMPUTE_THRESHOLD 28 -#define SET_STR_DC_THRESHOLD 108 -#define SET_STR_PRECOMPUTE_THRESHOLD 630 +#define SET_STR_DC_THRESHOLD 136 +#define SET_STR_PRECOMPUTE_THRESHOLD 541 -#define FAC_DSC_THRESHOLD 282 -#define FAC_ODD_THRESHOLD 55 +#define FAC_DSC_THRESHOLD 127 +#define FAC_ODD_THRESHOLD 29 -#define MATRIX22_STRASSEN_THRESHOLD 28 -#define HGCD_THRESHOLD 58 -#define HGCD_APPR_THRESHOLD 52 +#define MATRIX22_STRASSEN_THRESHOLD 27 +#define HGCD2_DIV1_METHOD 1 /* 5.92% faster than 3 */ +#define HGCD_THRESHOLD 55 +#define HGCD_APPR_THRESHOLD 50 #define HGCD_REDUCE_THRESHOLD 3524 -#define GCD_DC_THRESHOLD 358 -#define GCDEXT_DC_THRESHOLD 249 -#define JACOBI_BASE_METHOD 4 +#define GCD_DC_THRESHOLD 315 +#define GCDEXT_DC_THRESHOLD 225 +#define JACOBI_BASE_METHOD 4 /* 9.19% faster than 1 */ diff --git a/mpn/arm/v7a/cora5/gmp-mparam.h b/mpn/arm/v7a/cora5/gmp-mparam.h index dbadd94fc..9a0e7e3f8 100644 --- a/mpn/arm/v7a/cora5/gmp-mparam.h +++ b/mpn/arm/v7a/cora5/gmp-mparam.h @@ -1,6 +1,6 @@ /* gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991-2016 Free Software Foundation, Inc. +Copyright 2019 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -33,48 +33,48 @@ see https://www.gnu.org/licenses/. */ /* 1500 MHz Cortex-A5 (odroid c1) */ /* FFT tuning limit = 0.5 M */ -/* Generated by tuneup.c, 2016-03-20, gcc 4.9 */ +/* Generated by tuneup.c, 2019-10-01, gcc 4.9 */ #define MOD_1_NORM_THRESHOLD 0 /* always */ #define MOD_1_UNNORM_THRESHOLD 0 /* always */ #define MOD_1N_TO_MOD_1_1_THRESHOLD 7 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 8 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */ +#define MOD_1U_TO_MOD_1_1_THRESHOLD 7 +#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8 #define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 24 +#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 23 #define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 +#define DIV_QR_1N_PI1_METHOD 1 /* 132.75% faster than 2 */ #define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 51 +#define BMOD_1_TO_MOD_1_THRESHOLD 50 -#define DIV_1_VS_MUL_1_PERCENT 178 +#define DIV_1_VS_MUL_1_PERCENT 213 -#define MUL_TOOM22_THRESHOLD 45 -#define MUL_TOOM33_THRESHOLD 149 -#define MUL_TOOM44_THRESHOLD 242 +#define MUL_TOOM22_THRESHOLD 48 +#define MUL_TOOM33_THRESHOLD 145 +#define MUL_TOOM44_THRESHOLD 244 #define MUL_TOOM6H_THRESHOLD 414 -#define MUL_TOOM8H_THRESHOLD 527 +#define MUL_TOOM8H_THRESHOLD 532 -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 141 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 199 +#define MUL_TOOM32_TO_TOOM43_THRESHOLD 153 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 179 #define MUL_TOOM42_TO_TOOM53_THRESHOLD 152 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 181 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 225 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 180 +#define MUL_TOOM43_TO_TOOM54_THRESHOLD 241 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 66 -#define SQR_TOOM3_THRESHOLD 161 -#define SQR_TOOM4_THRESHOLD 345 -#define SQR_TOOM6_THRESHOLD 517 +#define SQR_TOOM2_THRESHOLD 65 +#define SQR_TOOM3_THRESHOLD 149 +#define SQR_TOOM4_THRESHOLD 360 +#define SQR_TOOM6_THRESHOLD 486 #define SQR_TOOM8_THRESHOLD 632 #define MULMID_TOOM42_THRESHOLD 70 -#define MULMOD_BNM1_THRESHOLD 28 -#define SQRMOD_BNM1_THRESHOLD 28 +#define MULMOD_BNM1_THRESHOLD 26 +#define SQRMOD_BNM1_THRESHOLD 27 #define MUL_FFT_MODF_THRESHOLD 654 /* k = 5 */ #define MUL_FFT_TABLE3 \ @@ -91,79 +91,77 @@ see https://www.gnu.org/licenses/. */ { 135,10}, { 79, 9}, { 167,10}, { 95, 9}, \ { 191,10}, { 111,11}, { 63,10}, { 159,11}, \ { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \ - { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \ - { 543,11}, { 159,10}, { 319, 9}, { 639,10}, \ - { 335, 9}, { 671,10}, { 351,11}, { 191,10}, \ - { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \ - { 415,11}, { 223,12}, { 4096,13}, { 8192,14}, \ - { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 74 + { 127,10}, { 255, 9}, { 511,10}, { 271,11}, \ + { 159,10}, { 319, 9}, { 639,10}, { 335,11}, \ + { 191,10}, { 383, 9}, { 767,10}, { 399,11}, \ + { 223,12}, { 4096,13}, { 8192,14}, { 16384,15}, \ + { 32768,16} } +#define MUL_FFT_TABLE3_SIZE 69 #define MUL_FFT_THRESHOLD 6784 #define SQR_FFT_MODF_THRESHOLD 595 /* k = 5 */ #define SQR_FFT_TABLE3 \ - { { 595, 5}, { 29, 6}, { 15, 5}, { 33, 6}, \ - { 17, 5}, { 35, 6}, { 36, 7}, { 19, 6}, \ - { 40, 7}, { 21, 6}, { 43, 7}, { 23, 6}, \ - { 47, 7}, { 37, 8}, { 19, 7}, { 43, 8}, \ - { 23, 7}, { 51, 8}, { 27, 7}, { 55, 8}, \ - { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \ - { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ - { 83, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ - { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \ - { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ - { 167,10}, { 95, 9}, { 191,10}, { 111,11}, \ - { 63,10}, { 159,11}, { 95,10}, { 191, 9}, \ - { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511, 8}, { 1023,10}, { 271, 9}, { 543,11}, \ - { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \ - { 671,10}, { 351,11}, { 191,10}, { 383, 9}, \ - { 767,10}, { 399, 9}, { 799,10}, { 415,11}, \ - { 223,12}, { 4096,13}, { 8192,14}, { 16384,15}, \ - { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 73 + { { 595, 5}, { 33, 6}, { 17, 5}, { 35, 6}, \ + { 37, 7}, { 19, 6}, { 40, 7}, { 21, 6}, \ + { 43, 7}, { 23, 6}, { 47, 7}, { 37, 8}, \ + { 19, 7}, { 43, 8}, { 23, 7}, { 49, 8}, \ + { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \ + { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \ + { 67, 9}, { 39, 8}, { 83, 9}, { 47, 8}, \ + { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \ + { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \ + { 135,10}, { 79, 9}, { 167,10}, { 95, 9}, \ + { 191,10}, { 111,11}, { 63,10}, { 159,11}, \ + { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \ + { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \ + { 543,11}, { 159,10}, { 319, 9}, { 639,10}, \ + { 335, 9}, { 671,10}, { 351,11}, { 191,10}, \ + { 383, 9}, { 767,10}, { 415,12}, { 4096,13}, \ + { 8192,14}, { 16384,15}, { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 67 #define SQR_FFT_THRESHOLD 5760 #define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 48 +#define MULLO_DC_THRESHOLD 37 #define MULLO_MUL_N_THRESHOLD 13463 -#define SQRLO_BASECASE_THRESHOLD 8 -#define SQRLO_DC_THRESHOLD 37 +#define SQRLO_BASECASE_THRESHOLD 9 +#define SQRLO_DC_THRESHOLD 27 #define SQRLO_SQR_THRESHOLD 11278 -#define DC_DIV_QR_THRESHOLD 34 -#define DC_DIVAPPR_Q_THRESHOLD 122 -#define DC_BDIV_QR_THRESHOLD 49 -#define DC_BDIV_Q_THRESHOLD 151 +#define DC_DIV_QR_THRESHOLD 36 +#define DC_DIVAPPR_Q_THRESHOLD 114 +#define DC_BDIV_QR_THRESHOLD 44 +#define DC_BDIV_Q_THRESHOLD 140 #define INV_MULMOD_BNM1_THRESHOLD 94 -#define INV_NEWTON_THRESHOLD 129 -#define INV_APPR_THRESHOLD 121 +#define INV_NEWTON_THRESHOLD 149 +#define INV_APPR_THRESHOLD 132 -#define BINV_NEWTON_THRESHOLD 268 +#define BINV_NEWTON_THRESHOLD 309 #define REDC_1_TO_REDC_2_THRESHOLD 0 /* always */ -#define REDC_2_TO_REDC_N_THRESHOLD 156 - -#define MU_DIV_QR_THRESHOLD 2089 -#define MU_DIVAPPR_Q_THRESHOLD 2130 -#define MUPI_DIV_QR_THRESHOLD 69 -#define MU_BDIV_QR_THRESHOLD 1787 -#define MU_BDIV_Q_THRESHOLD 2099 - -#define POWM_SEC_TABLE 3,19,62,624,2280 - -#define GET_STR_DC_THRESHOLD 25 -#define GET_STR_PRECOMPUTE_THRESHOLD 56 -#define SET_STR_DC_THRESHOLD 268 -#define SET_STR_PRECOMPUTE_THRESHOLD 855 - -#define FAC_DSC_THRESHOLD 238 -#define FAC_ODD_THRESHOLD 29 - -#define MATRIX22_STRASSEN_THRESHOLD 27 -#define HGCD_THRESHOLD 61 -#define HGCD_APPR_THRESHOLD 51 -#define HGCD_REDUCE_THRESHOLD 4284 -#define GCD_DC_THRESHOLD 217 -#define GCDEXT_DC_THRESHOLD 233 -#define JACOBI_BASE_METHOD 1 +#define REDC_2_TO_REDC_N_THRESHOLD 147 + +#define MU_DIV_QR_THRESHOLD 2130 +#define MU_DIVAPPR_Q_THRESHOLD 2089 +#define MUPI_DIV_QR_THRESHOLD 81 +#define MU_BDIV_QR_THRESHOLD 1895 +#define MU_BDIV_Q_THRESHOLD 2130 + +#define POWM_SEC_TABLE 7,34,106,624,2111 + +#define GET_STR_DC_THRESHOLD 28 +#define GET_STR_PRECOMPUTE_THRESHOLD 50 +#define SET_STR_DC_THRESHOLD 292 +#define SET_STR_PRECOMPUTE_THRESHOLD 773 + +#define FAC_DSC_THRESHOLD 236 +#define FAC_ODD_THRESHOLD 39 + +#define MATRIX22_STRASSEN_THRESHOLD 28 +#define HGCD2_DIV1_METHOD 5 /* 3.61% faster than 3 */ +#define HGCD_THRESHOLD 69 +#define HGCD_APPR_THRESHOLD 75 +#define HGCD_REDUCE_THRESHOLD 4455 +#define GCD_DC_THRESHOLD 241 +#define GCDEXT_DC_THRESHOLD 225 +#define JACOBI_BASE_METHOD 1 /* 15.00% faster than 4 */ diff --git a/mpn/arm/v7a/cora7/gmp-mparam.h b/mpn/arm/v7a/cora7/gmp-mparam.h index 0ee7100ab..7397ddc11 100644 --- a/mpn/arm/v7a/cora7/gmp-mparam.h +++ b/mpn/arm/v7a/cora7/gmp-mparam.h @@ -1,6 +1,6 @@ /* gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 2017 Free Software Foundation, Inc. +Copyright 2019 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -33,135 +33,137 @@ see https://www.gnu.org/licenses/. */ /* 900 MHz Cortex-A7 (raspberry pi2) */ /* FFT tuning limit = 0.5 M */ -/* Generated by tuneup.c, 2017-02-23, gcc 4.9 */ +/* Generated by tuneup.c, 2019-10-01, gcc 8.3 */ #define MOD_1_NORM_THRESHOLD 0 /* always */ #define MOD_1_UNNORM_THRESHOLD 0 /* always */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 7 +#define MOD_1N_TO_MOD_1_1_THRESHOLD 6 #define MOD_1U_TO_MOD_1_1_THRESHOLD 8 #define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */ #define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX #define PREINV_MOD_1_TO_MOD_1_THRESHOLD 18 #define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 +#define DIV_QR_1N_PI1_METHOD 1 /* 64.87% faster than 2 */ #define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 48 +#define BMOD_1_TO_MOD_1_THRESHOLD 49 #define DIV_1_VS_MUL_1_PERCENT 216 -#define MUL_TOOM22_THRESHOLD 44 -#define MUL_TOOM33_THRESHOLD 129 -#define MUL_TOOM44_THRESHOLD 218 -#define MUL_TOOM6H_THRESHOLD 327 -#define MUL_TOOM8H_THRESHOLD 620 +#define MUL_TOOM22_THRESHOLD 39 +#define MUL_TOOM33_THRESHOLD 132 +#define MUL_TOOM44_THRESHOLD 195 +#define MUL_TOOM6H_THRESHOLD 324 +#define MUL_TOOM8H_THRESHOLD 478 #define MUL_TOOM32_TO_TOOM43_THRESHOLD 129 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 145 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 132 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 147 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 191 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 183 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 129 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 145 +#define MUL_TOOM43_TO_TOOM54_THRESHOLD 193 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ #define SQR_TOOM2_THRESHOLD 52 -#define SQR_TOOM3_THRESHOLD 162 -#define SQR_TOOM4_THRESHOLD 274 +#define SQR_TOOM3_THRESHOLD 169 +#define SQR_TOOM4_THRESHOLD 268 #define SQR_TOOM6_THRESHOLD 399 #define SQR_TOOM8_THRESHOLD 547 -#define MULMID_TOOM42_THRESHOLD 56 +#define MULMID_TOOM42_THRESHOLD 50 #define MULMOD_BNM1_THRESHOLD 21 -#define SQRMOD_BNM1_THRESHOLD 25 +#define SQRMOD_BNM1_THRESHOLD 26 -#define MUL_FFT_MODF_THRESHOLD 624 /* k = 5 */ +#define MUL_FFT_MODF_THRESHOLD 636 /* k = 5 */ #define MUL_FFT_TABLE3 \ - { { 624, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \ + { { 636, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \ { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 36, 7}, { 19, 6}, { 39, 7}, { 29, 8}, \ - { 15, 7}, { 37, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 49, 8}, { 27, 7}, { 55, 8}, \ - { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \ - { 55, 9}, { 31, 8}, { 71, 9}, { 39, 8}, \ - { 83, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ - { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \ - { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ - { 159,10}, { 95, 9}, { 191,10}, { 111,11}, \ - { 63,10}, { 159,11}, { 95,10}, { 191,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ - { 271, 9}, { 543,11}, { 159,10}, { 319, 9}, \ - { 639,10}, { 335, 9}, { 671,11}, { 191,10}, \ - { 383, 9}, { 767,10}, { 399, 9}, { 799,11}, \ - { 223,12}, { 4096,13}, { 8192,14}, { 16384,15}, \ - { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 69 -#define MUL_FFT_THRESHOLD 5760 - -#define SQR_FFT_MODF_THRESHOLD 565 /* k = 5 */ + { 35, 7}, { 19, 6}, { 39, 7}, { 29, 8}, \ + { 15, 7}, { 35, 8}, { 19, 7}, { 43, 8}, \ + { 23, 7}, { 49, 8}, { 27, 7}, { 55, 9}, \ + { 15, 8}, { 31, 7}, { 63, 8}, { 43, 9}, \ + { 23, 8}, { 55, 9}, { 31, 8}, { 67, 9}, \ + { 39, 8}, { 83, 9}, { 47, 8}, { 95, 9}, \ + { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \ + { 103,11}, { 31,10}, { 63, 9}, { 135,10}, \ + { 79, 9}, { 167,10}, { 95, 9}, { 191,10}, \ + { 111,11}, { 63,10}, { 159,11}, { 95,10}, \ + { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \ + { 511,10}, { 271, 9}, { 543,11}, { 159,10}, \ + { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \ + { 351,11}, { 191,10}, { 383, 9}, { 767,10}, \ + { 399, 9}, { 799,10}, { 415,11}, { 223,12}, \ + { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} } +#define MUL_FFT_TABLE3_SIZE 72 +#define MUL_FFT_THRESHOLD 6784 + +#define SQR_FFT_MODF_THRESHOLD 530 /* k = 5 */ #define SQR_FFT_TABLE3 \ - { { 565, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \ + { { 530, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \ { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ { 36, 7}, { 19, 6}, { 39, 7}, { 29, 8}, \ - { 15, 7}, { 37, 8}, { 19, 7}, { 43, 8}, \ + { 15, 7}, { 35, 8}, { 19, 7}, { 43, 8}, \ { 23, 7}, { 49, 8}, { 27, 7}, { 55, 8}, \ { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \ { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ - { 31, 9}, { 79,10}, { 47, 9}, { 95,11}, \ + { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \ { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ { 159,10}, { 95, 9}, { 191,10}, { 111,11}, \ - { 63,10}, { 159,11}, { 95,10}, { 191,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 511, 8}, \ - { 1023, 9}, { 543,10}, { 287,11}, { 159,10}, \ - { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \ - { 351,11}, { 191,10}, { 383, 9}, { 767,10}, \ - { 399, 9}, { 799,10}, { 415,12}, { 4096,13}, \ - { 8192,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 71 -#define SQR_FFT_THRESHOLD 4800 + { 63,10}, { 143, 9}, { 287,10}, { 159,11}, \ + { 95,10}, { 191,12}, { 63,11}, { 127,10}, \ + { 255, 9}, { 511,10}, { 271, 9}, { 543,10}, \ + { 287,11}, { 159,10}, { 319, 9}, { 639,10}, \ + { 335, 9}, { 671,10}, { 351, 9}, { 703,11}, \ + { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \ + { 799,10}, { 415, 9}, { 831,11}, { 223,12}, \ + { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 76 +#define SQR_FFT_THRESHOLD 4736 #define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 27 -#define MULLO_MUL_N_THRESHOLD 11278 +#define MULLO_DC_THRESHOLD 28 +#define MULLO_MUL_N_THRESHOLD 13463 #define SQRLO_BASECASE_THRESHOLD 5 #define SQRLO_DC_THRESHOLD 31 #define SQRLO_SQR_THRESHOLD 8907 -#define DC_DIV_QR_THRESHOLD 32 +#define DC_DIV_QR_THRESHOLD 28 #define DC_DIVAPPR_Q_THRESHOLD 92 -#define DC_BDIV_QR_THRESHOLD 39 -#define DC_BDIV_Q_THRESHOLD 114 +#define DC_BDIV_QR_THRESHOLD 34 +#define DC_BDIV_Q_THRESHOLD 112 -#define INV_MULMOD_BNM1_THRESHOLD 86 -#define INV_NEWTON_THRESHOLD 134 -#define INV_APPR_THRESHOLD 101 +#define INV_MULMOD_BNM1_THRESHOLD 78 +#define INV_NEWTON_THRESHOLD 133 +#define INV_APPR_THRESHOLD 98 -#define BINV_NEWTON_THRESHOLD 216 -#define REDC_1_TO_REDC_2_THRESHOLD 4 +#define BINV_NEWTON_THRESHOLD 240 +#define REDC_1_TO_REDC_2_THRESHOLD 2 #define REDC_2_TO_REDC_N_THRESHOLD 123 #define MU_DIV_QR_THRESHOLD 1718 -#define MU_DIVAPPR_Q_THRESHOLD 1589 -#define MUPI_DIV_QR_THRESHOLD 55 +#define MU_DIVAPPR_Q_THRESHOLD 1685 +#define MUPI_DIV_QR_THRESHOLD 66 #define MU_BDIV_QR_THRESHOLD 1528 -#define MU_BDIV_Q_THRESHOLD 1685 +#define MU_BDIV_Q_THRESHOLD 1718 -#define POWM_SEC_TABLE 1,16,102,652,2016 +#define POWM_SEC_TABLE 1,16,96,563,2016 -#define GET_STR_DC_THRESHOLD 35 -#define GET_STR_PRECOMPUTE_THRESHOLD 58 -#define SET_STR_DC_THRESHOLD 238 -#define SET_STR_PRECOMPUTE_THRESHOLD 710 +#define GET_STR_DC_THRESHOLD 31 +#define GET_STR_PRECOMPUTE_THRESHOLD 55 +#define SET_STR_DC_THRESHOLD 183 +#define SET_STR_PRECOMPUTE_THRESHOLD 706 -#define FAC_DSC_THRESHOLD 360 -#define FAC_ODD_THRESHOLD 55 +#define FAC_DSC_THRESHOLD 166 +#define FAC_ODD_THRESHOLD 199 -#define MATRIX22_STRASSEN_THRESHOLD 25 -#define HGCD_THRESHOLD 56 -#define HGCD_APPR_THRESHOLD 55 -#define HGCD_REDUCE_THRESHOLD 3524 -#define GCD_DC_THRESHOLD 174 +#define MATRIX22_STRASSEN_THRESHOLD 27 +#define HGCD2_DIV1_METHOD 1 /* 4.97% faster than 3 */ +#define HGCD_THRESHOLD 59 +#define HGCD_APPR_THRESHOLD 53 +#define HGCD_REDUCE_THRESHOLD 3389 +#define GCD_DC_THRESHOLD 171 #define GCDEXT_DC_THRESHOLD 186 -#define JACOBI_BASE_METHOD 1 +#define JACOBI_BASE_METHOD 1 /* 30.75% faster than 4 */ diff --git a/mpn/arm/v7a/cora8/gmp-mparam.h b/mpn/arm/v7a/cora8/gmp-mparam.h index dc9e071b8..47fe29b07 100644 --- a/mpn/arm/v7a/cora8/gmp-mparam.h +++ b/mpn/arm/v7a/cora8/gmp-mparam.h @@ -1,7 +1,6 @@ /* gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010, 2012, 2015 Free Software -Foundation, Inc. +Copyright 2019 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -33,49 +32,49 @@ see https://www.gnu.org/licenses/. */ #define GMP_LIMB_BYTES 4 /* 1000 MHz Cortex-A8 (beaglebone black) */ -/* FFT tuning limit = 10 M */ -/* Generated by tuneup.c, 2015-10-05, gcc 4.6 */ +/* FFT tuning limit = 0.5 M */ +/* Generated by tuneup.c, 2019-10-01, gcc 6.3 */ #define MOD_1_NORM_THRESHOLD 0 /* always */ #define MOD_1_UNNORM_THRESHOLD 0 /* always */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 3 +#define MOD_1N_TO_MOD_1_1_THRESHOLD 5 #define MOD_1U_TO_MOD_1_1_THRESHOLD 5 #define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX #define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ #define PREINV_MOD_1_TO_MOD_1_THRESHOLD 12 #define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 +#define DIV_QR_1N_PI1_METHOD 1 /* 50.46% faster than 2 */ #define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 28 +#define BMOD_1_TO_MOD_1_THRESHOLD 30 #define DIV_1_VS_MUL_1_PERCENT 192 #define MUL_TOOM22_THRESHOLD 39 #define MUL_TOOM33_THRESHOLD 129 -#define MUL_TOOM44_THRESHOLD 220 -#define MUL_TOOM6H_THRESHOLD 366 +#define MUL_TOOM44_THRESHOLD 238 +#define MUL_TOOM6H_THRESHOLD 324 #define MUL_TOOM8H_THRESHOLD 620 #define MUL_TOOM32_TO_TOOM43_THRESHOLD 129 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 179 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 139 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 149 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 199 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 161 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 151 #define MUL_TOOM43_TO_TOOM54_THRESHOLD 193 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 44 +#define SQR_TOOM2_THRESHOLD 50 #define SQR_TOOM3_THRESHOLD 145 -#define SQR_TOOM4_THRESHOLD 384 -#define SQR_TOOM6_THRESHOLD 414 +#define SQR_TOOM4_THRESHOLD 375 +#define SQR_TOOM6_THRESHOLD 0 /* always */ #define SQR_TOOM8_THRESHOLD 547 -#define MULMID_TOOM42_THRESHOLD 40 +#define MULMID_TOOM42_THRESHOLD 38 -#define MULMOD_BNM1_THRESHOLD 22 -#define SQRMOD_BNM1_THRESHOLD 25 +#define MULMOD_BNM1_THRESHOLD 21 +#define SQRMOD_BNM1_THRESHOLD 26 #define MUL_FFT_MODF_THRESHOLD 476 /* k = 5 */ #define MUL_FFT_TABLE3 \ @@ -83,14 +82,14 @@ see https://www.gnu.org/licenses/. */ { 28, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ { 35, 7}, { 19, 6}, { 39, 7}, { 27, 8}, \ { 15, 7}, { 35, 8}, { 19, 7}, { 43, 8}, \ - { 23, 7}, { 49, 8}, { 27, 7}, { 55, 8}, \ + { 23, 7}, { 51, 8}, { 27, 7}, { 55, 8}, \ { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \ { 55, 9}, { 31, 8}, { 71, 9}, { 39, 8}, \ { 83, 9}, { 47, 8}, { 99, 9}, { 55,10}, \ { 31, 9}, { 63, 8}, { 127, 9}, { 87,10}, \ { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \ { 143,10}, { 79, 9}, { 167,10}, { 95, 9}, \ - { 191,10}, { 111,11}, { 63,10}, { 127, 9}, \ + { 199,10}, { 111,11}, { 63,10}, { 127, 9}, \ { 255,10}, { 143, 9}, { 287, 8}, { 575,10}, \ { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \ { 383, 8}, { 767, 9}, { 399,10}, { 207,12}, \ @@ -99,111 +98,80 @@ see https://www.gnu.org/licenses/. */ { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \ { 671,10}, { 351, 9}, { 703,11}, { 191,10}, \ { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \ - { 415, 9}, { 831,11}, { 223,12}, { 127,11}, \ - { 255,10}, { 511, 9}, { 1023,10}, { 543,11}, \ - { 287,10}, { 607, 9}, { 1215,11}, { 319,10}, \ - { 671,11}, { 351,10}, { 703,12}, { 191,11}, \ - { 383,10}, { 799,11}, { 415,10}, { 863,11}, \ - { 447,13}, { 127,12}, { 255,11}, { 511,10}, \ - { 1023,11}, { 543,10}, { 1087,11}, { 607,12}, \ - { 319,11}, { 671,10}, { 1343,11}, { 735,12}, \ - { 383,11}, { 799,10}, { 1599,11}, { 863,12}, \ - { 447,11}, { 959,13}, { 255,12}, { 511,11}, \ - { 1087,12}, { 575,11}, { 1215,12}, { 639,11}, \ - { 1343,12}, { 703,11}, { 1407,13}, { 383,12}, \ - { 767,11}, { 1599,12}, { 831,11}, { 1663,12}, \ - { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \ - { 639,12}, { 1471,13}, { 767,12}, { 1663,13}, \ - { 895,12}, { 1791,14}, { 511,13}, { 1023,12}, \ - { 2111,13}, { 1151,12}, { 2431,13}, { 8192,14}, \ - { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 150 + { 415,11}, { 223,10}, { 447,12}, { 4096,13}, \ + { 8192,14}, { 16384,15}, { 32768,16} } +#define MUL_FFT_TABLE3_SIZE 87 #define MUL_FFT_THRESHOLD 7552 -#define SQR_FFT_MODF_THRESHOLD 428 /* k = 5 */ +#define SQR_FFT_MODF_THRESHOLD 436 /* k = 5 */ #define SQR_FFT_TABLE3 \ - { { 428, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ - { 25, 7}, { 13, 6}, { 28, 7}, { 15, 6}, \ - { 32, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \ - { 39, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \ - { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \ - { 55,10}, { 15, 9}, { 31, 8}, { 67, 9}, \ - { 39, 8}, { 83, 9}, { 47, 8}, { 95, 9}, \ - { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \ - { 103,11}, { 31,10}, { 63, 9}, { 135,10}, \ - { 79, 9}, { 159, 8}, { 319, 9}, { 167,10}, \ - { 95, 9}, { 191,10}, { 111,11}, { 63,10}, \ - { 127, 9}, { 255, 8}, { 511, 9}, { 271,10}, \ - { 143, 9}, { 287, 8}, { 575, 9}, { 303,10}, \ - { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \ - { 383, 8}, { 767, 9}, { 399,12}, { 63,11}, \ - { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \ - { 543,10}, { 287, 9}, { 575,10}, { 303,11}, \ - { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \ - { 671,10}, { 351, 9}, { 703,10}, { 367,11}, \ - { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \ - { 799,10}, { 415, 9}, { 831,11}, { 223,10}, \ - { 447,12}, { 127,11}, { 255,10}, { 511, 9}, \ - { 1023,10}, { 543,11}, { 287,10}, { 607,11}, \ - { 319,10}, { 671,11}, { 351,10}, { 735,12}, \ - { 191,11}, { 383,10}, { 799,11}, { 415,10}, \ - { 863,11}, { 447,10}, { 895,13}, { 127,12}, \ - { 255,11}, { 511,10}, { 1023,11}, { 543,10}, \ - { 1087,11}, { 607,12}, { 319,11}, { 671,10}, \ - { 1343,11}, { 735,12}, { 383,11}, { 799,10}, \ - { 1599,11}, { 863,12}, { 447,11}, { 959,13}, \ - { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \ - { 1215,12}, { 639,11}, { 1343,12}, { 703,11}, \ - { 1407,13}, { 383,12}, { 767,11}, { 1599,12}, \ - { 831,11}, { 1663,12}, { 959,14}, { 255,13}, \ - { 511,12}, { 1215,13}, { 639,12}, { 1471,13}, \ - { 767,12}, { 1663,13}, { 895,12}, { 1919,14}, \ - { 511,13}, { 1023,12}, { 2111,13}, { 1151,12}, \ - { 2431,13}, { 8192,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 156 -#define SQR_FFT_THRESHOLD 3712 - -#define MULLO_BASECASE_THRESHOLD 19 + { { 436, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ + { 28, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \ + { 35, 7}, { 19, 6}, { 39, 7}, { 27, 8}, \ + { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ + { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \ + { 43, 9}, { 23, 8}, { 55,10}, { 15, 9}, \ + { 31, 8}, { 67, 9}, { 39, 8}, { 83, 9}, \ + { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \ + { 79,10}, { 47, 9}, { 103,11}, { 31,10}, \ + { 63, 9}, { 135,10}, { 79, 9}, { 159, 8}, \ + { 319, 9}, { 167,10}, { 95, 9}, { 191,10}, \ + { 111,11}, { 63,10}, { 127, 9}, { 255, 8}, \ + { 511,10}, { 143, 9}, { 287, 8}, { 575,10}, \ + { 159, 9}, { 319, 8}, { 639,11}, { 95,10}, \ + { 191, 9}, { 383, 8}, { 767, 9}, { 399,12}, \ + { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ + { 271, 9}, { 543,10}, { 287, 9}, { 575,10}, \ + { 303,11}, { 159,10}, { 319, 9}, { 639,10}, \ + { 335, 9}, { 671,10}, { 351, 9}, { 703,10}, \ + { 367,11}, { 191,10}, { 383, 9}, { 767,10}, \ + { 399, 9}, { 799,10}, { 415, 9}, { 831,11}, \ + { 223,10}, { 447,12}, { 4096,13}, { 8192,14}, \ + { 16384,15}, { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 90 +#define SQR_FFT_THRESHOLD 4544 + +#define MULLO_BASECASE_THRESHOLD 12 #define MULLO_DC_THRESHOLD 0 /* never mpn_mullo_basecase */ -#define MULLO_MUL_N_THRESHOLD 22 -#define SQRLO_BASECASE_THRESHOLD 13 -#define SQRLO_DC_THRESHOLD 14 -#define SQRLO_SQR_THRESHOLD 7246 +#define MULLO_MUL_N_THRESHOLD 12 +#define SQRLO_BASECASE_THRESHOLD 9 +#define SQRLO_DC_THRESHOLD 20 +#define SQRLO_SQR_THRESHOLD 8907 #define DC_DIV_QR_THRESHOLD 23 -#define DC_DIVAPPR_Q_THRESHOLD 64 -#define DC_BDIV_QR_THRESHOLD 25 -#define DC_BDIV_Q_THRESHOLD 56 +#define DC_DIVAPPR_Q_THRESHOLD 86 +#define DC_BDIV_QR_THRESHOLD 21 +#define DC_BDIV_Q_THRESHOLD 68 -#define INV_MULMOD_BNM1_THRESHOLD 77 -#define INV_NEWTON_THRESHOLD 31 -#define INV_APPR_THRESHOLD 37 +#define INV_MULMOD_BNM1_THRESHOLD 78 +#define INV_NEWTON_THRESHOLD 37 +#define INV_APPR_THRESHOLD 42 -#define BINV_NEWTON_THRESHOLD 166 -#define REDC_1_TO_REDC_2_THRESHOLD 0 /* always */ +#define BINV_NEWTON_THRESHOLD 167 +#define REDC_1_TO_REDC_2_THRESHOLD 3 #define REDC_2_TO_REDC_N_THRESHOLD 198 -#define MU_DIV_QR_THRESHOLD 1787 -#define MU_DIVAPPR_Q_THRESHOLD 1558 -#define MUPI_DIV_QR_THRESHOLD 45 -#define MU_BDIV_QR_THRESHOLD 1718 -#define MU_BDIV_Q_THRESHOLD 1685 +#define MU_DIV_QR_THRESHOLD 1718 +#define MU_DIVAPPR_Q_THRESHOLD 1685 +#define MUPI_DIV_QR_THRESHOLD 47 +#define MU_BDIV_QR_THRESHOLD 2089 +#define MU_BDIV_Q_THRESHOLD 1787 -#define POWM_SEC_TABLE 1,19,101,371,1486 +#define POWM_SEC_TABLE 3,16,96,428,1420 -#define GET_STR_DC_THRESHOLD 21 -#define GET_STR_PRECOMPUTE_THRESHOLD 38 -#define SET_STR_DC_THRESHOLD 146 -#define SET_STR_PRECOMPUTE_THRESHOLD 531 +#define GET_STR_DC_THRESHOLD 18 +#define GET_STR_PRECOMPUTE_THRESHOLD 39 +#define SET_STR_DC_THRESHOLD 136 +#define SET_STR_PRECOMPUTE_THRESHOLD 511 -#define FAC_DSC_THRESHOLD 155 -#define FAC_ODD_THRESHOLD 24 +#define FAC_DSC_THRESHOLD 141 +#define FAC_ODD_THRESHOLD 28 -#define MATRIX22_STRASSEN_THRESHOLD 24 -#define HGCD_THRESHOLD 42 +#define MATRIX22_STRASSEN_THRESHOLD 27 +#define HGCD2_DIV1_METHOD 5 /* 4.40% faster than 4 */ +#define HGCD_THRESHOLD 47 #define HGCD_APPR_THRESHOLD 50 -#define HGCD_REDUCE_THRESHOLD 3664 -#define GCD_DC_THRESHOLD 156 -#define GCDEXT_DC_THRESHOLD 116 -#define JACOBI_BASE_METHOD 4 +#define HGCD_REDUCE_THRESHOLD 3524 +#define GCD_DC_THRESHOLD 142 +#define GCDEXT_DC_THRESHOLD 118 +#define JACOBI_BASE_METHOD 4 /* 6.16% faster than 1 */ |