diff options
author | Torbjorn Granlund <tege@gmplib.org> | 2014-03-15 21:26:50 +0100 |
---|---|---|
committer | Torbjorn Granlund <tege@gmplib.org> | 2014-03-15 21:26:50 +0100 |
commit | 2d782b9d8794e468ab398dde30b98079fac622e1 (patch) | |
tree | 9db4fd0948cebcf560c4f2cc2e5b946660cf1c68 | |
parent | b45a5d123c5ac038ab38f6ac64ac66250fabc6fe (diff) | |
download | gmp-2d782b9d8794e468ab398dde30b98079fac622e1.tar.gz |
Retune.
-rw-r--r-- | mpn/s390_64/z10/gmp-mparam.h | 159 | ||||
-rw-r--r-- | mpn/sparc64/ultrasparc34/gmp-mparam.h | bin | 10454 -> 10593 bytes | |||
-rw-r--r-- | mpn/x86/atom/gmp-mparam.h | 216 | ||||
-rw-r--r-- | mpn/x86/bd1/gmp-mparam.h | 208 | ||||
-rw-r--r-- | mpn/x86/coreihwl/gmp-mparam.h | 208 | ||||
-rw-r--r-- | mpn/x86_64/atom/gmp-mparam.h | 199 | ||||
-rw-r--r-- | mpn/x86_64/coreihwl/gmp-mparam.h | 200 | ||||
-rw-r--r-- | mpn/x86_64/coreisbr/gmp-mparam.h | 177 | ||||
-rw-r--r-- | mpn/x86_64/k10/gmp-mparam.h | 116 |
9 files changed, 981 insertions, 502 deletions
diff --git a/mpn/s390_64/z10/gmp-mparam.h b/mpn/s390_64/z10/gmp-mparam.h index 328a78b19..c034f9b3b 100644 --- a/mpn/s390_64/z10/gmp-mparam.h +++ b/mpn/s390_64/z10/gmp-mparam.h @@ -32,7 +32,7 @@ see https://www.gnu.org/licenses/. */ #define GMP_LIMB_BYTES 8 /* 4400 MHz IBM z10 */ -/* FFT tuning limit = 5000000 */ +/* FFT tuning limit = 15000000 */ /* Generated by tuneup.c, 2014-03-12, gcc 4.7 */ #define DIVREM_1_NORM_THRESHOLD 0 /* always */ @@ -77,57 +77,61 @@ see https://www.gnu.org/licenses/. */ #define MULMOD_BNM1_THRESHOLD 9 #define SQRMOD_BNM1_THRESHOLD 9 -#define MUL_FFT_MODF_THRESHOLD 252 /* k = 5 */ +#define MUL_FFT_MODF_THRESHOLD 220 /* k = 5 */ #define MUL_FFT_TABLE3 \ - { { 252, 5}, { 9, 6}, { 5, 5}, { 11, 6}, \ - { 6, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \ + { { 220, 5}, { 7, 4}, { 15, 5}, { 9, 6}, \ + { 5, 5}, { 11, 6}, { 6, 5}, { 13, 6}, \ { 11, 7}, { 6, 6}, { 13, 7}, { 7, 6}, \ - { 15, 7}, { 9, 8}, { 5, 7}, { 13, 8}, \ - { 7, 7}, { 15, 8}, { 9, 7}, { 19, 8}, \ - { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \ - { 15, 7}, { 31, 8}, { 19, 9}, { 11, 8}, \ - { 23,10}, { 7, 9}, { 15, 8}, { 33, 9}, \ + { 15, 7}, { 13, 8}, { 7, 7}, { 16, 8}, \ + { 9, 7}, { 19, 8}, { 11, 7}, { 23, 8}, \ + { 13, 9}, { 7, 8}, { 15, 7}, { 31, 8}, \ + { 19, 9}, { 11, 8}, { 23,10}, { 7, 9}, \ + { 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \ { 27,10}, { 15, 9}, { 39,10}, { 23,11}, \ { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \ - { 79, 8}, { 159,10}, { 47,11}, { 31,10}, \ - { 63, 9}, { 127, 8}, { 255,10}, { 71, 9}, \ - { 143, 8}, { 287, 7}, { 575,10}, { 79, 9}, \ - { 159,11}, { 47,12}, { 31,11}, { 63,10}, \ - { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \ - { 287, 8}, { 575,11}, { 79,10}, { 159, 9}, \ - { 319,10}, { 175, 9}, { 351, 8}, { 703, 7}, \ + { 83,10}, { 47,11}, { 31,10}, { 63, 9}, \ + { 127, 8}, { 255, 7}, { 511,10}, { 71, 9}, \ + { 143, 8}, { 287, 7}, { 575,10}, { 79,11}, \ + { 47,12}, { 31,11}, { 63,10}, { 127, 9}, \ + { 255, 8}, { 511,10}, { 143, 9}, { 287, 8}, \ + { 575,11}, { 79,10}, { 159, 9}, { 319, 8}, \ + { 639,10}, { 175, 9}, { 351, 8}, { 703, 7}, \ { 1407, 6}, { 2815,10}, { 191, 9}, { 383, 8}, \ - { 767, 9}, { 415,10}, { 223, 9}, { 447, 8}, \ - { 895,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,11}, { 143,10}, { 287, 9}, { 575, 8}, \ - { 1151,11}, { 159,10}, { 319, 9}, { 639,11}, \ - { 175,10}, { 351, 9}, { 703, 8}, { 1407, 7}, \ - { 2815,11}, { 191,10}, { 383, 9}, { 767,10}, \ - { 415,11}, { 223,10}, { 447, 9}, { 895, 8}, \ - { 1791,11}, { 239,13}, { 63,12}, { 127,11}, \ - { 255,10}, { 511,11}, { 287,10}, { 575, 9}, \ - { 1151,12}, { 159,11}, { 319,10}, { 703, 9}, \ + { 767, 9}, { 415,11}, { 111,10}, { 223, 9}, \ + { 447, 8}, { 895,12}, { 63,11}, { 127,10}, \ + { 255, 9}, { 511,11}, { 143,10}, { 287, 9}, \ + { 575, 8}, { 1151,10}, { 319, 9}, { 639,11}, \ + { 175, 9}, { 703, 8}, { 1407, 7}, { 2815,11}, \ + { 191,10}, { 415, 9}, { 831,11}, { 223,13}, \ + { 63,12}, { 127,11}, { 255,10}, { 511,11}, \ + { 287,10}, { 575, 9}, { 1151,12}, { 159,11}, \ + { 319,10}, { 639,11}, { 351,10}, { 703, 9}, \ { 1407, 8}, { 2815,12}, { 191,11}, { 383,10}, \ - { 767,11}, { 415,10}, { 831,12}, { 223,11}, \ - { 447,10}, { 895, 9}, { 1791, 8}, { 3583,11}, \ - { 479,10}, { 959, 9}, { 1919,13}, { 127,12}, \ - { 255,11}, { 511,12}, { 287,11}, { 575,10}, \ - { 1151,12}, { 319,11}, { 639,12}, { 351,11}, \ - { 703,10}, { 1407, 9}, { 2815,13}, { 191,12}, \ - { 383,11}, { 767,12}, { 415,11}, { 831,10}, \ - { 1663,12}, { 447,11}, { 895,10}, { 1791, 9}, \ - { 3583,12}, { 479,11}, { 959,10}, { 1919,14}, \ - { 127,13}, { 255,12}, { 511,11}, { 1023,12}, \ - { 575,11}, { 1151,13}, { 319,12}, { 639,11}, \ - { 1279,12}, { 703,11}, { 1407,10}, { 2815,13}, \ - { 383,12}, { 767,11}, { 1535,12}, { 831,11}, \ - { 1663,13}, { 447,12}, { 895,11}, { 1791,10}, \ - { 3583,12}, { 959,11}, { 1919,14}, { 255,13}, \ - { 511,12}, { 1023,13}, { 575,12}, { 1151,13}, \ - { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \ - { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \ - {2097152,22}, {4194304,23}, {8388608,24} } -#define MUL_FFT_TABLE3_SIZE 191 + { 767,11}, { 415,10}, { 831,12}, { 223,10}, \ + { 895, 9}, { 1791,11}, { 479,13}, { 127,12}, \ + { 255,11}, { 511,10}, { 1023,12}, { 287,11}, \ + { 575,10}, { 1151,12}, { 319,11}, { 639,12}, \ + { 351,11}, { 703,10}, { 1407, 9}, { 2815,13}, \ + { 191,12}, { 383,11}, { 767,12}, { 415,11}, \ + { 831,10}, { 1663,12}, { 447,11}, { 895,10}, \ + { 1791, 9}, { 3583,12}, { 479,14}, { 127,13}, \ + { 255,12}, { 511,11}, { 1023,12}, { 575,11}, \ + { 1151,13}, { 319,12}, { 703,11}, { 1407,10}, \ + { 2815,13}, { 383,12}, { 767,11}, { 1535,12}, \ + { 831,11}, { 1663,13}, { 447,12}, { 895,11}, \ + { 1791,10}, { 3583,14}, { 255,13}, { 511,12}, \ + { 1023,13}, { 575,12}, { 1151,13}, { 639,12}, \ + { 1279,13}, { 703,12}, { 1407,11}, { 2815,14}, \ + { 383,13}, { 767,12}, { 1535,13}, { 831,12}, \ + { 1663,13}, { 895,12}, { 1791,11}, { 3583,15}, \ + { 255,14}, { 511,13}, { 1151,14}, { 639,13}, \ + { 1279,12}, { 2559,13}, { 1407,12}, { 2815,13}, \ + { 1471,14}, { 767,13}, { 1663,14}, { 895,13}, \ + { 1791,12}, { 3583,13}, { 8192,14}, { 16384,15}, \ + { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \ + { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \ + {8388608,24} } +#define MUL_FFT_TABLE3_SIZE 205 #define MUL_FFT_THRESHOLD 1728 #define SQR_FFT_MODF_THRESHOLD 212 /* k = 5 */ @@ -147,36 +151,43 @@ see https://www.gnu.org/licenses/. */ { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \ { 511,10}, { 143, 9}, { 287, 8}, { 575,11}, \ { 79,10}, { 159, 9}, { 319, 8}, { 639,10}, \ - { 175, 9}, { 351, 8}, { 703, 7}, { 1407,11}, \ - { 95,10}, { 191, 9}, { 383, 8}, { 767,10}, \ - { 207,11}, { 111,12}, { 63,11}, { 127,10}, \ - { 255, 9}, { 511,11}, { 143,10}, { 287, 9}, \ - { 575, 8}, { 1151,11}, { 159,10}, { 319, 9}, \ - { 639,11}, { 175,10}, { 351, 9}, { 703, 8}, \ - { 1407,11}, { 191,10}, { 383, 9}, { 767,11}, \ - { 223,13}, { 63,12}, { 127,11}, { 255,10}, \ - { 511,11}, { 287,10}, { 575, 9}, { 1151,12}, \ - { 159,11}, { 319,10}, { 639,11}, { 351,10}, \ - { 703, 9}, { 1407, 8}, { 2815,12}, { 191,11}, \ - { 383,10}, { 767,11}, { 415,12}, { 223,11}, \ - { 447,10}, { 895, 9}, { 1791,11}, { 479,13}, \ - { 127,12}, { 255,11}, { 511,12}, { 287,11}, \ - { 575,10}, { 1151,12}, { 319,11}, { 639,12}, \ - { 351,11}, { 703,10}, { 1407, 9}, { 2815,13}, \ + { 175, 9}, { 351, 8}, { 703,10}, { 191, 9}, \ + { 383, 8}, { 767,10}, { 207,11}, { 111,12}, \ + { 63,11}, { 127,10}, { 255, 9}, { 511,11}, \ + { 143,10}, { 287, 9}, { 575, 8}, { 1151,11}, \ + { 159,10}, { 319, 9}, { 639,11}, { 175,10}, \ + { 351, 9}, { 703,12}, { 95,11}, { 191,10}, \ + { 383, 9}, { 767,11}, { 207,10}, { 415,13}, \ + { 63,12}, { 127,11}, { 255,10}, { 511,11}, \ + { 287,10}, { 575, 9}, { 1151,12}, { 159,11}, \ + { 319,10}, { 639,11}, { 351,10}, { 703, 9}, \ + { 1407,12}, { 191,11}, { 383,10}, { 767,11}, \ + { 415,12}, { 223,11}, { 447,10}, { 895, 9}, \ + { 1791,13}, { 127,12}, { 255,11}, { 511,12}, \ + { 287,11}, { 575,10}, { 1151,12}, { 319,11}, \ + { 639,12}, { 351,11}, { 703,10}, { 1407,13}, \ { 191,12}, { 383,11}, { 767,12}, { 415,11}, \ { 831,10}, { 1663,12}, { 447,11}, { 895,10}, \ - { 1791,12}, { 479,11}, { 959,14}, { 127,13}, \ - { 255,12}, { 511,11}, { 1023,12}, { 575,11}, \ - { 1151,13}, { 319,12}, { 639,11}, { 1279,12}, \ - { 703,11}, { 1407,10}, { 2815,13}, { 383,12}, \ - { 767,11}, { 1535,12}, { 831,11}, { 1663,13}, \ - { 447,12}, { 895,11}, { 1791,10}, { 3583,12}, \ - { 959,11}, { 1919,14}, { 255,13}, { 511,12}, \ - { 1023,13}, { 575,12}, { 1151,13}, { 8192,14}, \ - { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \ - { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \ - {4194304,23}, {8388608,24} } -#define SQR_FFT_TABLE3_SIZE 174 + { 1791, 9}, { 3583,12}, { 479,11}, { 959,10}, \ + { 1919,14}, { 127,13}, { 255,12}, { 511,11}, \ + { 1023,12}, { 575,11}, { 1151,13}, { 319,12}, \ + { 639,11}, { 1279,12}, { 703,11}, { 1407,10}, \ + { 2815,13}, { 383,12}, { 767,11}, { 1535,12}, \ + { 831,11}, { 1663,13}, { 447,12}, { 895,11}, \ + { 1791,12}, { 959,11}, { 1919,14}, { 255,13}, \ + { 511,12}, { 1023,13}, { 575,12}, { 1151,13}, \ + { 639,12}, { 1279,13}, { 703,12}, { 1407,11}, \ + { 2815,14}, { 383,13}, { 767,12}, { 1535,13}, \ + { 831,12}, { 1663,13}, { 895,12}, { 1791,11}, \ + { 3583,13}, { 959,12}, { 1919,15}, { 255,14}, \ + { 511,13}, { 1023,12}, { 2047,13}, { 1151,14}, \ + { 639,13}, { 1279,12}, { 2559,13}, { 1407,12}, \ + { 2815,14}, { 767,13}, { 1663,14}, { 895,13}, \ + { 1791,12}, { 3583,13}, { 8192,14}, { 16384,15}, \ + { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \ + { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \ + {8388608,24} } +#define SQR_FFT_TABLE3_SIZE 201 #define SQR_FFT_THRESHOLD 1728 #define MULLO_BASECASE_THRESHOLD 0 /* always */ diff --git a/mpn/sparc64/ultrasparc34/gmp-mparam.h b/mpn/sparc64/ultrasparc34/gmp-mparam.h Binary files differindex 24ec3b2ce..bfb2a1c4b 100644 --- a/mpn/sparc64/ultrasparc34/gmp-mparam.h +++ b/mpn/sparc64/ultrasparc34/gmp-mparam.h diff --git a/mpn/x86/atom/gmp-mparam.h b/mpn/x86/atom/gmp-mparam.h index 902cf733c..45df12806 100644 --- a/mpn/x86/atom/gmp-mparam.h +++ b/mpn/x86/atom/gmp-mparam.h @@ -1,6 +1,6 @@ /* Intel Atom/32 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 2000-2011 Free Software Foundation, Inc. +Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -31,133 +31,171 @@ see https://www.gnu.org/licenses/. */ #define GMP_LIMB_BITS 32 #define GMP_LIMB_BYTES 4 -/* Generated by tuneup.c */ +/* 1667 MHz Pineview (Atom D510) */ +/* FFT tuning limit = 25000000 */ +/* Generated by tuneup.c, 2014-03-14, gcc 4.5 */ -#define MOD_1_NORM_THRESHOLD 4 -#define MOD_1_UNNORM_THRESHOLD 8 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 7 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 4 +#define MOD_1_NORM_THRESHOLD 3 +#define MOD_1_UNNORM_THRESHOLD 5 +#define MOD_1N_TO_MOD_1_1_THRESHOLD 11 +#define MOD_1U_TO_MOD_1_1_THRESHOLD 5 #define MOD_1_1_TO_MOD_1_2_THRESHOLD 10 #define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11 +#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 13 #define USE_PREINV_DIVREM_1 1 /* native */ +#define DIV_QR_1N_PI1_METHOD 1 +#define DIV_QR_1_NORM_THRESHOLD 4 +#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 33 +#define BMOD_1_TO_MOD_1_THRESHOLD 31 -#define MUL_TOOM22_THRESHOLD 22 -#define MUL_TOOM33_THRESHOLD 81 +#define MUL_TOOM22_THRESHOLD 20 +#define MUL_TOOM33_THRESHOLD 74 #define MUL_TOOM44_THRESHOLD 178 #define MUL_TOOM6H_THRESHOLD 270 -#define MUL_TOOM8H_THRESHOLD 406 +#define MUL_TOOM8H_THRESHOLD 399 -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 85 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 126 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 121 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 129 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 113 +#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 115 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 127 +#define MUL_TOOM43_TO_TOOM54_THRESHOLD 106 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 32 -#define SQR_TOOM3_THRESHOLD 109 -#define SQR_TOOM4_THRESHOLD 262 -#define SQR_TOOM6_THRESHOLD 396 -#define SQR_TOOM8_THRESHOLD 547 +#define SQR_TOOM2_THRESHOLD 30 +#define SQR_TOOM3_THRESHOLD 105 +#define SQR_TOOM4_THRESHOLD 178 +#define SQR_TOOM6_THRESHOLD 303 +#define SQR_TOOM8_THRESHOLD 527 #define MULMID_TOOM42_THRESHOLD 54 -#define MULMOD_BNM1_THRESHOLD 16 +#define MULMOD_BNM1_THRESHOLD 13 #define SQRMOD_BNM1_THRESHOLD 18 -#define MUL_FFT_MODF_THRESHOLD 404 /* k = 5 */ +#define MUL_FFT_MODF_THRESHOLD 380 /* k = 5 */ #define MUL_FFT_TABLE3 \ - { { 376, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ - { 21, 7}, { 11, 6}, { 25, 7}, { 13, 6}, \ - { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \ - { 11, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \ - { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \ - { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \ - { 39, 8}, { 79, 9}, { 47, 8}, { 95,10}, \ - { 31, 9}, { 79,10}, { 47, 9}, { 95,11}, \ - { 31,10}, { 63, 9}, { 127, 8}, { 255, 9}, \ - { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \ - { 191,10}, { 111,11}, { 63,10}, { 127, 9}, \ - { 255, 8}, { 511, 9}, { 271,10}, { 143, 9}, \ - { 287, 8}, { 575, 9}, { 303,10}, { 159, 9}, \ - { 319,11}, { 95,10}, { 191, 9}, { 383,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ - { 271, 9}, { 543,10}, { 287, 9}, { 575,10}, \ - { 303,11}, { 159,10}, { 319, 9}, { 639,10}, \ - { 335, 9}, { 671,10}, { 351, 9}, { 703,11}, \ - { 191,10}, { 383, 9}, { 767,10}, { 415,11}, \ - { 223,10}, { 447,12}, { 4096,13}, { 8192,14}, \ + { { 380, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \ + { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \ + { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \ + { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \ + { 15, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \ + { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \ + { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \ + { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \ + { 47, 8}, { 95,10}, { 31, 9}, { 79,10}, \ + { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \ + { 127, 8}, { 255, 9}, { 135,10}, { 79, 9}, \ + { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \ + { 127, 9}, { 255, 8}, { 511, 9}, { 271,10}, \ + { 143, 9}, { 287, 8}, { 575,10}, { 159,11}, \ + { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \ + { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \ + { 543,10}, { 287, 9}, { 575,10}, { 303,11}, \ + { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \ + { 671,10}, { 351, 9}, { 703,11}, { 191,10}, \ + { 383, 9}, { 767,10}, { 415, 9}, { 831,11}, \ + { 223,10}, { 447,12}, { 127,11}, { 255,10}, \ + { 543,11}, { 287,10}, { 607, 9}, { 1215,11}, \ + { 319,10}, { 671,11}, { 351,10}, { 703,12}, \ + { 191,11}, { 383,10}, { 767,11}, { 415,10}, \ + { 831,11}, { 447,13}, { 127,12}, { 255,11}, \ + { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \ + { 319,11}, { 735,12}, { 383,11}, { 831,12}, \ + { 447,11}, { 959,13}, { 255,12}, { 511,11}, \ + { 1087,12}, { 575,11}, { 1151,12}, { 703,11}, \ + { 1471,13}, { 383,12}, { 831,11}, { 1663,12}, \ + { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \ + { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \ + { 1663,13}, { 895,12}, { 1919,14}, { 511,13}, \ + { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \ + { 1407,12}, { 2943,14}, { 767,13}, { 1663,12}, \ + { 3455,13}, { 1919,15}, { 511,14}, { 1023,13}, \ + { 2431,14}, { 1279,13}, { 2943,12}, { 5887,14}, \ { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 86 +#define MUL_FFT_TABLE3_SIZE 150 #define MUL_FFT_THRESHOLD 4544 #define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */ #define SQR_FFT_TABLE3 \ - { { 280, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \ - { 17, 7}, { 9, 6}, { 21, 7}, { 11, 6}, \ - { 24, 7}, { 13, 6}, { 27, 7}, { 21, 8}, \ - { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \ - { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \ - { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \ - { 47,10}, { 15, 9}, { 31, 8}, { 63, 9}, \ - { 39, 8}, { 79, 9}, { 47,10}, { 31, 9}, \ - { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \ - { 63, 9}, { 127, 8}, { 255, 7}, { 511,10}, \ - { 79, 9}, { 159, 8}, { 319, 9}, { 175,10}, \ - { 95, 9}, { 191, 8}, { 383, 9}, { 207,11}, \ - { 63,10}, { 127, 9}, { 255, 8}, { 511, 9}, \ - { 271,10}, { 143, 9}, { 287,10}, { 159, 9}, \ - { 319,10}, { 175,11}, { 95,10}, { 191, 9}, \ - { 383,10}, { 207,12}, { 63,11}, { 127,10}, \ - { 255, 9}, { 511,10}, { 271, 9}, { 543,10}, \ - { 287,11}, { 159,10}, { 319, 9}, { 639,10}, \ - { 351, 9}, { 703,11}, { 191,10}, { 415,11}, \ - { 223,10}, { 479,12}, { 4096,13}, { 8192,14}, \ - { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 82 -#define SQR_FFT_THRESHOLD 3712 + { { 340, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ + { 12, 5}, { 25, 6}, { 21, 7}, { 11, 6}, \ + { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \ + { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \ + { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ + { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \ + { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \ + { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \ + { 47,10}, { 31, 9}, { 79,10}, { 47, 9}, \ + { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \ + { 255,10}, { 79, 9}, { 159, 8}, { 319,10}, \ + { 95, 9}, { 191,11}, { 63,10}, { 127, 9}, \ + { 255, 8}, { 511, 9}, { 271,10}, { 143, 9}, \ + { 287, 8}, { 575, 9}, { 303, 8}, { 607,10}, \ + { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \ + { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \ + { 511,10}, { 271, 9}, { 543,10}, { 287, 9}, \ + { 575,10}, { 303, 9}, { 607,10}, { 319, 9}, \ + { 639,10}, { 335, 9}, { 671,10}, { 351, 9}, \ + { 703,11}, { 191,10}, { 383, 9}, { 767,10}, \ + { 415,11}, { 223,10}, { 447,12}, { 127,11}, \ + { 255,10}, { 543,11}, { 287,10}, { 607,11}, \ + { 319,10}, { 671,11}, { 351,10}, { 703,12}, \ + { 191,11}, { 383,10}, { 767,11}, { 415,10}, \ + { 831,11}, { 479,13}, { 127,12}, { 255,11}, \ + { 543,10}, { 1087,11}, { 607,12}, { 319,11}, \ + { 671,10}, { 1343,11}, { 735,12}, { 383,11}, \ + { 831,12}, { 447,11}, { 959,13}, { 255,12}, \ + { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \ + { 639,11}, { 1343,12}, { 703,11}, { 1407,13}, \ + { 383,12}, { 831,11}, { 1663,12}, { 959,14}, \ + { 255,13}, { 511,12}, { 1215,13}, { 639,12}, \ + { 1471,13}, { 767,12}, { 1663,13}, { 895,12}, \ + { 1791,14}, { 511,13}, { 1023,12}, { 2111,13}, \ + { 1151,12}, { 2431,13}, { 1407,14}, { 767,13}, \ + { 1663,12}, { 3455,13}, { 1791,15}, { 511,14}, \ + { 1023,13}, { 2431,14}, { 1279,13}, { 2943,12}, \ + { 5887,14}, { 16384,15}, { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 151 +#define SQR_FFT_THRESHOLD 2880 #define MULLO_BASECASE_THRESHOLD 6 -#define MULLO_DC_THRESHOLD 53 +#define MULLO_DC_THRESHOLD 48 #define MULLO_MUL_N_THRESHOLD 8907 -#define DC_DIV_QR_THRESHOLD 63 -#define DC_DIVAPPR_Q_THRESHOLD 266 -#define DC_BDIV_QR_THRESHOLD 63 -#define DC_BDIV_Q_THRESHOLD 175 +#define DC_DIV_QR_THRESHOLD 59 +#define DC_DIVAPPR_Q_THRESHOLD 250 +#define DC_BDIV_QR_THRESHOLD 59 +#define DC_BDIV_Q_THRESHOLD 169 -#define INV_MULMOD_BNM1_THRESHOLD 42 -#define INV_NEWTON_THRESHOLD 250 -#define INV_APPR_THRESHOLD 250 +#define INV_MULMOD_BNM1_THRESHOLD 38 +#define INV_NEWTON_THRESHOLD 246 +#define INV_APPR_THRESHOLD 246 -#define BINV_NEWTON_THRESHOLD 274 -#define REDC_1_TO_REDC_N_THRESHOLD 68 +#define BINV_NEWTON_THRESHOLD 276 +#define REDC_1_TO_REDC_N_THRESHOLD 67 #define MU_DIV_QR_THRESHOLD 1334 #define MU_DIVAPPR_Q_THRESHOLD 1442 #define MUPI_DIV_QR_THRESHOLD 114 -#define MU_BDIV_QR_THRESHOLD 1078 +#define MU_BDIV_QR_THRESHOLD 1142 #define MU_BDIV_Q_THRESHOLD 1334 -#define POWM_SEC_TABLE 4,35,258,1084 +#define POWM_SEC_TABLE 1,22,98,416,1378 -#define MATRIX22_STRASSEN_THRESHOLD 15 -#define HGCD_THRESHOLD 135 -#define HGCD_APPR_THRESHOLD 164 -#define HGCD_REDUCE_THRESHOLD 2384 -#define GCD_DC_THRESHOLD 487 +#define MATRIX22_STRASSEN_THRESHOLD 13 +#define HGCD_THRESHOLD 133 +#define HGCD_APPR_THRESHOLD 169 +#define HGCD_REDUCE_THRESHOLD 2479 +#define GCD_DC_THRESHOLD 460 #define GCDEXT_DC_THRESHOLD 342 #define JACOBI_BASE_METHOD 3 #define GET_STR_DC_THRESHOLD 12 -#define GET_STR_PRECOMPUTE_THRESHOLD 27 -#define SET_STR_DC_THRESHOLD 324 -#define SET_STR_PRECOMPUTE_THRESHOLD 1290 +#define GET_STR_PRECOMPUTE_THRESHOLD 23 +#define SET_STR_DC_THRESHOLD 321 +#define SET_STR_PRECOMPUTE_THRESHOLD 1099 -#define FAC_DSC_THRESHOLD 250 +#define FAC_DSC_THRESHOLD 198 #define FAC_ODD_THRESHOLD 34 diff --git a/mpn/x86/bd1/gmp-mparam.h b/mpn/x86/bd1/gmp-mparam.h new file mode 100644 index 000000000..7d80a1cb4 --- /dev/null +++ b/mpn/x86/bd1/gmp-mparam.h @@ -0,0 +1,208 @@ +/* AMD bd2 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright 1991, 1993, 1994, 2000-2005, 2008-2010, 2014 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#define GMP_LIMB_BITS 32 +#define GMP_LIMB_BYTES 4 + +/* 3600 MHz Bulldozer Zambezi */ +/* FFT tuning limit = 25000000 */ +/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */ + +#define MOD_1_NORM_THRESHOLD 0 /* always */ +#define MOD_1_UNNORM_THRESHOLD 3 +#define MOD_1N_TO_MOD_1_1_THRESHOLD 7 +#define MOD_1U_TO_MOD_1_1_THRESHOLD 4 +#define MOD_1_1_TO_MOD_1_2_THRESHOLD 16 +#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ +#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11 +#define USE_PREINV_DIVREM_1 1 /* native */ +#define DIV_QR_1N_PI1_METHOD 1 +#define DIV_QR_1_NORM_THRESHOLD 3 +#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define BMOD_1_TO_MOD_1_THRESHOLD 27 + +#define MUL_TOOM22_THRESHOLD 32 +#define MUL_TOOM33_THRESHOLD 65 +#define MUL_TOOM44_THRESHOLD 154 +#define MUL_TOOM6H_THRESHOLD 230 +#define MUL_TOOM8H_THRESHOLD 354 + +#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 110 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 93 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 102 +#define MUL_TOOM43_TO_TOOM54_THRESHOLD 130 + +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_TOOM2_THRESHOLD 48 +#define SQR_TOOM3_THRESHOLD 87 +#define SQR_TOOM4_THRESHOLD 204 +#define SQR_TOOM6_THRESHOLD 315 +#define SQR_TOOM8_THRESHOLD 430 + +#define MULMID_TOOM42_THRESHOLD 48 + +#define MULMOD_BNM1_THRESHOLD 21 +#define SQRMOD_BNM1_THRESHOLD 23 + +#define MUL_FFT_MODF_THRESHOLD 840 /* k = 5 */ +#define MUL_FFT_TABLE3 \ + { { 840, 5}, { 28, 6}, { 15, 5}, { 33, 6}, \ + { 28, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \ + { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \ + { 47, 7}, { 29, 8}, { 15, 7}, { 31, 6}, \ + { 63, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ + { 23, 7}, { 51, 8}, { 27, 7}, { 55, 8}, \ + { 31, 7}, { 63, 8}, { 39, 7}, { 79, 9}, \ + { 23, 8}, { 55, 9}, { 31, 8}, { 67, 9}, \ + { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \ + { 55,10}, { 31, 9}, { 63, 8}, { 127, 9}, \ + { 79,10}, { 47, 9}, { 103,11}, { 31,10}, \ + { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \ + { 95, 9}, { 191,10}, { 111,11}, { 63,10}, \ + { 127, 9}, { 255,10}, { 159,11}, { 95,10}, \ + { 191,12}, { 63,11}, { 127,10}, { 271,11}, \ + { 159,10}, { 319, 9}, { 639,11}, { 191,10}, \ + { 383, 9}, { 767,11}, { 223,12}, { 127,11}, \ + { 255,10}, { 511,11}, { 287,10}, { 607,11}, \ + { 319,10}, { 639,12}, { 191,11}, { 383,10}, \ + { 799,13}, { 127,12}, { 255,11}, { 543,10}, \ + { 1087,11}, { 607,12}, { 319,11}, { 671,10}, \ + { 1343,11}, { 735,10}, { 1471, 9}, { 2943,12}, \ + { 383,11}, { 799,10}, { 1599,11}, { 863,10}, \ + { 1727,12}, { 447,13}, { 255,12}, { 511,11}, \ + { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \ + { 639,11}, { 1343,12}, { 703,11}, { 1471,10}, \ + { 2943,13}, { 383,12}, { 767,11}, { 1599,12}, \ + { 831,11}, { 1727,10}, { 3455,14}, { 255,13}, \ + { 511,12}, { 1087,11}, { 2239,12}, { 1215,11}, \ + { 2431,13}, { 639,12}, { 1471,11}, { 2943,13}, \ + { 767,12}, { 1727,11}, { 3455,13}, { 895,12}, \ + { 1919,11}, { 3839,12}, { 1983,11}, { 3967,10}, \ + { 7935,14}, { 511,13}, { 1023,12}, { 2239,13}, \ + { 1151,12}, { 2495,11}, { 4991,13}, { 1279,12}, \ + { 2623,13}, { 1407,12}, { 2943,14}, { 767,13}, \ + { 1535,12}, { 3071,13}, { 1663,12}, { 3455,13}, \ + { 1791,12}, { 3583,13}, { 1919,12}, { 3967,11}, \ + { 7935,15}, { 511,14}, { 1023,13}, { 2175,12}, \ + { 4479,13}, { 2431,12}, { 4991,14}, { 1279,13}, \ + { 2943,12}, { 6015,14}, { 16384,15}, { 32768,16} } +#define MUL_FFT_TABLE3_SIZE 160 +#define MUL_FFT_THRESHOLD 7808 + +#define SQR_FFT_MODF_THRESHOLD 690 /* k = 5 */ +#define SQR_FFT_TABLE3 \ + { { 690, 5}, { 28, 6}, { 15, 5}, { 32, 6}, \ + { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ + { 35, 7}, { 19, 6}, { 40, 7}, { 21, 6}, \ + { 43, 7}, { 23, 6}, { 47, 7}, { 35, 8}, \ + { 19, 7}, { 43, 8}, { 23, 7}, { 49, 8}, \ + { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \ + { 39, 7}, { 79, 8}, { 43, 9}, { 23, 8}, \ + { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ + { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ + { 31, 9}, { 63, 8}, { 127, 9}, { 79,10}, \ + { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \ + { 127,10}, { 79, 9}, { 167,10}, { 95, 9}, \ + { 191,10}, { 111,11}, { 63,10}, { 159,11}, \ + { 95,10}, { 191,12}, { 63,11}, { 127,10}, \ + { 271,11}, { 159,10}, { 319, 9}, { 639,11}, \ + { 191,10}, { 383,11}, { 223,12}, { 127,11}, \ + { 255,10}, { 511, 9}, { 1023,10}, { 543,11}, \ + { 287,10}, { 607, 9}, { 1215,11}, { 319,10}, \ + { 639,12}, { 191,11}, { 383,10}, { 799,11}, \ + { 415,13}, { 127,12}, { 255,11}, { 511,10}, \ + { 1023,11}, { 543,10}, { 1087,11}, { 607,10}, \ + { 1215,12}, { 319,11}, { 671,10}, { 1343,11}, \ + { 735,10}, { 1471,12}, { 383,11}, { 799,10}, \ + { 1599,11}, { 863,12}, { 447,11}, { 927,13}, \ + { 255,12}, { 511,11}, { 1055,10}, { 2111,11}, \ + { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \ + { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \ + { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \ + { 1727,10}, { 3455,12}, { 895,14}, { 255,13}, \ + { 511,12}, { 1023,11}, { 2111,12}, { 1087,11}, \ + { 2239,10}, { 4479,12}, { 1215,11}, { 2431,13}, \ + { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \ + { 1727,11}, { 3455,13}, { 895,12}, { 1983,11}, \ + { 3967,14}, { 511,13}, { 1023,12}, { 2239,11}, \ + { 4479,13}, { 1151,12}, { 2495,11}, { 4991,10}, \ + { 9983,13}, { 1279,12}, { 2623,13}, { 1407,12}, \ + { 2943,14}, { 767,13}, { 1663,12}, { 3455,13}, \ + { 1791,12}, { 3583,13}, { 1919,12}, { 3967,15}, \ + { 511,14}, { 1023,13}, { 2175,12}, { 4479,13}, \ + { 2431,12}, { 4991,11}, { 9983,14}, { 1279,13}, \ + { 2687,12}, { 5375,13}, { 2943,12}, { 5887,14}, \ + { 16384,15}, { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 166 +#define SQR_FFT_THRESHOLD 6784 + +#define MULLO_BASECASE_THRESHOLD 5 +#define MULLO_DC_THRESHOLD 31 +#define MULLO_MUL_N_THRESHOLD 14709 + +#define DC_DIV_QR_THRESHOLD 53 +#define DC_DIVAPPR_Q_THRESHOLD 230 +#define DC_BDIV_QR_THRESHOLD 50 +#define DC_BDIV_Q_THRESHOLD 136 + +#define INV_MULMOD_BNM1_THRESHOLD 78 +#define INV_NEWTON_THRESHOLD 202 +#define INV_APPR_THRESHOLD 202 + +#define BINV_NEWTON_THRESHOLD 236 +#define REDC_1_TO_REDC_N_THRESHOLD 55 + +#define MU_DIV_QR_THRESHOLD 1442 +#define MU_DIVAPPR_Q_THRESHOLD 1652 +#define MUPI_DIV_QR_THRESHOLD 81 +#define MU_BDIV_QR_THRESHOLD 1787 +#define MU_BDIV_Q_THRESHOLD 1685 + +#define POWM_SEC_TABLE 1,22,194,376,692,2657 + +#define MATRIX22_STRASSEN_THRESHOLD 21 +#define HGCD_THRESHOLD 85 +#define HGCD_APPR_THRESHOLD 50 +#define HGCD_REDUCE_THRESHOLD 4455 +#define GCD_DC_THRESHOLD 456 +#define GCDEXT_DC_THRESHOLD 345 +#define JACOBI_BASE_METHOD 4 + +#define GET_STR_DC_THRESHOLD 17 +#define GET_STR_PRECOMPUTE_THRESHOLD 27 +#define SET_STR_DC_THRESHOLD 100 +#define SET_STR_PRECOMPUTE_THRESHOLD 960 + +#define FAC_DSC_THRESHOLD 208 +#define FAC_ODD_THRESHOLD 26 diff --git a/mpn/x86/coreihwl/gmp-mparam.h b/mpn/x86/coreihwl/gmp-mparam.h new file mode 100644 index 000000000..fdd5fbb54 --- /dev/null +++ b/mpn/x86/coreihwl/gmp-mparam.h @@ -0,0 +1,208 @@ +/* x86/coreihwl gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#define GMP_LIMB_BITS 32 +#define GMP_LIMB_BYTES 4 + +/* 2900 MHz Core i5 Haswell */ +/* FFT tuning limit = 40000000 */ +/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */ + +#define MOD_1_NORM_THRESHOLD 16 +#define MOD_1_UNNORM_THRESHOLD 13 +#define MOD_1N_TO_MOD_1_1_THRESHOLD 11 +#define MOD_1U_TO_MOD_1_1_THRESHOLD 9 +#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10 +#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ +#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 5 +#define USE_PREINV_DIVREM_1 1 /* native */ +#define DIV_QR_1N_PI1_METHOD 1 +#define DIV_QR_1_NORM_THRESHOLD 15 +#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define BMOD_1_TO_MOD_1_THRESHOLD 19 + +#define MUL_TOOM22_THRESHOLD 27 +#define MUL_TOOM33_THRESHOLD 90 +#define MUL_TOOM44_THRESHOLD 218 +#define MUL_TOOM6H_THRESHOLD 318 +#define MUL_TOOM8H_THRESHOLD 490 + +#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 153 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 105 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 101 +#define MUL_TOOM43_TO_TOOM54_THRESHOLD 130 + +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_TOOM2_THRESHOLD 44 +#define SQR_TOOM3_THRESHOLD 137 +#define SQR_TOOM4_THRESHOLD 242 +#define SQR_TOOM6_THRESHOLD 351 +#define SQR_TOOM8_THRESHOLD 597 + +#define MULMID_TOOM42_THRESHOLD 98 + +#define MULMOD_BNM1_THRESHOLD 17 +#define SQRMOD_BNM1_THRESHOLD 21 + +#define MUL_FFT_MODF_THRESHOLD 630 /* k = 5 */ +#define MUL_FFT_TABLE3 \ + { { 630, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ + { 15, 5}, { 31, 6}, { 28, 7}, { 15, 6}, \ + { 33, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \ + { 39, 7}, { 23, 6}, { 47, 7}, { 27, 8}, \ + { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ + { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \ + { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \ + { 55,10}, { 15, 9}, { 31, 8}, { 67, 9}, \ + { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \ + { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \ + { 103,11}, { 31,10}, { 63, 9}, { 135,10}, \ + { 79, 9}, { 159,10}, { 95, 9}, { 191,10}, \ + { 111,11}, { 63,10}, { 159,11}, { 95,10}, \ + { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \ + { 511,10}, { 271, 9}, { 543,11}, { 159,10}, \ + { 319, 9}, { 639,10}, { 335, 9}, { 671,11}, \ + { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \ + { 799,11}, { 223,12}, { 127,11}, { 255,10}, \ + { 543,11}, { 287,10}, { 607, 9}, { 1215,11}, \ + { 319,10}, { 671,12}, { 191,11}, { 383,10}, \ + { 799,11}, { 415,13}, { 127,12}, { 255,11}, \ + { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \ + { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \ + { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \ + { 863,10}, { 1727,12}, { 447,11}, { 959,13}, \ + { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \ + { 1215,10}, { 2431,12}, { 639,11}, { 1343,12}, \ + { 703,11}, { 1471,13}, { 383,12}, { 767,11}, \ + { 1599,12}, { 831,11}, { 1727,10}, { 3455,12}, \ + { 959,14}, { 255,13}, { 511,12}, { 1087,11}, \ + { 2239,12}, { 1215,11}, { 2431,13}, { 639,12}, \ + { 1471,11}, { 2943,10}, { 5887,13}, { 767,12}, \ + { 1727,11}, { 3455,13}, { 895,12}, { 1983,14}, \ + { 511,13}, { 1023,12}, { 2239,13}, { 1151,12}, \ + { 2495,13}, { 1279,12}, { 2559,13}, { 1407,12}, \ + { 2943,11}, { 5887,14}, { 767,13}, { 1535,12}, \ + { 3071,13}, { 1663,12}, { 3455,13}, { 1919,15}, \ + { 511,14}, { 1023,13}, { 2175,12}, { 4479,13}, \ + { 2431,14}, { 1279,13}, { 2943,12}, { 5887,14}, \ + { 1535,13}, { 3455,14}, { 1791,13}, { 3967,12}, \ + { 7935,15}, { 1023,14}, { 2047,13}, { 4479,14}, \ + { 2303,13}, { 8192,14}, { 16384,15}, { 32768,16} } +#define MUL_FFT_TABLE3_SIZE 168 +#define MUL_FFT_THRESHOLD 7424 + +#define SQR_FFT_MODF_THRESHOLD 530 /* k = 5 */ +#define SQR_FFT_TABLE3 \ + { { 530, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \ + { 28, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ + { 36, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \ + { 47, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \ + { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \ + { 27, 7}, { 55, 9}, { 15, 8}, { 31, 7}, \ + { 63, 8}, { 39, 9}, { 23, 8}, { 55,10}, \ + { 15, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ + { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ + { 31, 9}, { 79,10}, { 47, 9}, { 95,11}, \ + { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ + { 159,10}, { 95, 9}, { 191,10}, { 111,11}, \ + { 63,10}, { 159,11}, { 95,10}, { 191,12}, \ + { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ + { 271, 9}, { 543,11}, { 159,10}, { 319, 9}, \ + { 639,10}, { 335, 9}, { 671,10}, { 351,11}, \ + { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \ + { 799,12}, { 127,11}, { 255,10}, { 511, 9}, \ + { 1023,10}, { 543,11}, { 287,10}, { 607,11}, \ + { 319,10}, { 671,11}, { 351,12}, { 191,11}, \ + { 383,10}, { 799,11}, { 415,10}, { 831,13}, \ + { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \ + { 543,10}, { 1087,11}, { 607,12}, { 319,11}, \ + { 671,10}, { 1343,11}, { 735,10}, { 1471,12}, \ + { 383,11}, { 799,10}, { 1599,11}, { 863,10}, \ + { 1727,12}, { 447,11}, { 991,13}, { 255,12}, \ + { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \ + { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \ + { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \ + { 1727,12}, { 959,11}, { 1983,14}, { 255,13}, \ + { 511,12}, { 1023,11}, { 2047,12}, { 1087,11}, \ + { 2239,12}, { 1215,11}, { 2431,13}, { 639,12}, \ + { 1471,11}, { 2943,13}, { 767,12}, { 1727,13}, \ + { 895,12}, { 1983,14}, { 511,13}, { 1023,12}, \ + { 2239,13}, { 1151,12}, { 2495,13}, { 1279,12}, \ + { 2623,13}, { 1407,12}, { 2943,14}, { 767,13}, \ + { 1535,12}, { 3071,13}, { 1663,12}, { 3455,13}, \ + { 1919,12}, { 3839,15}, { 511,14}, { 1023,13}, \ + { 2175,12}, { 4479,13}, { 2431,12}, { 4863,14}, \ + { 1279,13}, { 2943,12}, { 5887,14}, { 1535,13}, \ + { 3455,14}, { 1791,13}, { 3967,15}, { 1023,14}, \ + { 2047,13}, { 4479,14}, { 2303,13}, { 8192,14}, \ + { 16384,15}, { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 170 +#define SQR_FFT_THRESHOLD 5760 + +#define MULLO_BASECASE_THRESHOLD 0 /* always */ +#define MULLO_DC_THRESHOLD 57 +#define MULLO_MUL_N_THRESHOLD 14281 + +#define DC_DIV_QR_THRESHOLD 23 +#define DC_DIVAPPR_Q_THRESHOLD 63 +#define DC_BDIV_QR_THRESHOLD 87 +#define DC_BDIV_Q_THRESHOLD 204 + +#define INV_MULMOD_BNM1_THRESHOLD 54 +#define INV_NEWTON_THRESHOLD 75 +#define INV_APPR_THRESHOLD 67 + +#define BINV_NEWTON_THRESHOLD 296 +#define REDC_1_TO_REDC_N_THRESHOLD 79 + +#define MU_DIV_QR_THRESHOLD 872 +#define MU_DIVAPPR_Q_THRESHOLD 654 +#define MUPI_DIV_QR_THRESHOLD 0 /* always */ +#define MU_BDIV_QR_THRESHOLD 1858 +#define MU_BDIV_Q_THRESHOLD 2089 + +#define POWM_SEC_TABLE 1,17,127,508,1603 + +#define MATRIX22_STRASSEN_THRESHOLD 19 +#define HGCD_THRESHOLD 61 +#define HGCD_APPR_THRESHOLD 60 +#define HGCD_REDUCE_THRESHOLD 3810 +/* Parameters beyond this point are not properly measured */ +#define GCD_DC_THRESHOLD 278 +#define GCDEXT_DC_THRESHOLD 298 +#define JACOBI_BASE_METHOD 4 + +#define GET_STR_DC_THRESHOLD 11 +#define GET_STR_PRECOMPUTE_THRESHOLD 23 +#define SET_STR_DC_THRESHOLD 438 +#define SET_STR_PRECOMPUTE_THRESHOLD 1206 diff --git a/mpn/x86_64/atom/gmp-mparam.h b/mpn/x86_64/atom/gmp-mparam.h index 87f763547..6816dfc36 100644 --- a/mpn/x86_64/atom/gmp-mparam.h +++ b/mpn/x86_64/atom/gmp-mparam.h @@ -35,7 +35,7 @@ see https://www.gnu.org/licenses/. */ #define SHLD_SLOW 1 #define SHRD_SLOW 1 -/* 1600 MHz Atom 330 */ +/* 1667 MHz Pineview (Atom D510) */ /* FFT tuning limit = 25000000 */ /* Generated by tuneup.c, 2014-03-13, gcc 4.5 */ @@ -45,7 +45,7 @@ see https://www.gnu.org/licenses/. */ #define MOD_1U_TO_MOD_1_1_THRESHOLD 3 #define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX #define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11 +#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 12 #define USE_PREINV_DIVREM_1 1 /* native */ #define DIV_QR_1_NORM_THRESHOLD 1 #define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ @@ -60,17 +60,17 @@ see https://www.gnu.org/licenses/. */ #define MUL_TOOM8H_THRESHOLD 212 #define MUL_TOOM32_TO_TOOM43_THRESHOLD 73 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 79 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 73 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 84 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 81 #define MUL_TOOM42_TO_TOOM63_THRESHOLD 80 #define MUL_TOOM43_TO_TOOM54_THRESHOLD 58 #define SQR_BASECASE_THRESHOLD 6 -#define SQR_TOOM2_THRESHOLD 22 -#define SQR_TOOM3_THRESHOLD 74 -#define SQR_TOOM4_THRESHOLD 124 -#define SQR_TOOM6_THRESHOLD 172 -#define SQR_TOOM8_THRESHOLD 246 +#define SQR_TOOM2_THRESHOLD 23 +#define SQR_TOOM3_THRESHOLD 49 +#define SQR_TOOM4_THRESHOLD 130 +#define SQR_TOOM6_THRESHOLD 173 +#define SQR_TOOM8_THRESHOLD 238 #define MULMID_TOOM42_THRESHOLD 16 @@ -84,134 +84,137 @@ see https://www.gnu.org/licenses/. */ { 15, 7}, { 8, 6}, { 17, 7}, { 9, 6}, \ { 19, 7}, { 13, 8}, { 7, 7}, { 17, 8}, \ { 9, 7}, { 19, 8}, { 11, 7}, { 23, 8}, \ - { 13, 9}, { 7, 8}, { 19, 9}, { 11, 8}, \ - { 23,10}, { 7, 9}, { 15, 8}, { 31, 9}, \ - { 19, 8}, { 39, 9}, { 23,10}, { 15, 9}, \ - { 39,10}, { 23, 9}, { 47,11}, { 15,10}, \ - { 31, 9}, { 67,10}, { 39, 9}, { 79,10}, \ - { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \ + { 13, 9}, { 7, 8}, { 15, 7}, { 31, 8}, \ + { 19, 9}, { 11, 8}, { 25, 9}, { 15, 8}, \ + { 33, 9}, { 19, 8}, { 39, 9}, { 23,10}, \ + { 15, 9}, { 39,10}, { 23, 9}, { 47,11}, \ + { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \ + { 79,10}, { 47,11}, { 31,10}, { 63, 9}, \ { 127, 8}, { 255,10}, { 71, 9}, { 143, 8}, \ { 287,10}, { 79,11}, { 47,10}, { 95,12}, \ { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \ - { 511,10}, { 143, 9}, { 287,11}, { 79,10}, \ - { 159, 9}, { 319,10}, { 175, 9}, { 351,11}, \ - { 95,10}, { 191, 9}, { 383,10}, { 207, 9}, \ - { 415,10}, { 223,12}, { 63,11}, { 127,10}, \ - { 255, 9}, { 511,11}, { 143,10}, { 287, 9}, \ - { 575,11}, { 159,10}, { 319, 9}, { 639,11}, \ - { 175,10}, { 351,12}, { 95,11}, { 191,10}, \ - { 383,11}, { 207,10}, { 415,11}, { 223,10}, \ - { 447,13}, { 63,12}, { 127,11}, { 255,10}, \ - { 511,11}, { 287,10}, { 575,12}, { 159,11}, \ - { 319,10}, { 639,11}, { 351,10}, { 703,12}, \ - { 191,11}, { 415,12}, { 223,11}, { 447,10}, \ - { 895,11}, { 479,13}, { 127,12}, { 255,11}, \ - { 511,12}, { 287,11}, { 575,10}, { 1151,12}, \ - { 319,11}, { 639,12}, { 351,11}, { 703,13}, \ - { 191,12}, { 447,11}, { 895,12}, { 479,14}, \ - { 127,13}, { 255,12}, { 575,11}, { 1151,13}, \ + { 511, 9}, { 287, 8}, { 575,11}, { 79,10}, \ + { 159, 9}, { 319,10}, { 175, 9}, { 351, 8}, \ + { 703,11}, { 95,10}, { 191, 9}, { 383, 8}, \ + { 767,10}, { 207, 9}, { 415,10}, { 223,12}, \ + { 63,11}, { 127,10}, { 255, 9}, { 511,11}, \ + { 143,10}, { 287, 9}, { 575, 8}, { 1151,10}, \ + { 319, 9}, { 639,11}, { 175,10}, { 351, 9}, \ + { 703, 8}, { 1407, 7}, { 2815,10}, { 383,11}, \ + { 207,10}, { 415,11}, { 223,10}, { 447,13}, \ + { 63,12}, { 127,11}, { 255,10}, { 511,11}, \ + { 287,10}, { 575, 9}, { 1151,12}, { 159,11}, \ + { 319,10}, { 639,11}, { 351,10}, { 703, 9}, \ + { 1407,12}, { 191,11}, { 415,12}, { 223,11}, \ + { 447,10}, { 895,11}, { 479,13}, { 127,12}, \ + { 255,11}, { 511,12}, { 287,11}, { 575,10}, \ + { 1151,12}, { 319,11}, { 639,12}, { 351,11}, \ + { 703,10}, { 1407,13}, { 191,12}, { 383,11}, \ + { 767,12}, { 415,11}, { 831,12}, { 447,11}, \ + { 895,12}, { 479,14}, { 127,13}, { 255,12}, \ + { 511,11}, { 1023,12}, { 575,11}, { 1151,13}, \ { 319,12}, { 703,11}, { 1407,13}, { 383,12}, \ - { 767,13}, { 447,12}, { 895,14}, { 255,13}, \ - { 511,12}, { 1023,13}, { 575,12}, { 1151,13}, \ - { 703,12}, { 1407,14}, { 383,13}, { 831,12}, \ - { 1663,13}, { 895,15}, { 255,14}, { 511,13}, \ - { 1087,12}, { 2175,13}, { 1151,14}, { 639,13}, \ - { 1279,12}, { 2559,13}, { 1407,12}, { 2815,14}, \ - { 767,13}, { 1663,14}, { 895,13}, { 1791,12}, \ - { 3583,13}, { 1919,15}, { 511,14}, { 1023,13}, \ - { 2175,14}, { 1151,13}, { 2431,12}, { 4863,14}, \ - { 1279,13}, { 2559,14}, { 1407,13}, { 2815,15}, \ + { 831,13}, { 447,12}, { 895,11}, { 1791,14}, \ + { 255,13}, { 511,12}, { 1023,13}, { 575,12}, \ + { 1151,13}, { 703,12}, { 1407,14}, { 383,13}, \ + { 831,12}, { 1663,13}, { 895,12}, { 1791,15}, \ + { 255,14}, { 511,13}, { 1087,12}, { 2175,13}, \ + { 1151,14}, { 639,13}, { 1407,12}, { 2815,14}, \ + { 767,13}, { 1663,14}, { 895,13}, { 1919,12}, \ + { 3839,15}, { 511,14}, { 1023,13}, { 2175,14}, \ + { 1151,13}, { 2431,14}, { 1407,13}, { 2815,15}, \ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \ {8388608,24} } -#define MUL_FFT_TABLE3_SIZE 177 +#define MUL_FFT_TABLE3_SIZE 185 #define MUL_FFT_THRESHOLD 2240 -#define SQR_FFT_MODF_THRESHOLD 188 /* k = 5 */ +#define SQR_FFT_MODF_THRESHOLD 208 /* k = 5 */ #define SQR_FFT_TABLE3 \ - { { 188, 5}, { 11, 6}, { 6, 5}, { 13, 6}, \ + { { 208, 5}, { 11, 6}, { 6, 5}, { 13, 6}, \ { 13, 7}, { 7, 6}, { 15, 7}, { 13, 8}, \ { 7, 7}, { 17, 8}, { 9, 7}, { 19, 8}, \ { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \ - { 19, 9}, { 11, 8}, { 25,10}, { 7, 9}, \ + { 19, 9}, { 11, 8}, { 23,10}, { 7, 9}, \ { 15, 8}, { 31, 9}, { 23,10}, { 15, 9}, \ - { 39,10}, { 23, 9}, { 47,11}, { 15,10}, \ - { 31, 9}, { 63, 8}, { 127, 7}, { 255,10}, \ - { 39, 8}, { 159,10}, { 47, 9}, { 95, 8}, \ - { 191,11}, { 31,10}, { 63, 9}, { 127, 8}, \ - { 255,10}, { 71, 9}, { 143, 8}, { 287, 7}, \ - { 575, 9}, { 159, 8}, { 319,11}, { 47,10}, \ - { 95, 9}, { 191,12}, { 31,11}, { 63,10}, \ - { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \ - { 287, 8}, { 575,10}, { 159, 9}, { 319,10}, \ - { 175, 9}, { 351,11}, { 95,10}, { 191, 9}, \ - { 383,10}, { 207,11}, { 111,10}, { 223, 9}, \ - { 447,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,11}, { 143,10}, { 287, 9}, { 575,11}, \ - { 159,10}, { 319,11}, { 175,10}, { 351,12}, \ - { 95,11}, { 191,10}, { 383,11}, { 207,10}, \ + { 39,10}, { 23,11}, { 15,10}, { 31, 9}, \ + { 63, 8}, { 127,10}, { 39, 9}, { 79, 8}, \ + { 159,10}, { 47, 8}, { 191,10}, { 55,11}, \ + { 31,10}, { 63, 9}, { 127, 8}, { 255,10}, \ + { 71, 9}, { 143, 8}, { 287, 7}, { 575,10}, \ + { 79, 9}, { 159,11}, { 47, 9}, { 191,12}, \ + { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \ + { 511,10}, { 143, 9}, { 287, 8}, { 575,10}, \ + { 159, 9}, { 319, 8}, { 639,10}, { 175, 9}, \ + { 351, 8}, { 703,10}, { 191, 9}, { 383,10}, \ + { 207,11}, { 111,10}, { 223, 9}, { 447,12}, \ + { 63,11}, { 127,10}, { 255, 9}, { 511,11}, \ + { 143,10}, { 287, 9}, { 575,11}, { 159,10}, \ + { 319, 9}, { 639,11}, { 175,10}, { 351, 9}, \ + { 703,11}, { 191,10}, { 383,11}, { 207,10}, \ { 415,11}, { 223,10}, { 447,13}, { 63,12}, \ { 127,11}, { 255,10}, { 511,11}, { 287,10}, \ { 575,12}, { 159,11}, { 319,10}, { 639,11}, \ - { 351,10}, { 703,12}, { 191,11}, { 415,12}, \ - { 223,11}, { 447,13}, { 127,12}, { 255,11}, \ - { 511,12}, { 287,11}, { 575,12}, { 319,11}, \ - { 639,12}, { 351,11}, { 703,13}, { 191,12}, \ - { 447,14}, { 127,13}, { 255,12}, { 511,11}, \ - { 1023,12}, { 575,13}, { 319,12}, { 703,11}, \ - { 1407,13}, { 383,12}, { 767,13}, { 447,12}, \ - { 895,14}, { 255,13}, { 511,12}, { 1023,13}, \ - { 575,12}, { 1151,13}, { 703,12}, { 1407,14}, \ - { 383,13}, { 831,12}, { 1663,13}, { 895,15}, \ - { 255,14}, { 511,13}, { 1087,12}, { 2175,13}, \ - { 1151,14}, { 639,13}, { 1279,12}, { 2559,13}, \ - { 1407,12}, { 2815,14}, { 767,13}, { 1663,14}, \ - { 895,13}, { 1791,12}, { 3583,15}, { 511,14}, \ - { 1023,13}, { 2175,14}, { 1151,13}, { 2431,12}, \ - { 4863,14}, { 1279,13}, { 2559,14}, { 1407,13}, \ - { 2815,15}, { 32768,16}, { 65536,17}, { 131072,18}, \ - { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \ - {4194304,23}, {8388608,24} } -#define SQR_FFT_TABLE3_SIZE 170 + { 351,10}, { 703,12}, { 191,11}, { 383,10}, \ + { 767,11}, { 415,12}, { 223,11}, { 447,10}, \ + { 895,13}, { 127,12}, { 255,11}, { 511,12}, \ + { 287,11}, { 575,12}, { 319,11}, { 639,12}, \ + { 351,11}, { 703,13}, { 191,12}, { 383,11}, \ + { 767,12}, { 415,11}, { 831,12}, { 447,11}, \ + { 895,14}, { 127,13}, { 255,12}, { 511,11}, \ + { 1023,12}, { 575,11}, { 1151,13}, { 319,12}, \ + { 703,11}, { 1407,13}, { 383,12}, { 831,13}, \ + { 447,12}, { 895,14}, { 255,13}, { 511,12}, \ + { 1023,13}, { 575,12}, { 1151,13}, { 703,12}, \ + { 1407,14}, { 383,13}, { 831,12}, { 1663,13}, \ + { 895,15}, { 255,14}, { 511,13}, { 1087,12}, \ + { 2175,13}, { 1151,14}, { 639,13}, { 1407,12}, \ + { 2815,14}, { 767,13}, { 1663,14}, { 895,13}, \ + { 1791,12}, { 3583,15}, { 511,14}, { 1023,13}, \ + { 2047,14}, { 1151,13}, { 2431,12}, { 4863,14}, \ + { 1407,13}, { 2815,15}, { 32768,16}, { 65536,17}, \ + { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \ + {2097152,22}, {4194304,23}, {8388608,24} } +#define SQR_FFT_TABLE3_SIZE 175 #define SQR_FFT_THRESHOLD 1600 #define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 33 +#define MULLO_DC_THRESHOLD 34 #define MULLO_MUL_N_THRESHOLD 4392 -#define DC_DIV_QR_THRESHOLD 35 -#define DC_DIVAPPR_Q_THRESHOLD 119 -#define DC_BDIV_QR_THRESHOLD 31 +#define DC_DIV_QR_THRESHOLD 32 +#define DC_DIVAPPR_Q_THRESHOLD 122 +#define DC_BDIV_QR_THRESHOLD 35 #define DC_BDIV_Q_THRESHOLD 76 #define INV_MULMOD_BNM1_THRESHOLD 22 #define INV_NEWTON_THRESHOLD 163 -#define INV_APPR_THRESHOLD 133 +#define INV_APPR_THRESHOLD 134 #define BINV_NEWTON_THRESHOLD 179 -#define REDC_1_TO_REDC_2_THRESHOLD 20 -#define REDC_2_TO_REDC_N_THRESHOLD 42 +#define REDC_1_TO_REDC_2_THRESHOLD 17 +#define REDC_2_TO_REDC_N_THRESHOLD 43 #define MU_DIV_QR_THRESHOLD 855 #define MU_DIVAPPR_Q_THRESHOLD 872 -#define MUPI_DIV_QR_THRESHOLD 81 -#define MU_BDIV_QR_THRESHOLD 762 +#define MUPI_DIV_QR_THRESHOLD 83 +#define MU_BDIV_QR_THRESHOLD 748 #define MU_BDIV_Q_THRESHOLD 807 -#define POWM_SEC_TABLE 3,22,114,386,1486 +#define POWM_SEC_TABLE 1,16,114,452,1603 #define MATRIX22_STRASSEN_THRESHOLD 13 -#define HGCD_THRESHOLD 99 -#define HGCD_APPR_THRESHOLD 101 +#define HGCD_THRESHOLD 102 +#define HGCD_APPR_THRESHOLD 95 #define HGCD_REDUCE_THRESHOLD 1329 #define GCD_DC_THRESHOLD 268 -#define GCDEXT_DC_THRESHOLD 217 +#define GCDEXT_DC_THRESHOLD 221 #define JACOBI_BASE_METHOD 4 -#define GET_STR_DC_THRESHOLD 13 -#define GET_STR_PRECOMPUTE_THRESHOLD 27 -#define SET_STR_DC_THRESHOLD 286 -#define SET_STR_PRECOMPUTE_THRESHOLD 1424 +#define GET_STR_DC_THRESHOLD 14 +#define GET_STR_PRECOMPUTE_THRESHOLD 26 +#define SET_STR_DC_THRESHOLD 418 +#define SET_STR_PRECOMPUTE_THRESHOLD 1420 #define FAC_DSC_THRESHOLD 1065 #define FAC_ODD_THRESHOLD 0 /* always */ diff --git a/mpn/x86_64/coreihwl/gmp-mparam.h b/mpn/x86_64/coreihwl/gmp-mparam.h index 0779f805e..eef44b3a8 100644 --- a/mpn/x86_64/coreihwl/gmp-mparam.h +++ b/mpn/x86_64/coreihwl/gmp-mparam.h @@ -32,7 +32,7 @@ see https://www.gnu.org/licenses/. */ #define GMP_LIMB_BYTES 8 /* 2900 MHz Core i5 Haswell */ -/* FFT tuning limit = 40000000 */ +/* FFT tuning limit = 75000000 */ /* Generated by tuneup.c, 2014-03-12, gcc 4.5 */ #define MOD_1_NORM_THRESHOLD 0 /* always */ @@ -73,12 +73,12 @@ see https://www.gnu.org/licenses/. */ #define MULMOD_BNM1_THRESHOLD 13 #define SQRMOD_BNM1_THRESHOLD 17 -#define MUL_FFT_MODF_THRESHOLD 380 /* k = 5 */ +#define MUL_FFT_MODF_THRESHOLD 376 /* k = 5 */ #define MUL_FFT_TABLE3 \ - { { 380, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \ + { { 376, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \ { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \ - { 23, 7}, { 13, 6}, { 27, 7}, { 21, 8}, \ - { 11, 7}, { 25, 8}, { 13, 7}, { 27, 8}, \ + { 25, 7}, { 13, 6}, { 27, 7}, { 21, 8}, \ + { 11, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \ { 15, 7}, { 31, 8}, { 21, 9}, { 11, 8}, \ { 27, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \ { 39, 9}, { 23, 8}, { 47, 9}, { 27,10}, \ @@ -91,105 +91,109 @@ see https://www.gnu.org/licenses/. */ { 167,11}, { 95,10}, { 191, 9}, { 383,11}, \ { 111,12}, { 63,11}, { 127,10}, { 255, 9}, \ { 511,10}, { 271, 9}, { 543,11}, { 143,10}, \ - { 287, 9}, { 575,10}, { 303,11}, { 159,10}, \ - { 319, 9}, { 639,12}, { 95,11}, { 191,10}, \ - { 383, 9}, { 767,10}, { 415,13}, { 63,12}, \ - { 127,11}, { 255,10}, { 511,11}, { 271,10}, \ - { 543, 9}, { 1087,11}, { 287,10}, { 575, 9}, \ - { 1151,11}, { 303,10}, { 607,12}, { 159,11}, \ - { 319,10}, { 639,11}, { 335,10}, { 671,11}, \ - { 351,10}, { 703,11}, { 367,10}, { 735,11}, \ - { 383,10}, { 767,11}, { 415,10}, { 831,11}, \ - { 447,10}, { 895,11}, { 479,13}, { 127,12}, \ - { 255,11}, { 511,10}, { 1023,11}, { 543,10}, \ - { 1087,11}, { 575,10}, { 1151,11}, { 607,10}, \ - { 1215,12}, { 319,11}, { 671,12}, { 351,11}, \ - { 703,10}, { 1407,11}, { 735,12}, { 383,11}, \ - { 767,12}, { 415,11}, { 831,12}, { 447,11}, \ - { 895,12}, { 479,14}, { 127,12}, { 511,11}, \ - { 1023,12}, { 543,11}, { 1087,12}, { 575,11}, \ - { 1151,12}, { 607,11}, { 1215,13}, { 319,12}, \ - { 671,11}, { 1343,12}, { 703,11}, { 1407,12}, \ - { 735,13}, { 383,12}, { 767,11}, { 1535,12}, \ - { 831,11}, { 1663,13}, { 447,12}, { 959,11}, \ - { 1919,13}, { 511,12}, { 1087,13}, { 575,12}, \ - { 1215,11}, { 2431,13}, { 639,12}, { 1343,13}, \ - { 703,12}, { 1407,11}, { 2815,14}, { 383,13}, \ - { 767,12}, { 1535,13}, { 831,12}, { 1727,13}, \ - { 959,12}, { 1919,14}, { 511,13}, { 1023,12}, \ - { 2047,13}, { 1087,12}, { 2175,13}, { 1215,12}, \ - { 2431,14}, { 639,13}, { 1279,12}, { 2559,13}, \ - { 1343,12}, { 2687,13}, { 1407,12}, { 2815,13}, \ - { 1471,12}, { 2943,14}, { 767,13}, { 1599,12}, \ - { 3199,13}, { 1727,14}, { 895,13}, { 1791,12}, \ - { 3583,13}, { 1919,15}, { 511,14}, { 1023,13}, \ - { 2175,14}, { 1151,13}, { 2431,12}, { 4863,14}, \ - { 1279,13}, { 2687,14}, { 1407,13}, { 2943,15}, \ - { 767,14}, { 1535,13}, { 3199,14}, { 1663,13}, \ - { 3455,12}, { 6911,14}, { 1791,13}, { 3583,14}, \ - { 1919,16}, { 511,15}, { 1023,14}, { 2303,13}, \ - { 4607,14}, { 2431,13}, { 4863,15}, { 32768,16}, \ - { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \ - {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} } -#define MUL_FFT_TABLE3_SIZE 224 -#define MUL_FFT_THRESHOLD 4224 - -#define SQR_FFT_MODF_THRESHOLD 344 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 344, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \ - { 10, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ - { 21, 7}, { 11, 6}, { 25, 7}, { 13, 6}, \ - { 27, 7}, { 21, 8}, { 11, 7}, { 25, 8}, \ - { 13, 7}, { 28, 8}, { 15, 7}, { 31, 8}, \ - { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \ - { 33, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \ - { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \ - { 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \ - { 67,10}, { 39, 9}, { 79,10}, { 55,11}, \ - { 31,10}, { 79,11}, { 47,10}, { 95,12}, \ - { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \ - { 511,10}, { 135,11}, { 79,10}, { 159, 9}, \ - { 319,11}, { 95,10}, { 191, 9}, { 383,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ - { 271, 9}, { 543,11}, { 143, 9}, { 575,10}, \ - { 303, 9}, { 607,10}, { 319, 9}, { 639,12}, \ - { 95,11}, { 191,10}, { 383, 9}, { 767,11}, \ - { 207,10}, { 415,13}, { 63,12}, { 127,11}, \ - { 255,10}, { 511,11}, { 271,10}, { 543, 9}, \ - { 1087,11}, { 287,10}, { 575,11}, { 303,10}, \ - { 607,11}, { 319,10}, { 639,11}, { 335,10}, \ - { 671,11}, { 351,10}, { 703,11}, { 367,12}, \ - { 191,11}, { 383,10}, { 767,11}, { 415,10}, \ - { 831,11}, { 447,10}, { 895,11}, { 479,13}, \ - { 127,11}, { 511,10}, { 1023,11}, { 543,10}, \ - { 1087,12}, { 287,11}, { 575,10}, { 1151,11}, \ - { 607,10}, { 1215,12}, { 319,11}, { 639,10}, \ - { 1279,11}, { 671,12}, { 351,11}, { 735,13}, \ + { 287, 9}, { 575,10}, { 303, 9}, { 607,11}, \ + { 159,10}, { 319, 9}, { 639,12}, { 95,11}, \ + { 191,10}, { 383,11}, { 207,10}, { 415,13}, \ + { 63,12}, { 127,11}, { 255,10}, { 511,11}, \ + { 271,10}, { 543, 9}, { 1087,11}, { 287,10}, \ + { 607,12}, { 159,11}, { 319,10}, { 639,11}, \ + { 335,10}, { 671,11}, { 351,10}, { 703,11}, \ + { 367,12}, { 191,11}, { 383,10}, { 767,11}, \ + { 415,10}, { 831,12}, { 223,11}, { 447,10}, \ + { 895,11}, { 479,13}, { 127,12}, { 255,11}, \ + { 511,10}, { 1023,11}, { 543,10}, { 1087,12}, \ + { 287,11}, { 575,10}, { 1151,11}, { 607,12}, \ + { 319,11}, { 639,10}, { 1279,11}, { 671,12}, \ + { 351,11}, { 703,10}, { 1407,11}, { 735,13}, \ { 191,12}, { 383,11}, { 767,12}, { 415,11}, \ - { 831,12}, { 447,11}, { 895,12}, { 479,14}, \ - { 127,12}, { 511,11}, { 1023,12}, { 543,11}, \ - { 1087,12}, { 607,11}, { 1215,13}, { 319,12}, \ - { 639,11}, { 1279,12}, { 671,11}, { 1343,12}, \ - { 735,13}, { 383,12}, { 831,11}, { 1663,13}, \ - { 447,12}, { 959,13}, { 511,12}, { 1087,13}, \ - { 575,12}, { 1215,11}, { 2431,13}, { 639,12}, \ - { 1343,13}, { 703,12}, { 1407,14}, { 383,13}, \ - { 767,12}, { 1535,13}, { 831,12}, { 1663,13}, \ - { 959,14}, { 511,13}, { 1087,12}, { 2175,13}, \ - { 1215,12}, { 2431,14}, { 639,13}, { 1343,12}, \ - { 2687,13}, { 1407,12}, { 2815,13}, { 1471,14}, \ - { 767,13}, { 1599,12}, { 3199,13}, { 1663,14}, \ - { 895,13}, { 1791,12}, { 3583,15}, { 511,14}, \ + { 831,10}, { 1663,12}, { 447,11}, { 895,12}, \ + { 479,14}, { 127,12}, { 511,11}, { 1023,12}, \ + { 543,11}, { 1087,12}, { 575,11}, { 1151,12}, \ + { 607,11}, { 1215,13}, { 319,12}, { 671,11}, \ + { 1343,12}, { 703,11}, { 1407,12}, { 735,13}, \ + { 383,12}, { 767,11}, { 1535,12}, { 831,13}, \ + { 447,12}, { 959,11}, { 1919,13}, { 511,12}, \ + { 1087,13}, { 575,12}, { 1215,13}, { 639,12}, \ + { 1343,13}, { 703,12}, { 1407,11}, { 2815,14}, \ + { 383,13}, { 767,12}, { 1535,13}, { 831,12}, \ + { 1727,13}, { 959,12}, { 1919,14}, { 511,13}, \ + { 1023,12}, { 2047,13}, { 1087,12}, { 2175,13}, \ + { 1215,12}, { 2431,14}, { 639,13}, { 1279,12}, \ + { 2559,13}, { 1343,12}, { 2687,13}, { 1407,12}, \ + { 2815,13}, { 1471,12}, { 2943,14}, { 767,13}, \ + { 1535,12}, { 3071,13}, { 1727,14}, { 895,13}, \ + { 1791,12}, { 3583,13}, { 1919,15}, { 511,14}, \ { 1023,13}, { 2175,14}, { 1151,13}, { 2431,12}, \ { 4863,14}, { 1279,13}, { 2687,14}, { 1407,13}, \ { 2943,15}, { 767,14}, { 1535,13}, { 3199,14}, \ { 1663,13}, { 3455,12}, { 6911,14}, { 1791,13}, \ { 3583,14}, { 1919,16}, { 511,15}, { 1023,14}, \ - { 2431,13}, { 4863,15}, { 32768,16}, { 65536,17}, \ - { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \ - {2097152,22}, {4194304,23}, {8388608,24} } -#define SQR_FFT_TABLE3_SIZE 207 -#define SQR_FFT_THRESHOLD 3264 + { 2175,13}, { 4351,14}, { 2431,13}, { 4863,15}, \ + { 1279,14}, { 2943,13}, { 5887,15}, { 1535,14}, \ + { 3455,13}, { 6911,15}, { 1791,14}, { 3839,13}, \ + { 7679,16}, { 1023,15}, { 2047,14}, { 4351,15}, \ + { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \ + { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \ + {8388608,24} } +#define MUL_FFT_TABLE3_SIZE 237 +#define MUL_FFT_THRESHOLD 4224 + +#define SQR_FFT_MODF_THRESHOLD 344 /* k = 5 */ +#define SQR_FFT_TABLE3 \ + { { 344, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \ + { 10, 5}, { 21, 6}, { 21, 7}, { 11, 6}, \ + { 25, 7}, { 13, 6}, { 27, 7}, { 21, 8}, \ + { 11, 7}, { 25, 8}, { 13, 7}, { 28, 8}, \ + { 15, 7}, { 31, 8}, { 21, 9}, { 11, 8}, \ + { 27, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \ + { 41, 9}, { 23, 8}, { 47, 9}, { 27,10}, \ + { 15, 9}, { 39,10}, { 23, 9}, { 51,11}, \ + { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \ + { 79,10}, { 55,11}, { 31,10}, { 79,11}, \ + { 47,10}, { 95,12}, { 31,11}, { 63,10}, \ + { 127, 9}, { 255, 8}, { 511,10}, { 135,11}, \ + { 79,10}, { 159, 9}, { 319,11}, { 95,10}, \ + { 191, 9}, { 383,11}, { 111,12}, { 63,11}, \ + { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \ + { 543,11}, { 143,10}, { 287, 9}, { 575,10}, \ + { 303, 9}, { 607,11}, { 159,10}, { 319, 9}, \ + { 639,12}, { 95,11}, { 191,10}, { 383, 9}, \ + { 767,11}, { 207,10}, { 415,13}, { 63,12}, \ + { 127,11}, { 255,10}, { 511,11}, { 271,10}, \ + { 543, 9}, { 1087,10}, { 575,11}, { 303,10}, \ + { 607,11}, { 319,10}, { 671,11}, { 351,10}, \ + { 735,11}, { 383,10}, { 767,11}, { 415,10}, \ + { 831,11}, { 447,10}, { 895,11}, { 479,13}, \ + { 127,12}, { 255,11}, { 543,10}, { 1087,11}, \ + { 607,10}, { 1215,11}, { 671,12}, { 351,11}, \ + { 735,12}, { 383,11}, { 767,12}, { 415,11}, \ + { 831,10}, { 1663,12}, { 447,11}, { 895,12}, \ + { 479,14}, { 127,12}, { 511,11}, { 1023,12}, \ + { 543,11}, { 1087,12}, { 607,11}, { 1215,13}, \ + { 319,12}, { 639,11}, { 1279,12}, { 671,11}, \ + { 1343,12}, { 735,13}, { 383,12}, { 767,11}, \ + { 1535,12}, { 831,13}, { 447,12}, { 959,13}, \ + { 511,12}, { 1087,13}, { 575,12}, { 1215,13}, \ + { 639,12}, { 1343,13}, { 703,12}, { 1407,14}, \ + { 383,13}, { 767,12}, { 1535,13}, { 831,12}, \ + { 1663,13}, { 959,14}, { 511,13}, { 1087,12}, \ + { 2175,13}, { 1215,12}, { 2431,14}, { 639,13}, \ + { 1343,12}, { 2687,13}, { 1407,12}, { 2815,13}, \ + { 1471,14}, { 767,13}, { 1599,12}, { 3199,13}, \ + { 1663,14}, { 895,13}, { 1791,12}, { 3583,15}, \ + { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \ + { 2431,12}, { 4863,14}, { 1279,13}, { 2687,14}, \ + { 1407,13}, { 2815,15}, { 767,14}, { 1535,13}, \ + { 3199,14}, { 1663,13}, { 3455,12}, { 6911,14}, \ + { 1791,13}, { 3583,16}, { 511,15}, { 1023,14}, \ + { 2431,13}, { 4863,15}, { 1279,14}, { 2943,13}, \ + { 5887,15}, { 1535,14}, { 3455,13}, { 6911,15}, \ + { 1791,14}, { 3839,16}, { 1023,15}, { 2047,14}, \ + { 4223,15}, { 32768,16}, { 65536,17}, { 131072,18}, \ + { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \ + {4194304,23}, {8388608,24} } +#define SQR_FFT_TABLE3_SIZE 206 +#define SQR_FFT_THRESHOLD 3712 #define MULLO_BASECASE_THRESHOLD 0 /* always */ #define MULLO_DC_THRESHOLD 78 diff --git a/mpn/x86_64/coreisbr/gmp-mparam.h b/mpn/x86_64/coreisbr/gmp-mparam.h index e4b591ce3..3a91b4c30 100644 --- a/mpn/x86_64/coreisbr/gmp-mparam.h +++ b/mpn/x86_64/coreisbr/gmp-mparam.h @@ -32,7 +32,7 @@ see https://www.gnu.org/licenses/. */ #define GMP_LIMB_BYTES 8 /* 3300 MHz Core i5 Sandy Bridge */ -/* FFT tuning limit = 40000000 */ +/* FFT tuning limit = 100000000 */ /* Generated by tuneup.c, 2014-03-12, gcc 4.5 */ #define MOD_1_NORM_THRESHOLD 0 /* always */ @@ -77,63 +77,63 @@ see https://www.gnu.org/licenses/. */ #define MUL_FFT_TABLE3 \ { { 380, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \ { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \ - { 23, 7}, { 12, 6}, { 25, 7}, { 21, 8}, \ - { 11, 7}, { 25, 8}, { 13, 7}, { 27, 8}, \ - { 15, 7}, { 31, 8}, { 21, 9}, { 11, 8}, \ - { 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \ - { 39, 9}, { 23, 8}, { 47, 9}, { 27,10}, \ - { 15, 9}, { 39,10}, { 23, 9}, { 51,11}, \ - { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \ - { 83,10}, { 47, 9}, { 95,10}, { 55,11}, \ - { 31,10}, { 79,11}, { 47,10}, { 95,12}, \ - { 31,11}, { 63,10}, { 127, 9}, { 255,10}, \ - { 135,11}, { 79,10}, { 159, 9}, { 319,10}, \ - { 167,11}, { 95,10}, { 191, 9}, { 383,11}, \ - { 111,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,11}, { 143,10}, { 287, 9}, { 575,10}, \ - { 303,11}, { 159,10}, { 319,12}, { 95,11}, \ - { 191,10}, { 383, 9}, { 767,11}, { 207,10}, \ - { 415,13}, { 63,12}, { 127,11}, { 255,10}, \ - { 511,11}, { 271,10}, { 543,11}, { 287,10}, \ - { 575,11}, { 303,10}, { 607,12}, { 159,11}, \ - { 319,10}, { 639,11}, { 351,10}, { 703,11}, \ - { 367,12}, { 191,11}, { 383,10}, { 767,11}, \ - { 415,10}, { 831,12}, { 223,11}, { 447,10}, \ - { 895,11}, { 479,13}, { 127,12}, { 255,11}, \ - { 511,10}, { 1023,11}, { 543,10}, { 1087,12}, \ + { 23, 7}, { 21, 8}, { 11, 7}, { 25, 8}, \ + { 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \ + { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \ + { 33, 9}, { 19, 8}, { 39, 9}, { 23, 8}, \ + { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \ + { 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \ + { 67,10}, { 39, 9}, { 83,10}, { 47, 9}, \ + { 95,10}, { 55,11}, { 31,10}, { 79,11}, \ + { 47,10}, { 95,12}, { 31,11}, { 63,10}, \ + { 127, 9}, { 255,10}, { 135,11}, { 79,10}, \ + { 159, 9}, { 319,10}, { 167,11}, { 95,10}, \ + { 191, 9}, { 383, 8}, { 767, 7}, { 1599, 8}, \ + { 831, 9}, { 447,10}, { 239,12}, { 63,11}, \ + { 127,10}, { 255,11}, { 143,10}, { 287, 9}, \ + { 575,12}, { 95,11}, { 191,10}, { 383,11}, \ + { 207,10}, { 447,13}, { 63,12}, { 127,11}, \ + { 255,10}, { 511,11}, { 271,10}, { 543, 8}, \ + { 2175,11}, { 303,12}, { 159,11}, { 319,10}, \ + { 671,11}, { 367,12}, { 191,11}, { 383,10}, \ + { 767,11}, { 415,10}, { 831,12}, { 223,11}, \ + { 447,10}, { 895,11}, { 479,13}, { 127,12}, \ + { 255,11}, { 511,10}, { 1023,11}, { 543,12}, \ { 287,11}, { 575,10}, { 1151,11}, { 607,12}, \ { 319,11}, { 671,12}, { 351,11}, { 703,10}, \ - { 1407,11}, { 735,13}, { 191,12}, { 383,11}, \ - { 767,12}, { 415,11}, { 831,12}, { 447,11}, \ - { 895,12}, { 479,14}, { 127,13}, { 255,12}, \ - { 511,11}, { 1023,12}, { 543,11}, { 1087,12}, \ - { 575,11}, { 1151,12}, { 607,13}, { 319,12}, \ - { 703,11}, { 1407,12}, { 735,13}, { 383,12}, \ - { 767,11}, { 1535,12}, { 831,13}, { 447,12}, \ - { 959,11}, { 1919,14}, { 255,13}, { 511,12}, \ - { 1087,13}, { 575,12}, { 1215,11}, { 2431,13}, \ - { 639,12}, { 1279,13}, { 703,12}, { 1407,14}, \ - { 383,13}, { 767,12}, { 1535,13}, { 831,12}, \ - { 1663,13}, { 959,12}, { 1919,14}, { 511,13}, \ - { 1087,12}, { 2175,13}, { 1215,12}, { 2431,14}, \ - { 639,13}, { 1343,12}, { 2687,13}, { 1407,12}, \ - { 2815,13}, { 1471,14}, { 767,13}, { 1663,14}, \ - { 895,13}, { 1919,15}, { 511,14}, { 1023,13}, \ - { 2175,14}, { 1151,13}, { 2431,12}, { 4863,14}, \ - { 1279,13}, { 2687,14}, { 1407,13}, { 2943,15}, \ - { 767,14}, { 1535,13}, { 3071,14}, { 1663,13}, \ - { 3455,14}, { 1791,13}, { 3583,14}, { 1919,16}, \ + { 1407,13}, { 191,12}, { 383,11}, { 767,12}, \ + { 415,11}, { 831,12}, { 479,14}, { 127,13}, \ + { 255,12}, { 511,11}, { 1023,12}, { 575,11}, \ + { 1151,12}, { 607,13}, { 319,12}, { 671,11}, \ + { 1343,12}, { 703,13}, { 383,12}, { 767,11}, \ + { 1535,12}, { 831,13}, { 447,12}, { 959,11}, \ + { 1919,14}, { 255,13}, { 511,12}, { 1087,13}, \ + { 575,12}, { 1215,13}, { 639,12}, { 1279,13}, \ + { 703,12}, { 1407,14}, { 383,13}, { 767,12}, \ + { 1535,13}, { 831,12}, { 1663,13}, { 959,14}, \ + { 511,13}, { 1087,12}, { 2175,13}, { 1215,12}, \ + { 2431,14}, { 639,13}, { 1343,12}, { 2687,13}, \ + { 1407,12}, { 2815,13}, { 1471,12}, { 2943,14}, \ + { 767,13}, { 1663,14}, { 895,13}, { 1919,15}, \ + { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \ + { 2431,12}, { 4863,14}, { 1279,13}, { 2687,14}, \ + { 1407,13}, { 2943,15}, { 767,14}, { 1535,13}, \ + { 3071,14}, { 1663,13}, { 3455,14}, { 1919,16}, \ { 511,15}, { 1023,14}, { 2431,13}, { 4863,15}, \ - { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \ - { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \ - {8388608,24} } -#define MUL_FFT_TABLE3_SIZE 205 + { 1279,14}, { 2943,13}, { 5887,15}, { 1535,14}, \ + { 3455,15}, { 1791,14}, { 3839,13}, { 7679,16}, \ + { 1023,15}, { 2047,14}, { 4223,15}, { 2303,14}, \ + { 4863,15}, { 2815,14}, { 5887,16}, { 65536,17}, \ + { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \ + {2097152,22}, {4194304,23}, {8388608,24} } +#define MUL_FFT_TABLE3_SIZE 203 #define MUL_FFT_THRESHOLD 4736 #define SQR_FFT_MODF_THRESHOLD 336 /* k = 5 */ #define SQR_FFT_TABLE3 \ - { { 336, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \ - { 25, 7}, { 21, 8}, { 11, 7}, { 25, 8}, \ + { { 336, 5}, { 11, 4}, { 23, 5}, { 19, 6}, \ + { 10, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ + { 25, 7}, { 13, 6}, { 27, 7}, { 25, 8}, \ { 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \ { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \ { 33, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \ @@ -142,45 +142,44 @@ see https://www.gnu.org/licenses/. */ { 63,10}, { 39, 9}, { 79,10}, { 55,11}, \ { 31,10}, { 79,11}, { 47,10}, { 95,12}, \ { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \ - { 511,10}, { 135, 9}, { 271,11}, { 79, 9}, \ - { 319, 8}, { 639,11}, { 95,10}, { 191, 9}, \ - { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,10}, { 271, 9}, { 543,11}, { 143,10}, \ - { 287, 9}, { 575,10}, { 303, 9}, { 607,10}, \ - { 319, 9}, { 639,12}, { 95,11}, { 191,10}, \ - { 383, 9}, { 767,11}, { 207,10}, { 415,13}, \ - { 63,12}, { 127,11}, { 255,10}, { 511,11}, \ - { 271,10}, { 543,11}, { 287,10}, { 575,11}, \ - { 303,10}, { 607,11}, { 319,10}, { 639,11}, \ - { 335,10}, { 671,11}, { 351,10}, { 703,12}, \ - { 191,11}, { 383,10}, { 767,11}, { 415,10}, \ - { 831,12}, { 223,11}, { 447,10}, { 895,11}, \ - { 479,13}, { 127,12}, { 255,11}, { 511,10}, \ - { 1023,11}, { 543,12}, { 287,11}, { 575,10}, \ - { 1151,11}, { 607,12}, { 319,11}, { 671,12}, \ - { 351,11}, { 703,13}, { 191,12}, { 383,11}, \ - { 767,12}, { 415,11}, { 831,12}, { 447,11}, \ - { 895,12}, { 479,11}, { 959,14}, { 127,13}, \ - { 255,12}, { 511,11}, { 1023,12}, { 543,11}, \ - { 1087,12}, { 575,11}, { 1151,12}, { 607,13}, \ - { 319,12}, { 671,11}, { 1343,12}, { 735,13}, \ - { 383,12}, { 767,11}, { 1535,12}, { 831,13}, \ - { 447,12}, { 959,13}, { 511,12}, { 1087,13}, \ + { 511,10}, { 135,11}, { 79, 9}, { 319, 8}, \ + { 639,11}, { 95,10}, { 191, 9}, { 383,12}, \ + { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ + { 271, 9}, { 543,11}, { 143,10}, { 287, 8}, \ + { 1151,10}, { 303, 6}, { 4863, 8}, { 1279, 9}, \ + { 671,11}, { 175,10}, { 367,12}, { 95,11}, \ + { 191,10}, { 383,11}, { 207, 9}, { 831,10}, \ + { 447,13}, { 63,12}, { 127,11}, { 255,10}, \ + { 511,11}, { 271, 9}, { 1087,10}, { 575,11}, \ + { 303,10}, { 607,11}, { 319,10}, { 671,11}, \ + { 367,12}, { 191,11}, { 383,10}, { 767,11}, \ + { 415,12}, { 223,11}, { 447,10}, { 959,12}, \ + { 255,11}, { 511,10}, { 1023,11}, { 575,10}, \ + { 1151,11}, { 607,10}, { 1215,12}, { 319,11}, \ + { 671, 9}, { 2687,12}, { 351,11}, { 703,13}, \ + { 191,12}, { 415,11}, { 831,12}, { 479,14}, \ + { 127,13}, { 255,12}, { 511,11}, { 1023,12}, \ + { 607,13}, { 319,12}, { 671,11}, { 1343,12}, \ + { 703,13}, { 383,12}, { 831,13}, { 447,12}, \ + { 959,14}, { 255,13}, { 511,12}, { 1087,13}, \ { 575,12}, { 1215,13}, { 639,12}, { 1343,13}, \ - { 703,12}, { 1407,14}, { 383,13}, { 767,12}, \ - { 1535,13}, { 831,12}, { 1663,13}, { 959,14}, \ - { 511,13}, { 1087,12}, { 2175,13}, { 1215,14}, \ - { 639,13}, { 1343,12}, { 2687,13}, { 1407,12}, \ - { 2815,13}, { 1471,14}, { 767,13}, { 1663,14}, \ - { 895,13}, { 1791,15}, { 511,14}, { 1023,13}, \ - { 2175,14}, { 1151,13}, { 2431,12}, { 4863,14}, \ - { 1279,13}, { 2687,14}, { 1407,13}, { 2815,15}, \ - { 767,14}, { 1535,13}, { 3071,14}, { 1663,13}, \ - { 3455,14}, { 1791,16}, { 511,15}, { 1023,14}, \ - { 2431,13}, { 4863,15}, { 32768,16}, { 65536,17}, \ - { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \ - {2097152,22}, {4194304,23}, {8388608,24} } -#define SQR_FFT_TABLE3_SIZE 191 + { 703,14}, { 383,13}, { 767,12}, { 1535,13}, \ + { 831,12}, { 1663,13}, { 959,14}, { 511,13}, \ + { 1087,12}, { 2175,13}, { 1215,14}, { 639,13}, \ + { 1343,12}, { 2687,13}, { 1407,12}, { 2815,14}, \ + { 767,13}, { 1663,14}, { 895,13}, { 1791,15}, \ + { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \ + { 2431,12}, { 4863,14}, { 1279,13}, { 2687,14}, \ + { 1407,13}, { 2815,15}, { 767,14}, { 1535,13}, \ + { 3071,14}, { 1663,13}, { 3455,14}, { 1791,16}, \ + { 511,15}, { 1023,14}, { 2431,13}, { 4863,15}, \ + { 1279,14}, { 2943,13}, { 5887,15}, { 1535,14}, \ + { 3455,15}, { 1791,14}, { 3839,16}, { 1023,15}, \ + { 2047,14}, { 4223,15}, { 2303,14}, { 4863,15}, \ + { 2815,14}, { 5887,16}, { 65536,17}, { 131072,18}, \ + { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \ + {4194304,23}, {8388608,24} } +#define SQR_FFT_TABLE3_SIZE 190 #define SQR_FFT_THRESHOLD 3264 #define MULLO_BASECASE_THRESHOLD 0 /* always */ diff --git a/mpn/x86_64/k10/gmp-mparam.h b/mpn/x86_64/k10/gmp-mparam.h index 7064f8af1..5881306a4 100644 --- a/mpn/x86_64/k10/gmp-mparam.h +++ b/mpn/x86_64/k10/gmp-mparam.h @@ -39,7 +39,7 @@ see https://www.gnu.org/licenses/. */ #endif /* 3200 MHz K10 Thuban */ -/* FFT tuning limit = 40000000 */ +/* FFT tuning limit = 100000000 */ /* Generated by tuneup.c, 2014-03-12, gcc 4.2 */ #define MOD_1_NORM_THRESHOLD 0 /* always */ @@ -83,9 +83,9 @@ see https://www.gnu.org/licenses/. */ #define MUL_FFT_MODF_THRESHOLD 570 /* k = 5 */ #define MUL_FFT_TABLE3 \ { { 570, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ - { 27, 7}, { 14, 6}, { 29, 7}, { 15, 6}, \ - { 31, 7}, { 29, 8}, { 15, 7}, { 31, 8}, \ - { 17, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \ + { 29, 7}, { 15, 6}, { 31, 7}, { 29, 8}, \ + { 15, 7}, { 32, 8}, { 17, 7}, { 35, 8}, \ + { 19, 7}, { 39, 8}, { 21, 7}, { 43, 8}, \ { 23, 7}, { 47, 8}, { 25, 7}, { 51, 8}, \ { 29, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \ { 43, 9}, { 23, 8}, { 51, 9}, { 27, 8}, \ @@ -94,33 +94,38 @@ see https://www.gnu.org/licenses/. */ { 67,10}, { 39, 9}, { 83,10}, { 47, 9}, \ { 95,10}, { 55,11}, { 31,10}, { 87,11}, \ { 47,10}, { 111,12}, { 31,11}, { 63,10}, \ - { 135,11}, { 79,10}, { 167,11}, { 95, 9}, \ - { 383,10}, { 199,12}, { 63,11}, { 159,12}, \ - { 95,11}, { 207,13}, { 63,12}, { 127,11}, \ - { 255,10}, { 543, 9}, { 1087,12}, { 159,11}, \ + { 135,11}, { 79,10}, { 167, 8}, { 671,11}, \ + { 111,12}, { 63,11}, { 159,12}, { 95,11}, \ + { 207,10}, { 415,13}, { 63,12}, { 127,11}, \ + { 255,10}, { 511,11}, { 271,12}, { 159,11}, \ { 319,10}, { 639,11}, { 335,10}, { 671,11}, \ - { 367,12}, { 191,11}, { 383,10}, { 767,11}, \ - { 415,12}, { 223,11}, { 447,13}, { 127,12}, \ - { 255,11}, { 543,12}, { 287,11}, { 607,12}, \ + { 367,12}, { 191,11}, { 415,12}, { 223,13}, \ + { 127,12}, { 255,11}, { 543,12}, { 287,11}, \ + { 575,10}, { 1151,11}, { 607,10}, { 1215,12}, \ { 319,11}, { 671,12}, { 351,11}, { 703,13}, \ { 191,12}, { 383,11}, { 767,12}, { 415,11}, \ { 831,12}, { 447,14}, { 127,13}, { 255,12}, \ { 543,11}, { 1087,12}, { 607,13}, { 319,12}, \ - { 735,13}, { 383,12}, { 831,13}, { 447,12}, \ - { 959,14}, { 255,13}, { 511,12}, { 1087,13}, \ - { 575,12}, { 1215,13}, { 639,12}, { 1343,13}, \ - { 703,12}, { 1407,14}, { 383,13}, { 767,12}, \ - { 1599,13}, { 831,12}, { 1663,13}, { 895,12}, \ - { 1791,13}, { 959,14}, { 511,13}, { 1087,12}, \ + { 671,11}, { 1343,12}, { 735,13}, { 383,12}, \ + { 799,11}, { 1599,12}, { 831,13}, { 447,12}, \ + { 959,13}, { 511,12}, { 1087,13}, { 575,12}, \ + { 1215,13}, { 639,12}, { 1343,13}, { 703,12}, \ + { 1407,14}, { 383,13}, { 767,12}, { 1599,13}, \ + { 831,12}, { 1663,13}, { 895,12}, { 1791,13}, \ + { 959,15}, { 255,14}, { 511,13}, { 1087,12}, \ { 2175,13}, { 1215,14}, { 639,13}, { 1471,14}, \ - { 767,13}, { 1663,14}, { 895,13}, { 1855,15}, \ + { 767,13}, { 1727,14}, { 895,13}, { 1855,15}, \ { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \ { 2431,14}, { 1279,13}, { 2559,14}, { 1407,15}, \ { 767,14}, { 1535,13}, { 3071,14}, { 1791,16}, \ - { 511,15}, { 1023,14}, { 2303,15}, { 32768,16}, \ - { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \ - {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} } -#define MUL_FFT_TABLE3_SIZE 152 + { 511,15}, { 1023,14}, { 2431,15}, { 1279,14}, \ + { 2815,15}, { 1535,14}, { 3199,15}, { 1791,14}, \ + { 3583,16}, { 1023,15}, { 2047,14}, { 4223,15}, \ + { 2303,14}, { 4863,15}, { 2559,14}, { 5247,15}, \ + { 2815,16}, { 65536,17}, { 131072,18}, { 262144,19}, \ + { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \ + {8388608,24} } +#define MUL_FFT_TABLE3_SIZE 169 #define MUL_FFT_THRESHOLD 7808 #define SQR_FFT_MODF_THRESHOLD 448 /* k = 5 */ @@ -132,45 +137,48 @@ see https://www.gnu.org/licenses/. */ { 21, 7}, { 43, 8}, { 25, 7}, { 51, 8}, \ { 29, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \ { 43, 9}, { 23, 8}, { 51, 9}, { 27, 8}, \ - { 55,10}, { 15, 9}, { 31, 8}, { 63, 9}, \ + { 55,10}, { 15, 9}, { 31, 8}, { 65, 9}, \ { 43,10}, { 23, 9}, { 55,11}, { 15,10}, \ { 31, 9}, { 67,10}, { 39, 9}, { 83,10}, \ { 47, 9}, { 95,10}, { 55,11}, { 31,10}, \ { 79,11}, { 47,10}, { 103,12}, { 31,11}, \ { 63,10}, { 135,11}, { 79,10}, { 159,11}, \ { 95,10}, { 191,11}, { 111,12}, { 63,11}, \ - { 143,10}, { 287, 9}, { 575,12}, { 95,11}, \ - { 191,10}, { 383, 9}, { 799,11}, { 207,13}, \ - { 63,12}, { 127,11}, { 255,10}, { 511, 9}, \ - { 1023,11}, { 271,10}, { 543,12}, { 159,11}, \ - { 319,10}, { 639, 9}, { 1279,11}, { 335,10}, \ - { 671,11}, { 351,10}, { 703,11}, { 367,12}, \ - { 191,10}, { 767,11}, { 399,10}, { 799,11}, \ - { 415,10}, { 831,12}, { 223,11}, { 447,13}, \ - { 127,12}, { 255,11}, { 543,12}, { 287,11}, \ - { 575,10}, { 1151,11}, { 607,12}, { 319,11}, \ - { 639,10}, { 1279,11}, { 671,12}, { 351,11}, \ - { 703,10}, { 1407,13}, { 191,12}, { 383,11}, \ - { 767,10}, { 1535,12}, { 415,11}, { 831,12}, \ - { 447,14}, { 127,13}, { 255,12}, { 511,11}, \ - { 1023,12}, { 543,11}, { 1087,12}, { 607,13}, \ + { 127,10}, { 255,11}, { 143, 9}, { 575,10}, \ + { 303, 9}, { 607,12}, { 95,11}, { 191, 9}, \ + { 767,10}, { 399,11}, { 207,13}, { 63,12}, \ + { 127,11}, { 255,10}, { 543, 9}, { 1087,10}, \ + { 575,12}, { 159,11}, { 319,10}, { 639,11}, \ + { 335,10}, { 671,11}, { 351,10}, { 703, 9}, \ + { 1407,12}, { 191,11}, { 415,10}, { 831,12}, \ + { 223,11}, { 447,13}, { 127,12}, { 255,11}, \ + { 543,10}, { 1087,12}, { 287,11}, { 607,12}, \ + { 319,11}, { 671,12}, { 351,11}, { 703,13}, \ + { 191,12}, { 383,11}, { 767,10}, { 1535,12}, \ + { 415,11}, { 863,12}, { 447,14}, { 127,13}, \ + { 255,12}, { 511,11}, { 1023,12}, { 543,11}, \ + { 1087,12}, { 575,11}, { 1151,12}, { 607,13}, \ { 319,12}, { 639,11}, { 1279,12}, { 671,11}, \ - { 1343,12}, { 735,13}, { 383,12}, { 799,11}, \ - { 1599,12}, { 831,13}, { 447,12}, { 959,14}, \ - { 255,13}, { 511,12}, { 1087,13}, { 575,12}, \ - { 1215,13}, { 639,12}, { 1343,13}, { 703,12}, \ - { 1407,14}, { 383,13}, { 767,12}, { 1599,13}, \ - { 831,12}, { 1663,13}, { 895,12}, { 1791,15}, \ - { 255,14}, { 511,13}, { 1087,12}, { 2175,13}, \ - { 1215,14}, { 639,13}, { 1407,14}, { 767,13}, \ - { 1727,14}, { 895,13}, { 1855,15}, { 511,14}, \ - { 1023,13}, { 2047,14}, { 1151,13}, { 2303,14}, \ - { 1407,15}, { 767,14}, { 1791,16}, { 511,15}, \ - { 1023,14}, { 2303,15}, { 32768,16}, { 65536,17}, \ - { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \ - {2097152,22}, {4194304,23}, {8388608,24} } -#define SQR_FFT_TABLE3_SIZE 175 -#define SQR_FFT_THRESHOLD 5952 + { 1343,12}, { 703,11}, { 1407,12}, { 735,13}, \ + { 383,12}, { 799,11}, { 1599,12}, { 863,13}, \ + { 447,12}, { 927,14}, { 255,13}, { 511,12}, \ + { 1087,13}, { 575,12}, { 1215,13}, { 639,12}, \ + { 1343,13}, { 703,12}, { 1407,14}, { 383,13}, \ + { 767,12}, { 1535,13}, { 831,12}, { 1727,13}, \ + { 895,12}, { 1791,13}, { 959,15}, { 255,14}, \ + { 511,13}, { 1087,12}, { 2175,13}, { 1215,14}, \ + { 639,13}, { 1471,14}, { 767,13}, { 1663,14}, \ + { 895,13}, { 1791,15}, { 511,14}, { 1023,13}, \ + { 2175,14}, { 1151,13}, { 2303,14}, { 1407,15}, \ + { 767,14}, { 1791,16}, { 511,15}, { 1023,14}, \ + { 2303,15}, { 1279,14}, { 2687,15}, { 1535,14}, \ + { 3199,15}, { 1791,16}, { 1023,15}, { 2047,14}, \ + { 4223,15}, { 2303,14}, { 4863,15}, { 2559,14}, \ + { 5247,16}, { 65536,17}, { 131072,18}, { 262144,19}, \ + { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \ + {8388608,24} } +#define SQR_FFT_TABLE3_SIZE 185 +#define SQR_FFT_THRESHOLD 5568 #define MULLO_BASECASE_THRESHOLD 0 /* always */ #define MULLO_DC_THRESHOLD 61 |