From e531a140af2926cbb3b70d0b7b5c011bf0ac0765 Mon Sep 17 00:00:00 2001 From: Torbjorn Granlund Date: Thu, 21 Jan 2010 21:55:35 +0100 Subject: Add FFT_TABLE3 tables for a basic set of machines. --- ChangeLog | 2 + mpn/alpha/ev5/gmp-mparam.h | 186 ++++++++++++++++++++++++---------- mpn/powerpc64/mode64/p4/gmp-mparam.h | 135 ++++++++++++++++++++---- mpn/sparc64/ultrasparc34/gmp-mparam.h | 2 +- mpn/x86/k7/gmp-mparam.h | 103 +++++++++++++++++-- mpn/x86/p6/mmx/gmp-mparam.h | 2 +- mpn/x86/p6/sse2/gmp-mparam.h | 2 +- mpn/x86/pentium4/sse2/gmp-mparam.h | 91 +++++++++++++++-- mpn/x86_64/atom/gmp-mparam.h | 104 ++++++++++++++++--- mpn/x86_64/core2/gmp-mparam.h | 104 +++++++++++++++++-- mpn/x86_64/corei/gmp-mparam.h | 113 +++++++++++++++++++-- mpn/x86_64/gmp-mparam.h | 164 +++++++++++++++++++++++------- 12 files changed, 845 insertions(+), 163 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9a7dcabee..0c68e1080 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,7 @@ 2010-01-21 Torbjorn Granlund + * Add FFT_TABLE3 tables for a basic set of machines. + * configure.in: Use -mtune=nocona for 64-bit pentium4. * config.guess: Recognise many more Intel processors. diff --git a/mpn/alpha/ev5/gmp-mparam.h b/mpn/alpha/ev5/gmp-mparam.h index 2182cb0d0..d60cac911 100644 --- a/mpn/alpha/ev5/gmp-mparam.h +++ b/mpn/alpha/ev5/gmp-mparam.h @@ -28,71 +28,145 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define DIVREM_1_UNNORM_THRESHOLD 0 /* always */ #define MOD_1_NORM_THRESHOLD 0 /* always */ #define MOD_1_UNNORM_THRESHOLD 0 /* always */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 36 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 3 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 14 -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 76 +#define MOD_1N_TO_MOD_1_1_THRESHOLD 32 +#define MOD_1U_TO_MOD_1_1_THRESHOLD 2 +#define MOD_1_1_TO_MOD_1_2_THRESHOLD 7 +#define MOD_1_2_TO_MOD_1_4_THRESHOLD 18 +#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 73 #define USE_PREINV_DIVREM_1 1 /* preinv always */ #define DIVEXACT_1_THRESHOLD 0 /* always */ -#define BMOD_1_TO_MOD_1_THRESHOLD 72 +#define BMOD_1_TO_MOD_1_THRESHOLD 87 -#define MUL_TOOM22_THRESHOLD 14 -#define MUL_TOOM33_THRESHOLD 74 -#define MUL_TOOM44_THRESHOLD 130 -#define MUL_TOOM6H_THRESHOLD 155 +#define MUL_TOOM22_THRESHOLD 16 +#define MUL_TOOM33_THRESHOLD 53 +#define MUL_TOOM44_THRESHOLD 121 +#define MUL_TOOM6H_THRESHOLD 173 #define MUL_TOOM8H_THRESHOLD 236 -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 84 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 73 +#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 81 #define MUL_TOOM42_TO_TOOM63_THRESHOLD 56 -#define SQR_BASECASE_THRESHOLD 4 -#define SQR_TOOM2_THRESHOLD 26 -#define SQR_TOOM3_THRESHOLD 53 +#define SQR_BASECASE_THRESHOLD 5 +#define SQR_TOOM2_THRESHOLD 28 +#define SQR_TOOM3_THRESHOLD 78 #define SQR_TOOM4_THRESHOLD 136 -#define SQR_TOOM6_THRESHOLD 173 -#define SQR_TOOM8_THRESHOLD 254 - -#define MULMOD_BNM1_THRESHOLD 9 -#define SQRMOD_BNM1_THRESHOLD 14 - -#define MUL_FFT_TABLE { 240, 480, 1344, 1792, 5120, 20480, 81920, 196608, 0 } -#define MUL_FFT_MODF_THRESHOLD 240 -#define MUL_FFT_THRESHOLD 1920 - -#define SQR_FFT_TABLE { 240, 480, 1216, 1792, 5120, 12288, 81920, 196608, 0 } -#define SQR_FFT_MODF_THRESHOLD 208 -#define SQR_FFT_THRESHOLD 1920 +#define SQR_TOOM6_THRESHOLD 180 +#define SQR_TOOM8_THRESHOLD 260 + +#define MULMOD_BNM1_THRESHOLD 11 +#define SQRMOD_BNM1_THRESHOLD 17 + +#define MUL_FFT_MODF_THRESHOLD 244 /* k = 5 */ +#define MUL_FFT_TABLE3 \ + { { 244, 5}, { 11, 6}, { 6, 5}, { 13, 6}, \ + { 7, 5}, { 15, 6}, { 13, 7}, { 7, 6}, \ + { 15, 7}, { 8, 6}, { 17, 7}, { 9, 6}, \ + { 19, 7}, { 13, 8}, { 7, 7}, { 17, 8}, \ + { 9, 7}, { 20, 8}, { 11, 7}, { 23, 8}, \ + { 13, 7}, { 27, 9}, { 7, 8}, { 21, 9}, \ + { 11, 8}, { 25,10}, { 7, 9}, { 15, 8}, \ + { 33, 9}, { 23,10}, { 15, 9}, { 39,10}, \ + { 23, 9}, { 47,11}, { 15,10}, { 31, 9}, \ + { 67,10}, { 39, 9}, { 79,10}, { 47, 9}, \ + { 95,10}, { 55,11}, { 31,10}, { 63, 9}, \ + { 127,10}, { 71, 9}, { 143, 8}, { 287,10}, \ + { 79,11}, { 47,10}, { 95, 9}, { 191,12}, \ + { 31,11}, { 63,10}, { 127, 9}, { 255,10}, \ + { 143, 9}, { 287,11}, { 79,10}, { 159, 9}, \ + { 319, 8}, { 639,10}, { 175,11}, { 95,10}, \ + { 191, 9}, { 383,10}, { 207, 9}, { 415,11}, \ + { 111,12}, { 63,11}, { 127,10}, { 255,11}, \ + { 143,10}, { 287, 9}, { 575,11}, { 159,10}, \ + { 319,11}, { 175,10}, { 351,12}, { 95,11}, \ + { 191,10}, { 383,11}, { 207,10}, { 415,11}, \ + { 223,13}, { 63,12}, { 127,11}, { 255,10}, \ + { 511,11}, { 287,10}, { 575,12}, { 159,11}, \ + { 319,10}, { 639,11}, { 351,10}, { 703,12}, \ + { 191,11}, { 415,12}, { 223,11}, { 447,13}, \ + { 127,12}, { 255,11}, { 511,12}, { 287,11}, \ + { 575,12}, { 319,11}, { 639,12}, { 351,11}, \ + { 703,13}, { 191,12}, { 383,11}, { 767,12}, \ + { 415,11}, { 831,12}, { 447,14}, { 127,13}, \ + { 255,12}, { 575,13}, { 319,12}, { 703,13}, \ + { 383,12}, { 831,13}, { 447,12}, { 895,14}, \ + { 255,13}, { 511,12}, { 1023,13}, { 575,12}, \ + { 1151,13}, { 703,12}, { 1407,14}, { 16384,15}, \ + { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \ + { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \ + {8388608,24} } +#define MUL_FFT_TABLE3_SIZE 141 +#define MUL_FFT_THRESHOLD 4480 + +#define SQR_FFT_MODF_THRESHOLD 220 /* k = 5 */ +#define SQR_FFT_TABLE3 \ + { { 220, 5}, { 13, 6}, { 15, 7}, { 8, 6}, \ + { 17, 7}, { 9, 6}, { 19, 7}, { 13, 8}, \ + { 7, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \ + { 11, 7}, { 23, 8}, { 13, 7}, { 30, 8}, \ + { 19, 4}, { 319, 9}, { 11, 8}, { 25,10}, \ + { 7, 9}, { 15, 8}, { 31, 7}, { 64, 9}, \ + { 19, 8}, { 39, 7}, { 79, 9}, { 23, 8}, \ + { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \ + { 23, 9}, { 47,11}, { 15,10}, { 31, 9}, \ + { 67,10}, { 39, 9}, { 79,10}, { 47,11}, \ + { 31,10}, { 63, 9}, { 127,10}, { 71, 9}, \ + { 143, 8}, { 287,10}, { 79,11}, { 47,10}, \ + { 95, 9}, { 191,12}, { 31,11}, { 63,10}, \ + { 127, 9}, { 255,10}, { 143, 9}, { 287,11}, \ + { 79,10}, { 159, 9}, { 319,10}, { 175, 9}, \ + { 351,11}, { 95,10}, { 191, 9}, { 383,10}, \ + { 207,11}, { 111,12}, { 63,11}, { 127,10}, \ + { 255,11}, { 143,10}, { 287,11}, { 159,10}, \ + { 319,11}, { 175,10}, { 351,12}, { 95,11}, \ + { 191,10}, { 383,11}, { 207,10}, { 415,11}, \ + { 223,13}, { 63,12}, { 127,11}, { 255,10}, \ + { 511,11}, { 287,12}, { 159,11}, { 319,10}, \ + { 639,11}, { 351,12}, { 191,11}, { 383,10}, \ + { 767,11}, { 415,12}, { 223,11}, { 447,13}, \ + { 127,12}, { 255,11}, { 511,12}, { 287,11}, \ + { 575,12}, { 319,11}, { 639,12}, { 351,13}, \ + { 191,12}, { 383,11}, { 767,12}, { 415,11}, \ + { 831,12}, { 447,14}, { 127,13}, { 255,12}, \ + { 575,13}, { 319,12}, { 703,13}, { 383,12}, \ + { 831,13}, { 447,12}, { 895,14}, { 255,13}, \ + { 511,12}, { 1023,13}, { 575,12}, { 1151,13}, \ + { 703,14}, { 16384,15}, { 32768,16}, { 65536,17}, \ + { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \ + {2097152,22}, {4194304,23}, {8388608,24} } +#define SQR_FFT_TABLE3_SIZE 135 +#define SQR_FFT_THRESHOLD 3712 #define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 44 -#define MULLO_MUL_N_THRESHOLD 246 - -#define DC_DIV_QR_THRESHOLD 47 -#define DC_DIVAPPR_Q_THRESHOLD 182 -#define DC_BDIV_QR_THRESHOLD 47 -#define DC_BDIV_Q_THRESHOLD 168 - -#define INV_MULMOD_BNM1_THRESHOLD 55 -#define INV_NEWTON_THRESHOLD 187 -#define INV_APPR_THRESHOLD 179 - -#define BINV_NEWTON_THRESHOLD 220 -#define REDC_1_TO_REDC_N_THRESHOLD 77 - -#define MATRIX22_STRASSEN_THRESHOLD 11 -#define HGCD_THRESHOLD 96 -#define GCD_DC_THRESHOLD 309 -#define GCDEXT_DC_THRESHOLD 233 +#define MULLO_DC_THRESHOLD 55 +#define MULLO_MUL_N_THRESHOLD 7916 + +#define DC_DIV_QR_THRESHOLD 55 +#define DC_DIVAPPR_Q_THRESHOLD 192 +#define DC_BDIV_QR_THRESHOLD 51 +#define DC_BDIV_Q_THRESHOLD 120 + +#define INV_MULMOD_BNM1_THRESHOLD 100 +#define INV_NEWTON_THRESHOLD 188 +#define INV_APPR_THRESHOLD 189 + +#define BINV_NEWTON_THRESHOLD 199 +#define REDC_1_TO_REDC_N_THRESHOLD 55 + +#define MU_DIV_QR_THRESHOLD 979 +#define MU_DIVAPPR_Q_THRESHOLD 998 +#define MUPI_DIV_QR_THRESHOLD 90 +#define MU_BDIV_QR_THRESHOLD 792 +#define MU_BDIV_Q_THRESHOLD 942 + +#define MATRIX22_STRASSEN_THRESHOLD 13 +#define HGCD_THRESHOLD 101 +#define GCD_DC_THRESHOLD 306 +#define GCDEXT_DC_THRESHOLD 210 #define JACOBI_BASE_METHOD 2 -#define GET_STR_DC_THRESHOLD 15 -#define GET_STR_PRECOMPUTE_THRESHOLD 25 -#define SET_STR_DC_THRESHOLD 470 -#define SET_STR_PRECOMPUTE_THRESHOLD 1452 - -#define MUL_FFT_TABLE2 {{1,4}, {177,5}, {481,6}, {1089,7}, {3905,6}, {3969,7}, {4353,8}, {4929,9}, {5633,7}, {5761,5}, {5793,7}, {5889,6}, {5953,5}, {6145,7}, {7041,5}, {7169,8}, {7425,9}, {7681,8}, {9281,7}, {9345,6}, {9409,5}, {9841,8}, {10305,9}, {10753,8}, {11265,9}, {11777,7}, {11905,9}, {12289,10}, {13313,9}, {14593,7}, {14849,9}, {15361,8}, {15745,7}, {15937,5}, {15969,8}, {16513,6}, {16609,4}, {16641,7}, {16865,5}, {17105,8}, {17409,7}, {18177,8}, {18433,9}, {19585,10}, {22529,9}, {23553,10}, {24577,11}, {26625,10}, {28161,8}, {28545,6}, {28737,4}, {28753,5}, {28817,6}, {28865,4}, {28881,5}, {28897,6}, {28929,8}, {29441,9}, {30465,11}, {30721,10}, {32769,9}, {33793,10}, {34817,9}, {35841,10}, {38913,8}, {40129,7}, {40193,6}, {40257,5}, {40289,8}, {40449,9}, {44289,8}, {44545,9}, {45057,10}, {48129,9}, {49409,10}, {50433,11}, {51201,10}, {53249,11}, {63489,9}, {64001,10}, {67585,9}, {68097,10}, {75777,11}, {81921,10}, {84993,11}, {89089,9}, {89601,11}, {96257,10}, {102401,12}, {126977,11}, {129025,9}, {129537,8}, {129793,10}, {131073,11}, {136193,10}, {138753,9}, {141313,11}, {145409,10}, {146945,11}, {159745,10}, {160769,9}, {161281,11}, {161793,10}, {169473,11}, {170497,9}, {171009,10}, {174081,9}, {179969,11}, {194561,10}, {206849,9}, {207361,10}, {209409,9}, {210945,10}, {211969,9}, {212993,11}, {215041,12}, {218113,10}, {219137,11}, {222209,12}, {253953,11}, {264705,9}, {266241,10}, {272897,11}, {274433,10}, {280577,11}, {364545,12}, {389121,10}, {390145,11}, {424961,13}, {434177,11}, {450561,13}, {516097,12}, {MP_SIZE_T_MAX, 0}} - -#define SQR_FFT_TABLE2 {{1,4}, {209,5}, {417,6}, {1089,7}, {3585,6}, {3777,8}, {6657,7}, {9249,8}, {11329,6}, {11521,5}, {11553,8}, {15105,6}, {15201,5}, {15233,7}, {15361,8}, {15617,7}, {15745,5}, {15857,4}, {15873,7}, {16257,6}, {16897,5}, {17377,7}, {17633,9}, {18049,7}, {18177,8}, {20033,6}, {20241,8}, {20993,9}, {22529,10}, {25345,11}, {28673,9}, {29697,10}, {31745,9}, {32257,8}, {32513,10}, {35073,9}, {35329,8}, {36865,7}, {36993,9}, {38401,8}, {39169,10}, {41473,9}, {41985,7}, {42497,6}, {42689,7}, {42753,8}, {43009,7}, {43137,9}, {43649,7}, {43809,6}, {43841,8}, {44545,9}, {45313,10}, {53249,11}, {60417,9}, {60929,7}, {61569,9}, {64769,10}, {71169,9}, {72193,10}, {72705,9}, {74241,7}, {74369,9}, {74753,11}, {75777,9}, {76289,10}, {79361,9}, {79873,11}, {96769,9}, {97537,10}, {102401,11}, {104961,12}, {122881,10}, {126977,8}, {128001,10}, {129537,9}, {130049,10}, {135169,9}, {135681,8}, {135937,7}, {136193,9}, {137217,11}, {139265,10}, {141057,8}, {141569,7}, {142337,8}, {143489,7}, {143617,8}, {144385,7}, {145537,9}, {145921,11}, {147457,9}, {149249,8}, {150273,7}, {151041,8}, {151297,10}, {154113,11}, {155649,9}, {157697,11}, {165889,9}, {172033,10}, {176129,9}, {177665,10}, {179201,11}, {181249,10}, {182273,11}, {186369,9}, {187649,8}, {188417,10}, {189441,11}, {192513,10}, {202753,8}, {203009,9}, {203265,8}, {203521,9}, {206849,10}, {210945,11}, {214017,12}, {219137,11}, {221185,12}, {225281,11}, {227329,12}, {239617,11}, {241665,12}, {245761,11}, {260097,10}, {261121,9}, {261633,10}, {263169,11}, {264193,10}, {265217,11}, {288769,10}, {293121,11}, {294913,9}, {296449,8}, {296705,9}, {298497,11}, {299009,10}, {300033,9}, {301569,11}, {304129,10}, {307201,11}, {359937,12}, {385025,11}, {395265,10}, {399361,11}, {409601,10}, {411137,9}, {411649,10}, {413697,11}, {455681,10}, {457217,11}, {462849,10}, {463873,12}, {480257,13}, {495617,12}, {499713,13}, {507905,11}, {509953,13}, {516097,12}, {526337,10}, {528385,11}, {587777,12}, {651265,11}, {719873,12}, {782337,11}, {849921,12}, {915457,13}, {MP_SIZE_T_MAX, 0}} +#define GET_STR_DC_THRESHOLD 16 +#define GET_STR_PRECOMPUTE_THRESHOLD 31 +#define SET_STR_DC_THRESHOLD 422 +#define SET_STR_PRECOMPUTE_THRESHOLD 1524 diff --git a/mpn/powerpc64/mode64/p4/gmp-mparam.h b/mpn/powerpc64/mode64/p4/gmp-mparam.h index 8b6c11aa6..d11ecdd18 100644 --- a/mpn/powerpc64/mode64/p4/gmp-mparam.h +++ b/mpn/powerpc64/mode64/p4/gmp-mparam.h @@ -1,4 +1,4 @@ -/* PowerPC970 gmp-mparam.h -- Compiler/machine parameter header file. +/* POWER4/PowerPC970 gmp-mparam.h -- Compiler/machine parameter header file. Copyright 2008, 2009, 2010 Free Software Foundation, Inc. @@ -53,13 +53,114 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define MULMOD_BNM1_THRESHOLD 12 #define SQRMOD_BNM1_THRESHOLD 14 -#define MUL_FFT_TABLE { 368, 544, 1856, 2816, 7168, 20480, 81920, 327680, 0 } -#define MUL_FFT_MODF_THRESHOLD 384 +#define MUL_FFT_MODF_THRESHOLD 654 /* k = 5 */ +#define MUL_FFT_TABLE3 \ + { { 654, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ + { 13, 5}, { 27, 6}, { 21, 7}, { 11, 6}, \ + { 23, 7}, { 12, 6}, { 25, 7}, { 13, 6}, \ + { 27, 7}, { 21, 8}, { 11, 7}, { 25, 8}, \ + { 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \ + { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \ + { 35, 9}, { 19, 8}, { 39, 9}, { 23, 8}, \ + { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \ + { 23, 9}, { 55,11}, { 15,10}, { 31, 9}, \ + { 71,10}, { 39, 9}, { 83,10}, { 47, 9}, \ + { 95,10}, { 55,11}, { 31,10}, { 63, 9}, \ + { 127,10}, { 79,11}, { 47,10}, { 103,12}, \ + { 31,11}, { 63,10}, { 135, 8}, { 543,11}, \ + { 79,10}, { 159, 8}, { 639,10}, { 167, 9}, \ + { 335,11}, { 95, 9}, { 383, 8}, { 767,10}, \ + { 199, 9}, { 415,11}, { 111,12}, { 63,11}, \ + { 127, 9}, { 511, 8}, { 1023, 9}, { 543,11}, \ + { 143, 9}, { 575, 8}, { 1151, 9}, { 607,11}, \ + { 159, 9}, { 639,10}, { 351,12}, { 95,10}, \ + { 383, 9}, { 767,10}, { 415, 9}, { 831,13}, \ + { 63,12}, { 127,10}, { 511, 9}, { 1023,10}, \ + { 543, 9}, { 1087, 8}, { 2175,10}, { 575, 9}, \ + { 1151,10}, { 607, 9}, { 1215,12}, { 159,10}, \ + { 639,11}, { 335,10}, { 671,11}, { 351,10}, \ + { 703, 9}, { 1407,11}, { 383,10}, { 767,11}, \ + { 415,10}, { 831, 9}, { 1663,11}, { 447,10}, \ + { 895,13}, { 127,11}, { 511,10}, { 1023,11}, \ + { 543,10}, { 1087, 9}, { 2175,11}, { 575,10}, \ + { 1151,11}, { 607,10}, { 1215, 9}, { 2431,11}, \ + { 639,10}, { 1279,11}, { 671,12}, { 351,11}, \ + { 703,10}, { 1407,12}, { 383,11}, { 767,12}, \ + { 415,11}, { 831,10}, { 1663,12}, { 447,11}, \ + { 895,12}, { 479,11}, { 959,14}, { 127,12}, \ + { 511,11}, { 1023,12}, { 543,11}, { 1087,10}, \ + { 2175,12}, { 575,11}, { 1151,12}, { 607,11}, \ + { 1215,10}, { 2431,12}, { 639,11}, { 1279,12}, \ + { 671,11}, { 1343,12}, { 703,11}, { 1407,12}, \ + { 735,13}, { 383,12}, { 767,11}, { 1535,12}, \ + { 799,11}, { 1599,12}, { 831,11}, { 1663,13}, \ + { 447,12}, { 959,11}, { 1919,13}, { 511,12}, \ + { 1087,11}, { 2175,13}, { 575,12}, { 1215,11}, \ + { 2431,13}, { 639,12}, { 1343,11}, { 2687,13}, \ + { 703,12}, { 1407,14}, { 383,13}, { 767,12}, \ + { 1599,13}, { 831,12}, { 1663,13}, { 895,12}, \ + { 1791,13}, { 959,12}, { 1919,11}, { 3839,14}, \ + { 511,13}, { 1087,12}, { 2175,13}, { 1215,12}, \ + { 2431,14}, { 639,13}, { 1343,12}, { 2687,13}, \ + { 1471,12}, { 2943,14}, { 767,13}, { 1599,12}, \ + { 3199,13}, { 1663,14}, { 895,13}, { 8192,14}, \ + { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \ + { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \ + {4194304,23}, {8388608,24} } +#define MUL_FFT_TABLE3_SIZE 206 #define MUL_FFT_THRESHOLD 9472 -#define SQR_FFT_TABLE { 304, 672, 1600, 2816, 7168, 20480, 81920, 196608, 0 } -#define SQR_FFT_MODF_THRESHOLD 320 -#define SQR_FFT_THRESHOLD 7424 +#define SQR_FFT_MODF_THRESHOLD 618 /* k = 6 */ +#define SQR_FFT_TABLE3 \ + { { 618, 6}, { 21, 7}, { 11, 6}, { 23, 7}, \ + { 21, 8}, { 11, 7}, { 25, 8}, { 13, 7}, \ + { 27, 8}, { 15, 7}, { 31, 8}, { 21, 9}, \ + { 11, 8}, { 27, 9}, { 15, 8}, { 33, 9}, \ + { 19, 8}, { 39, 9}, { 23, 8}, { 47, 9}, \ + { 27,10}, { 15, 9}, { 39,10}, { 23, 9}, \ + { 51,11}, { 15,10}, { 31, 9}, { 67,10}, \ + { 39, 9}, { 79,10}, { 47, 9}, { 95,10}, \ + { 55,11}, { 31,10}, { 79,11}, { 47,10}, \ + { 95, 8}, { 383,12}, { 31,11}, { 63,10}, \ + { 127, 8}, { 511,10}, { 135, 8}, { 543,11}, \ + { 79, 9}, { 319, 8}, { 639, 9}, { 351,11}, \ + { 95, 9}, { 383, 8}, { 767, 9}, { 415,12}, \ + { 63,11}, { 127, 9}, { 511, 8}, { 1023, 9}, \ + { 543, 8}, { 1087,11}, { 143, 9}, { 575, 8}, \ + { 1151, 9}, { 607,10}, { 319, 9}, { 639,10}, \ + { 351,12}, { 95,10}, { 383, 9}, { 767,10}, \ + { 415, 9}, { 831,13}, { 63,12}, { 127,10}, \ + { 511, 9}, { 1023,10}, { 543, 9}, { 1087, 8}, \ + { 2175,10}, { 575, 9}, { 1151,10}, { 607,11}, \ + { 319,10}, { 639,11}, { 351,10}, { 703, 9}, \ + { 1407,11}, { 383,10}, { 767,11}, { 415,10}, \ + { 831, 9}, { 1663,11}, { 447,10}, { 895,11}, \ + { 479,10}, { 959,13}, { 127,11}, { 511,10}, \ + { 1023,11}, { 543,10}, { 1087, 9}, { 2175,11}, \ + { 575,10}, { 1151,11}, { 607,12}, { 319,11}, \ + { 639,12}, { 351,11}, { 703,10}, { 1407,12}, \ + { 383,11}, { 767,12}, { 415,11}, { 831,10}, \ + { 1663,12}, { 447,11}, { 895,12}, { 479,11}, \ + { 959,10}, { 1919,14}, { 127,12}, { 511,11}, \ + { 1023,12}, { 543,11}, { 1087,10}, { 2175,12}, \ + { 575,11}, { 1151,12}, { 607,13}, { 319,12}, \ + { 639,11}, { 1279,12}, { 671,11}, { 1343,12}, \ + { 703,11}, { 1407,13}, { 383,12}, { 767,11}, \ + { 1535,12}, { 831,11}, { 1663,13}, { 447,12}, \ + { 959,11}, { 1919,10}, { 3839,13}, { 511,12}, \ + { 1087,11}, { 2175,13}, { 575,12}, { 1215,11}, \ + { 2431,13}, { 639,12}, { 1343,13}, { 703,12}, \ + { 1407,14}, { 383,13}, { 767,12}, { 1535,13}, \ + { 831,12}, { 1663,13}, { 895,12}, { 1791,13}, \ + { 959,12}, { 1919,11}, { 3839,14}, { 511,13}, \ + { 1087,12}, { 2175,13}, { 1215,12}, { 2431,14}, \ + { 639,13}, { 1343,12}, { 2687,13}, { 1407,12}, \ + { 2815,13}, { 1471,14}, { 767,13}, { 1663,14}, \ + { 895,13}, { 8192,14}, { 16384,15}, { 32768,16}, \ + { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \ + {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} } +#define SQR_FFT_TABLE3_SIZE 188 +#define SQR_FFT_THRESHOLD 7040 #define MULLO_BASECASE_THRESHOLD 5 #define MULLO_DC_THRESHOLD 34 @@ -70,18 +171,18 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define DC_BDIV_QR_THRESHOLD 48 #define DC_BDIV_Q_THRESHOLD 120 -#define INV_MULMOD_BNM1_THRESHOLD 92 -#define INV_NEWTON_THRESHOLD 147 -#define INV_APPR_THRESHOLD 122 +#define INV_MULMOD_BNM1_THRESHOLD 107 +#define INV_NEWTON_THRESHOLD 170 +#define INV_APPR_THRESHOLD 117 #define BINV_NEWTON_THRESHOLD 206 #define REDC_1_TO_REDC_N_THRESHOLD 56 -#define MU_DIV_QR_THRESHOLD 1589 -#define MU_DIVAPPR_Q_THRESHOLD 1308 -#define MUPI_DIV_QR_THRESHOLD 62 -#define MU_BDIV_QR_THRESHOLD 1308 -#define MU_BDIV_Q_THRESHOLD 1334 +#define MU_DIV_QR_THRESHOLD 1470 +#define MU_DIVAPPR_Q_THRESHOLD 1334 +#define MUPI_DIV_QR_THRESHOLD 57 +#define MU_BDIV_QR_THRESHOLD 1099 +#define MU_BDIV_Q_THRESHOLD 1308 #define MATRIX22_STRASSEN_THRESHOLD 17 #define HGCD_THRESHOLD 86 @@ -91,9 +192,5 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define GET_STR_DC_THRESHOLD 11 #define GET_STR_PRECOMPUTE_THRESHOLD 24 -#define SET_STR_DC_THRESHOLD 532 +#define SET_STR_DC_THRESHOLD 795 #define SET_STR_PRECOMPUTE_THRESHOLD 1790 - -#define MUL_FFT_TABLE2 {{1,4}, {209,5}, {609,6}, {1345,7}, {3457,8}, {6913,9}, {7681,8}, {8961,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {28161,11}, {30721,10}, {31745,9}, {35329,10}, {39937,9}, {42497,10}, {48641,9}, {50689,10}, {56321,11}, {63489,10}, {80897,11}, {96257,10}, {105473,12}, {126977,11}, {129025,10}, {139009,11}, {142337,10}, {145409,11}, {161793,10}, {171009,11}, {194561,10}, {212481,11}, {227329,12}, {258049,11}, {261121,9}, {278017,11}, {292865,10}, {293889,9}, {310785,10}, {326657,9}, {327425,10}, {331265,9}, {336897,10}, {337921,9}, {343553,10}, {359425,12}, {389121,11}, {424961,13}, {516097,12}, {520193,11}, {522241,10}, {556545,11}, {587777,10}, {621569,11}, {653313,10}, {687105,11}, {719873,12}, {782337,11}, {851457,12}, {913409,11}, {980993,13}, {1040385,12}, {1044481,11}, {1113089,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,12}, {1440769,13}, {1564673,12}, {1961985,14}, {2080769,13}, {2088961,12}, {2488321,13}, {2613249,12}, {2881537,13}, {MP_SIZE_T_MAX, 0}} - -#define SQR_FFT_TABLE2 {{1,4}, {209,5}, {609,6}, {1345,7}, {3073,8}, {6913,9}, {7681,8}, {8449,9}, {9729,8}, {10241,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {26113,11}, {30721,10}, {31745,9}, {34305,10}, {56321,11}, {63489,10}, {80897,11}, {96257,10}, {102401,12}, {126977,11}, {129025,10}, {130049,9}, {139009,11}, {161793,10}, {179713,11}, {194561,10}, {212481,12}, {258049,11}, {260097,10}, {278273,11}, {359425,12}, {389121,11}, {457729,13}, {516097,12}, {520193,11}, {589313,12}, {651265,11}, {718849,12}, {782337,11}, {850945,12}, {913409,11}, {982017,13}, {983041,12}, {999425,13}, {1024001,12}, {1028097,13}, {1040385,12}, {1044481,11}, {1113089,12}, {1175553,11}, {1244161,12}, {1437697,13}, {1564673,12}, {1965057,13}, {2088961,12}, {2488321,13}, {2613249,12}, {2748417,11}, {2881537,13}, {MP_SIZE_T_MAX, 0}} diff --git a/mpn/sparc64/ultrasparc34/gmp-mparam.h b/mpn/sparc64/ultrasparc34/gmp-mparam.h index 63190e8a7..270e623c6 100644 --- a/mpn/sparc64/ultrasparc34/gmp-mparam.h +++ b/mpn/sparc64/ultrasparc34/gmp-mparam.h @@ -75,7 +75,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define DC_BDIV_QR_THRESHOLD 26 #define DC_BDIV_Q_THRESHOLD 92 -#define INV_MULMOD_BNM1_THRESHOLD 76 +#define INV_MULMOD_BNM1_THRESHOLD 58 #define INV_NEWTON_THRESHOLD 17 #define INV_APPR_THRESHOLD 17 diff --git a/mpn/x86/k7/gmp-mparam.h b/mpn/x86/k7/gmp-mparam.h index 441432f9d..363d07405 100644 --- a/mpn/x86/k7/gmp-mparam.h +++ b/mpn/x86/k7/gmp-mparam.h @@ -54,12 +54,101 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define MULMOD_BNM1_THRESHOLD 18 #define SQRMOD_BNM1_THRESHOLD 19 -#define MUL_FFT_TABLE { 400, 800, 1408, 3584, 10240, 40960, 163840, 0 } -#define MUL_FFT_MODF_THRESHOLD 928 +#define MUL_FFT_MODF_THRESHOLD 888 /* k = 6 */ +#define MUL_FFT_TABLE3 \ + { { 888, 6}, { 25, 7}, { 13, 6}, { 27, 7}, \ + { 15, 6}, { 32, 7}, { 17, 6}, { 35, 7}, \ + { 19, 6}, { 39, 7}, { 23, 6}, { 47, 7}, \ + { 27, 8}, { 15, 7}, { 31, 6}, { 63, 7}, \ + { 35, 8}, { 19, 7}, { 39, 8}, { 23, 7}, \ + { 47, 8}, { 31, 7}, { 63, 8}, { 39, 7}, \ + { 79, 9}, { 23, 8}, { 47, 7}, { 95, 8}, \ + { 51, 9}, { 31, 8}, { 71, 9}, { 39, 8}, \ + { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ + { 31, 9}, { 63, 8}, { 127, 9}, { 71, 8}, \ + { 143, 9}, { 79,10}, { 47,11}, { 31,10}, \ + { 63, 9}, { 127,10}, { 79, 9}, { 167,10}, \ + { 95, 9}, { 207,10}, { 111,11}, { 63,10}, \ + { 127, 9}, { 255,10}, { 159, 9}, { 319,11}, \ + { 95,10}, { 191,12}, { 63,11}, { 127,10}, \ + { 271, 9}, { 543,10}, { 287,11}, { 159,10}, \ + { 319, 9}, { 671,11}, { 191,10}, { 383, 9}, \ + { 767,11}, { 223,12}, { 127,11}, { 255,10}, \ + { 511, 9}, { 1023,10}, { 543, 9}, { 1087,11}, \ + { 287,10}, { 575, 9}, { 1151,10}, { 607, 9}, \ + { 1215, 8}, { 2431,11}, { 319,10}, { 639, 9}, \ + { 1279,10}, { 671, 9}, { 1343,12}, { 191,11}, \ + { 383,10}, { 767, 9}, { 1535,10}, { 799, 9}, \ + { 1599,11}, { 415,10}, { 831, 9}, { 1663,13}, \ + { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \ + { 543,10}, { 1087,11}, { 575,10}, { 1151,11}, \ + { 607,10}, { 1215,12}, { 319,11}, { 639,10}, \ + { 1279,11}, { 671,10}, { 1407, 9}, { 2815,11}, \ + { 735,10}, { 1471, 9}, { 2943,12}, { 383,11}, \ + { 767,10}, { 1535,11}, { 799,10}, { 1599,11}, \ + { 831,10}, { 1663,11}, { 863,10}, { 1727,11}, \ + { 895,10}, { 1791,11}, { 959,13}, { 255,12}, \ + { 511,11}, { 1023,10}, { 2047,11}, { 1087,12}, \ + { 575,11}, { 1151,10}, { 2303,11}, { 1215,10}, \ + { 2431,12}, { 639,11}, { 1407,10}, { 2815,11}, \ + { 1471,10}, { 2943,13}, { 383,12}, { 767,11}, \ + { 1599,12}, { 831,11}, { 1663,10}, { 3327,11}, \ + { 1727,12}, { 895,11}, { 1791,10}, { 3583,12}, \ + { 959,11}, { 1919,14}, { 255,13}, { 511,12}, \ + { 1023,11}, { 2047,12}, { 1087,11}, { 2239,12}, \ + { 1151,11}, { 2303,12}, { 1215,11}, { 2431,13}, \ + { 8192,14}, { 16384,15}, { 32768,16} } +#define MUL_FFT_TABLE3_SIZE 167 #define MUL_FFT_THRESHOLD 7808 -#define SQR_FFT_TABLE { 400, 800, 1408, 3584, 10240, 24576, 163840, 0 } -#define SQR_FFT_MODF_THRESHOLD 720 +#define SQR_FFT_MODF_THRESHOLD 786 /* k = 6 */ +#define SQR_FFT_TABLE3 \ + { { 786, 6}, { 25, 7}, { 13, 6}, { 27, 7}, \ + { 15, 6}, { 31, 7}, { 17, 6}, { 35, 7}, \ + { 19, 6}, { 39, 7}, { 23, 6}, { 47, 7}, \ + { 27, 8}, { 15, 7}, { 31, 6}, { 63, 7}, \ + { 35, 8}, { 19, 7}, { 39, 8}, { 23, 7}, \ + { 47, 8}, { 31, 7}, { 63, 8}, { 39, 9}, \ + { 23, 8}, { 47, 7}, { 95, 8}, { 51, 9}, \ + { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \ + { 47, 8}, { 95,10}, { 31, 9}, { 63, 8}, \ + { 127, 9}, { 79,10}, { 47, 9}, { 95, 8}, \ + { 191,11}, { 31,10}, { 63, 9}, { 135,10}, \ + { 79, 9}, { 167,10}, { 95, 9}, { 191,10}, \ + { 111,11}, { 63,10}, { 143, 9}, { 287, 8}, \ + { 607,10}, { 159, 9}, { 319,10}, { 175,11}, \ + { 95,10}, { 191, 9}, { 383,10}, { 207,12}, \ + { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ + { 287,11}, { 159,10}, { 319, 9}, { 639, 8}, \ + { 1279, 9}, { 671, 8}, { 1343,11}, { 191,10}, \ + { 383, 9}, { 767, 8}, { 1535, 9}, { 799, 8}, \ + { 1599,10}, { 415,11}, { 223,12}, { 127,11}, \ + { 255,10}, { 511, 9}, { 1023,10}, { 543, 9}, \ + { 1087,11}, { 287,10}, { 575, 9}, { 1151,10}, \ + { 607, 9}, { 1215, 8}, { 2431,11}, { 319,10}, \ + { 639, 9}, { 1279,10}, { 671, 9}, { 1343,12}, \ + { 191,11}, { 383,10}, { 767, 9}, { 1535,10}, \ + { 799, 9}, { 1599,11}, { 415,10}, { 863,13}, \ + { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \ + { 543,10}, { 1087,11}, { 575,10}, { 1151, 9}, \ + { 2303,11}, { 607,10}, { 1215, 9}, { 2431,12}, \ + { 319,11}, { 639,10}, { 1279,11}, { 671,10}, \ + { 1407, 9}, { 2815,11}, { 735,10}, { 1471, 9}, \ + { 2943,11}, { 767,10}, { 1535,11}, { 799,10}, \ + { 1599,11}, { 831,10}, { 1663,11}, { 863,10}, \ + { 1727,11}, { 895,10}, { 1791,11}, { 959,10}, \ + { 1919,13}, { 255,12}, { 511,11}, { 1023,10}, \ + { 2047,11}, { 1087,10}, { 2175,12}, { 575,11}, \ + { 1151,10}, { 2303,11}, { 1215,10}, { 2431,12}, \ + { 639,11}, { 1407,10}, { 2815,11}, { 1471,10}, \ + { 2943,12}, { 767,11}, { 1599,12}, { 831,11}, \ + { 1663,10}, { 3327,12}, { 895,11}, { 1791,12}, \ + { 959,11}, { 1919,10}, { 3839,11}, { 1983,14}, \ + { 255,13}, { 511,12}, { 1023,11}, { 2047,12}, \ + { 1087,11}, { 2239,12}, { 1151,11}, { 2303,12}, \ + { 1215,11}, { 2431,13}, { 8192,14}, { 16384,15}, \ + { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 177 #define SQR_FFT_THRESHOLD 7552 #define MULLO_BASECASE_THRESHOLD 10 @@ -71,7 +160,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define DC_BDIV_QR_THRESHOLD 82 #define DC_BDIV_Q_THRESHOLD 268 -#define INV_MULMOD_BNM1_THRESHOLD 54 +#define INV_MULMOD_BNM1_THRESHOLD 75 #define INV_NEWTON_THRESHOLD 300 #define INV_APPR_THRESHOLD 303 @@ -94,7 +183,3 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define GET_STR_PRECOMPUTE_THRESHOLD 34 #define SET_STR_DC_THRESHOLD 542 #define SET_STR_PRECOMPUTE_THRESHOLD 1615 - -#define MUL_FFT_TABLE2 {{1,4}, {337,5}, {801,6}, {1601,7}, {3457,8}, {3841,7}, {4481,8}, {10113,9}, {11777,8}, {13057,9}, {15873,8}, {18177,9}, {28161,10}, {31745,9}, {40449,10}, {48129,9}, {52737,11}, {63489,10}, {64513,9}, {71681,10}, {73729,9}, {74241,10}, {80897,9}, {84481,10}, {84993,9}, {85505,10}, {97281,9}, {97793,10}, {98817,9}, {102401,10}, {113665,11}, {129025,10}, {130049,9}, {130561,10}, {163329,11}, {168961,10}, {169985,11}, {194561,10}, {195585,9}, {196097,10}, {199681,12}, {201729,10}, {208897,12}, {258049,11}, {260097,10}, {290817,11}, {326657,9}, {328193,8}, {328705,9}, {329729,10}, {331265,8}, {331521,9}, {336897,10}, {337921,11}, {391169,10}, {392193,9}, {392705,8}, {392961,9}, {393217,10}, {394241,9}, {396801,10}, {397313,11}, {399361,9}, {403457,10}, {405505,11}, {456705,12}, {520193,11}, {523265,9}, {524289,11}, {526337,9}, {539649,10}, {555009,9}, {556545,11}, {587777,10}, {622337,11}, {653313,10}, {686081,9}, {693761,11}, {694273,12}, {782337,11}, {784385,10}, {818177,9}, {818689,11}, {849921,9}, {851457,10}, {916481,11}, {917505,13}, {1040385,12}, {1044481,11}, {1112065,10}, {1145857,11}, {1243137,10}, {1244673,12}, {1306625,11}, {1339393,10}, {1341441,11}, {1370113,10}, {1440769,11}, {1443841,10}, {1445889,11}, {1467393,10}, {1472513,11}, {1481729,10}, {1489921,11}, {1501185,10}, {1503233,11}, {1505281,10}, {1506817,12}, {1568769,11}, {1636353,10}, {1643521,11}, {1650689,10}, {1662977,11}, {1668097,10}, {1677313,11}, {1681409,10}, {1683457,11}, {1687553,10}, {1702913,11}, {1965057,13}, {2088961,12}, {2093057,11}, {2227201,12}, {2248705,11}, {2252801,12}, {2355201,11}, {2492417,12}, {2617345,11}, {MP_SIZE_T_MAX, 0}} - -#define SQR_FFT_TABLE2 {{1,4}, {369,5}, {673,6}, {1601,7}, {3457,8}, {3841,7}, {4481,8}, {9985,9}, {11777,8}, {13057,9}, {15873,8}, {17153,9}, {24321,10}, {31745,9}, {40449,10}, {48129,9}, {48641,11}, {63489,10}, {64513,9}, {69121,8}, {69377,9}, {73217,10}, {100353,11}, {102401,10}, {113665,11}, {129025,10}, {163329,11}, {194561,10}, {196609,12}, {258049,11}, {260097,10}, {263681,9}, {264193,10}, {290817,11}, {325633,10}, {326657,8}, {327169,9}, {333825,10}, {334849,9}, {340993,8}, {342017,9}, {343553,11}, {391169,9}, {393217,8}, {393473,10}, {394241,8}, {394497,9}, {395521,8}, {395777,9}, {396289,10}, {397313,9}, {398337,8}, {399873,9}, {404481,10}, {407553,9}, {409089,11}, {450561,12}, {520193,11}, {522241,10}, {523265,9}, {523777,10}, {556033,9}, {556545,11}, {587777,10}, {622337,11}, {649217,10}, {687105,9}, {711169,10}, {711681,12}, {782337,11}, {784385,10}, {821249,11}, {827393,10}, {829441,11}, {837633,10}, {883713,9}, {884225,11}, {884737,13}, {1040385,12}, {1044481,11}, {1244673,12}, {1306625,11}, {1374209,10}, {1442817,11}, {1470465,10}, {1471489,11}, {1495041,10}, {1499137,11}, {1507329,12}, {1527809,11}, {1536001,12}, {1540097,11}, {1554433,12}, {1560577,11}, {1562625,12}, {1564673,11}, {1595393,10}, {1596417,11}, {1599489,10}, {1600513,11}, {1638401,10}, {1644545,11}, {1646593,10}, {1650689,11}, {1652737,10}, {1655809,11}, {1656833,10}, {1657857,11}, {1660929,10}, {1664001,11}, {1671169,10}, {1674241,11}, {1675265,10}, {1677313,11}, {1681409,10}, {1685505,11}, {1689601,10}, {1691649,11}, {1694721,10}, {1700865,11}, {1767425,10}, {1900545,11}, {1965057,13}, {MP_SIZE_T_MAX, 0}} diff --git a/mpn/x86/p6/mmx/gmp-mparam.h b/mpn/x86/p6/mmx/gmp-mparam.h index d1bf75887..290c1f88a 100644 --- a/mpn/x86/p6/mmx/gmp-mparam.h +++ b/mpn/x86/p6/mmx/gmp-mparam.h @@ -79,7 +79,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define DC_BDIV_QR_THRESHOLD 76 #define DC_BDIV_Q_THRESHOLD 175 -#define INV_MULMOD_BNM1_THRESHOLD 43 +#define INV_MULMOD_BNM1_THRESHOLD 82 #define INV_NEWTON_THRESHOLD 268 #define INV_APPR_THRESHOLD 250 diff --git a/mpn/x86/p6/sse2/gmp-mparam.h b/mpn/x86/p6/sse2/gmp-mparam.h index f5d3e4c05..7fa81b97a 100644 --- a/mpn/x86/p6/sse2/gmp-mparam.h +++ b/mpn/x86/p6/sse2/gmp-mparam.h @@ -79,7 +79,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define DC_BDIV_QR_THRESHOLD 60 #define DC_BDIV_Q_THRESHOLD 132 -#define INV_MULMOD_BNM1_THRESHOLD 83 +#define INV_MULMOD_BNM1_THRESHOLD 117 #define INV_NEWTON_THRESHOLD 81 #define INV_APPR_THRESHOLD 61 diff --git a/mpn/x86/pentium4/sse2/gmp-mparam.h b/mpn/x86/pentium4/sse2/gmp-mparam.h index b2a9c5bbe..6cd782ce1 100644 --- a/mpn/x86/pentium4/sse2/gmp-mparam.h +++ b/mpn/x86/pentium4/sse2/gmp-mparam.h @@ -54,12 +54,87 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define MULMOD_BNM1_THRESHOLD 19 #define SQRMOD_BNM1_THRESHOLD 24 -#define MUL_FFT_TABLE { 592, 928, 1920, 3584, 14336, 40960, 163840, 393216, 0 } -#define MUL_FFT_MODF_THRESHOLD 960 -#define MUL_FFT_THRESHOLD 7808 - -#define SQR_FFT_TABLE { 592, 928, 1920, 3584, 14336, 40960, 98304, 393216, 0 } -#define SQR_FFT_MODF_THRESHOLD 848 +#define MUL_FFT_MODF_THRESHOLD 904 /* k = 6 */ +#define MUL_FFT_TABLE3 \ + { { 904, 6}, { 15, 5}, { 32, 6}, { 17, 5}, \ + { 35, 6}, { 19, 5}, { 39, 6}, { 28, 7}, \ + { 15, 6}, { 33, 7}, { 17, 6}, { 35, 7}, \ + { 19, 6}, { 41, 7}, { 21, 6}, { 43, 7}, \ + { 23, 6}, { 47, 7}, { 27, 6}, { 55, 8}, \ + { 15, 7}, { 31, 6}, { 63, 7}, { 35, 8}, \ + { 19, 7}, { 43, 8}, { 23, 7}, { 51, 8}, \ + { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \ + { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \ + { 71, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \ + { 95, 9}, { 55,10}, { 31, 9}, { 63, 8}, \ + { 127, 9}, { 79,10}, { 47, 9}, { 103,11}, \ + { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ + { 159,10}, { 95,11}, { 63,10}, { 127, 9}, \ + { 263,10}, { 143, 9}, { 287,10}, { 159,11}, \ + { 95,10}, { 207,12}, { 63,11}, { 127,10}, \ + { 271,11}, { 159,10}, { 319,11}, { 191,10}, \ + { 383,11}, { 223,12}, { 127,11}, { 287,10}, \ + { 607,11}, { 319,12}, { 191,11}, { 383,10}, \ + { 767,13}, { 127,12}, { 255,11}, { 511,10}, \ + { 1055,11}, { 543,10}, { 1119, 9}, { 2239,11}, \ + { 607,12}, { 319,11}, { 671,10}, { 1407,11}, \ + { 735,10}, { 1471, 9}, { 2943,12}, { 383,11}, \ + { 799,10}, { 1663,11}, { 863,10}, { 1727,12}, \ + { 447,13}, { 255,12}, { 511,11}, { 1055,10}, \ + { 2111,11}, { 1119,10}, { 2239, 9}, { 4479,12}, \ + { 575,11}, { 1247,10}, { 2495, 9}, { 4991,12}, \ + { 639,11}, { 1471,10}, { 2943,13}, { 383,12}, \ + { 767,11}, { 1599,12}, { 831,11}, { 1727,10}, \ + { 3455,14}, { 255,13}, { 511,12}, { 1023,11}, \ + { 2111,12}, { 1087,11}, { 2239,10}, { 4479,12}, \ + { 1215,11}, { 2495,10}, { 4991,13}, { 639,12}, \ + { 1471,11}, { 2943,10}, { 5887,11}, { 3007,13}, \ + { 767,12}, { 1727,11}, { 3455,13}, { 895,11}, \ + { 3839,12}, { 4096,13}, { 8192,14}, { 16384,15}, \ + { 32768,16} } +#define MUL_FFT_TABLE3_SIZE 141 +#define MUL_FFT_THRESHOLD 7552 + +#define SQR_FFT_MODF_THRESHOLD 793 /* k = 5 */ +#define SQR_FFT_TABLE3 \ + { { 793, 5}, { 28, 6}, { 15, 5}, { 33, 6}, \ + { 17, 5}, { 35, 6}, { 19, 5}, { 39, 6}, \ + { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ + { 35, 7}, { 19, 6}, { 41, 7}, { 23, 6}, \ + { 47, 7}, { 27, 6}, { 55, 7}, { 31, 6}, \ + { 63, 7}, { 37, 8}, { 19, 7}, { 43, 8}, \ + { 23, 7}, { 49, 8}, { 31, 7}, { 63, 8}, \ + { 39, 7}, { 79, 8}, { 43, 9}, { 23, 8}, \ + { 55, 9}, { 31, 8}, { 71, 9}, { 39, 8}, \ + { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ + { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \ + { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ + { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \ + { 159,11}, { 95,10}, { 191,12}, { 63,11}, \ + { 127,10}, { 255, 9}, { 511,10}, { 271,11}, \ + { 159,10}, { 335,11}, { 191,10}, { 383, 9}, \ + { 767,10}, { 399, 9}, { 799,11}, { 223,12}, \ + { 127,11}, { 255,10}, { 527, 9}, { 1055,10}, \ + { 543,11}, { 287,10}, { 607, 9}, { 1215,11}, \ + { 319,12}, { 191,11}, { 383,10}, { 799,13}, \ + { 127,12}, { 255,11}, { 511,10}, { 1055,11}, \ + { 543,10}, { 1119, 9}, { 2239,11}, { 607,10}, \ + { 1215,12}, { 319,11}, { 671,10}, { 1407,11}, \ + { 735,10}, { 1471, 9}, { 2943,10}, { 1503,12}, \ + { 383,11}, { 799,10}, { 1599,11}, { 863,10}, \ + { 1727,12}, { 447,11}, { 991,13}, { 255,12}, \ + { 511,11}, { 1055,10}, { 2111,11}, { 1119,10}, \ + { 2239,12}, { 575,11}, { 1247,10}, { 2495,12}, \ + { 639,11}, { 1471,10}, { 2943,13}, { 383,12}, \ + { 767,11}, { 1599,12}, { 831,11}, { 1727,10}, \ + { 3455,12}, { 959,11}, { 1919,14}, { 255,13}, \ + { 511,12}, { 1023,11}, { 2111,12}, { 1087,11}, \ + { 2239,10}, { 4479,12}, { 1215,11}, { 2495,13}, \ + { 639,12}, { 1471,11}, { 2943,10}, { 5887,13}, \ + { 767,12}, { 1727,11}, { 3455,13}, { 895,12}, \ + { 1791,11}, { 3711,12}, { 1919,11}, { 3839,12}, \ + { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 148 #define SQR_FFT_THRESHOLD 6784 #define MULLO_BASECASE_THRESHOLD 12 @@ -94,7 +169,3 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define GET_STR_PRECOMPUTE_THRESHOLD 28 #define SET_STR_DC_THRESHOLD 123 #define SET_STR_PRECOMPUTE_THRESHOLD 1265 - -#define MUL_FFT_TABLE2 {{1,4}, {529,5}, {1025,6}, {1217,5}, {1249,6}, {2113,7}, {2177,6}, {2241,7}, {2433,6}, {2625,7}, {5505,8}, {5889,7}, {6529,8}, {11009,9}, {11777,8}, {14081,9}, {15873,8}, {17409,9}, {28161,10}, {31745,9}, {40449,10}, {48129,9}, {52737,11}, {63489,9}, {69121,10}, {113665,11}, {129025,10}, {165889,11}, {194561,10}, {195585,12}, {258049,11}, {260097,10}, {278529,11}, {331777,10}, {343041,11}, {392705,12}, {393217,11}, {415745,12}, {417793,11}, {452609,12}, {520193,11}, {589825,10}, {620545,11}, {653313,10}, {654337,12}, {782337,11}, {784385,10}, {818177,11}, {819201,13}, {1040385,12}, {1044481,11}, {1208321,12}, {1220609,11}, {1222657,12}, {1228801,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1440769,11}, {1506305,12}, {1568769,11}, {1768449,12}, {1830913,13}, {2088961,12}, {2093057,11}, {2226177,9}, {2260993,11}, {2263041,12}, {2355201,11}, {2489345,9}, {2555393,11}, {2555905,12}, {2617345,11}, {3012609,13}, {3137537,12}, {3141633,11}, {3274753,12}, {3403777,11}, {3536897,10}, {3537921,12}, {3547137,14}, {3559425,12}, {3604481,14}, {3620865,12}, {3653633,14}, {MP_SIZE_T_MAX, 0}} - -#define SQR_FFT_TABLE2 {{1,4}, {529,5}, {1057,6}, {1729,7}, {1921,6}, {2113,7}, {2177,6}, {2241,7}, {2433,6}, {2625,7}, {5249,8}, {5889,7}, {6273,8}, {11009,9}, {11777,8}, {14081,9}, {15873,8}, {18177,9}, {28161,10}, {31745,9}, {40449,10}, {48129,9}, {52737,11}, {63489,10}, {97793,11}, {129025,10}, {160769,11}, {194561,10}, {195585,12}, {258049,11}, {261121,10}, {274433,11}, {327681,9}, {328193,11}, {333825,10}, {338945,11}, {391169,9}, {396289,11}, {397313,9}, {407553,11}, {413697,12}, {520193,11}, {523265,10}, {542721,11}, {548865,10}, {556033,11}, {587777,10}, {622081,11}, {653313,10}, {654337,12}, {782337,11}, {784385,10}, {834561,11}, {835585,13}, {1040385,12}, {1044481,11}, {1244161,12}, {1306625,11}, {1374209,10}, {1439745,11}, {1506817,12}, {1568769,11}, {1768449,12}, {1830913,11}, {1964033,13}, {2088961,12}, {2093057,11}, {2226177,12}, {2355201,11}, {2489345,12}, {2617345,11}, {3013633,13}, {3137537,12}, {3141633,11}, {3274753,12}, {3403777,11}, {3537921,12}, {3928065,14}, {MP_SIZE_T_MAX, 0}} diff --git a/mpn/x86_64/atom/gmp-mparam.h b/mpn/x86_64/atom/gmp-mparam.h index 1743166dc..e0f64dddc 100644 --- a/mpn/x86_64/atom/gmp-mparam.h +++ b/mpn/x86_64/atom/gmp-mparam.h @@ -53,17 +53,99 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define MULMOD_BNM1_THRESHOLD 9 #define SQRMOD_BNM1_THRESHOLD 9 -#define MUL_FFT_TABLE { 272, 544, 1088, 1792, 5120, 12288, 49152, 196608, 786432, 0 } -#define MUL_FFT_MODF_THRESHOLD 240 -#define MUL_FFT_THRESHOLD 1856 - -#define SQR_FFT_TABLE { 272, 544, 1088, 1792, 5120, 12288, 49152, 196608, 786432, 0 } -#define SQR_FFT_MODF_THRESHOLD 216 +#define MUL_FFT_MODF_THRESHOLD 212 /* k = 5 */ +#define MUL_FFT_TABLE3 \ + { { 212, 5}, { 7, 4}, { 15, 5}, { 11, 6}, \ + { 6, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \ + { 13, 7}, { 7, 6}, { 15, 7}, { 9, 6}, \ + { 19, 7}, { 13, 8}, { 7, 7}, { 17, 8}, \ + { 9, 7}, { 19, 8}, { 11, 7}, { 23, 8}, \ + { 13, 9}, { 7, 8}, { 17, 9}, { 11,10}, \ + { 7, 9}, { 15, 8}, { 31, 9}, { 19, 8}, \ + { 39, 9}, { 23,10}, { 15, 9}, { 39,10}, \ + { 23, 9}, { 47,11}, { 15,10}, { 31, 9}, \ + { 63,10}, { 39, 9}, { 79,10}, { 47, 9}, \ + { 95, 8}, { 191,11}, { 31,10}, { 63, 9}, \ + { 127, 8}, { 255,10}, { 71, 9}, { 143, 8}, \ + { 287,10}, { 79,11}, { 47, 9}, { 191,12}, \ + { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \ + { 511,10}, { 143, 9}, { 287, 8}, { 575,11}, \ + { 79,10}, { 159, 9}, { 319,10}, { 175, 9}, \ + { 351, 8}, { 703,10}, { 191, 9}, { 383,10}, \ + { 207, 9}, { 415,11}, { 111,10}, { 223,12}, \ + { 63,11}, { 127,10}, { 255, 9}, { 511,11}, \ + { 143,10}, { 287, 9}, { 575,11}, { 159,10}, \ + { 319,11}, { 175,10}, { 351, 9}, { 703,11}, \ + { 191,10}, { 383,11}, { 207,10}, { 415,11}, \ + { 223,10}, { 447,13}, { 63,12}, { 127,11}, \ + { 255,10}, { 511,11}, { 287,10}, { 575,12}, \ + { 159,11}, { 351,10}, { 703,12}, { 191,11}, \ + { 383,12}, { 223,11}, { 479,13}, { 127,12}, \ + { 255,11}, { 511,12}, { 287,11}, { 575,12}, \ + { 319,11}, { 639,12}, { 351,11}, { 703,13}, \ + { 191,12}, { 415,11}, { 831,12}, { 447,11}, \ + { 895,12}, { 479,13}, { 255,12}, { 511,11}, \ + { 1023,12}, { 575,11}, { 1151,13}, { 319,12}, \ + { 703,13}, { 383,12}, { 831,13}, { 447,12}, \ + { 895,14}, { 255,13}, { 511,12}, { 1023,13}, \ + { 575,12}, { 1151,13}, { 703,14}, { 383,13}, \ + { 831,12}, { 1663,13}, { 895,15}, { 255,14}, \ + { 511,13}, { 1023,12}, { 2047,13}, { 1151,14}, \ + { 639,13}, { 1407,12}, { 2815,14}, { 767,13}, \ + { 1663,14}, { 895,13}, { 1791,15}, { 32768,16}, \ + { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \ + {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} } +#define MUL_FFT_TABLE3_SIZE 160 +#define MUL_FFT_THRESHOLD 1728 + +#define SQR_FFT_MODF_THRESHOLD 208 /* k = 5 */ +#define SQR_FFT_TABLE3 \ + { { 208, 5}, { 7, 4}, { 15, 5}, { 11, 6}, \ + { 6, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \ + { 13, 7}, { 7, 6}, { 15, 7}, { 8, 6}, \ + { 17, 7}, { 17, 8}, { 9, 7}, { 19, 8}, \ + { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \ + { 19, 9}, { 11, 8}, { 25,10}, { 7, 9}, \ + { 15, 8}, { 31, 9}, { 19, 8}, { 39, 9}, \ + { 23,10}, { 15, 9}, { 39,10}, { 23, 9}, \ + { 47,11}, { 15,10}, { 31, 9}, { 67,10}, \ + { 39, 9}, { 79, 8}, { 159,10}, { 47,11}, \ + { 31,10}, { 63, 9}, { 127, 8}, { 255,10}, \ + { 71, 9}, { 143, 8}, { 287, 7}, { 575, 9}, \ + { 159, 8}, { 319,11}, { 47, 9}, { 191,12}, \ + { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \ + { 511,10}, { 143, 9}, { 287, 8}, { 575,10}, \ + { 159, 9}, { 319, 8}, { 639, 9}, { 351, 8}, \ + { 703,10}, { 191, 9}, { 383,10}, { 207, 9}, \ + { 415,11}, { 111,10}, { 223,12}, { 63,11}, \ + { 127,10}, { 255, 9}, { 511,11}, { 143,10}, \ + { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \ + { 639,11}, { 175,10}, { 351, 9}, { 703,11}, \ + { 191,10}, { 383,11}, { 207,10}, { 415,11}, \ + { 223,10}, { 447,13}, { 63,12}, { 127,11}, \ + { 255,10}, { 511,11}, { 287,10}, { 575,12}, \ + { 159,11}, { 319,10}, { 639,11}, { 351,10}, \ + { 703,12}, { 191,11}, { 383,10}, { 767,11}, \ + { 415,12}, { 223,11}, { 447,13}, { 127,12}, \ + { 255,11}, { 543,12}, { 287,11}, { 575,12}, \ + { 319,11}, { 639,12}, { 351,13}, { 191,12}, \ + { 383,11}, { 767,12}, { 415,11}, { 831,12}, \ + { 479,13}, { 255,10}, { 2047,12}, { 575,13}, \ + { 319,11}, { 1279,12}, { 703,13}, { 383,12}, \ + { 831,13}, { 447,12}, { 895,14}, { 255,13}, \ + { 511,12}, { 1023,13}, { 575,12}, { 1151,13}, \ + { 703,14}, { 383,13}, { 831,12}, { 1663,13}, \ + { 895,15}, { 255,14}, { 511,13}, { 1151,14}, \ + { 639,13}, { 1407,12}, { 2815,14}, { 767,13}, \ + { 1663,14}, { 895,13}, { 1791,15}, { 32768,16}, \ + { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \ + {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} } +#define SQR_FFT_TABLE3_SIZE 160 #define SQR_FFT_THRESHOLD 1600 -#define MULLO_BASECASE_THRESHOLD 2 +#define MULLO_BASECASE_THRESHOLD 0 #define MULLO_DC_THRESHOLD 22 -#define MULLO_MUL_N_THRESHOLD 4141 +#define MULLO_MUL_N_THRESHOLD 3176 #define DC_DIV_QR_THRESHOLD 26 #define DC_DIVAPPR_Q_THRESHOLD 93 @@ -80,7 +162,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define MU_DIV_QR_THRESHOLD 792 #define MU_DIVAPPR_Q_THRESHOLD 807 -#define MUPI_DIV_QR_THRESHOLD 58 +#define MUPI_DIV_QR_THRESHOLD 67 #define MU_BDIV_QR_THRESHOLD 654 #define MU_BDIV_Q_THRESHOLD 792 @@ -94,7 +176,3 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define GET_STR_PRECOMPUTE_THRESHOLD 27 #define SET_STR_DC_THRESHOLD 254 #define SET_STR_PRECOMPUTE_THRESHOLD 1122 - -#define MUL_FFT_TABLE2 {{1,4}, {177,5}, {353,6}, {833,7}, {2177,8}, {4865,9}, {5633,8}, {6401,10}, {7169,9}, {11777,10}, {15361,9}, {19969,10}, {23553,9}, {24065,11}, {30721,10}, {48129,11}, {63489,10}, {80897,11}, {96257,10}, {97281,9}, {114177,11}, {114689,12}, {126977,11}, {129025,10}, {146945,11}, {161793,10}, {179969,11}, {194561,10}, {228353,12}, {258049,11}, {359937,12}, {389121,11}, {456705,13}, {516097,12}, {520193,11}, {588801,12}, {651265,11}, {719873,12}, {782337,11}, {849921,12}, {913409,11}, {980993,13}, {1040385,12}, {1439745,13}, {1564673,12}, {1961985,13}, {2088961,12}, {2357249,13}, {2613249,12}, {2881537,13}, {3137537,12}, {3403777,13}, {3661825,14}, {MP_SIZE_T_MAX, 0}} - -#define SQR_FFT_TABLE2 {{1,4}, {177,5}, {353,6}, {833,7}, {2177,8}, {4865,9}, {5633,8}, {6401,10}, {7169,9}, {11777,10}, {15361,9}, {19969,10}, {23553,9}, {24065,11}, {30721,10}, {48129,11}, {63489,10}, {72705,9}, {81665,11}, {96257,9}, {97793,8}, {105473,10}, {106497,12}, {126977,11}, {129025,10}, {195585,9}, {212481,11}, {228353,12}, {258049,11}, {391169,10}, {424961,11}, {457729,13}, {516097,12}, {520193,11}, {588801,12}, {651265,11}, {719873,12}, {782337,11}, {849921,12}, {916481,13}, {1040385,12}, {1439745,13}, {1564673,12}, {1830913,14}, {2080769,13}, {2088961,12}, {2355201,13}, {2613249,12}, {2879489,13}, {3137537,12}, {3162113,13}, {3227649,12}, {3395585,13}, {3661825,14}, {MP_SIZE_T_MAX, 0}} diff --git a/mpn/x86_64/core2/gmp-mparam.h b/mpn/x86_64/core2/gmp-mparam.h index d657b0408..ba39f4064 100644 --- a/mpn/x86_64/core2/gmp-mparam.h +++ b/mpn/x86_64/core2/gmp-mparam.h @@ -55,13 +55,103 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define MULMOD_BNM1_THRESHOLD 12 #define SQRMOD_BNM1_THRESHOLD 14 -#define MUL_FFT_TABLE { 336, 800, 1600, 3328, 7168, 20480, 81920, 196608, 0 } -#define MUL_FFT_MODF_THRESHOLD 400 +#define MUL_FFT_MODF_THRESHOLD 380 /* k = 5 */ +#define MUL_FFT_TABLE3 \ + { { 380, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \ + { 9, 5}, { 19, 6}, { 19, 7}, { 10, 6}, \ + { 21, 7}, { 11, 6}, { 23, 7}, { 13, 6}, \ + { 27, 7}, { 21, 8}, { 11, 7}, { 23, 8}, \ + { 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \ + { 17, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \ + { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \ + { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \ + { 47, 9}, { 27,10}, { 15, 9}, { 43,10}, \ + { 23, 9}, { 55,11}, { 15,10}, { 31, 9}, \ + { 67,10}, { 39, 9}, { 79,10}, { 55,11}, \ + { 31, 9}, { 127,10}, { 71, 8}, { 287,10}, \ + { 79,11}, { 47,12}, { 31,11}, { 63, 9}, \ + { 255,10}, { 135, 9}, { 271,11}, { 79, 9}, \ + { 319,10}, { 175,11}, { 95,10}, { 191, 9}, \ + { 383,11}, { 111,12}, { 63,11}, { 127,10}, \ + { 271, 9}, { 543,11}, { 143,10}, { 287,11}, \ + { 159,10}, { 319, 9}, { 639,11}, { 175,10}, \ + { 351,12}, { 95,11}, { 191,10}, { 383,11}, \ + { 207,10}, { 415,13}, { 63,12}, { 127,11}, \ + { 271,10}, { 543,11}, { 287,12}, { 159,11}, \ + { 319,10}, { 671,11}, { 351,12}, { 191,11}, \ + { 415,12}, { 223,11}, { 447,10}, { 895,11}, \ + { 479,13}, { 127,12}, { 287,11}, { 607,12}, \ + { 319,11}, { 671,12}, { 351,13}, { 191,12}, \ + { 415,11}, { 831,10}, { 1663,12}, { 479,14}, \ + { 127,13}, { 255,12}, { 543,11}, { 1087,12}, \ + { 607,13}, { 319,12}, { 703,13}, { 383,12}, \ + { 767,10}, { 3071,12}, { 831,13}, { 447,12}, \ + { 959,14}, { 255,13}, { 511,12}, { 1023,13}, \ + { 575,12}, { 1151,11}, { 2303,13}, { 639,12}, \ + { 1343,13}, { 703,14}, { 383,13}, { 831,12}, \ + { 1727,13}, { 959,15}, { 255,14}, { 511,13}, \ + { 1087,12}, { 2175,13}, { 1215,14}, { 639,13}, \ + { 1343,12}, { 2687,13}, { 1407,12}, { 2815,14}, \ + { 767,13}, { 1663,14}, { 895,13}, { 1919,15}, \ + { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \ + { 2431,12}, { 4863,13}, { 2495,14}, { 1279,13}, \ + { 2687,14}, { 1407,15}, { 767,14}, { 1663,13}, \ + { 3327,12}, { 6655,13}, { 3455,12}, { 6911,14}, \ + { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \ + { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \ + {4194304,23}, {8388608,24} } +#define MUL_FFT_TABLE3_SIZE 170 #define MUL_FFT_THRESHOLD 4736 -#define SQR_FFT_TABLE { 336, 672, 1728, 2816, 7168, 20480, 81920, 327680, 786432, 0 } -#define SQR_FFT_MODF_THRESHOLD 352 -#define SQR_FFT_THRESHOLD 3712 +#define SQR_FFT_MODF_THRESHOLD 308 /* k = 5 */ +#define SQR_FFT_TABLE3 \ + { { 308, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \ + { 21, 7}, { 11, 6}, { 23, 7}, { 12, 6}, \ + { 25, 7}, { 21, 8}, { 11, 7}, { 24, 8}, \ + { 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \ + { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \ + { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \ + { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \ + { 23, 9}, { 59,11}, { 15,10}, { 31, 8}, \ + { 125, 9}, { 67,10}, { 39, 9}, { 79,10}, \ + { 47, 9}, { 103,10}, { 79, 9}, { 159,10}, \ + { 87, 9}, { 175, 8}, { 351,11}, { 47,10}, \ + { 95,11}, { 63,10}, { 127, 8}, { 511, 9}, \ + { 271, 8}, { 543,11}, { 79,10}, { 175,11}, \ + { 95,10}, { 191, 9}, { 415,12}, { 63,11}, \ + { 127,10}, { 255,11}, { 143,10}, { 287, 9}, \ + { 575,11}, { 159,10}, { 319,11}, { 175,10}, \ + { 351,12}, { 95,11}, { 191,10}, { 383,11}, \ + { 207,10}, { 415,13}, { 63,12}, { 127,11}, \ + { 255,10}, { 511, 9}, { 1023,10}, { 543,11}, \ + { 287,10}, { 575,11}, { 303,12}, { 159,11}, \ + { 319,10}, { 639,11}, { 351,12}, { 191,11}, \ + { 383,10}, { 767,11}, { 415,10}, { 831,12}, \ + { 223,11}, { 479,13}, { 127,12}, { 255,11}, \ + { 543,12}, { 287,11}, { 575,12}, { 319,11}, \ + { 639,12}, { 351,13}, { 191,12}, { 383,11}, \ + { 767,12}, { 415,11}, { 831,12}, { 447,11}, \ + { 895,12}, { 479,14}, { 127,13}, { 255,12}, \ + { 543,11}, { 1087,12}, { 607,13}, { 319,12}, \ + { 639,11}, { 1279,12}, { 703,13}, { 383,12}, \ + { 831,13}, { 447,12}, { 959,14}, { 255,13}, \ + { 511,12}, { 1087,13}, { 575,12}, { 1215,13}, \ + { 639,12}, { 1279,13}, { 703,14}, { 383,13}, \ + { 767,12}, { 1535,13}, { 831,12}, { 1663,13}, \ + { 959,15}, { 255,14}, { 511,13}, { 1087,12}, \ + { 2175,13}, { 1215,14}, { 639,13}, { 1343,12}, \ + { 2687,13}, { 1407,12}, { 2815,14}, { 767,13}, \ + { 1535,12}, { 3071,13}, { 1663,14}, { 895,13}, \ + { 1791,15}, { 511,14}, { 1023,13}, { 2175,14}, \ + { 1151,13}, { 2303,12}, { 4607,13}, { 2431,12}, \ + { 4863,14}, { 1279,13}, { 2687,14}, { 1407,13}, \ + { 2815,15}, { 767,14}, { 1535,13}, { 3071,14}, \ + { 1663,13}, { 3327,12}, { 6655,13}, { 3455,12}, \ + { 6911,14}, { 1791,16}, { 65536,17}, { 131072,18}, \ + { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \ + {4194304,23}, {8388608,24} } +#define SQR_FFT_TABLE3_SIZE 178 +#define SQR_FFT_THRESHOLD 3520 #define MULLO_BASECASE_THRESHOLD 3 #define MULLO_DC_THRESHOLD 20 @@ -96,7 +186,3 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define GET_STR_PRECOMPUTE_THRESHOLD 20 #define SET_STR_DC_THRESHOLD 552 #define SET_STR_PRECOMPUTE_THRESHOLD 1790 - -#define MUL_FFT_TABLE2 {{1,4}, {273,5}, {545,6}, {1345,7}, {3201,8}, {6913,9}, {7681,8}, {8961,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {28161,11}, {30721,10}, {31745,9}, {34305,10}, {56321,11}, {61441,10}, {80897,11}, {96257,12}, {104449,10}, {105985,12}, {126977,11}, {129025,10}, {141313,11}, {163841,10}, {165889,11}, {194561,10}, {204801,11}, {227329,12}, {258049,11}, {261633,10}, {274433,11}, {292865,10}, {296961,11}, {299009,10}, {308225,11}, {326657,12}, {389121,11}, {424961,13}, {516097,12}, {520193,11}, {620545,12}, {651265,11}, {752641,12}, {782337,11}, {849921,12}, {913409,11}, {937985,13}, {944129,11}, {980993,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,12}, {1437697,11}, {1447937,13}, {1564673,12}, {1961985,14}, {2080769,13}, {2088961,12}, {2486273,13}, {2613249,12}, {3012609,13}, {3137537,12}, {3403777,13}, {3661825,12}, {3928065,14}, {4177921,13}, {4349953,12}, {4354049,13}, {4362241,12}, {4370433,13}, {4407297,12}, {4415489,13}, {4431873,12}, {4440065,13}, {4710401,12}, {4976641,13}, {5758977,12}, {5763073,14}, {6275073,13}, {MP_SIZE_T_MAX, 0}} - -#define SQR_FFT_TABLE2 {{1,4}, {273,5}, {545,6}, {1345,7}, {3201,8}, {3329,7}, {3457,8}, {6913,9}, {7681,8}, {8961,9}, {9729,8}, {10497,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {28161,11}, {30721,10}, {48129,11}, {63489,10}, {80897,11}, {96257,10}, {97281,12}, {102401,11}, {104449,12}, {126977,11}, {129025,10}, {138241,11}, {163329,10}, {179201,9}, {179713,11}, {210945,10}, {211969,11}, {221185,12}, {258049,10}, {262145,9}, {262657,10}, {274945,11}, {279553,9}, {280577,11}, {285697,10}, {286721,11}, {292865,10}, {293889,9}, {295937,10}, {296961,11}, {299009,10}, {309249,9}, {310785,11}, {331777,10}, {332801,11}, {339969,12}, {348161,11}, {352257,12}, {389121,11}, {391169,10}, {393217,11}, {402433,10}, {405505,11}, {425985,13}, {516097,11}, {528385,10}, {529409,11}, {565249,10}, {566273,11}, {622593,12}, {651265,11}, {718849,12}, {765953,11}, {768001,12}, {782337,11}, {849921,12}, {913409,11}, {930817,13}, {942081,11}, {980993,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1437697,13}, {1564673,12}, {1961985,14}, {2080769,13}, {2088961,12}, {2486273,13}, {2613249,12}, {2879489,13}, {3137537,12}, {3272705,13}, {3661825,12}, {3665921,14}, {4177921,13}, {4235265,12}, {4276225,13}, {4710401,12}, {4849665,13}, {4866049,12}, {4964353,13}, {5263361,12}, {5300225,13}, {5324801,12}, {5332993,13}, {5349377,11}, {5353473,12}, {5357569,13}, {5369857,14}, {5373953,13}, {5423105,12}, {5455873,13}, {5603329,12}, {5611521,13}, {5664769,14}, {5668865,13}, {5758977,14}, {6275073,13}, {MP_SIZE_T_MAX, 0}} diff --git a/mpn/x86_64/corei/gmp-mparam.h b/mpn/x86_64/corei/gmp-mparam.h index 4bd9a3580..269dee976 100644 --- a/mpn/x86_64/corei/gmp-mparam.h +++ b/mpn/x86_64/corei/gmp-mparam.h @@ -55,13 +55,110 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define MULMOD_BNM1_THRESHOLD 11 #define SQRMOD_BNM1_THRESHOLD 16 -#define MUL_FFT_TABLE { 336, 800, 1600, 2816, 7168, 20480, 81920, 196608, 0 } -#define MUL_FFT_MODF_THRESHOLD 400 +#define MUL_FFT_MODF_THRESHOLD 396 /* k = 5 */ +#define MUL_FFT_TABLE3 \ + { { 396, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \ + { 10, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ + { 21, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \ + { 11, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \ + { 15, 7}, { 31, 8}, { 21, 9}, { 11, 8}, \ + { 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \ + { 39, 9}, { 23, 8}, { 47, 9}, { 27,10}, \ + { 15, 9}, { 39,10}, { 23, 9}, { 51,11}, \ + { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \ + { 79,10}, { 47, 9}, { 95,10}, { 55,11}, \ + { 31,10}, { 79,11}, { 47,10}, { 95,12}, \ + { 31,11}, { 63,10}, { 135,11}, { 79,10}, \ + { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \ + { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \ + { 543,11}, { 143,10}, { 287, 9}, { 607,10}, \ + { 319,12}, { 95,11}, { 191,10}, { 383,13}, \ + { 63,12}, { 127,11}, { 255,10}, { 511, 9}, \ + { 1023,10}, { 543,11}, { 287,10}, { 575,11}, \ + { 303,12}, { 159,11}, { 319,10}, { 639,11}, \ + { 335,10}, { 671,11}, { 351,10}, { 703,11}, \ + { 367,12}, { 191,11}, { 383,10}, { 767,11}, \ + { 415,10}, { 831,11}, { 447,10}, { 895,13}, \ + { 127,11}, { 511,10}, { 1023,11}, { 543,12}, \ + { 287,11}, { 575,10}, { 1151,11}, { 607,12}, \ + { 319,11}, { 671,12}, { 351,11}, { 703,10}, \ + { 1407,12}, { 383,11}, { 767,12}, { 415,11}, \ + { 831,10}, { 1663,12}, { 447,11}, { 895,12}, \ + { 479,14}, { 127,12}, { 511,11}, { 1023,12}, \ + { 543,11}, { 1087,12}, { 607,13}, { 319,12}, \ + { 671,11}, { 1343,12}, { 703,11}, { 1407,12}, \ + { 735,13}, { 383,12}, { 831,11}, { 1663,13}, \ + { 447,12}, { 959,11}, { 1919,14}, { 255,13}, \ + { 511,12}, { 1087,13}, { 575,12}, { 1215,11}, \ + { 2431,13}, { 639,12}, { 1343,13}, { 703,12}, \ + { 1407,14}, { 383,13}, { 831,12}, { 1663,13}, \ + { 959,12}, { 1919,14}, { 511,13}, { 1087,12}, \ + { 2175,13}, { 1215,12}, { 2431,14}, { 639,13}, \ + { 1343,12}, { 2687,13}, { 1407,12}, { 2815,13}, \ + { 1471,14}, { 767,13}, { 1663,14}, { 895,13}, \ + { 1919,15}, { 511,14}, { 1023,13}, { 2175,14}, \ + { 1151,13}, { 2431,12}, { 4863,14}, { 1279,13}, \ + { 2687,14}, { 1407,13}, { 2943,15}, { 767,14}, \ + { 1663,13}, { 3455,14}, { 1919,16}, { 511,15}, \ + { 1023,14}, { 2431,13}, { 4863,15}, { 1279,14}, \ + { 2943,13}, { 5887,15}, { 1535,14}, { 3455,15}, \ + { 1791,14}, { 16384,15}, { 32768,16}, { 65536,17}, \ + { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \ + {2097152,22}, {4194304,23}, {8388608,24} } +#define MUL_FFT_TABLE3_SIZE 191 #define MUL_FFT_THRESHOLD 4224 -#define SQR_FFT_TABLE { 336, 672, 1728, 2816, 7168, 20480, 49152, 196608, 0 } -#define SQR_FFT_MODF_THRESHOLD 336 -#define SQR_FFT_THRESHOLD 3264 +#define SQR_FFT_MODF_THRESHOLD 308 /* k = 5 */ +#define SQR_FFT_TABLE3 \ + { { 308, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \ + { 21, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \ + { 11, 7}, { 25, 8}, { 13, 7}, { 27, 8}, \ + { 15, 7}, { 31, 8}, { 21, 9}, { 11, 8}, \ + { 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \ + { 41, 9}, { 23, 8}, { 47, 9}, { 27,10}, \ + { 15, 9}, { 39,10}, { 23, 9}, { 51,11}, \ + { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \ + { 79,10}, { 47,11}, { 31,10}, { 79,11}, \ + { 47,10}, { 95,12}, { 31,11}, { 63,10}, \ + { 127, 9}, { 255, 8}, { 511,10}, { 135, 8}, \ + { 543,11}, { 79, 9}, { 319,11}, { 95, 9}, \ + { 415,12}, { 63,11}, { 127, 8}, { 1087,10}, \ + { 287, 9}, { 575,10}, { 319,12}, { 95,10}, \ + { 383,13}, { 63,12}, { 127,11}, { 255,10}, \ + { 511, 9}, { 1023,11}, { 287,10}, { 575, 9}, \ + { 1151,11}, { 319,10}, { 639, 9}, { 1279,10}, \ + { 671,11}, { 351,10}, { 703, 9}, { 1407,11}, \ + { 383,10}, { 767,11}, { 415,10}, { 831, 9}, \ + { 1663,11}, { 447,10}, { 895,11}, { 479,13}, \ + { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \ + { 543,10}, { 1087,12}, { 287,11}, { 575,10}, \ + { 1151,11}, { 607,12}, { 319,11}, { 671,12}, \ + { 351,11}, { 703,12}, { 383,11}, { 767,12}, \ + { 415,11}, { 831,12}, { 447,11}, { 895,12}, \ + { 479,14}, { 127,12}, { 511,11}, { 1023,12}, \ + { 543,11}, { 1087,12}, { 575,11}, { 1151,12}, \ + { 607,13}, { 319,12}, { 639,11}, { 1279,12}, \ + { 671,11}, { 1343,12}, { 703,11}, { 1407,13}, \ + { 383,12}, { 767,11}, { 1535,12}, { 831,13}, \ + { 447,12}, { 959,13}, { 511,12}, { 1087,13}, \ + { 575,12}, { 1215,13}, { 639,12}, { 1343,13}, \ + { 703,14}, { 383,13}, { 767,12}, { 1535,13}, \ + { 831,12}, { 1663,13}, { 959,12}, { 1919,14}, \ + { 511,13}, { 1087,12}, { 2175,13}, { 1215,14}, \ + { 639,13}, { 1343,12}, { 2687,13}, { 1407,12}, \ + { 2815,13}, { 1471,14}, { 767,13}, { 1663,14}, \ + { 895,13}, { 1919,15}, { 511,14}, { 1023,13}, \ + { 2175,14}, { 1151,13}, { 2431,12}, { 4863,14}, \ + { 1279,13}, { 2687,14}, { 1407,13}, { 2815,15}, \ + { 767,14}, { 1535,13}, { 3071,14}, { 1663,13}, \ + { 3455,14}, { 1919,16}, { 511,15}, { 1023,14}, \ + { 2431,13}, { 4863,15}, { 1279,14}, { 2943,15}, \ + { 1535,14}, { 3455,15}, { 1791,14}, { 16384,15}, \ + { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \ + { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \ + {8388608,24} } +#define SQR_FFT_TABLE3_SIZE 185 +#define SQR_FFT_THRESHOLD 3200 #define MULLO_BASECASE_THRESHOLD 4 #define MULLO_DC_THRESHOLD 17 @@ -82,7 +179,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define MU_DIV_QR_THRESHOLD 1334 #define MU_DIVAPPR_Q_THRESHOLD 1499 -#define MUPI_DIV_QR_THRESHOLD 124 +#define MUPI_DIV_QR_THRESHOLD 81 #define MU_BDIV_QR_THRESHOLD 1187 #define MU_BDIV_Q_THRESHOLD 1308 @@ -96,7 +193,3 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define GET_STR_PRECOMPUTE_THRESHOLD 23 #define SET_STR_DC_THRESHOLD 226 #define SET_STR_PRECOMPUTE_THRESHOLD 1660 - -#define MUL_FFT_TABLE2 {{1,4}, {241,5}, {545,6}, {1345,7}, {3073,8}, {6913,9}, {7681,8}, {8449,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {26113,11}, {30721,10}, {31745,9}, {34305,10}, {51201,11}, {63489,10}, {79873,11}, {96257,10}, {98305,12}, {126977,11}, {129025,10}, {138241,11}, {195585,9}, {196097,10}, {196609,12}, {200705,11}, {204801,12}, {210945,11}, {217089,12}, {258049,11}, {261121,10}, {266241,11}, {295425,10}, {295937,9}, {296449,11}, {303105,10}, {305153,9}, {306177,11}, {308225,10}, {315393,11}, {317441,10}, {318465,11}, {323585,10}, {324609,11}, {327169,12}, {389121,11}, {417793,13}, {516097,12}, {520193,10}, {521217,11}, {587777,10}, {588801,12}, {596993,10}, {603137,12}, {651265,11}, {722945,12}, {724993,11}, {776193,12}, {782337,11}, {851969,12}, {913409,11}, {917505,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1949697,14}, {2080769,13}, {2088961,12}, {2486273,13}, {2613249,12}, {2617345,11}, {2639873,12}, {2641921,11}, {2670593,12}, {2699265,11}, {2703361,12}, {2719745,11}, {2744321,12}, {2879489,13}, {3137537,12}, {3405825,13}, {3661825,12}, {3928065,14}, {4177921,13}, {4186113,12}, {4452353,13}, {4710401,12}, {4978689,13}, {5238785,12}, {5300225,13}, {5308417,12}, {5328897,13}, {5332993,12}, {5443585,13}, {5447681,12}, {5459969,13}, {5468161,12}, {5480449,13}, {5758977,14}, {6275073,13}, {7860225,15}, {8355841,14}, {8372225,13}, {MP_SIZE_T_MAX, 0}} - -#define SQR_FFT_TABLE2 {{1,4}, {273,5}, {545,6}, {1345,7}, {3073,8}, {6913,9}, {7681,8}, {8449,9}, {9729,8}, {10497,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {26113,11}, {30721,10}, {31745,9}, {34305,10}, {51201,11}, {63489,10}, {80897,11}, {96257,10}, {97281,12}, {126977,11}, {129025,10}, {137217,11}, {194561,10}, {196097,12}, {258049,11}, {261121,9}, {262145,10}, {263169,9}, {263681,10}, {264193,11}, {266241,10}, {274433,9}, {275457,10}, {279041,11}, {290817,9}, {294913,11}, {296961,9}, {300545,8}, {300801,9}, {301057,11}, {309249,9}, {310273,10}, {311297,12}, {315393,10}, {316417,11}, {327681,12}, {350209,10}, {351233,12}, {389121,10}, {392705,11}, {417793,13}, {516097,11}, {522241,10}, {524289,11}, {528385,10}, {529409,11}, {546817,10}, {547841,11}, {587777,10}, {620033,12}, {624641,11}, {653313,10}, {656385,11}, {669697,10}, {671745,11}, {677889,10}, {684033,11}, {720897,12}, {735233,11}, {737281,12}, {761857,11}, {763905,12}, {774145,11}, {776193,12}, {778241,11}, {849921,12}, {851969,11}, {854017,12}, {856065,11}, {860161,12}, {905217,11}, {907265,12}, {913409,11}, {980993,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1234945,12}, {1306625,11}, {1323009,12}, {1327105,11}, {1357825,12}, {1439745,13}, {1564673,12}, {1964033,14}, {2080769,13}, {2088961,12}, {2486273,13}, {2613249,12}, {2879489,13}, {3137537,12}, {3403777,13}, {3661825,12}, {3928065,14}, {3932161,13}, {3948545,14}, {4046849,13}, {4079617,14}, {4177921,13}, {4186113,12}, {4452353,13}, {4710401,12}, {4759553,13}, {4767745,12}, {4976641,13}, {5234689,12}, {5378049,13}, {5386241,12}, {5500929,13}, {5758977,14}, {6275073,13}, {7856129,15}, {7897089,14}, {7913473,15}, {7929857,14}, {8077313,15}, {8093697,14}, {8273921,15}, {8323073,14}, {8372225,13}, {MP_SIZE_T_MAX, 0}} diff --git a/mpn/x86_64/gmp-mparam.h b/mpn/x86_64/gmp-mparam.h index 465f6fae9..8ebc943fc 100644 --- a/mpn/x86_64/gmp-mparam.h +++ b/mpn/x86_64/gmp-mparam.h @@ -24,29 +24,29 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define MOD_1_NORM_THRESHOLD 0 /* always */ #define MOD_1_UNNORM_THRESHOLD 0 /* always */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 4 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 5 +#define MOD_1N_TO_MOD_1_1_THRESHOLD 8 +#define MOD_1U_TO_MOD_1_1_THRESHOLD 6 #define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 15 +#define MOD_1_2_TO_MOD_1_4_THRESHOLD 11 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD 14 #define USE_PREINV_DIVREM_1 1 /* native */ #define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 20 +#define BMOD_1_TO_MOD_1_THRESHOLD 19 #define MUL_TOOM22_THRESHOLD 28 -#define MUL_TOOM33_THRESHOLD 77 -#define MUL_TOOM44_THRESHOLD 260 -#define MUL_TOOM6H_THRESHOLD 393 -#define MUL_TOOM8H_THRESHOLD 517 +#define MUL_TOOM33_THRESHOLD 81 +#define MUL_TOOM44_THRESHOLD 120 +#define MUL_TOOM6H_THRESHOLD 466 +#define MUL_TOOM8H_THRESHOLD 478 -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 113 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 138 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 163 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 175 +#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 160 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 160 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 211 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 38 -#define SQR_TOOM3_THRESHOLD 121 +#define SQR_TOOM2_THRESHOLD 34 +#define SQR_TOOM3_THRESHOLD 117 #define SQR_TOOM4_THRESHOLD 512 #define SQR_TOOM6_THRESHOLD 686 #define SQR_TOOM8_THRESHOLD 686 @@ -54,48 +54,144 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define MULMOD_BNM1_THRESHOLD 17 #define SQRMOD_BNM1_THRESHOLD 17 -#define MUL_FFT_TABLE { 400, 992, 1984, 3840, 11264, 28672, 81920, 0 } -#define MUL_FFT_MODF_THRESHOLD 656 -#define MUL_FFT_THRESHOLD 7808 - -#define SQR_FFT_TABLE { 400, 800, 2240, 3840, 11264, 28672, 114688, 0 } -#define SQR_FFT_MODF_THRESHOLD 528 -#define SQR_FFT_THRESHOLD 5312 +#define MUL_FFT_MODF_THRESHOLD 570 /* k = 5 */ +#define MUL_FFT_TABLE3 \ + { { 570, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ + { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \ + { 31, 7}, { 25, 8}, { 13, 7}, { 29, 8}, \ + { 15, 7}, { 31, 8}, { 17, 7}, { 35, 8}, \ + { 19, 7}, { 39, 8}, { 21, 7}, { 43, 8}, \ + { 23, 7}, { 47, 8}, { 25, 7}, { 51, 8}, \ + { 29, 9}, { 15, 8}, { 37, 9}, { 19, 8}, \ + { 43, 9}, { 23, 8}, { 51, 9}, { 27, 8}, \ + { 55,10}, { 15, 9}, { 43,10}, { 23, 9}, \ + { 55,10}, { 31, 9}, { 63, 5}, { 1023, 4}, \ + { 2431, 5}, { 1279, 6}, { 671, 7}, { 367, 8}, \ + { 189, 9}, { 95, 8}, { 195, 9}, { 111,11}, \ + { 31, 9}, { 131,10}, { 71, 9}, { 155,10}, \ + { 79, 9}, { 159,10}, { 87,11}, { 47,10}, \ + { 111,11}, { 63,10}, { 135,11}, { 79,10}, \ + { 167,11}, { 95,10}, { 191,11}, { 111,12}, \ + { 63,11}, { 143,10}, { 287,11}, { 159,10}, \ + { 319,11}, { 175,12}, { 95,11}, { 207,13}, \ + { 63,12}, { 127,11}, { 255,10}, { 543,11}, \ + { 287,12}, { 159,11}, { 319,10}, { 639,11}, \ + { 335,10}, { 671,11}, { 351,10}, { 703,12}, \ + { 191,11}, { 383,10}, { 767,11}, { 415,12}, \ + { 223,13}, { 127,12}, { 255,11}, { 543,12}, \ + { 287,11}, { 575,10}, { 1151,11}, { 607,12}, \ + { 319,11}, { 639,10}, { 1279,11}, { 671,12}, \ + { 351,11}, { 703,13}, { 191,12}, { 383,11}, \ + { 767,12}, { 415,11}, { 831,12}, { 447,14}, \ + { 127,13}, { 255,12}, { 543,11}, { 1087,12}, \ + { 607,11}, { 1215,13}, { 319,12}, { 671,11}, \ + { 1343,12}, { 735,13}, { 383,12}, { 767,11}, \ + { 1535,12}, { 799,11}, { 1599,12}, { 831,13}, \ + { 447,12}, { 895,11}, { 1791,12}, { 959,14}, \ + { 255,13}, { 511,12}, { 1087,13}, { 575,12}, \ + { 1215,13}, { 639,12}, { 1343,13}, { 703,12}, \ + { 1407,14}, { 383,13}, { 767,12}, { 1599,13}, \ + { 831,12}, { 1663,13}, { 895,12}, { 1791,13}, \ + { 959,15}, { 255,14}, { 511,13}, { 1087,12}, \ + { 2175,13}, { 1215,14}, { 639,13}, { 1471,14}, \ + { 767,13}, { 1663,14}, { 895,13}, { 1855,15}, \ + { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \ + { 2431,14}, { 1279,13}, { 2687,14}, { 1407,15}, \ + { 767,14}, { 1535,13}, { 3071,14}, { 1791,16}, \ + { 511,15}, { 1023,14}, { 2431,15}, { 1279,14}, \ + { 2815,15}, { 1535,14}, { 3199,15}, { 1791,14}, \ + { 3583,16}, { 65536,17}, { 131072,18}, { 262144,19}, \ + { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \ + {8388608,24} } +#define MUL_FFT_TABLE3_SIZE 185 +#define MUL_FFT_THRESHOLD 7552 + +#define SQR_FFT_MODF_THRESHOLD 460 /* k = 5 */ +#define SQR_FFT_TABLE3 \ + { { 460, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ + { 12, 5}, { 25, 6}, { 27, 7}, { 14, 6}, \ + { 29, 7}, { 15, 6}, { 31, 7}, { 29, 8}, \ + { 15, 7}, { 32, 8}, { 17, 7}, { 35, 8}, \ + { 19, 7}, { 39, 8}, { 21, 7}, { 43, 8}, \ + { 25, 7}, { 51, 8}, { 29, 9}, { 15, 8}, \ + { 35, 9}, { 19, 8}, { 43, 9}, { 23, 8}, \ + { 51, 9}, { 27, 8}, { 55,10}, { 15, 9}, \ + { 31, 8}, { 63, 9}, { 43,10}, { 23, 9}, \ + { 55,11}, { 15,10}, { 31, 9}, { 71,10}, \ + { 39, 9}, { 83,10}, { 47, 6}, { 767, 4}, \ + { 3263, 5}, { 1727, 4}, { 3455, 5}, { 1791, 6}, \ + { 927, 7}, { 479, 6}, { 959, 7}, { 511, 8}, \ + { 271, 9}, { 147,10}, { 87,11}, { 47,10}, \ + { 95,12}, { 31,11}, { 63,10}, { 135,11}, \ + { 79,10}, { 167,11}, { 95,10}, { 191,11}, \ + { 111,12}, { 63,11}, { 127,10}, { 255,11}, \ + { 143,10}, { 287, 9}, { 575,10}, { 303,11}, \ + { 159,12}, { 95,11}, { 191,10}, { 383, 9}, \ + { 767,10}, { 399,11}, { 207,13}, { 63,12}, \ + { 127,11}, { 255,10}, { 511,11}, { 271,10}, \ + { 543,11}, { 287,10}, { 575,12}, { 159,11}, \ + { 319,10}, { 639,11}, { 335,10}, { 671,11}, \ + { 351,10}, { 703,12}, { 191,11}, { 383,10}, \ + { 767,11}, { 415,10}, { 831,11}, { 447,13}, \ + { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \ + { 543,12}, { 287,11}, { 575,10}, { 1151,11}, \ + { 607,10}, { 1215,12}, { 319,11}, { 639,10}, \ + { 1279,11}, { 671,12}, { 351,11}, { 703,13}, \ + { 191,12}, { 383,11}, { 767,12}, { 415,11}, \ + { 831,12}, { 447,14}, { 127,13}, { 255,12}, \ + { 511,11}, { 1023,12}, { 543,11}, { 1087,12}, \ + { 575,11}, { 1151,12}, { 607,13}, { 319,12}, \ + { 639,11}, { 1279,12}, { 671,11}, { 1343,12}, \ + { 703,11}, { 1407,12}, { 735,13}, { 383,12}, \ + { 767,11}, { 1535,12}, { 799,11}, { 1599,12}, \ + { 831,13}, { 447,12}, { 959,14}, { 255,13}, \ + { 511,12}, { 1087,13}, { 575,12}, { 1215,13}, \ + { 639,12}, { 1343,13}, { 703,12}, { 1407,14}, \ + { 383,13}, { 767,12}, { 1599,13}, { 831,12}, \ + { 1663,13}, { 895,12}, { 1791,13}, { 959,15}, \ + { 255,14}, { 511,13}, { 1087,12}, { 2175,13}, \ + { 1215,14}, { 639,13}, { 1471,14}, { 767,13}, \ + { 1663,14}, { 895,13}, { 1855,15}, { 511,14}, \ + { 1023,13}, { 2175,14}, { 1151,13}, { 2303,14}, \ + { 1279,13}, { 2559,14}, { 1407,15}, { 767,14}, \ + { 1535,13}, { 3071,14}, { 1791,16}, { 511,15}, \ + { 1023,14}, { 2303,15}, { 1279,14}, { 2687,15}, \ + { 1535,14}, { 3199,15}, { 1791,16}, { 65536,17}, \ + { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \ + {2097152,22}, {4194304,23}, {8388608,24} } +#define SQR_FFT_TABLE3_SIZE 203 +#define SQR_FFT_THRESHOLD 5248 #define MULLO_BASECASE_THRESHOLD 9 #define MULLO_DC_THRESHOLD 29 #define MULLO_MUL_N_THRESHOLD 14709 -#define DC_DIV_QR_THRESHOLD 48 +#define DC_DIV_QR_THRESHOLD 56 #define DC_DIVAPPR_Q_THRESHOLD 270 -#define DC_BDIV_QR_THRESHOLD 45 +#define DC_BDIV_QR_THRESHOLD 52 #define DC_BDIV_Q_THRESHOLD 152 -#define INV_MULMOD_BNM1_THRESHOLD 152 +#define INV_MULMOD_BNM1_THRESHOLD 174 #define INV_NEWTON_THRESHOLD 252 #define INV_APPR_THRESHOLD 250 #define BINV_NEWTON_THRESHOLD 345 -#define REDC_1_TO_REDC_2_THRESHOLD 11 -#define REDC_2_TO_REDC_N_THRESHOLD 84 +#define REDC_1_TO_REDC_2_THRESHOLD 50 +#define REDC_2_TO_REDC_N_THRESHOLD 79 -#define MU_DIV_QR_THRESHOLD 1932 -#define MU_DIVAPPR_Q_THRESHOLD 1895 +#define MU_DIV_QR_THRESHOLD 1787 +#define MU_DIVAPPR_Q_THRESHOLD 1787 #define MUPI_DIV_QR_THRESHOLD 106 #define MU_BDIV_QR_THRESHOLD 1620 #define MU_BDIV_Q_THRESHOLD 1787 -#define MATRIX22_STRASSEN_THRESHOLD 21 +#define MATRIX22_STRASSEN_THRESHOLD 17 #define HGCD_THRESHOLD 139 #define GCD_DC_THRESHOLD 555 #define GCDEXT_DC_THRESHOLD 496 #define JACOBI_BASE_METHOD 1 -#define GET_STR_DC_THRESHOLD 18 +#define GET_STR_DC_THRESHOLD 9 #define GET_STR_PRECOMPUTE_THRESHOLD 23 #define SET_STR_DC_THRESHOLD 248 #define SET_STR_PRECOMPUTE_THRESHOLD 1648 - -#define MUL_FFT_TABLE2 {{1,4}, {337,5}, {737,6}, {1665,7}, {4097,8}, {10497,9}, {11777,8}, {13057,9}, {13825,8}, {14081,10}, {15361,9}, {15873,8}, {16129,9}, {22017,10}, {23553,9}, {28161,10}, {28673,9}, {29697,10}, {31745,9}, {36353,10}, {39937,9}, {42497,10}, {48129,9}, {49153,10}, {56321,11}, {63489,10}, {64513,9}, {69633,10}, {72705,9}, {77825,11}, {96257,10}, {97281,12}, {122881,11}, {129025,10}, {145409,11}, {161793,10}, {179201,11}, {227329,10}, {241665,12}, {258049,11}, {260097,10}, {269313,9}, {272385,11}, {293377,9}, {294401,10}, {297473,11}, {301057,9}, {309249,11}, {325633,9}, {327425,10}, {343041,9}, {343809,11}, {358401,12}, {389121,11}, {391169,9}, {392705,8}, {392961,9}, {396289,11}, {399361,9}, {408577,10}, {409601,11}, {466945,12}, {471041,13}, {475137,12}, {487425,13}, {491521,12}, {503809,13}, {516097,12}, {520193,11}, {522241,10}, {551937,11}, {552961,10}, {607233,12}, {610305,10}, {612353,12}, {651265,11}, {681985,10}, {683009,11}, {686081,10}, {687617, 11}, {692225,10}, {696321,11}, {701441,10}, {703489,11}, {708609,10}, {709633,11}, {711681,10}, {712705,11}, {714753,10}, {731137,12}, {741377,10}, {752641,12}, {782337,11}, {784385,10}, {817153,11}, {849921,10}, {850945,12}, {913409,11}, {915457,10}, {949249,11}, {980993,13}, {1040385,11}, {1388545,12}, {1394689,11}, {1404929,12}, {1409025,11}, {1505281,13}, {1564673,12}, {1568769,11}, {1637377,12}, {1699841,11}, {1768449,12}, {1830913,11}, {1898497,12}, {1961985,14}, {2080769,12}, {3536897,13}, {3661825,12}, {3928065,13}, {4186113,12}, {4452353,13}, {4710401,12}, {4976641,13}, {5238785,12}, {5513217,13}, {5550081,12}, {5574657,13}, {5734401,12}, {6025217,13}, {6283265,12}, {6549505,13}, {6815745,12}, {6852609,13}, {6873089,12}, {6881281,13}, {6889473,12}, {6946817,13}, {6955009,12}, {MP_SIZE_T_MAX, 0}} - -#define SQR_FFT_TABLE2 {{1,4}, {369,5}, {801,6}, {1729,7}, {4097,8}, {8961,9}, {9729,8}, {10497,9}, {11777,8}, {13057,9}, {13825,8}, {14081,10}, {15361,9}, {22017,10}, {23553,9}, {28161,11}, {28673,10}, {31745,9}, {35841,10}, {39937,9}, {42497,10}, {56321,11}, {63489,10}, {87041,11}, {96257,10}, {106497,12}, {126977,11}, {129025,10}, {138753,8}, {139265,10}, {146433,8}, {147457,10}, {149505,8}, {150017,10}, {155649,11}, {161793,10}, {162817,8}, {164097,10}, {167937,8}, {189441,10}, {190465,11}, {194561,10}, {196097,8}, {196609,10}, {204289,8}, {204673,9}, {212737,11}, {223233,12}, {258049,11}, {260097,9}, {280577,11}, {282625,9}, {284161,11}, {284673,9}, {299009,10}, {300033,9}, {335873,8}, {336129,9}, {366593,11}, {368641,12}, {372737,11}, {374785,9}, {389121,11}, {391169,9}, {409601,10}, {427009,11}, {428033,9}, {428545,8}, {429057,10}, {438273,11}, {440321,10}, {454657,11}, {456705,10}, {467457,9}, {467969,10}, {475137,12}, {479233,13}, {516097,12}, {520193,10}, {546817,11}, {550913,10}, {588801,9}, {589313,10}, {706561,11}, {708609,10}, {755713,11}, {757761,10}, {758785,12}, {761857,10}, {771073,11}, {774145,10}, {777217,12}, {778241,10}, {779265,11}, {780289,10}, {801793,9}, {802305,10}, {818689,11}, {849921,9}, {851457,11}, {852993,9}, {854017,11}, {856065,9}, {856577,11}, {864257,9}, {865281,11}, {868865,9}, {870401,11}, {1540097,13}, {1550337,11}, {1637377,12}, {1701889,10}, {1703937,12}, {1708033,10}, {1712129,12}, {1717249,10}, {1721345,12}, {1724417,10}, {1725441,12}, {1728513,10}, {1744897,12}, {1835009,14}, {1851393,12}, {1867777,14}, {1884161,12}, {1892353,14}, {1900545,12}, {1921025,14}, {2000001,12}, {3403777,11}, {3536897,13}, {3661825,12}, {3743745,11}, {3749889,12}, {3928065,13}, {4186113,12}, {4456449,13}, {4464641,12}, {4472833,13}, {4710401,12}, {4976641,13}, {5234689,12}, {5500929,13}, {5758977,12}, {MP_SIZE_T_MAX, 0}} -- cgit v1.2.1