summaryrefslogtreecommitdiff
path: root/mpn/x86_64/core2
diff options
context:
space:
mode:
authorTorbjorn Granlund <tege@gmplib.org>2010-01-21 21:55:35 +0100
committerTorbjorn Granlund <tege@gmplib.org>2010-01-21 21:55:35 +0100
commite531a140af2926cbb3b70d0b7b5c011bf0ac0765 (patch)
tree76440ce647482ad9fbd3e75f95e77406ee7cad6c /mpn/x86_64/core2
parent496ca4cc8655839b25423a89fa0b88530f83c6bc (diff)
downloadgmp-e531a140af2926cbb3b70d0b7b5c011bf0ac0765.tar.gz
Add FFT_TABLE3 tables for a basic set of machines.
Diffstat (limited to 'mpn/x86_64/core2')
-rw-r--r--mpn/x86_64/core2/gmp-mparam.h104
1 files changed, 95 insertions, 9 deletions
diff --git a/mpn/x86_64/core2/gmp-mparam.h b/mpn/x86_64/core2/gmp-mparam.h
index d657b0408..ba39f4064 100644
--- a/mpn/x86_64/core2/gmp-mparam.h
+++ b/mpn/x86_64/core2/gmp-mparam.h
@@ -55,13 +55,103 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#define MULMOD_BNM1_THRESHOLD 12
#define SQRMOD_BNM1_THRESHOLD 14
-#define MUL_FFT_TABLE { 336, 800, 1600, 3328, 7168, 20480, 81920, 196608, 0 }
-#define MUL_FFT_MODF_THRESHOLD 400
+#define MUL_FFT_MODF_THRESHOLD 380 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 380, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
+ { 9, 5}, { 19, 6}, { 19, 7}, { 10, 6}, \
+ { 21, 7}, { 11, 6}, { 23, 7}, { 13, 6}, \
+ { 27, 7}, { 21, 8}, { 11, 7}, { 23, 8}, \
+ { 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \
+ { 17, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \
+ { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
+ { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
+ { 47, 9}, { 27,10}, { 15, 9}, { 43,10}, \
+ { 23, 9}, { 55,11}, { 15,10}, { 31, 9}, \
+ { 67,10}, { 39, 9}, { 79,10}, { 55,11}, \
+ { 31, 9}, { 127,10}, { 71, 8}, { 287,10}, \
+ { 79,11}, { 47,12}, { 31,11}, { 63, 9}, \
+ { 255,10}, { 135, 9}, { 271,11}, { 79, 9}, \
+ { 319,10}, { 175,11}, { 95,10}, { 191, 9}, \
+ { 383,11}, { 111,12}, { 63,11}, { 127,10}, \
+ { 271, 9}, { 543,11}, { 143,10}, { 287,11}, \
+ { 159,10}, { 319, 9}, { 639,11}, { 175,10}, \
+ { 351,12}, { 95,11}, { 191,10}, { 383,11}, \
+ { 207,10}, { 415,13}, { 63,12}, { 127,11}, \
+ { 271,10}, { 543,11}, { 287,12}, { 159,11}, \
+ { 319,10}, { 671,11}, { 351,12}, { 191,11}, \
+ { 415,12}, { 223,11}, { 447,10}, { 895,11}, \
+ { 479,13}, { 127,12}, { 287,11}, { 607,12}, \
+ { 319,11}, { 671,12}, { 351,13}, { 191,12}, \
+ { 415,11}, { 831,10}, { 1663,12}, { 479,14}, \
+ { 127,13}, { 255,12}, { 543,11}, { 1087,12}, \
+ { 607,13}, { 319,12}, { 703,13}, { 383,12}, \
+ { 767,10}, { 3071,12}, { 831,13}, { 447,12}, \
+ { 959,14}, { 255,13}, { 511,12}, { 1023,13}, \
+ { 575,12}, { 1151,11}, { 2303,13}, { 639,12}, \
+ { 1343,13}, { 703,14}, { 383,13}, { 831,12}, \
+ { 1727,13}, { 959,15}, { 255,14}, { 511,13}, \
+ { 1087,12}, { 2175,13}, { 1215,14}, { 639,13}, \
+ { 1343,12}, { 2687,13}, { 1407,12}, { 2815,14}, \
+ { 767,13}, { 1663,14}, { 895,13}, { 1919,15}, \
+ { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
+ { 2431,12}, { 4863,13}, { 2495,14}, { 1279,13}, \
+ { 2687,14}, { 1407,15}, { 767,14}, { 1663,13}, \
+ { 3327,12}, { 6655,13}, { 3455,12}, { 6911,14}, \
+ { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 170
#define MUL_FFT_THRESHOLD 4736
-#define SQR_FFT_TABLE { 336, 672, 1728, 2816, 7168, 20480, 81920, 327680, 786432, 0 }
-#define SQR_FFT_MODF_THRESHOLD 352
-#define SQR_FFT_THRESHOLD 3712
+#define SQR_FFT_MODF_THRESHOLD 308 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 308, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
+ { 21, 7}, { 11, 6}, { 23, 7}, { 12, 6}, \
+ { 25, 7}, { 21, 8}, { 11, 7}, { 24, 8}, \
+ { 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \
+ { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
+ { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
+ { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
+ { 23, 9}, { 59,11}, { 15,10}, { 31, 8}, \
+ { 125, 9}, { 67,10}, { 39, 9}, { 79,10}, \
+ { 47, 9}, { 103,10}, { 79, 9}, { 159,10}, \
+ { 87, 9}, { 175, 8}, { 351,11}, { 47,10}, \
+ { 95,11}, { 63,10}, { 127, 8}, { 511, 9}, \
+ { 271, 8}, { 543,11}, { 79,10}, { 175,11}, \
+ { 95,10}, { 191, 9}, { 415,12}, { 63,11}, \
+ { 127,10}, { 255,11}, { 143,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 319,11}, { 175,10}, \
+ { 351,12}, { 95,11}, { 191,10}, { 383,11}, \
+ { 207,10}, { 415,13}, { 63,12}, { 127,11}, \
+ { 255,10}, { 511, 9}, { 1023,10}, { 543,11}, \
+ { 287,10}, { 575,11}, { 303,12}, { 159,11}, \
+ { 319,10}, { 639,11}, { 351,12}, { 191,11}, \
+ { 383,10}, { 767,11}, { 415,10}, { 831,12}, \
+ { 223,11}, { 479,13}, { 127,12}, { 255,11}, \
+ { 543,12}, { 287,11}, { 575,12}, { 319,11}, \
+ { 639,12}, { 351,13}, { 191,12}, { 383,11}, \
+ { 767,12}, { 415,11}, { 831,12}, { 447,11}, \
+ { 895,12}, { 479,14}, { 127,13}, { 255,12}, \
+ { 543,11}, { 1087,12}, { 607,13}, { 319,12}, \
+ { 639,11}, { 1279,12}, { 703,13}, { 383,12}, \
+ { 831,13}, { 447,12}, { 959,14}, { 255,13}, \
+ { 511,12}, { 1087,13}, { 575,12}, { 1215,13}, \
+ { 639,12}, { 1279,13}, { 703,14}, { 383,13}, \
+ { 767,12}, { 1535,13}, { 831,12}, { 1663,13}, \
+ { 959,15}, { 255,14}, { 511,13}, { 1087,12}, \
+ { 2175,13}, { 1215,14}, { 639,13}, { 1343,12}, \
+ { 2687,13}, { 1407,12}, { 2815,14}, { 767,13}, \
+ { 1535,12}, { 3071,13}, { 1663,14}, { 895,13}, \
+ { 1791,15}, { 511,14}, { 1023,13}, { 2175,14}, \
+ { 1151,13}, { 2303,12}, { 4607,13}, { 2431,12}, \
+ { 4863,14}, { 1279,13}, { 2687,14}, { 1407,13}, \
+ { 2815,15}, { 767,14}, { 1535,13}, { 3071,14}, \
+ { 1663,13}, { 3327,12}, { 6655,13}, { 3455,12}, \
+ { 6911,14}, { 1791,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 178
+#define SQR_FFT_THRESHOLD 3520
#define MULLO_BASECASE_THRESHOLD 3
#define MULLO_DC_THRESHOLD 20
@@ -96,7 +186,3 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#define GET_STR_PRECOMPUTE_THRESHOLD 20
#define SET_STR_DC_THRESHOLD 552
#define SET_STR_PRECOMPUTE_THRESHOLD 1790
-
-#define MUL_FFT_TABLE2 {{1,4}, {273,5}, {545,6}, {1345,7}, {3201,8}, {6913,9}, {7681,8}, {8961,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {28161,11}, {30721,10}, {31745,9}, {34305,10}, {56321,11}, {61441,10}, {80897,11}, {96257,12}, {104449,10}, {105985,12}, {126977,11}, {129025,10}, {141313,11}, {163841,10}, {165889,11}, {194561,10}, {204801,11}, {227329,12}, {258049,11}, {261633,10}, {274433,11}, {292865,10}, {296961,11}, {299009,10}, {308225,11}, {326657,12}, {389121,11}, {424961,13}, {516097,12}, {520193,11}, {620545,12}, {651265,11}, {752641,12}, {782337,11}, {849921,12}, {913409,11}, {937985,13}, {944129,11}, {980993,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,12}, {1437697,11}, {1447937,13}, {1564673,12}, {1961985,14}, {2080769,13}, {2088961,12}, {2486273,13}, {2613249,12}, {3012609,13}, {3137537,12}, {3403777,13}, {3661825,12}, {3928065,14}, {4177921,13}, {4349953,12}, {4354049,13}, {4362241,12}, {4370433,13}, {4407297,12}, {4415489,13}, {4431873,12}, {4440065,13}, {4710401,12}, {4976641,13}, {5758977,12}, {5763073,14}, {6275073,13}, {MP_SIZE_T_MAX, 0}}
-
-#define SQR_FFT_TABLE2 {{1,4}, {273,5}, {545,6}, {1345,7}, {3201,8}, {3329,7}, {3457,8}, {6913,9}, {7681,8}, {8961,9}, {9729,8}, {10497,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {28161,11}, {30721,10}, {48129,11}, {63489,10}, {80897,11}, {96257,10}, {97281,12}, {102401,11}, {104449,12}, {126977,11}, {129025,10}, {138241,11}, {163329,10}, {179201,9}, {179713,11}, {210945,10}, {211969,11}, {221185,12}, {258049,10}, {262145,9}, {262657,10}, {274945,11}, {279553,9}, {280577,11}, {285697,10}, {286721,11}, {292865,10}, {293889,9}, {295937,10}, {296961,11}, {299009,10}, {309249,9}, {310785,11}, {331777,10}, {332801,11}, {339969,12}, {348161,11}, {352257,12}, {389121,11}, {391169,10}, {393217,11}, {402433,10}, {405505,11}, {425985,13}, {516097,11}, {528385,10}, {529409,11}, {565249,10}, {566273,11}, {622593,12}, {651265,11}, {718849,12}, {765953,11}, {768001,12}, {782337,11}, {849921,12}, {913409,11}, {930817,13}, {942081,11}, {980993,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1437697,13}, {1564673,12}, {1961985,14}, {2080769,13}, {2088961,12}, {2486273,13}, {2613249,12}, {2879489,13}, {3137537,12}, {3272705,13}, {3661825,12}, {3665921,14}, {4177921,13}, {4235265,12}, {4276225,13}, {4710401,12}, {4849665,13}, {4866049,12}, {4964353,13}, {5263361,12}, {5300225,13}, {5324801,12}, {5332993,13}, {5349377,11}, {5353473,12}, {5357569,13}, {5369857,14}, {5373953,13}, {5423105,12}, {5455873,13}, {5603329,12}, {5611521,13}, {5664769,14}, {5668865,13}, {5758977,14}, {6275073,13}, {MP_SIZE_T_MAX, 0}}