diff options
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | acinclude.m4 | 2 | ||||
-rwxr-xr-x | config.guess | 2 | ||||
-rwxr-xr-x | config.sub | 2 | ||||
-rw-r--r-- | configure.in | 40 | ||||
-rw-r--r-- | mpn/powerpc64/mode64/p3/gmp-mparam.h | 71 | ||||
-rw-r--r-- | mpn/powerpc64/mode64/p4/gmp-mparam.h | 73 | ||||
-rw-r--r-- | mpn/powerpc64/mode64/p5/gmp-mparam.h | 73 | ||||
-rw-r--r-- | mpn/x86_64/core2/gmp-mparam.h | 53 | ||||
-rw-r--r-- | mpn/x86_64/corei/gmp-mparam.h | 80 |
10 files changed, 367 insertions, 41 deletions
@@ -1,5 +1,17 @@ 2009-11-27 Torbjorn Granlund <tege@gmplib.org> + * mpn/x86_64/corei/gmp-mparam.h: New file. + * mpn/x86_64/core2/gmp-mparam.h: Now for just core2. + * mpn/powerpc64/mode64/p3/gmp-mparam.h: New file. + * mpn/powerpc64/mode64/p4/gmp-mparam.h: New file. + * mpn/powerpc64/mode64/p5/gmp-mparam.h: New file. + + * config.guess: Return "corei" for core i7 and core i5. + * config.sub: Recognise "corei". + * acinclude.m4 (X86_64_PATTERN): Add corei. + * configure.in (powerpc): Set up more CPU-specific paths. + (x86): Handle corei. + * mpz/powm.c: Allow input operand overlap also when exponent = 1. Misc cleanups. diff --git a/acinclude.m4 b/acinclude.m4 index f2d405f6f..55c9d3126 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -47,7 +47,7 @@ define(X86_PATTERN, [[i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-*]]) define(X86_64_PATTERN, -[[athlon64-*-* | pentium4-*-* | atom-*-* | core2-*-* | x86_64-*-* | nano-*-*]]) +[[athlon64-*-* | pentium4-*-* | atom-*-* | core2-*-* | corei-*-* | x86_64-*-* | nano-*-*]]) dnl GMP_FAT_SUFFIX(DSTVAR, DIRECTORY) dnl --------------------------------- diff --git a/config.guess b/config.guess index a244135b1..40b263796 100755 --- a/config.guess +++ b/config.guess @@ -735,7 +735,7 @@ main () else if (model <= 12) modelstr = "pentium3"; else if (model <= 14) modelstr = "pentiumm"; else if (model <= 25) modelstr = "core2"; - else if (model <= 27) modelstr = "core2"; /* core i5, i7 */ + else if (model <= 27) modelstr = "corei"; /* core i5, i7 */ else if (model == 28) modelstr = "atom"; else if (model == 29) modelstr = "core2"; break; diff --git a/config.sub b/config.sub index 13f4bb05e..ccc7bb1a5 100755 --- a/config.sub +++ b/config.sub @@ -91,7 +91,7 @@ itanium | itanium2) test_cpu=ia64 ;; pentium | pentiummmx | pentiumpro | pentium[234m] | k[56] | k6[23] | geode | athlon | viac3* | nano) test_cpu=i386 ;; -athlon64 | atom | core2 | opteron) +athlon64 | atom | core2 | corei | opteron) test_cpu=x86_64 ;; power[2-9] | power2sc) test_cpu=power ;; diff --git a/configure.in b/configure.in index 1e8bda099..6cd941773 100644 --- a/configure.in +++ b/configure.in @@ -851,6 +851,7 @@ case $host in gcc_cflags_subtype="-force_cpusubtype_ALL" # for vmx on darwin gcc_cflags_asm="" gcc_cflags_cpu="" + vmx_path="" # grab this object, though it's not a true cycle counter routine SPEED_CYCLECOUNTER_OBJ=powerpc.lo @@ -892,7 +893,8 @@ case $host in powerpc604) gcc_cflags_cpu="-mcpu=604" ;; powerpc604e) gcc_cflags_cpu="-mcpu=604e -mcpu=604" ;; powerpc620) gcc_cflags_cpu="-mcpu=620" ;; - powerpc630) gcc_cflags_cpu="-mcpu=630" ;; + powerpc630) gcc_cflags_cpu="-mcpu=630" + cpu_path="p3" ;; powerpc740) gcc_cflags_cpu="-mcpu=740" ;; powerpc7400 | powerpc7410) gcc_cflags_asm="-Wa,-maltivec" @@ -905,7 +907,15 @@ case $host in powerpc821) gcc_cflags_cpu="-mcpu=821" ;; powerpc823) gcc_cflags_cpu="-mcpu=823" ;; powerpc860) gcc_cflags_cpu="-mcpu=860" ;; - powerpc970) gcc_cflags_cpu="-mcpu=970" ;; + powerpc970) gcc_cflags_cpu="-mtune=970" + vmx_path="powerpc64/vmx + cpu_path="p4" ;; + power4) gcc_cflags_cpu="-mtune=power4" + cpu_path="p4" ;; + power5) gcc_cflags_cpu="-mtune=power5 -mtune=power4" + cpu_path="p5 p4" ;; + power6) gcc_cflags_cpu="-mtune=power6" + cpu_path="p6" ;; esac case $host in @@ -934,10 +944,6 @@ case $host in case $host in POWERPC64_PATTERN) - case $host_cpu in - powerpc970) vmx_path="powerpc64/vmx" ;; - *) vmx_path="" ;; - esac case $host in *-*-aix*) # On AIX a true 64-bit ABI is available. @@ -952,7 +958,9 @@ case $host in # Must indicate object type to ar and nm ar_aix64_flags="-X64" nm_aix64_flags="-X64" - path_aix64="powerpc64/mode64 $vmx_path powerpc64" + path_aix64="" + for i in $cpu_path; do path_aix64="${path_aix64}powerpc64/mode64/$i "; done + path_aix64="${path_aix64}powerpc64/mode64 $vmx_path powerpc64" # grab this object, though it's not a true cycle counter routine SPEED_CYCLECOUNTER_OBJ_aix64=powerpc64.lo cyclecounter_size_aix64=0 @@ -990,7 +998,9 @@ case $host in gcc_mode64_cflags="-m64" gcc_mode64_cflags_optlist="cpu opt" gcc_mode64_cflags_opt="-O3 -O2 -O1" - path_mode64="powerpc64/mode64 $vmx_path powerpc64" + path_mode64="" + for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done + path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64" SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo cyclecounter_size_mode64=0 any_mode64_testlist="sizeof-long-8" @@ -1021,7 +1031,9 @@ case $host in gcc_mode64_cflags_maybe="-m64" gcc_mode64_cflags_optlist="cpu opt" gcc_mode64_cflags_opt="-O3 -O2 -O1" - path_mode64="powerpc64/mode64 $vmx_path powerpc64" + path_mode64="" + for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done + path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64" SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo cyclecounter_size_mode64=0 any_mode64_testlist="sizeof-long-8" @@ -1405,7 +1417,7 @@ case $host in gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486" gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium" ;; - core2) + core2 | corei) gcc_cflags_cpu="-mtune=core2 -mtune=k8" gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2" ;; @@ -1427,7 +1439,8 @@ case $host in i686 | pentiumpro) path="x86/p6 x86" ;; pentium2) path="x86/p6/mmx x86/p6 x86" ;; pentium3) path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86";; - pentiumm | core2) path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86";; + pentiumm | core2 | corei) + path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86";; [k6[23]]) path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86" ;; k6) path="x86/k6/mmx x86/k6 x86" ;; geode) path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86" ;; @@ -1464,6 +1477,9 @@ case $host in core2) path_64="x86_64/core2 $path_64" ;; + corei) + path_64="x86_64/corei x86_64/core2 $path_64" + ;; atom) path_64="x86_64/atom $path_64" ;; @@ -1783,7 +1799,7 @@ case $host in gcc_64_cflags="" extra_functions_64="$extra_functions_64 fat fat_entry" path_64="x86_64/fat x86_64" - fat_path="x86_64 x86_64/fat x86_64/pentium4 x86_64/core2 x86_64/atom" + fat_path="x86_64 x86_64/fat x86_64/pentium4 x86_64/core2 x86_64/corei x86_64/atom x86_64/nano" fi fat_functions="add_n addmul_1 copyd copyi diff --git a/mpn/powerpc64/mode64/p3/gmp-mparam.h b/mpn/powerpc64/mode64/p3/gmp-mparam.h new file mode 100644 index 000000000..2f0984093 --- /dev/null +++ b/mpn/powerpc64/mode64/p3/gmp-mparam.h @@ -0,0 +1,71 @@ +/* POWER3/PowerPC630 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright 2008, 2009 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 + + +/* Generated by tuneup.c, 2009-11-27, gcc 2.9 */ + +#define MUL_TOOM22_THRESHOLD 10 +#define MUL_TOOM33_THRESHOLD 30 +#define MUL_TOOM44_THRESHOLD 46 + +#define SQR_BASECASE_THRESHOLD 0 /* always */ +#define SQR_TOOM2_THRESHOLD 16 +#define SQR_TOOM3_THRESHOLD 45 +#define SQR_TOOM4_THRESHOLD 64 + +#define MUL_FFT_TABLE { 208, 416, 1088, 1792, 5120, 12288, 49152, 196608, 786432, 0 } +#define MUL_FFT_MODF_THRESHOLD 240 +#define MUL_FFT_THRESHOLD 1664 + +#define SQR_FFT_TABLE { 208, 416, 960, 1792, 5120, 12288, 49152, 196608, 786432, 0 } +#define SQR_FFT_MODF_THRESHOLD 184 +#define SQR_FFT_THRESHOLD 1664 + +#define MULLOW_BASECASE_THRESHOLD 2 +#define MULLOW_DC_THRESHOLD 27 +#define MULLOW_MUL_N_THRESHOLD 2350 + +#define MULMOD_BNM1_THRESHOLD 8 + +#define DC_DIV_QR_THRESHOLD 27 +#define REDC_1_TO_REDC_N_THRESHOLD 48 + +#define MATRIX22_STRASSEN_THRESHOLD 8 +#define HGCD_THRESHOLD 54 +#define GCD_DC_THRESHOLD 159 +#define GCDEXT_DC_THRESHOLD 150 +#define JACOBI_BASE_METHOD 2 + +#define MOD_1_NORM_THRESHOLD 0 /* always */ +#define MOD_1_UNNORM_THRESHOLD 0 /* always */ +#define MOD_1_1_THRESHOLD 5 +#define MOD_1_2_THRESHOLD MP_SIZE_T_MAX /* never */ +#define MOD_1_4_THRESHOLD MP_SIZE_T_MAX /* never */ +#define USE_PREINV_DIVREM_1 0 +#define USE_PREINV_MOD_1 1 +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always */ + +#define GET_STR_DC_THRESHOLD 15 +#define GET_STR_PRECOMPUTE_THRESHOLD 29 +#define SET_STR_DC_THRESHOLD 375 +#define SET_STR_PRECOMPUTE_THRESHOLD 812 diff --git a/mpn/powerpc64/mode64/p4/gmp-mparam.h b/mpn/powerpc64/mode64/p4/gmp-mparam.h new file mode 100644 index 000000000..3acf6905d --- /dev/null +++ b/mpn/powerpc64/mode64/p4/gmp-mparam.h @@ -0,0 +1,73 @@ +/* PowerPC970 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright 2008, 2009 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 + + +/* Generated by tuneup.c, 2009-11-27, gcc 4.0 */ + +#define MUL_TOOM22_THRESHOLD 16 +#define MUL_TOOM33_THRESHOLD 62 +#define MUL_TOOM44_THRESHOLD 136 + +#define SQR_BASECASE_THRESHOLD 8 +#define SQR_TOOM2_THRESHOLD 32 +#define SQR_TOOM3_THRESHOLD 65 +#define SQR_TOOM4_THRESHOLD 145 + +#define MUL_FFT_TABLE { 336, 800, 1728, 2816, 7168, 20480, 81920, 327680, 0 } +#define MUL_FFT_MODF_THRESHOLD 336 +#define MUL_FFT_THRESHOLD 5248 + +#define SQR_FFT_TABLE { 272, 672, 1600, 2816, 7168, 20480, 81920, 327680, 786432, 0 } +#define SQR_FFT_MODF_THRESHOLD 272 +#define SQR_FFT_THRESHOLD 2944 + +#define MULLOW_BASECASE_THRESHOLD 5 +#define MULLOW_DC_THRESHOLD 35 +#define MULLOW_MUL_N_THRESHOLD 10292 + +#define MULMOD_BNM1_THRESHOLD 12 + +#define DC_DIV_QR_THRESHOLD 29 +#define REDC_1_TO_REDC_N_THRESHOLD 96 + +#define MATRIX22_STRASSEN_THRESHOLD 11 +#define HGCD_THRESHOLD 93 +#define GCD_DC_THRESHOLD 241 +#define GCDEXT_DC_THRESHOLD 273 +#define JACOBI_BASE_METHOD 1 + +#define MOD_1_NORM_THRESHOLD 0 /* always */ +#define MOD_1_UNNORM_THRESHOLD 0 /* always */ +#define MOD_1_1_THRESHOLD 6 +#define MOD_1_2_THRESHOLD 9 +#define MOD_1_4_THRESHOLD 23 +#define USE_PREINV_DIVREM_1 0 +#define USE_PREINV_MOD_1 0 +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define MODEXACT_1_ODD_THRESHOLD MP_SIZE_T_MAX /* never */ + +#define GET_STR_DC_THRESHOLD 12 +#define GET_STR_PRECOMPUTE_THRESHOLD 23 +#define SET_STR_DC_THRESHOLD 650 +#define SET_STR_PRECOMPUTE_THRESHOLD 1781 + +/* Tuneup completed successfully, took 176 seconds */ diff --git a/mpn/powerpc64/mode64/p5/gmp-mparam.h b/mpn/powerpc64/mode64/p5/gmp-mparam.h new file mode 100644 index 000000000..b63190493 --- /dev/null +++ b/mpn/powerpc64/mode64/p5/gmp-mparam.h @@ -0,0 +1,73 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 + +/* POWER5 (friggms.hpc.ntnu.no) */ + +/* Generated by tuneup.c, 2009-11-26, gcc 4.4 */ + +#define MUL_TOOM22_THRESHOLD 16 +#define MUL_TOOM33_THRESHOLD 65 +#define MUL_TOOM44_THRESHOLD 147 + +#define SQR_BASECASE_THRESHOLD 10 +#define SQR_TOOM2_THRESHOLD 42 +#define SQR_TOOM3_THRESHOLD 65 +#define SQR_TOOM4_THRESHOLD 115 + +#define MUL_FFT_TABLE { 336, 800, 1728, 2816, 7168, 20480, 49152, 196608, 786432, 0 } +#define MUL_FFT_MODF_THRESHOLD 336 +#define MUL_FFT_THRESHOLD 2432 + +#define SQR_FFT_TABLE { 304, 608, 1600, 2816, 7168, 20480, 49152, 196608, 786432, 0 } +#define SQR_FFT_MODF_THRESHOLD 272 +#define SQR_FFT_THRESHOLD 1920 + +#define MULLOW_BASECASE_THRESHOLD 3 +#define MULLOW_DC_THRESHOLD 33 +#define MULLOW_MUL_N_THRESHOLD 3913 + +#define MULMOD_BNM1_THRESHOLD 12 + +#define DC_DIV_QR_THRESHOLD 21 +#define REDC_1_TO_REDC_N_THRESHOLD 91 + +#define MATRIX22_STRASSEN_THRESHOLD 13 +#define HGCD_THRESHOLD 93 +#define GCD_DC_THRESHOLD 241 +#define GCDEXT_DC_THRESHOLD 298 +#define JACOBI_BASE_METHOD 1 + +#define MOD_1_NORM_THRESHOLD 0 /* always */ +#define MOD_1_UNNORM_THRESHOLD 0 /* always */ +#define MOD_1_1_THRESHOLD 30 +#define MOD_1_2_THRESHOLD 31 +#define MOD_1_4_THRESHOLD 32 +#define USE_PREINV_DIVREM_1 0 +#define USE_PREINV_MOD_1 0 +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always */ + +#define GET_STR_DC_THRESHOLD 12 +#define GET_STR_PRECOMPUTE_THRESHOLD 25 +#define SET_STR_DC_THRESHOLD 537 +#define SET_STR_PRECOMPUTE_THRESHOLD 1639 diff --git a/mpn/x86_64/core2/gmp-mparam.h b/mpn/x86_64/core2/gmp-mparam.h index 349209d88..e9f8d0eb0 100644 --- a/mpn/x86_64/core2/gmp-mparam.h +++ b/mpn/x86_64/core2/gmp-mparam.h @@ -1,4 +1,4 @@ -/* "Core 2" gmp-mparam.h -- Compiler/machine parameter header file. +/* Core 2 gmp-mparam.h -- Compiler/machine parameter header file. Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. @@ -21,60 +21,61 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define BITS_PER_MP_LIMB 64 #define BYTES_PER_MP_LIMB 8 -/* 2667 MHz Core i7 */ +/* 2133 MHz Core 2 (65nm) */ /* Generated by tuneup.c, 2009-11-24, gcc 4.2 */ -#define MUL_TOOM22_THRESHOLD 16 +/* Generated by tuneup.c, 2009-11-26, gcc 4.2 */ + +#define MUL_TOOM22_THRESHOLD 23 #define MUL_TOOM33_THRESHOLD 66 -#define MUL_TOOM44_THRESHOLD 166 +#define MUL_TOOM44_THRESHOLD 178 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 30 -#define SQR_TOOM3_THRESHOLD 105 -#define SQR_TOOM4_THRESHOLD 250 +#define SQR_TOOM2_THRESHOLD 28 +#define SQR_TOOM3_THRESHOLD 101 +#define SQR_TOOM4_THRESHOLD 143 -#define MUL_FFT_TABLE { 336, 800, 1600, 2816, 7168, 20480, 81920, 196608, 786432, 0 } +#define MUL_FFT_TABLE { 336, 800, 1600, 2816, 7168, 20480, 81920, 327680, 0 } #define MUL_FFT_MODF_THRESHOLD 400 -#define MUL_FFT_THRESHOLD 3456 +#define MUL_FFT_THRESHOLD 4224 -#define SQR_FFT_TABLE { 336, 736, 1600, 2816, 7168, 20480, 49152, 196608, 786432, 0 } -#define SQR_FFT_MODF_THRESHOLD 368 +#define SQR_FFT_TABLE { 304, 736, 1728, 3328, 7168, 20480, 81920, 327680, 0 } +#define SQR_FFT_MODF_THRESHOLD 336 #define SQR_FFT_THRESHOLD 2688 -#define MULLOW_BASECASE_THRESHOLD 4 -#define MULLOW_DC_THRESHOLD 21 +#define MULLOW_BASECASE_THRESHOLD 0 /* always */ +#define MULLOW_DC_THRESHOLD 19 #define MULLOW_MUL_N_THRESHOLD 5472 #define MULMOD_BNM1_THRESHOLD 12 -#define DC_DIV_QR_THRESHOLD 38 +#define DC_DIV_QR_THRESHOLD 52 #define REDC_1_TO_REDC_2_THRESHOLD 10 -#define REDC_2_TO_REDC_N_THRESHOLD 110 +#define REDC_2_TO_REDC_N_THRESHOLD 111 -#define MATRIX22_STRASSEN_THRESHOLD 17 -#define HGCD_THRESHOLD 139 -#define GCD_DC_THRESHOLD 354 -#define GCDEXT_DC_THRESHOLD 496 +#define MATRIX22_STRASSEN_THRESHOLD 25 +#define HGCD_THRESHOLD 135 +#define GCD_DC_THRESHOLD 443 +#define GCDEXT_DC_THRESHOLD 460 #define JACOBI_BASE_METHOD 1 #define MOD_1_NORM_THRESHOLD 0 /* always */ #define MOD_1_UNNORM_THRESHOLD 0 /* always */ #define MOD_1_1_THRESHOLD 4 #define MOD_1_2_THRESHOLD 5 -#define MOD_1_4_THRESHOLD 12 +#define MOD_1_4_THRESHOLD 15 #define USE_PREINV_DIVREM_1 1 /* native */ #define USE_PREINV_MOD_1 0 #define DIVEXACT_1_THRESHOLD 0 /* always (native) */ #define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ -#define GET_STR_DC_THRESHOLD 13 -#define GET_STR_PRECOMPUTE_THRESHOLD 23 -#define SET_STR_DC_THRESHOLD 632 -#define SET_STR_PRECOMPUTE_THRESHOLD 1939 - -/* Generated 2009-01-12, gcc 4.2 */ +#define GET_STR_DC_THRESHOLD 11 +#define GET_STR_PRECOMPUTE_THRESHOLD 21 +#define SET_STR_DC_THRESHOLD 704 +#define SET_STR_PRECOMPUTE_THRESHOLD 1785 +/* These tables are obsolete. */ #define MUL_FFT_TABLE2 {{1,4}, {273,5}, {545,6}, {1217,7}, {3201,8}, {6913,9}, {7681,8}, {8449,9}, {9729,8}, {10497,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {28161,11}, {30721,10}, {31745,9}, {34305,10}, {39937,9}, {42497,10}, {56321,11}, {63489,10}, {81409,11}, {92161,10}, {93185,11}, {96257,12}, {126977,11}, {131073,10}, {138241,11}, {167937,10}, {169473,11}, {169985,10}, {172033,11}, {195585,9}, {196097,11}, {198657,10}, {208897,11}, {217089,12}, {258049,11}, {261121,9}, {262657,10}, {275457,11}, {302081,10}, {307201,11}, {331777,12}, {389121,11}, {425985,13}, {516097,12}, {520193,11}, {598017,12}, {610305,11}, {614401,12}, {651265,11}, {653313,10}, {654337,11}, {673793,10}, {674817,11}, {677889,10}, {679937,11}, {718849,10}, {719873,12}, {782337,11}, {850945,12}, {913409,11}, {925697,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1244161,12}, {1306625,11}, {1310721,12}, {1327105,11}, {1347585,12}, {1355777,11}, {1366017,12}, {1439745,13}, {1564673,12}, {1835009,14}, {1900545,12}, {1904641,14}, {2080769,13}, {2088961,12}, {2488321,13}, {2613249,12}, {2879489,13}, {2932737,12}, {2940929,13}, {3137537,12}, {3403777,13}, {3661825,12}, {3928065,14}, {4177921,13}, {4186113,12}, {4452353,13}, {4710401,12}, {4978689,13}, {5234689,12}, {5500929,13}, {5758977,14}, {6275073,13}, {7856129,15}, {8355841,14}, {8372225,13}, {9957377,14}, {MP_SIZE_T_MAX, 0}} #define SQR_FFT_TABLE2 {{1,4}, {241,5}, {545,6}, {1345,7}, {3201,8}, {6913,9}, {7681,8}, {8961,9}, {9729,8}, {10497,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {28161,11}, {30721,10}, {31745,9}, {34305,10}, {55297,11}, {63489,10}, {80897,11}, {94209,10}, {97281,12}, {126977,11}, {129025,9}, {130049,10}, {138753,11}, {162817,9}, {164353,11}, {170497,10}, {178177,11}, {183297,10}, {184321,11}, {194561,10}, {208897,12}, {219137,11}, {221185,12}, {258049,11}, {261121,9}, {261633,10}, {267777,9}, {268289,11}, {270337,10}, {274945,9}, {276481,10}, {278529,11}, {292865,9}, {293377,10}, {295937,9}, {296449,10}, {306177,9}, {309249,10}, {310273,11}, {328705,12}, {331777,11}, {335873,12}, {344065,11}, {346113,12}, {352257,11}, {356353,12}, {389121,11}, {395265,10}, {398337,11}, {419841,10}, {421889,11}, {423937,13}, {516097,12}, {520193,11}, {546817,10}, {550913,11}, {561153,10}, {563201,11}, {579585,10}, {585729,11}, {621569,12}, {636929,11}, {638977,12}, {651265,11}, {714753,10}, {716801,11}, {718849,12}, {782337,11}, {849921,12}, {913409,11}, {954369,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,12}, {1437697,13}, {1564673,12}, {1961985,14}, {2080769,13}, {2088961,12}, {2486273,13}, {2613249,12}, {2879489,13}, {3137537,12}, {3272705,13}, {3661825,12}, {3928065,14}, {4177921,13}, {4186113,12}, {4452353,13}, {4710401,12}, {4976641,13}, {5234689,12}, {5320705,13}, {5324801,12}, {5447681,13}, {5455873,12}, {5500929,13}, {5758977,14}, {6275073,13}, {6283265,12}, {6549505,13}, {7856129,15}, {8355841,14}, {8372225,13}, {9953281,14}, {MP_SIZE_T_MAX, 0}} diff --git a/mpn/x86_64/corei/gmp-mparam.h b/mpn/x86_64/corei/gmp-mparam.h new file mode 100644 index 000000000..00c2aec05 --- /dev/null +++ b/mpn/x86_64/corei/gmp-mparam.h @@ -0,0 +1,80 @@ +/* Core i gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, +2008, 2009 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 + +/* 2667 MHz Core i7 */ + +/* Generated by tuneup.c, 2009-11-24, gcc 4.2 */ + +#define MUL_TOOM22_THRESHOLD 16 +#define MUL_TOOM33_THRESHOLD 66 +#define MUL_TOOM44_THRESHOLD 166 + +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_TOOM2_THRESHOLD 30 +#define SQR_TOOM3_THRESHOLD 105 +#define SQR_TOOM4_THRESHOLD 250 + +#define MUL_FFT_TABLE { 336, 800, 1600, 2816, 7168, 20480, 81920, 196608, 786432, 0 } +#define MUL_FFT_MODF_THRESHOLD 400 +#define MUL_FFT_THRESHOLD 3456 + +#define SQR_FFT_TABLE { 336, 736, 1600, 2816, 7168, 20480, 49152, 196608, 786432, 0 } +#define SQR_FFT_MODF_THRESHOLD 368 +#define SQR_FFT_THRESHOLD 2688 + +#define MULLOW_BASECASE_THRESHOLD 4 +#define MULLOW_DC_THRESHOLD 21 +#define MULLOW_MUL_N_THRESHOLD 5472 + +#define MULMOD_BNM1_THRESHOLD 12 + +#define DC_DIV_QR_THRESHOLD 38 +#define REDC_1_TO_REDC_2_THRESHOLD 10 +#define REDC_2_TO_REDC_N_THRESHOLD 110 + +#define MATRIX22_STRASSEN_THRESHOLD 17 +#define HGCD_THRESHOLD 139 +#define GCD_DC_THRESHOLD 354 +#define GCDEXT_DC_THRESHOLD 496 +#define JACOBI_BASE_METHOD 1 + +#define MOD_1_NORM_THRESHOLD 0 /* always */ +#define MOD_1_UNNORM_THRESHOLD 0 /* always */ +#define MOD_1_1_THRESHOLD 4 +#define MOD_1_2_THRESHOLD 5 +#define MOD_1_4_THRESHOLD 12 +#define USE_PREINV_DIVREM_1 1 /* native */ +#define USE_PREINV_MOD_1 0 +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ + +#define GET_STR_DC_THRESHOLD 13 +#define GET_STR_PRECOMPUTE_THRESHOLD 23 +#define SET_STR_DC_THRESHOLD 632 +#define SET_STR_PRECOMPUTE_THRESHOLD 1939 + +/* Generated 2009-01-12, gcc 4.2 */ + +#define MUL_FFT_TABLE2 {{1,4}, {273,5}, {545,6}, {1217,7}, {3201,8}, {6913,9}, {7681,8}, {8449,9}, {9729,8}, {10497,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {28161,11}, {30721,10}, {31745,9}, {34305,10}, {39937,9}, {42497,10}, {56321,11}, {63489,10}, {81409,11}, {92161,10}, {93185,11}, {96257,12}, {126977,11}, {131073,10}, {138241,11}, {167937,10}, {169473,11}, {169985,10}, {172033,11}, {195585,9}, {196097,11}, {198657,10}, {208897,11}, {217089,12}, {258049,11}, {261121,9}, {262657,10}, {275457,11}, {302081,10}, {307201,11}, {331777,12}, {389121,11}, {425985,13}, {516097,12}, {520193,11}, {598017,12}, {610305,11}, {614401,12}, {651265,11}, {653313,10}, {654337,11}, {673793,10}, {674817,11}, {677889,10}, {679937,11}, {718849,10}, {719873,12}, {782337,11}, {850945,12}, {913409,11}, {925697,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1244161,12}, {1306625,11}, {1310721,12}, {1327105,11}, {1347585,12}, {1355777,11}, {1366017,12}, {1439745,13}, {1564673,12}, {1835009,14}, {1900545,12}, {1904641,14}, {2080769,13}, {2088961,12}, {2488321,13}, {2613249,12}, {2879489,13}, {2932737,12}, {2940929,13}, {3137537,12}, {3403777,13}, {3661825,12}, {3928065,14}, {4177921,13}, {4186113,12}, {4452353,13}, {4710401,12}, {4978689,13}, {5234689,12}, {5500929,13}, {5758977,14}, {6275073,13}, {7856129,15}, {8355841,14}, {8372225,13}, {9957377,14}, {MP_SIZE_T_MAX, 0}} + +#define SQR_FFT_TABLE2 {{1,4}, {241,5}, {545,6}, {1345,7}, {3201,8}, {6913,9}, {7681,8}, {8961,9}, {9729,8}, {10497,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {28161,11}, {30721,10}, {31745,9}, {34305,10}, {55297,11}, {63489,10}, {80897,11}, {94209,10}, {97281,12}, {126977,11}, {129025,9}, {130049,10}, {138753,11}, {162817,9}, {164353,11}, {170497,10}, {178177,11}, {183297,10}, {184321,11}, {194561,10}, {208897,12}, {219137,11}, {221185,12}, {258049,11}, {261121,9}, {261633,10}, {267777,9}, {268289,11}, {270337,10}, {274945,9}, {276481,10}, {278529,11}, {292865,9}, {293377,10}, {295937,9}, {296449,10}, {306177,9}, {309249,10}, {310273,11}, {328705,12}, {331777,11}, {335873,12}, {344065,11}, {346113,12}, {352257,11}, {356353,12}, {389121,11}, {395265,10}, {398337,11}, {419841,10}, {421889,11}, {423937,13}, {516097,12}, {520193,11}, {546817,10}, {550913,11}, {561153,10}, {563201,11}, {579585,10}, {585729,11}, {621569,12}, {636929,11}, {638977,12}, {651265,11}, {714753,10}, {716801,11}, {718849,12}, {782337,11}, {849921,12}, {913409,11}, {954369,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,12}, {1437697,13}, {1564673,12}, {1961985,14}, {2080769,13}, {2088961,12}, {2486273,13}, {2613249,12}, {2879489,13}, {3137537,12}, {3272705,13}, {3661825,12}, {3928065,14}, {4177921,13}, {4186113,12}, {4452353,13}, {4710401,12}, {4976641,13}, {5234689,12}, {5320705,13}, {5324801,12}, {5447681,13}, {5455873,12}, {5500929,13}, {5758977,14}, {6275073,13}, {6283265,12}, {6549505,13}, {7856129,15}, {8355841,14}, {8372225,13}, {9953281,14}, {MP_SIZE_T_MAX, 0}} |