summaryrefslogtreecommitdiff
path: root/mpn/ia64
diff options
context:
space:
mode:
authorRichard Maw <richard.maw@codethink.co.uk>2012-01-19 10:33:31 +0000
committerRichard Maw <richard.maw@codethink.co.uk>2012-01-19 10:33:31 +0000
commit29137c6ff7a9e370e2332d855ab46616ad4e9cc9 (patch)
treefbca7aa7cfa645df1b059aeba7e81739620b013c /mpn/ia64
parent962de8d4b353178d38c2c70e952944686b9fd47b (diff)
parent2c033efc02631f22e6e180ce737a2faf81b09ccc (diff)
downloadgmp-29137c6ff7a9e370e2332d855ab46616ad4e9cc9.tar.gz
Merge branch 'master' into baserock/morph
Diffstat (limited to 'mpn/ia64')
-rw-r--r--mpn/ia64/gmp-mparam.h148
-rw-r--r--mpn/ia64/tabselect.asm139
2 files changed, 237 insertions, 50 deletions
diff --git a/mpn/ia64/gmp-mparam.h b/mpn/ia64/gmp-mparam.h
index 0841c82aa..77e02f518 100644
--- a/mpn/ia64/gmp-mparam.h
+++ b/mpn/ia64/gmp-mparam.h
@@ -1,6 +1,6 @@
/* gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010 Free Software
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010, 2011 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
@@ -21,70 +21,94 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#define GMP_LIMB_BITS 64
#define BYTES_PER_MP_LIMB 8
-/* 1300MHz Itanium2 (babe.fsffrance.org) */
-
+/* 900MHz Itanium2 (titanic.gmplib.org) */
+#define MOD_1_1P_METHOD 2
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 8
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 21
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 22
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 26
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD 12
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
#define MUL_TOOM22_THRESHOLD 40
-#define MUL_TOOM33_THRESHOLD 122
-#define MUL_TOOM44_THRESHOLD 212
+#define MUL_TOOM33_THRESHOLD 129
+#define MUL_TOOM44_THRESHOLD 214
#define MUL_TOOM6H_THRESHOLD 318
#define MUL_TOOM8H_THRESHOLD 430
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 93
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 146
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 129
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 145
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 126
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 151
#define SQR_BASECASE_THRESHOLD 11
#define SQR_TOOM2_THRESHOLD 84
-#define SQR_TOOM3_THRESHOLD 125
+#define SQR_TOOM3_THRESHOLD 135
#define SQR_TOOM4_THRESHOLD 494
-#define SQR_TOOM6_THRESHOLD 0 /* never toom4 */
-#define SQR_TOOM8_THRESHOLD 0 /* never toom6 */
+#define SQR_TOOM6_THRESHOLD 0 /* always */
+#define SQR_TOOM8_THRESHOLD 0 /* always */
+
+#define MULMID_TOOM42_THRESHOLD 98
#define MULMOD_BNM1_THRESHOLD 23
-#define SQRMOD_BNM1_THRESHOLD 25
+#define SQRMOD_BNM1_THRESHOLD 28
+
+#define POWM_SEC_TABLE 2,29,130,905
-#define MUL_FFT_MODF_THRESHOLD 444 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 476 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 444, 5}, { 27, 6}, { 14, 5}, { 29, 6}, \
- { 35, 7}, { 18, 6}, { 37, 7}, { 19, 6}, \
+ { { 476, 5}, { 27, 6}, { 14, 5}, { 29, 6}, \
+ { 33, 7}, { 17, 6}, { 37, 7}, { 19, 6}, \
{ 39, 7}, { 21, 6}, { 43, 7}, { 33, 8}, \
{ 17, 7}, { 37, 8}, { 19, 7}, { 39, 8}, \
- { 21, 7}, { 43, 8}, { 29, 9}, { 15, 8}, \
- { 37, 9}, { 19, 8}, { 43, 9}, { 23, 8}, \
- { 49, 9}, { 27, 8}, { 57, 9}, { 31, 8}, \
- { 63, 9}, { 35, 8}, { 71, 9}, { 43,10}, \
+ { 21, 7}, { 43, 8}, { 37, 9}, { 19, 8}, \
+ { 43, 9}, { 23, 8}, { 51, 9}, { 27, 8}, \
+ { 57, 9}, { 31, 8}, { 63, 9}, { 43,10}, \
{ 23, 9}, { 59,10}, { 31, 9}, { 71,10}, \
- { 39, 9}, { 87,10}, { 47, 9}, { 99,10}, \
+ { 39, 9}, { 83,10}, { 47, 9}, { 99,10}, \
{ 55,11}, { 31,10}, { 87,11}, { 47,10}, \
{ 111,12}, { 31,11}, { 63,10}, { 143,11}, \
{ 79,10}, { 167,11}, { 95,10}, { 191,11}, \
{ 111,12}, { 63,11}, { 143,10}, { 287, 9}, \
{ 575,10}, { 303,11}, { 159,10}, { 319,12}, \
{ 95,11}, { 191,10}, { 399,11}, { 207,10}, \
- { 431,13}, { 8192,14}, { 16384,15}, { 32768,16}, \
- { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
- {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 76
+ { 431,13}, { 63,12}, { 127,11}, { 271,10}, \
+ { 543,11}, { 287,10}, { 575,11}, { 303,12}, \
+ { 159,11}, { 335,10}, { 671,11}, { 367,12}, \
+ { 191,11}, { 399,10}, { 799,11}, { 431,12}, \
+ { 223,11}, { 447,13}, { 127,12}, { 255,11}, \
+ { 543,12}, { 287,11}, { 607,12}, { 319,11}, \
+ { 671,12}, { 351,11}, { 703,13}, { 191,12}, \
+ { 415,11}, { 863,12}, { 447,14}, { 127,13}, \
+ { 255,12}, { 607,13}, { 319,12}, { 735,13}, \
+ { 383,12}, { 799,11}, { 1599,12}, { 863,13}, \
+ { 447,12}, { 927,11}, { 1855,14}, { 255,13}, \
+ { 511,12}, { 1055,13}, { 575,12}, { 1215,13}, \
+ { 639,12}, { 1279,13}, { 703,14}, { 383,13}, \
+ { 767,12}, { 1535,13}, { 831,12}, { 1663,13}, \
+ { 895,12}, { 1791,15}, { 255,14}, { 511,13}, \
+ { 1087,12}, { 2175,13}, { 1215,14}, { 639,13}, \
+ { 1343,12}, { 2687,13}, { 1471,14}, { 767,13}, \
+ { 1599,12}, { 3199,13}, { 1663,14}, { 895,13}, \
+ { 1855,15}, { 511,14}, { 1023,13}, { 2175,14}, \
+ { 1151,13}, { 2431,14}, { 1279,13}, { 2687,14}, \
+ { 1407,15}, { 767,14}, { 1535,13}, { 3199,14}, \
+ { 1663,13}, { 3455,14}, { 1791,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 155
#define MUL_FFT_THRESHOLD 5760
-#define SQR_FFT_MODF_THRESHOLD 440 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 436 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 440, 5}, { 14, 4}, { 29, 5}, { 29, 6}, \
- { 15, 5}, { 31, 6}, { 35, 7}, { 18, 6}, \
- { 37, 7}, { 33, 8}, { 17, 7}, { 37, 8}, \
+ { { 436, 5}, { 14, 4}, { 29, 5}, { 31, 6}, \
+ { 35, 7}, { 18, 6}, { 37, 7}, { 37, 8}, \
{ 19, 7}, { 40, 8}, { 37, 9}, { 19, 8}, \
{ 43, 9}, { 23, 8}, { 49, 9}, { 27, 8}, \
{ 57, 9}, { 43,10}, { 23, 9}, { 55,10}, \
@@ -93,45 +117,69 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
{ 87,11}, { 47,10}, { 111,12}, { 31,11}, \
{ 63,10}, { 135,11}, { 79,10}, { 167,11}, \
{ 95,10}, { 191,11}, { 111,12}, { 63,11}, \
- { 127,10}, { 255,11}, { 143,10}, { 303,11}, \
- { 159,10}, { 319,12}, { 95,11}, { 191,10}, \
- { 399,11}, { 207,10}, { 431,13}, { 8192,14}, \
- { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
- { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
- {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 66
+ { 127,10}, { 255,11}, { 143,10}, { 287, 9}, \
+ { 575,10}, { 303,11}, { 159,10}, { 319,12}, \
+ { 95,11}, { 191,10}, { 399,11}, { 207,10}, \
+ { 431,13}, { 63,12}, { 127,11}, { 271,10}, \
+ { 543,11}, { 303,12}, { 159,11}, { 335,10}, \
+ { 671,11}, { 367,10}, { 735,12}, { 191,11}, \
+ { 399,10}, { 799,11}, { 431,12}, { 223,11}, \
+ { 463,13}, { 127,12}, { 255,11}, { 543,12}, \
+ { 287,11}, { 607,12}, { 319,11}, { 671,12}, \
+ { 351,11}, { 735,13}, { 191,12}, { 383,11}, \
+ { 799,12}, { 415,11}, { 863,12}, { 447,11}, \
+ { 895,14}, { 127,13}, { 255,12}, { 543,11}, \
+ { 1087,12}, { 607,13}, { 319,12}, { 735,13}, \
+ { 383,12}, { 863,13}, { 447,12}, { 959,14}, \
+ { 255,13}, { 511,12}, { 1087,13}, { 575,12}, \
+ { 1183,13}, { 639,12}, { 1279,13}, { 703,12}, \
+ { 1407,14}, { 383,13}, { 767,12}, { 1535,13}, \
+ { 831,12}, { 1663,13}, { 895,12}, { 1791,13}, \
+ { 959,15}, { 255,14}, { 511,13}, { 1087,12}, \
+ { 2175,13}, { 1215,14}, { 639,13}, { 1343,12}, \
+ { 2687,13}, { 1471,14}, { 767,13}, { 1663,14}, \
+ { 895,13}, { 1919,15}, { 511,14}, { 1023,13}, \
+ { 2175,14}, { 1151,13}, { 2431,14}, { 1279,13}, \
+ { 2687,14}, { 1407,15}, { 767,14}, { 1535,13}, \
+ { 3199,14}, { 1663,13}, { 3455,14}, { 1791,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 151
#define SQR_FFT_THRESHOLD 4032
#define MULLO_BASECASE_THRESHOLD 29
#define MULLO_DC_THRESHOLD 57
#define MULLO_MUL_N_THRESHOLD 11278
-#define DC_DIV_QR_THRESHOLD 59
+#define DC_DIV_QR_THRESHOLD 64
#define DC_DIVAPPR_Q_THRESHOLD 222
#define DC_BDIV_QR_THRESHOLD 95
#define DC_BDIV_Q_THRESHOLD 264
-#define INV_MULMOD_BNM1_THRESHOLD 82
-#define INV_NEWTON_THRESHOLD 11
-#define INV_APPR_THRESHOLD 18
+#define INV_MULMOD_BNM1_THRESHOLD 86
+#define INV_NEWTON_THRESHOLD 139
+#define INV_APPR_THRESHOLD 147
#define BINV_NEWTON_THRESHOLD 252
-#define REDC_1_TO_REDC_2_THRESHOLD 0
+#define REDC_1_TO_REDC_2_THRESHOLD 0 /* always */
#define REDC_2_TO_REDC_N_THRESHOLD 147
#define MU_DIV_QR_THRESHOLD 1142
-#define MU_DIVAPPR_Q_THRESHOLD 998
+#define MU_DIVAPPR_Q_THRESHOLD 1142
#define MUPI_DIV_QR_THRESHOLD 0 /* always */
-#define MU_BDIV_QR_THRESHOLD 1187
+#define MU_BDIV_QR_THRESHOLD 1210
#define MU_BDIV_Q_THRESHOLD 1470
#define MATRIX22_STRASSEN_THRESHOLD 23
#define HGCD_THRESHOLD 117
-#define GCD_DC_THRESHOLD 469
+#define HGCD_APPR_THRESHOLD 111
+#define HGCD_REDUCE_THRESHOLD 3014
+#define GCD_DC_THRESHOLD 555
#define GCDEXT_DC_THRESHOLD 368
#define JACOBI_BASE_METHOD 4
#define GET_STR_DC_THRESHOLD 13
-#define GET_STR_PRECOMPUTE_THRESHOLD 21
-#define SET_STR_DC_THRESHOLD 1204
-#define SET_STR_PRECOMPUTE_THRESHOLD 3266
+#define GET_STR_PRECOMPUTE_THRESHOLD 22
+#define SET_STR_DC_THRESHOLD 1474
+#define SET_STR_PRECOMPUTE_THRESHOLD 3168
diff --git a/mpn/ia64/tabselect.asm b/mpn/ia64/tabselect.asm
new file mode 100644
index 000000000..cc5b49b04
--- /dev/null
+++ b/mpn/ia64/tabselect.asm
@@ -0,0 +1,139 @@
+dnl IA-64 mpn_tabselect.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Itanium: ?
+C Itanium 2: 2.5
+
+C NOTES
+C * Using software pipelining could trivially yield 2 c/l without unrolling,
+C or 1+epsilon with unrolling. (This code was modelled after the powerpc64
+C code, for simplicity.)
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp', `r32')
+define(`tp', `r33')
+define(`n', `r34')
+define(`nents', `r35')
+define(`which', `r36')
+
+define(`mask', `r8')
+
+define(`rp1', `r32')
+define(`tp1', `r33')
+define(`rp2', `r14')
+define(`tp2', `r15')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_tabselect)
+ .prologue
+ .save ar.lc, r2
+ .body
+ifdef(`HAVE_ABI_32',`
+.mmi; addp4 rp = 0, rp C M I
+ addp4 tp = 0, tp C M I
+ zxt4 n = n C I
+.mii; nop 0
+ zxt4 nents = nents C I
+ zxt4 which = which C I
+ ;;
+')
+.mmi; add rp2 = 8, rp1
+ add tp2 = 8, tp1
+ add r6 = -2, n
+ ;;
+.mmi; cmp.eq p10, p0 = 1, n
+ and r9 = 1, n C set cr0 for use in inner loop
+ shr.u r6 = r6, 1 C inner loop count
+ ;;
+.mmi; cmp.eq p8, p0 = 0, r9
+ sub which = nents, which
+ shl n = n, 3
+ ;;
+
+L(outer):
+.mmi cmp.eq p6, p7 = which, nents C are we at the selected table entry?
+ nop 0
+ mov ar.lc = r6 C I0
+ ;;
+.mmb;
+ (p6) mov mask = -1
+ (p7) mov mask = 0
+ (p8) br.dptk L(top) C branch to loop entry if n even
+ ;;
+
+.mmi; ld8 r16 = [tp1], 8
+ add tp2 = 8, tp2
+ nop 0
+ ;;
+.mmi; ld8 r18 = [rp1]
+ and r16 = r16, mask
+ nop 0
+ ;;
+.mmi; andcm r18 = r18, mask
+ ;;
+ or r16 = r16, r18
+ nop 0
+ ;;
+.mmb; st8 [rp1] = r16, 8
+ add rp2 = 8, rp2
+ (p10) br.dpnt L(end)
+
+ ALIGN(32)
+L(top):
+.mmi; ld8 r16 = [tp1], 16
+ ld8 r17 = [tp2], 16
+ nop 0
+ ;;
+.mmi; ld8 r18 = [rp1]
+ and r16 = r16, mask
+ nop 0
+.mmi; ld8 r19 = [rp2]
+ and r17 = r17, mask
+ nop 0
+ ;;
+.mmi; andcm r18 = r18, mask
+ andcm r19 = r19, mask
+ nop 0
+ ;;
+.mmi; or r16 = r16, r18
+ or r17 = r17, r19
+ nop 0
+ ;;
+.mmb; st8 [rp1] = r16, 16
+ st8 [rp2] = r17, 16
+ br.cloop.dptk L(top)
+ ;;
+L(end):
+.mmi; sub rp1 = rp1, n C move rp back to beginning
+ sub rp2 = rp2, n C move rp back to beginning
+ cmp.ne p9, p0 = 1, nents
+.mmb; add nents = -1, nents
+ nop 0
+ (p9) br.dptk L(outer)
+ ;;
+
+.mib; nop 0
+ nop 0
+ br.ret.sptk.many b0
+EPILOGUE()