diff options
author | Kevin Ryde <user42@zip.com.au> | 2002-03-30 23:20:30 +0100 |
---|---|---|
committer | Kevin Ryde <user42@zip.com.au> | 2002-03-30 23:20:30 +0100 |
commit | 64d8a4619f996d6d91423e28d1bc6eb8695508c5 (patch) | |
tree | 1e1512cdf662ddeb0468cfe9905f93c0b76b0060 /mpn | |
parent | 7adb3efb61b771d93fba849454e910333b0997a9 (diff) | |
download | gmp-64d8a4619f996d6d91423e28d1bc6eb8695508c5.tar.gz |
* mpn/x86/*/*.asm, mpn/powerpc32/*/*.asm, mpn/powerpc64/*/*.asm: Put
speeds after the copyright notice, so as to keep that clear.
Diffstat (limited to 'mpn')
97 files changed, 548 insertions, 552 deletions
diff --git a/mpn/x86/aors_n.asm b/mpn/x86/aors_n.asm index a7a1ed8d1..cd6592b7d 100644 --- a/mpn/x86/aors_n.asm +++ b/mpn/x86/aors_n.asm @@ -1,13 +1,6 @@ dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction. -dnl -dnl cycles/limb -dnl P5: 3.375 -dnl P6: 3.7 -dnl K6: 3.5 -dnl K7: 2.25 -dnl P4: 8.75 - -dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001 Free Software + +dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software dnl Foundation, Inc. dnl dnl This file is part of the GNU MP Library. @@ -27,10 +20,17 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C cycles/limb +C P5: 3.375 +C P6: 3.7 +C K6: 3.5 +C K7: 2.25 +C P4: 8.75 + + ifdef(`OPERATION_add_n',` define(M4_inst, adcl) define(M4_function_n, mpn_add_n) diff --git a/mpn/x86/aorsmul_1.asm b/mpn/x86/aorsmul_1.asm index 8d90fe5c2..696956a7e 100644 --- a/mpn/x86/aorsmul_1.asm +++ b/mpn/x86/aorsmul_1.asm @@ -1,15 +1,8 @@ dnl x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a dnl limb and add the result to a second limb vector. -dnl -dnl cycles/limb -dnl P5: 14.75 -dnl P6: 7.5 -dnl K6: 12.5 -dnl K7: 5.25 -dnl P4: 24 - -dnl Copyright 1992, 1994, 1997, 1999, 2000, 2001 Free Software Foundation, -dnl Inc. + +dnl Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002 Free Software +dnl Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -28,10 +21,17 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C cycles/limb +C P5: 14.75 +C P6: 7.5 +C K6: 12.5 +C K7: 5.25 +C P4: 24 + + ifdef(`OPERATION_addmul_1',` define(M4_inst, addl) define(M4_function_1, mpn_addmul_1) diff --git a/mpn/x86/copyd.asm b/mpn/x86/copyd.asm index b2eb922a5..dfac83f08 100644 --- a/mpn/x86/copyd.asm +++ b/mpn/x86/copyd.asm @@ -1,15 +1,6 @@ dnl x86 mpn_copyd -- copy limb vector, decrementing. -dnl -dnl cycles/limb startup (approx) -dnl P5: 1.0 40 -dnl P6 2.4 70 -dnl K6 1.0 55 -dnl K7: 1.3 75 -dnl P4: 2.6 175 -dnl -dnl (Startup time includes some function call overheads.) - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. + +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -31,6 +22,16 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C cycles/limb startup (approx) +C P5: 1.0 40 +C P6 2.4 70 +C K6 1.0 55 +C K7: 1.3 75 +C P4: 2.6 175 +C +C (Startup time includes some function call overheads.) + + C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size); C C Copy src,size to dst,size, working from high to low addresses. diff --git a/mpn/x86/copyi.asm b/mpn/x86/copyi.asm index 40ffcf2a2..d5e054151 100644 --- a/mpn/x86/copyi.asm +++ b/mpn/x86/copyi.asm @@ -1,16 +1,6 @@ dnl x86 mpn_copyi -- copy limb vector, incrementing. -dnl -dnl cycles/limb startup (approx) -dnl P5: 1.0 35 -dnl P6 0.75 45 -dnl K6 1.0 30 -dnl K7: 1.3 65 -dnl P4: 1.0 120 -dnl -dnl (Startup time includes some function call overheads.) - - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. + +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -29,10 +19,19 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C cycles/limb startup (approx) +C P5: 1.0 35 +C P6 0.75 45 +C K6 1.0 30 +C K7: 1.3 65 +C P4: 1.0 120 +C +C (Startup time includes some function call overheads.) + + C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size); C C Copy src,size to dst,size, working from low to high addresses. diff --git a/mpn/x86/dive_1.asm b/mpn/x86/dive_1.asm index 4dfa131ce..7d7ce0f13 100644 --- a/mpn/x86/dive_1.asm +++ b/mpn/x86/dive_1.asm @@ -1,14 +1,6 @@ dnl x86 mpn_divexact_1 -- mpn by limb exact division. -dnl -dnl cycles/limb -dnl P54 30.0 -dnl P55 29.0 -dnl P6 13.0 odd divisor, 12.0 even (strangely) -dnl K6 14.0 -dnl K7 12.0 -dnl P4 42.0 - -dnl Copyright 2001 Free Software Foundation, Inc. + +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -30,6 +22,15 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C cycles/limb +C P54 30.0 +C P55 29.0 +C P6 13.0 odd divisor, 12.0 even (strangely) +C K6 14.0 +C K7 12.0 +C P4 42.0 + + C mp_limb_t mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C diff --git a/mpn/x86/diveby3.asm b/mpn/x86/diveby3.asm index 3d8817633..8f7870643 100644 --- a/mpn/x86/diveby3.asm +++ b/mpn/x86/diveby3.asm @@ -1,14 +1,6 @@ dnl x86 mpn_divexact_by3 -- mpn division by 3, expecting no remainder. -dnl -dnl cycles/limb -dnl P54 18.0 -dnl P55 17.0 -dnl P6 14.0 -dnl K6 14.0 -dnl K7 10.0 -dnl P4 24.0 - -dnl Copyright 2000, 2001 Free Software Foundation, Inc. + +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -27,10 +19,18 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C cycles/limb +C P54 18.0 +C P55 17.0 +C P6 14.0 +C K6 14.0 +C K7 10.0 +C P4 24.0 + + C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t carry); diff --git a/mpn/x86/divrem_1.asm b/mpn/x86/divrem_1.asm index 2389da3de..1da474c48 100644 --- a/mpn/x86/divrem_1.asm +++ b/mpn/x86/divrem_1.asm @@ -1,16 +1,6 @@ dnl x86 mpn_divrem_1 -- mpn by limb division extending to fractional quotient. -dnl -dnl cycles/limb -dnl 486 approx 43 maybe -dnl P5 44 -dnl P6 39 -dnl P6MMX 39 -dnl K6 20 -dnl K7 42 -dnl P4 58 - - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. + +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -29,10 +19,19 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C cycles/limb +C 486 approx 43 maybe +C P5 44 +C P6 39 +C P6MMX 39 +C K6 20 +C K7 42 +C P4 58 + + C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize, C mp_srcptr src, mp_size_t size, mp_limb_t divisor); C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize, diff --git a/mpn/x86/k6/aors_n.asm b/mpn/x86/k6/aors_n.asm index 579afbc45..2f718fafc 100644 --- a/mpn/x86/k6/aors_n.asm +++ b/mpn/x86/k6/aors_n.asm @@ -1,9 +1,6 @@ dnl AMD K6 mpn_add/sub_n -- mpn addition or subtraction. -dnl -dnl K6: normal 3.25 cycles/limb, in-place 2.75 cycles/limb. - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K6: normal 3.25 cycles/limb, in-place 2.75 cycles/limb. + + ifdef(`OPERATION_add_n', ` define(M4_inst, adcl) define(M4_function_n, mpn_add_n) diff --git a/mpn/x86/k6/aorsmul_1.asm b/mpn/x86/k6/aorsmul_1.asm index c6e0f40a6..7a8896dd9 100644 --- a/mpn/x86/k6/aorsmul_1.asm +++ b/mpn/x86/k6/aorsmul_1.asm @@ -1,10 +1,6 @@ dnl AMD K6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple. -dnl -dnl K6: 7.65 to 8.5 cycles/limb (at 16 limbs/loop and depending on the data), -dnl PIC adds about 6 cycles at the start. - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K6: 7.65 to 8.5 cycles/limb (at 16 limbs/loop and depending on the data), +C PIC adds about 6 cycles at the start. + + + dnl K6: large multpliers small multpliers dnl UNROLL_COUNT cycles/limb cycles/limb dnl 4 9.5 7.78 diff --git a/mpn/x86/k6/diveby3.asm b/mpn/x86/k6/diveby3.asm index 9d5f2cf3c..3c6099874 100644 --- a/mpn/x86/k6/diveby3.asm +++ b/mpn/x86/k6/diveby3.asm @@ -1,9 +1,6 @@ dnl AMD K6 mpn_divexact_by3 -- mpn division by 3, expecting no remainder. -dnl -dnl K6: 11.0 cycles/limb - -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K6: 11.0 cycles/limb + + C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t carry); C diff --git a/mpn/x86/k6/gcd_1.asm b/mpn/x86/k6/gcd_1.asm index e5adeb7e4..74fb99633 100644 --- a/mpn/x86/k6/gcd_1.asm +++ b/mpn/x86/k6/gcd_1.asm @@ -1,9 +1,6 @@ dnl AMD K6 mpn_mod_1 -- mpn by 1 gcd. -dnl -dnl K6: 9.5 cycles/bit (approx) 1x1 gcd -dnl 11.0 cycles/limb Nx1 reduction (modexact_1_odd) -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -25,6 +22,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C K6: 9.5 cycles/bit (approx) 1x1 gcd +C 11.0 cycles/limb Nx1 reduction (modexact_1_odd) + + C mp_limb_t mpn_gcd_1 (mp_srcptr src, mp_size_t size, mp_limb_t y); C C This code is nothing very special, but offers a speedup over what gcc 2.95 diff --git a/mpn/x86/k6/gcd_finda.asm b/mpn/x86/k6/gcd_finda.asm index e69439812..544b49480 100644 --- a/mpn/x86/k6/gcd_finda.asm +++ b/mpn/x86/k6/gcd_finda.asm @@ -1,9 +1,6 @@ dnl AMD K6 mpn_gcd_finda. -dnl -dnl K6: 680 cycles (approx) on average - -dnl Copyright 2000 Free Software Foundation, Inc. +dnl Copyright 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K6: 680 cycles (approx) on average + + dnl How many trial subtractions to attempt before launching into a full dnl division. diff --git a/mpn/x86/k6/k62mmx/copyd.asm b/mpn/x86/k6/k62mmx/copyd.asm index 1f9dba538..fff168c80 100644 --- a/mpn/x86/k6/k62mmx/copyd.asm +++ b/mpn/x86/k6/k62mmx/copyd.asm @@ -1,8 +1,6 @@ dnl AMD K6-2 mpn_copyd -- copy limb vector, decrementing. -dnl -dnl K6-2: 1.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C K6-2: 1.0 cycles/limb + + C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size); C C The loop here is no faster than a rep movsl at 1.0 c/l, but it avoids a 30 diff --git a/mpn/x86/k6/k62mmx/lshift.asm b/mpn/x86/k6/k62mmx/lshift.asm index 34fde7c99..a5449bf30 100644 --- a/mpn/x86/k6/k62mmx/lshift.asm +++ b/mpn/x86/k6/k62mmx/lshift.asm @@ -1,9 +1,6 @@ dnl AMD K6-2 mpn_lshift -- mpn left shift. -dnl -dnl K6-2: 1.75 cycles/limb - -dnl Copyright 1999, 2000 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K6-2: 1.75 cycles/limb + + C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, C unsigned shift); C diff --git a/mpn/x86/k6/k62mmx/rshift.asm b/mpn/x86/k6/k62mmx/rshift.asm index f47510bc6..daaff0fe7 100644 --- a/mpn/x86/k6/k62mmx/rshift.asm +++ b/mpn/x86/k6/k62mmx/rshift.asm @@ -1,9 +1,6 @@ dnl AMD K6-2 mpn_rshift -- mpn right shift. -dnl -dnl K6-2: 1.75 cycles/limb - -dnl Copyright 1999, 2000 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K6-2: 1.75 cycles/limb + + C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, C unsigned shift); C diff --git a/mpn/x86/k6/mmx/com_n.asm b/mpn/x86/k6/mmx/com_n.asm index 07ab394a0..e5ab55de2 100644 --- a/mpn/x86/k6/mmx/com_n.asm +++ b/mpn/x86/k6/mmx/com_n.asm @@ -1,10 +1,4 @@ dnl AMD K6-2 mpn_com_n -- mpn bitwise one's complement. -dnl -dnl alignment dst/src, A=0mod8 N=4mod8 -dnl A/A A/N N/A N/N -dnl K6-2 1.0 1.18 1.18 1.18 cycles/limb -dnl K6 1.5 1.85 1.75 1.85 - dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl @@ -25,10 +19,15 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C alignment dst/src, A=0mod8 N=4mod8 +C A/A A/N N/A N/N +C K6-2 1.0 1.18 1.18 1.18 cycles/limb +C K6 1.5 1.85 1.75 1.85 + + C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); C C Take the bitwise ones-complement of src,size and write it to dst,size. diff --git a/mpn/x86/k6/mmx/dive_1.asm b/mpn/x86/k6/mmx/dive_1.asm index bc8cd750d..8a5f45c90 100644 --- a/mpn/x86/k6/mmx/dive_1.asm +++ b/mpn/x86/k6/mmx/dive_1.asm @@ -1,11 +1,6 @@ dnl AMD K6 mpn_divexact_1 -- mpn by limb exact division. -dnl -dnl divisor -dnl odd even -dnl K6: 10.0 12.0 cycles/limb -dnl K6-2: 10.0 11.5 -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -27,6 +22,12 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C divisor +C odd even +C K6: 10.0 12.0 cycles/limb +C K6-2: 10.0 11.5 + + C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C diff --git a/mpn/x86/k6/mmx/logops_n.asm b/mpn/x86/k6/mmx/logops_n.asm index ef0c5c600..98dd0fd2c 100644 --- a/mpn/x86/k6/mmx/logops_n.asm +++ b/mpn/x86/k6/mmx/logops_n.asm @@ -1,19 +1,7 @@ dnl AMD K6-2 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n, dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations. -dnl -dnl alignment dst/src1/src2, A=0mod8, N=4mod8 -dnl A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N -dnl -dnl K6-2 1.2 1.5 1.5 1.2 1.2 1.5 1.5 1.2 and,andn,ior,xor -dnl K6-2 1.5 1.75 2.0 1.75 1.75 2.0 1.75 1.5 iorn,xnor -dnl K6-2 1.75 2.0 2.0 2.0 2.0 2.0 2.0 1.75 nand,nior -dnl -dnl K6 1.5 1.68 1.75 1.2 1.75 1.75 1.68 1.5 and,andn,ior,xor -dnl K6 2.0 2.0 2.25 2.25 2.25 2.25 2.0 2.0 iorn,xnor -dnl K6 2.0 2.25 2.25 2.25 2.25 2.25 2.25 2.0 nand,nior - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -32,10 +20,21 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C alignment dst/src1/src2, A=0mod8, N=4mod8 +C A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N +C +C K6-2 1.2 1.5 1.5 1.2 1.2 1.5 1.5 1.2 and,andn,ior,xor +C K6-2 1.5 1.75 2.0 1.75 1.75 2.0 1.75 1.5 iorn,xnor +C K6-2 1.75 2.0 2.0 2.0 2.0 2.0 2.0 1.75 nand,nior +C +C K6 1.5 1.68 1.75 1.2 1.75 1.75 1.68 1.5 and,andn,ior,xor +C K6 2.0 2.0 2.25 2.25 2.25 2.25 2.0 2.0 iorn,xnor +C K6 2.0 2.25 2.25 2.25 2.25 2.25 2.25 2.0 nand,nior + + dnl M4_p and M4_i are the MMX and integer instructions dnl M4_*_neg_dst means whether to negate the final result before writing dnl M4_*_neg_src2 means whether to negate the src2 values before using them diff --git a/mpn/x86/k6/mmx/lshift.asm b/mpn/x86/k6/mmx/lshift.asm index 8331849ad..2293e666e 100644 --- a/mpn/x86/k6/mmx/lshift.asm +++ b/mpn/x86/k6/mmx/lshift.asm @@ -1,9 +1,6 @@ dnl AMD K6 mpn_lshift -- mpn left shift. -dnl -dnl K6: 3.0 cycles/limb - -dnl Copyright 1999, 2000 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K6: 3.0 cycles/limb + + C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, C unsigned shift); C diff --git a/mpn/x86/k6/mmx/popham.asm b/mpn/x86/k6/mmx/popham.asm index 06c08df4e..733a7ac86 100644 --- a/mpn/x86/k6/mmx/popham.asm +++ b/mpn/x86/k6/mmx/popham.asm @@ -1,10 +1,5 @@ dnl AMD K6-2 mpn_popcount, mpn_hamdist -- mpn bit population count and dnl hamming distance. -dnl -dnl popcount hamdist -dnl K6-2: 9.0 11.5 cycles/limb -dnl K6: 12.5 13.0 - dnl Copyright 2000, 2001 Free Software Foundation, Inc. dnl @@ -25,10 +20,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C popcount hamdist +C K6-2: 9.0 11.5 cycles/limb +C K6: 12.5 13.0 + + C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size); C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size); C diff --git a/mpn/x86/k6/mmx/rshift.asm b/mpn/x86/k6/mmx/rshift.asm index cab88562f..e80f47506 100644 --- a/mpn/x86/k6/mmx/rshift.asm +++ b/mpn/x86/k6/mmx/rshift.asm @@ -1,9 +1,6 @@ dnl AMD K6 mpn_rshift -- mpn right shift. -dnl -dnl K6: 3.0 cycles/limb - -dnl Copyright 1999, 2000 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K6: 3.0 cycles/limb + + C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, C unsigned shift); C diff --git a/mpn/x86/k6/mod_34lsub1.asm b/mpn/x86/k6/mod_34lsub1.asm index 29f9daa4f..4d1577ca7 100644 --- a/mpn/x86/k6/mod_34lsub1.asm +++ b/mpn/x86/k6/mod_34lsub1.asm @@ -1,8 +1,6 @@ dnl AMD K6 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1. -dnl -dnl K6: 2.66 cycles/limb -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C K6: 2.66 cycles/limb + + C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) C C An attempt was made to use a loop like diff --git a/mpn/x86/k6/mode1o.asm b/mpn/x86/k6/mode1o.asm index a0e4028ab..1d382a82d 100644 --- a/mpn/x86/k6/mode1o.asm +++ b/mpn/x86/k6/mode1o.asm @@ -1,8 +1,6 @@ dnl AMD K6 mpn_modexact_1_odd -- exact division style remainder. -dnl -dnl K6: 10.0 cycles/limb -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C K6: 10.0 cycles/limb + + C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/k6/mul_1.asm b/mpn/x86/k6/mul_1.asm index eefb7595c..9be89b62d 100644 --- a/mpn/x86/k6/mul_1.asm +++ b/mpn/x86/k6/mul_1.asm @@ -1,9 +1,6 @@ dnl AMD K6 mpn_mul_1 -- mpn by limb multiply. -dnl -dnl K6: 6.25 cycles/limb. - -dnl Copyright 1999, 2000 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K6: 6.25 cycles/limb. + + C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t multiplier); C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/k6/mul_basecase.asm b/mpn/x86/k6/mul_basecase.asm index 195b67f9f..3b38e085b 100644 --- a/mpn/x86/k6/mul_basecase.asm +++ b/mpn/x86/k6/mul_basecase.asm @@ -1,10 +1,6 @@ dnl AMD K6 mpn_mul_basecase -- multiply two mpn numbers. -dnl -dnl K6: approx 9.0 cycles per cross product on 30x30 limbs (with 16 limbs/loop -dnl unrolling). - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K6: approx 9.0 cycles per cross product on 30x30 limbs (with 16 limbs/loop +C unrolling). + + + dnl K6: UNROLL_COUNT cycles/product (approx) dnl 8 9.75 dnl 16 9.3 diff --git a/mpn/x86/k6/pre_mod_1.asm b/mpn/x86/k6/pre_mod_1.asm index 396998970..13e49911a 100644 --- a/mpn/x86/k6/pre_mod_1.asm +++ b/mpn/x86/k6/pre_mod_1.asm @@ -1,9 +1,6 @@ dnl AMD K6 mpn_preinv_mod_1 -- mpn by 1 remainder, with pre-inverted divisor. -dnl -dnl K6: 18.0 cycles/limb - -dnl Copyright 2000 Free Software Foundation, Inc. +dnl Copyright 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K6: 18.0 cycles/limb + + C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor, C mp_limb_t inverse); C diff --git a/mpn/x86/k6/sqr_basecase.asm b/mpn/x86/k6/sqr_basecase.asm index a86013acf..e13c60d6a 100644 --- a/mpn/x86/k6/sqr_basecase.asm +++ b/mpn/x86/k6/sqr_basecase.asm @@ -1,9 +1,4 @@ dnl AMD K6 mpn_sqr_basecase -- square an mpn number. -dnl -dnl K6: approx 4.7 cycles per cross product, or 9.2 cycles per triangular -dnl product (measured on the speed difference between 17 and 33 limbs, -dnl which is roughly the Karatsuba recursing range). - dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl @@ -24,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K6: approx 4.7 cycles per cross product, or 9.2 cycles per triangular +C product (measured on the speed difference between 17 and 33 limbs, +C which is roughly the Karatsuba recursing range). + + dnl SQR_KARATSUBA_THRESHOLD_MAX is the maximum SQR_KARATSUBA_THRESHOLD this dnl code supports. This value is used only by the tune program to know dnl what it can go up to. (An attempt to compile with a bigger value will diff --git a/mpn/x86/k7/aors_n.asm b/mpn/x86/k7/aors_n.asm index ad538e293..c048625cc 100644 --- a/mpn/x86/k7/aors_n.asm +++ b/mpn/x86/k7/aors_n.asm @@ -1,9 +1,6 @@ dnl AMD K7 mpn_add_n/mpn_sub_n -- mpn add or subtract. -dnl -dnl K7: 1.64 cycles/limb (at 16 limb/loop). - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K7: 1.64 cycles/limb (at 16 limbs/loop). + + + dnl K7: UNROLL_COUNT cycles/limb dnl 8 1.9 dnl 16 1.64 diff --git a/mpn/x86/k7/aorsmul_1.asm b/mpn/x86/k7/aorsmul_1.asm index 29d418579..9bf31d8db 100644 --- a/mpn/x86/k7/aorsmul_1.asm +++ b/mpn/x86/k7/aorsmul_1.asm @@ -1,12 +1,6 @@ dnl AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple. -dnl -dnl K7: 3.9 cycles/limb. -dnl -dnl Future: It should be possible to avoid the separate mul after the -dnl unrolled loop by moving the movl/adcl to the top. - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -25,10 +19,16 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K7: 3.9 cycles/limb. +C +C Future: It should be possible to avoid the separate mul after the +C unrolled loop by moving the movl/adcl to the top. + + + dnl K7: UNROLL_COUNT cycles/limb dnl 4 4.42 dnl 8 4.16 diff --git a/mpn/x86/k7/dive_1.asm b/mpn/x86/k7/dive_1.asm index 3faf4cdc0..fe6c29a0d 100644 --- a/mpn/x86/k7/dive_1.asm +++ b/mpn/x86/k7/dive_1.asm @@ -1,8 +1,6 @@ -dnl AMD Athlon mpn_divexact_1 -- mpn by limb exact division. -dnl -dnl K7: 11.0 cycles/limb +dnl AMD K7 mpn_divexact_1 -- mpn by limb exact division. -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C K7: 11.0 cycles/limb + + C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C diff --git a/mpn/x86/k7/diveby3.asm b/mpn/x86/k7/diveby3.asm index 88a905f11..b612b613b 100644 --- a/mpn/x86/k7/diveby3.asm +++ b/mpn/x86/k7/diveby3.asm @@ -1,9 +1,6 @@ dnl AMD K7 mpn_divexact_by3 -- mpn division by 3, expecting no remainder. -dnl -dnl K7: 8.0 cycles/limb - -dnl Copyright 2000 Free Software Foundation, Inc. +dnl Copyright 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K7: 8.0 cycles/limb + + C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t carry); diff --git a/mpn/x86/k7/gcd_1.asm b/mpn/x86/k7/gcd_1.asm index 6c683c373..8be32b067 100644 --- a/mpn/x86/k7/gcd_1.asm +++ b/mpn/x86/k7/gcd_1.asm @@ -1,9 +1,6 @@ dnl AMD K7 mpn_gcd_1 -- mpn by 1 gcd. -dnl -dnl K7: 6.75 cycles/bit (approx) 1x1 gcd -dnl 11.0 cycles/limb Nx1 reduction (modexact_1_odd) -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -25,6 +22,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C K7: 6.75 cycles/bit (approx) 1x1 gcd +C 11.0 cycles/limb Nx1 reduction (modexact_1_odd) + + dnl Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y, dnl where x is the larger of the two. See tune/README for more. dnl diff --git a/mpn/x86/k7/mmx/com_n.asm b/mpn/x86/k7/mmx/com_n.asm index 53c96b10f..78ca6ca66 100644 --- a/mpn/x86/k7/mmx/com_n.asm +++ b/mpn/x86/k7/mmx/com_n.asm @@ -1,7 +1,4 @@ dnl AMD Athlon mpn_com_n -- mpn bitwise one's complement. -dnl -dnl K7: 1.0 cycles/limb - dnl Copyright 2002 Free Software Foundation, Inc. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K7: 1.0 cycles/limb + + C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); C C The loop form below is necessary for the claimed speed. It needs to be diff --git a/mpn/x86/k7/mmx/copyd.asm b/mpn/x86/k7/mmx/copyd.asm index 33332c607..8de034114 100644 --- a/mpn/x86/k7/mmx/copyd.asm +++ b/mpn/x86/k7/mmx/copyd.asm @@ -1,11 +1,6 @@ dnl AMD K7 mpn_copyd -- copy limb vector, decrementing. -dnl -dnl alignment dst/src, A=0mod8 N=4mod8 -dnl A/A A/N N/A N/N -dnl K7 0.75 1.0 1.0 0.75 - -dnl Copyright 1999, 2000 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C alignment dst/src, A=0mod8 N=4mod8 +C A/A A/N N/A N/N +C K7 0.75 1.0 1.0 0.75 + + C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size); C C The various comments in mpn/x86/k7/copyi.asm apply here too. diff --git a/mpn/x86/k7/mmx/copyi.asm b/mpn/x86/k7/mmx/copyi.asm index 5d774f9e2..5026bd5c1 100644 --- a/mpn/x86/k7/mmx/copyi.asm +++ b/mpn/x86/k7/mmx/copyi.asm @@ -1,11 +1,6 @@ dnl AMD K7 mpn_copyi -- copy limb vector, incrementing. -dnl -dnl alignment dst/src, A=0mod8 N=4mod8 -dnl A/A A/N N/A N/N -dnl K7 0.75 1.0 1.0 0.75 - -dnl Copyright 1999, 2000 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C alignment dst/src, A=0mod8 N=4mod8 +C A/A A/N N/A N/N +C K7 0.75 1.0 1.0 0.75 + + C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size); C C Copy src,size to dst,size. diff --git a/mpn/x86/k7/mmx/divrem_1.asm b/mpn/x86/k7/mmx/divrem_1.asm index ef5b37b5f..161ae4643 100644 --- a/mpn/x86/k7/mmx/divrem_1.asm +++ b/mpn/x86/k7/mmx/divrem_1.asm @@ -1,9 +1,6 @@ dnl AMD K7 mpn_divrem_1 -- mpn by limb division. -dnl -dnl K7: 17.0 cycles/limb integer part, 15.0 cycles/limb fraction part. - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K7: 17.0 cycles/limb integer part, 15.0 cycles/limb fraction part. + + C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize, C mp_srcptr src, mp_size_t size, C mp_limb_t divisor); diff --git a/mpn/x86/k7/mmx/lshift.asm b/mpn/x86/k7/mmx/lshift.asm index 9a9f8bd81..5316763b0 100644 --- a/mpn/x86/k7/mmx/lshift.asm +++ b/mpn/x86/k7/mmx/lshift.asm @@ -1,9 +1,6 @@ dnl AMD K7 mpn_lshift -- mpn left shift. -dnl -dnl K7: 1.21 cycles/limb (at 16 limbs/loop). - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K7: 1.21 cycles/limb (at 16 limbs/loop). + + + dnl K7: UNROLL_COUNT cycles/limb dnl 4 1.51 dnl 8 1.26 diff --git a/mpn/x86/k7/mmx/mod_1.asm b/mpn/x86/k7/mmx/mod_1.asm index 192a9f360..b27531cca 100644 --- a/mpn/x86/k7/mmx/mod_1.asm +++ b/mpn/x86/k7/mmx/mod_1.asm @@ -1,9 +1,6 @@ dnl AMD K7 mpn_mod_1 -- mpn by limb remainder. -dnl -dnl K7: 17.0 cycles/limb. - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K7: 17.0 cycles/limb. + + C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, C mp_limb_t carry); diff --git a/mpn/x86/k7/mmx/popham.asm b/mpn/x86/k7/mmx/popham.asm index 61e009fcc..f7f2daeef 100644 --- a/mpn/x86/k7/mmx/popham.asm +++ b/mpn/x86/k7/mmx/popham.asm @@ -1,10 +1,7 @@ dnl AMD K7 mpn_popcount, mpn_hamdist -- population count and hamming dnl distance. -dnl -dnl K7: popcount 5.0 cycles/limb, hamdist 6.0 cycles/limb - -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,10 +20,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K7: popcount 5.0 cycles/limb, hamdist 6.0 cycles/limb + + C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size); C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size); C diff --git a/mpn/x86/k7/mmx/rshift.asm b/mpn/x86/k7/mmx/rshift.asm index 7013a1623..ba7ef81c0 100644 --- a/mpn/x86/k7/mmx/rshift.asm +++ b/mpn/x86/k7/mmx/rshift.asm @@ -1,9 +1,6 @@ dnl AMD K7 mpn_rshift -- mpn right shift. -dnl -dnl K7: 1.21 cycles/limb (at 16 limbs/loop). - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K7: 1.21 cycles/limb (at 16 limbs/loop). + + + dnl K7: UNROLL_COUNT cycles/limb dnl 4 1.51 dnl 8 1.26 diff --git a/mpn/x86/k7/mod_34lsub1.asm b/mpn/x86/k7/mod_34lsub1.asm index 22a335a7d..45307b9ec 100644 --- a/mpn/x86/k7/mod_34lsub1.asm +++ b/mpn/x86/k7/mod_34lsub1.asm @@ -1,8 +1,6 @@ dnl AMD K7 mpn_mod_32lsub1 -- remainder modulo 2^24-1. -dnl -dnl K7: 1.0 cycles/limb -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C K7: 1.0 cycles/limb + + C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) C C The loop form below and the 64 byte code alignment seem necessary for the diff --git a/mpn/x86/k7/mode1o.asm b/mpn/x86/k7/mode1o.asm index 5888e42d0..ddb9e5bb1 100644 --- a/mpn/x86/k7/mode1o.asm +++ b/mpn/x86/k7/mode1o.asm @@ -1,8 +1,6 @@ dnl AMD K7 mpn_modexact_1_odd -- exact division style remainder. -dnl -dnl K7: 11.0 cycles/limb -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C K7: 11.0 cycles/limb + + C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/k7/mul_1.asm b/mpn/x86/k7/mul_1.asm index e60a8fb3b..9fa9625ab 100644 --- a/mpn/x86/k7/mul_1.asm +++ b/mpn/x86/k7/mul_1.asm @@ -1,9 +1,6 @@ dnl AMD K7 mpn_mul_1 -- mpn by limb multiply. -dnl -dnl K7: 3.4 cycles/limb (at 16 limbs/loop). - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K7: 3.4 cycles/limb (at 16 limbs/loop). + + + dnl K7: UNROLL_COUNT cycles/limb dnl 8 3.9 dnl 16 3.4 diff --git a/mpn/x86/k7/mul_basecase.asm b/mpn/x86/k7/mul_basecase.asm index 03b5c362b..5800ce0ec 100644 --- a/mpn/x86/k7/mul_basecase.asm +++ b/mpn/x86/k7/mul_basecase.asm @@ -1,10 +1,6 @@ dnl AMD K7 mpn_mul_basecase -- multiply two mpn numbers. -dnl -dnl K7: approx 4.42 cycles per cross product at around 20x20 limbs (16 -dnl limbs/loop unrolling). - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K7: approx 4.42 cycles per cross product at around 20x20 limbs (16 +C limbs/loop unrolling). + + + dnl K7 UNROLL_COUNT cycles/product (at around 20x20) dnl 8 4.67 dnl 16 4.59 diff --git a/mpn/x86/k7/sqr_basecase.asm b/mpn/x86/k7/sqr_basecase.asm index d987df2ab..1d29c8241 100644 --- a/mpn/x86/k7/sqr_basecase.asm +++ b/mpn/x86/k7/sqr_basecase.asm @@ -1,9 +1,4 @@ dnl AMD K7 mpn_sqr_basecase -- square an mpn number. -dnl -dnl K7: approx 2.3 cycles/crossproduct, or 4.55 cycles/triangular product -dnl (measured on the speed difference between 25 and 50 limbs, which is -dnl roughly the Karatsuba recursing range). - dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl @@ -24,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C K7: approx 2.3 cycles/crossproduct, or 4.55 cycles/triangular product +C (measured on the speed difference between 25 and 50 limbs, which is +C roughly the Karatsuba recursing range). + + dnl These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for dnl some comments. diff --git a/mpn/x86/lshift.asm b/mpn/x86/lshift.asm index 54cb0cf3b..93f144711 100644 --- a/mpn/x86/lshift.asm +++ b/mpn/x86/lshift.asm @@ -1,16 +1,7 @@ dnl x86 mpn_lshift -- mpn left shift. -dnl -dnl cycles/limb -dnl P54: 7.5 -dnl P55: 7.0 -dnl P6: 2.5 -dnl K6: 4.5 -dnl K7: 5.0 -dnl P4: 14.5 - - -dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001 Free Software Foundation, -dnl Inc. + +dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software +dnl Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -29,10 +20,18 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C cycles/limb +C P54: 7.5 +C P55: 7.0 +C P6: 2.5 +C K6: 4.5 +C K7: 5.0 +C P4: 14.5 + + C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, C unsigned shift); diff --git a/mpn/x86/mod_1.asm b/mpn/x86/mod_1.asm index 97aa308b7..fd251332a 100644 --- a/mpn/x86/mod_1.asm +++ b/mpn/x86/mod_1.asm @@ -1,14 +1,6 @@ dnl x86 mpn_mod_1 -- mpn by limb remainder. -dnl -dnl cycles/limb -dnl 486 42 approx, maybe -dnl P5 44 -dnl P6 39 -dnl K6 20 -dnl K7 41 -dnl P4 58 - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. + +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -30,6 +22,15 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C cycles/limb +C 486 42 approx, maybe +C P5 44 +C P6 39 +C K6 20 +C K7 41 +C P4 58 + + C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, C mp_limb_t carry); diff --git a/mpn/x86/mod_34lsub1.asm b/mpn/x86/mod_34lsub1.asm index 928e937a2..d89a2c209 100644 --- a/mpn/x86/mod_34lsub1.asm +++ b/mpn/x86/mod_34lsub1.asm @@ -1,13 +1,6 @@ dnl Generic x86 mpn_mod_32lsub1 -- mpn remainder modulo 2^24-1. -dnl -dnl cycles/limb -dnl P5: 3.0 -dnl P6: 3.66 -dnl K6: 3.0 -dnl K7: 1.3 -dnl P4: 9 - -dnl Copyright 2000, 2001 Free Software Foundation, Inc. + +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -29,6 +22,14 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C cycles/limb +C P5: 3.0 +C P6: 3.66 +C K6: 3.0 +C K7: 1.3 +C P4: 9 + + C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) C diff --git a/mpn/x86/mul_1.asm b/mpn/x86/mul_1.asm index fecefc855..c989e40da 100644 --- a/mpn/x86/mul_1.asm +++ b/mpn/x86/mul_1.asm @@ -1,15 +1,7 @@ dnl x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector dnl with a limb and store the result in a second limb vector. -dnl -dnl cycles/limb -dnl P5: 12.5 -dnl P6: 5.5 -dnl K6: 10.5 -dnl K7: 4.5 -dnl P4: 19 - -dnl Copyright 1992, 1994, 1997, 1998, 1999, 2000, 2001 Free Software +dnl Copyright 1992, 1994, 1997, 1998, 1999, 2000, 2001, 2002 Free Software dnl Foundation, Inc. dnl dnl This file is part of the GNU MP Library. @@ -29,10 +21,17 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C cycles/limb +C P5: 12.5 +C P6: 5.5 +C K6: 10.5 +C K7: 4.5 +C P4: 19 + + C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t multiplier); diff --git a/mpn/x86/mul_basecase.asm b/mpn/x86/mul_basecase.asm index 1b1778797..fb0c46b66 100644 --- a/mpn/x86/mul_basecase.asm +++ b/mpn/x86/mul_basecase.asm @@ -1,16 +1,8 @@ dnl x86 mpn_mul_basecase -- Multiply two limb vectors and store the result dnl in a third limb vector. -dnl -dnl cycles/crossproduct -dnl P5: 15 -dnl P6: 7.5 -dnl K6: 12.5 -dnl K7: 5.5 -dnl P4: 24 - -dnl Copyright 1996, 1997, 1998, 1999, 2000, 2001 Free Software Foundation, -dnl Inc. +dnl Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free Software +dnl Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -29,10 +21,17 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C cycles/crossproduct +C P5: 15 +C P6: 7.5 +C K6: 12.5 +C K7: 5.5 +C P4: 24 + + C void mpn_mul_basecase (mp_ptr wp, C mp_srcptr xp, mp_size_t xsize, C mp_srcptr yp, mp_size_t ysize); diff --git a/mpn/x86/p6/aorsmul_1.asm b/mpn/x86/p6/aorsmul_1.asm index ba3e5146f..7aab1afa4 100644 --- a/mpn/x86/p6/aorsmul_1.asm +++ b/mpn/x86/p6/aorsmul_1.asm @@ -1,9 +1,6 @@ dnl Intel P6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple. -dnl -dnl P6: 6.35 cycles/limb (at 16 limbs/loop). - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P6: 6.35 cycles/limb (at 16 limbs/loop). + + dnl P6 UNROLL_COUNT cycles/limb dnl 8 6.7 dnl 16 6.35 diff --git a/mpn/x86/p6/copyd.asm b/mpn/x86/p6/copyd.asm index b4ed341e4..ddcaafaff 100644 --- a/mpn/x86/p6/copyd.asm +++ b/mpn/x86/p6/copyd.asm @@ -1,8 +1,6 @@ dnl Intel P6 mpn_copyd -- copy limb vector backwards. -dnl -dnl P6: 1.75 cycles/limb, or 0.75 if no overlap -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P6: 1.75 cycles/limb, or 0.75 if no overlap + + C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size); C C An explicit loop is used because a decrementing rep movsl is a bit slow at diff --git a/mpn/x86/p6/dive_1.asm b/mpn/x86/p6/dive_1.asm index 478781d28..d512762d7 100644 --- a/mpn/x86/p6/dive_1.asm +++ b/mpn/x86/p6/dive_1.asm @@ -1,9 +1,6 @@ dnl Intel P6 mpn_modexact_1_odd -- exact division style remainder. -dnl -dnl odd even divisor -dnl P6: 10.0 12.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -25,6 +22,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C odd even divisor +C P6: 10.0 12.0 cycles/limb + + C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C diff --git a/mpn/x86/p6/diveby3.asm b/mpn/x86/p6/diveby3.asm index e5abbe905..b9c0bbd7d 100644 --- a/mpn/x86/p6/diveby3.asm +++ b/mpn/x86/p6/diveby3.asm @@ -1,9 +1,6 @@ dnl Intel P6 mpn_divexact_by3 -- mpn division by 3, expecting no remainder. -dnl -dnl P6: 8.5 cycles/limb - -dnl Copyright 2000 Free Software Foundation, Inc. +dnl Copyright 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,16 +19,19 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. +include(`../config.m4') -dnl The P5 code runs well on P6, in fact better than anything else found so -dnl far. An imul is 4 cycles, meaning the two cmp/sbbl pairs on the -dnl dependent path are taking 4.5 cycles. -dnl -dnl The destination cache line prefetching is unnecessary on P6, but -dnl removing it is a 2 cycle slowdown (approx), so it must be inducing -dnl something good in the out of order execution. -include(`../config.m4') +C P6: 8.5 cycles/limb + + +C The P5 code runs well on P6, in fact better than anything else found so +C far. An imul is 4 cycles, meaning the two cmp/sbbl pairs on the dependent +C path are taking 4.5 cycles. +C +C The destination cache line prefetching is unnecessary on P6, but removing +C it is a 2 cycle slowdown (approx), so it must be inducing something good +C in the out of order execution. MULFUNC_PROLOGUE(mpn_divexact_by3c) include_mpn(`x86/pentium/diveby3.asm') diff --git a/mpn/x86/p6/mmx/divrem_1.asm b/mpn/x86/p6/mmx/divrem_1.asm index 36a0d9837..da85aca10 100644 --- a/mpn/x86/p6/mmx/divrem_1.asm +++ b/mpn/x86/p6/mmx/divrem_1.asm @@ -1,9 +1,6 @@ dnl Intel Pentium-II mpn_divrem_1 -- mpn by limb division. -dnl -dnl P6MMX: 25.0 cycles/limb integer part, 17.5 cycles/limb fraction part. - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P6MMX: 25.0 cycles/limb integer part, 17.5 cycles/limb fraction part. + + C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize, C mp_srcptr src, mp_size_t size, C mp_limb_t divisor); diff --git a/mpn/x86/p6/mmx/popham.asm b/mpn/x86/p6/mmx/popham.asm index e00a8fada..9188af743 100644 --- a/mpn/x86/p6/mmx/popham.asm +++ b/mpn/x86/p6/mmx/popham.asm @@ -1,11 +1,7 @@ dnl Intel Pentium-II mpn_popcount, mpn_hamdist -- population count and dnl hamming distance. -dnl -dnl P6MMX: popcount 11 cycles/limb (approx), hamdist 11.5 cycles/limb -dnl (approx) - -dnl Copyright 2000 Free Software Foundation, Inc. +dnl Copyright 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,8 +20,11 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') + +C P6MMX: popcount 11 cycles/limb (approx), hamdist 11.5 cycles/limb (approx) + + MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) include_mpn(`x86/k6/mmx/popham.asm') diff --git a/mpn/x86/p6/mod_1.asm b/mpn/x86/p6/mod_1.asm index 5ffbb568c..84bb1034c 100644 --- a/mpn/x86/p6/mod_1.asm +++ b/mpn/x86/p6/mod_1.asm @@ -1,9 +1,6 @@ dnl Intel P6 mpn_mod_1 -- mpn by limb remainder. -dnl -dnl P6: 21.5 cycles/limb - -dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P6: 21.5 cycles/limb + + C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, C mp_limb_t carry); diff --git a/mpn/x86/p6/mode1o.asm b/mpn/x86/p6/mode1o.asm index 2f01e6646..7e468fdd9 100644 --- a/mpn/x86/p6/mode1o.asm +++ b/mpn/x86/p6/mode1o.asm @@ -1,8 +1,6 @@ dnl Intel P6 mpn_modexact_1_odd -- exact division style remainder. -dnl -dnl P6: 10.0 cycles/limb -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P6: 10.0 cycles/limb + + C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/p6/sqr_basecase.asm b/mpn/x86/p6/sqr_basecase.asm index 9a2aa2b60..be306ccda 100644 --- a/mpn/x86/p6/sqr_basecase.asm +++ b/mpn/x86/p6/sqr_basecase.asm @@ -1,9 +1,4 @@ dnl Intel P6 mpn_sqr_basecase -- square an mpn number. -dnl -dnl P6: approx 4.0 cycles per cross product, or 7.75 cycles per triangular -dnl product (measured on the speed difference between 20 and 40 limbs, -dnl which is the Karatsuba recursing range). - dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. dnl @@ -24,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P6: approx 4.0 cycles per cross product, or 7.75 cycles per triangular +C product (measured on the speed difference between 20 and 40 limbs, +C which is the Karatsuba recursing range). + + dnl These are the same as in mpn/x86/k6/sqr_basecase.asm, see that file for dnl a description. The only difference here is that UNROLL_COUNT can go up dnl to 64 (not 63) making SQR_KARATSUBA_THRESHOLD_MAX 67. diff --git a/mpn/x86/pentium/aors_n.asm b/mpn/x86/pentium/aors_n.asm index bc190776c..1a9ab6e84 100644 --- a/mpn/x86/pentium/aors_n.asm +++ b/mpn/x86/pentium/aors_n.asm @@ -1,9 +1,6 @@ dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction. -dnl -dnl P5: 2.375 cycles/limb - -dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000 Free Software +dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software dnl Foundation, Inc. dnl dnl This file is part of the GNU MP Library. @@ -23,10 +20,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P5: 2.375 cycles/limb + + ifdef(`OPERATION_add_n',` define(M4_inst, adcl) define(M4_function_n, mpn_add_n) diff --git a/mpn/x86/pentium/aorsmul_1.asm b/mpn/x86/pentium/aorsmul_1.asm index ae68fe08f..428ba8ddc 100644 --- a/mpn/x86/pentium/aorsmul_1.asm +++ b/mpn/x86/pentium/aorsmul_1.asm @@ -1,9 +1,6 @@ dnl Intel Pentium mpn_addmul_1 -- mpn by limb multiplication. -dnl -dnl P5: 14.0 cycles/limb - -dnl Copyright 1992, 1994, 1996, 1999, 2000 Free Software Foundation, +dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation, dnl Inc. dnl dnl This file is part of the GNU MP Library. @@ -23,10 +20,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. */ - include(`../config.m4') +C P5: 14.0 cycles/limb + + ifdef(`OPERATION_addmul_1', ` define(M4_inst, addl) define(M4_function_1, mpn_addmul_1) diff --git a/mpn/x86/pentium/com_n.asm b/mpn/x86/pentium/com_n.asm index 4e7c651a1..0d479b0bc 100644 --- a/mpn/x86/pentium/com_n.asm +++ b/mpn/x86/pentium/com_n.asm @@ -1,6 +1,4 @@ dnl Intel Pentium mpn_com_n -- mpn ones complement. -dnl -dnl P5: 1.75 cycles/limb dnl Copyright 1996, 2001, 2002 Free Software Foundation, Inc. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P5: 1.75 cycles/limb + + C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); C C This code is similar to mpn_copyi, basically there's just some "xorl diff --git a/mpn/x86/pentium/copyd.asm b/mpn/x86/pentium/copyd.asm index fda5e6a2a..3487d0e6e 100644 --- a/mpn/x86/pentium/copyd.asm +++ b/mpn/x86/pentium/copyd.asm @@ -1,8 +1,6 @@ dnl Intel Pentium mpn_copyd -- copy limb vector, decrementing. -dnl -dnl P5: 1.25 cycles/limb -dnl Copyright 1996, 2001 Free Software Foundation, Inc. +dnl Copyright 1996, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P5: 1.25 cycles/limb + + C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size); C C See comments in copyi.asm. diff --git a/mpn/x86/pentium/copyi.asm b/mpn/x86/pentium/copyi.asm index 48321b965..a246f5d99 100644 --- a/mpn/x86/pentium/copyi.asm +++ b/mpn/x86/pentium/copyi.asm @@ -1,8 +1,6 @@ dnl Intel Pentium mpn_copyi -- copy limb vector, incrementing. -dnl -dnl P5: 1.25 cycles/limb -dnl Copyright 1996, 2001 Free Software Foundation, Inc. +dnl Copyright 1996, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P5: 1.25 cycles/limb + + C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size); C C Destination prefetching is done to avoid repeated write-throughs on lines diff --git a/mpn/x86/pentium/dive_1.asm b/mpn/x86/pentium/dive_1.asm index 01b41d982..b311d4c0c 100644 --- a/mpn/x86/pentium/dive_1.asm +++ b/mpn/x86/pentium/dive_1.asm @@ -1,11 +1,6 @@ dnl Intel Pentium mpn_divexact_1 -- mpn by limb exact division. -dnl -dnl divisor -dnl odd even -dnl P54: 24.5 30.5 cycles/limb -dnl P55: 23.0 28.0 -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -27,6 +22,12 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C divisor +C odd even +C P54: 24.5 30.5 cycles/limb +C P55: 23.0 28.0 + + C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C diff --git a/mpn/x86/pentium/diveby3.asm b/mpn/x86/pentium/diveby3.asm index 8ab098602..1497a1a1f 100644 --- a/mpn/x86/pentium/diveby3.asm +++ b/mpn/x86/pentium/diveby3.asm @@ -1,9 +1,6 @@ dnl Intel P5 mpn_divexact_by3 -- mpn division by 3, expecting no remainder. -dnl -dnl P5: 15.0 cycles/limb - -dnl Copyright 2000 Free Software Foundation, Inc. +dnl Copyright 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P5: 15.0 cycles/limb + + C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t carry); diff --git a/mpn/x86/pentium/hamdist.asm b/mpn/x86/pentium/hamdist.asm index 04694a68f..cc5abc6fc 100644 --- a/mpn/x86/pentium/hamdist.asm +++ b/mpn/x86/pentium/hamdist.asm @@ -1,8 +1,6 @@ dnl Intel P5 mpn_hamdist -- mpn hamming distance. -dnl -dnl P5: 14.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P5: 14.0 cycles/limb + + C unsigned long mpn_hamdist (mp_srcptr src1, mp_srcptr src2, mp_size_t size); C C It might be possible to shave 1 cycle from the loop, and hence 2 diff --git a/mpn/x86/pentium/logops_n.asm b/mpn/x86/pentium/logops_n.asm index 47f649e61..feea75b81 100644 --- a/mpn/x86/pentium/logops_n.asm +++ b/mpn/x86/pentium/logops_n.asm @@ -1,7 +1,4 @@ dnl Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations. -dnl -dnl P5: 3.0 c/l and, ior, xor -dnl 3.5 c/l andn, iorn, nand, nior, xnor dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl @@ -25,6 +22,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P5: 3.0 c/l and, ior, xor +C 3.5 c/l andn, iorn, nand, nior, xnor + + define(M4_choose_op, `ifdef(`OPERATION_$1',` define(`M4_function', `mpn_$1') diff --git a/mpn/x86/pentium/lshift.asm b/mpn/x86/pentium/lshift.asm index a576526d7..721d7a0ed 100644 --- a/mpn/x86/pentium/lshift.asm +++ b/mpn/x86/pentium/lshift.asm @@ -1,11 +1,6 @@ dnl Intel Pentium mpn_lshift -- mpn left shift. -dnl -dnl cycles/limb -dnl P5,P54: 6.0 -dnl P55: 5.375 - -dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000 Free Software +dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software dnl Foundation, Inc. dnl dnl This file is part of the GNU MP Library. @@ -25,10 +20,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C cycles/limb +C P5,P54: 6.0 +C P55: 5.375 + + C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, C unsigned shift); C diff --git a/mpn/x86/pentium/mmx/lshift.asm b/mpn/x86/pentium/mmx/lshift.asm index 95202760c..21baff1b9 100644 --- a/mpn/x86/pentium/mmx/lshift.asm +++ b/mpn/x86/pentium/mmx/lshift.asm @@ -1,9 +1,6 @@ dnl Intel P5 mpn_lshift -- mpn left shift. -dnl -dnl P5: 1.75 cycles/limb. - -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P5: 1.75 cycles/limb. + + C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, C unsigned shift); C diff --git a/mpn/x86/pentium/mmx/mul_1.asm b/mpn/x86/pentium/mmx/mul_1.asm index 18c214490..3acffef78 100644 --- a/mpn/x86/pentium/mmx/mul_1.asm +++ b/mpn/x86/pentium/mmx/mul_1.asm @@ -1,10 +1,6 @@ dnl Intel Pentium MMX mpn_mul_1 -- mpn by limb multiplication. -dnl -dnl cycles/limb -dnl P5: 12.0 for 32-bit multiplier -dnl 7.0 for 16-bit multiplier -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -26,6 +22,11 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C cycles/limb +C P5: 12.0 for 32-bit multiplier +C 7.0 for 16-bit multiplier + + C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t multiplier); C diff --git a/mpn/x86/pentium/mmx/popham.asm b/mpn/x86/pentium/mmx/popham.asm index 7e15f49b5..3552a9331 100644 --- a/mpn/x86/pentium/mmx/popham.asm +++ b/mpn/x86/pentium/mmx/popham.asm @@ -1,10 +1,7 @@ dnl Intel P55 mpn_popcount, mpn_hamdist -- population count and hamming dnl distance. -dnl -dnl P55: popcount 11.5 cycles/limb, hamdist 12.0 cycles/limb - -dnl Copyright 2000 Free Software Foundation, Inc. +dnl Copyright 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,8 +20,11 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') + +C P55: popcount 11.5 cycles/limb, hamdist 12.0 cycles/limb + + MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) include_mpn(`x86/k6/mmx/popham.asm') diff --git a/mpn/x86/pentium/mmx/rshift.asm b/mpn/x86/pentium/mmx/rshift.asm index 8b03d5811..26afeff59 100644 --- a/mpn/x86/pentium/mmx/rshift.asm +++ b/mpn/x86/pentium/mmx/rshift.asm @@ -1,9 +1,6 @@ dnl Intel P5 mpn_rshift -- mpn right shift. -dnl -dnl P5: 1.75 cycles/limb. - -dnl Copyright 2000 Free Software Foundation, Inc. +dnl Copyright 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P5: 1.75 cycles/limb. + + C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, C unsigned shift); C diff --git a/mpn/x86/pentium/mod_1.asm b/mpn/x86/pentium/mod_1.asm index 065efef9d..b42b3dc91 100644 --- a/mpn/x86/pentium/mod_1.asm +++ b/mpn/x86/pentium/mod_1.asm @@ -1,9 +1,6 @@ dnl Intel P5 mpn_mod_1 -- mpn by limb remainder. -dnl -dnl P5: 28.0 cycles/limb - -dnl Copyright 1999, 2000 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P5: 28.0 cycles/limb + + C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, C mp_limb_t carry); diff --git a/mpn/x86/pentium/mod_34lsub1.asm b/mpn/x86/pentium/mod_34lsub1.asm index 84ac6ec9b..881af5e9b 100644 --- a/mpn/x86/pentium/mod_34lsub1.asm +++ b/mpn/x86/pentium/mod_34lsub1.asm @@ -1,8 +1,6 @@ dnl Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1. -dnl -dnl P5: 1.66 cycles/limb -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P5: 1.66 cycles/limb + + C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) C diff --git a/mpn/x86/pentium/mode1o.asm b/mpn/x86/pentium/mode1o.asm index 2f74463c7..b99d3f686 100644 --- a/mpn/x86/pentium/mode1o.asm +++ b/mpn/x86/pentium/mode1o.asm @@ -1,9 +1,6 @@ dnl Intel Pentium mpn_modexact_1_odd -- exact division style remainder. -dnl -dnl P5: 23.0 cycles/limb - -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P5: 23.0 cycles/limb + + C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/pentium/mul_1.asm b/mpn/x86/pentium/mul_1.asm index 9e727a08f..66309f4c8 100644 --- a/mpn/x86/pentium/mul_1.asm +++ b/mpn/x86/pentium/mul_1.asm @@ -1,8 +1,6 @@ dnl Intel Pentium mpn_mul_1 -- mpn by limb multiplication. -dnl -dnl P5: 12.0 cycles/limb -dnl Copyright 1992, 1994, 1996, 1999, 2000 Free Software Foundation, +dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation, dnl Inc. dnl dnl This file is part of the GNU MP Library. @@ -22,10 +20,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. */ - include(`../config.m4') +C P5: 12.0 cycles/limb + + C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t multiplier); C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/pentium/mul_2.asm b/mpn/x86/pentium/mul_2.asm index 5d9f4023e..39c9e6989 100644 --- a/mpn/x86/pentium/mul_2.asm +++ b/mpn/x86/pentium/mul_2.asm @@ -1,8 +1,6 @@ dnl Intel Pentium mpn_mul_2 -- mpn by 2-limb multiplication. -dnl -dnl P5: 24.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. */ include(`../config.m4') +C P5: 24.0 cycles/limb + + C mp_limb_t mpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_srcptr mult); C diff --git a/mpn/x86/pentium/mul_basecase.asm b/mpn/x86/pentium/mul_basecase.asm index 38a4bcadc..615051248 100644 --- a/mpn/x86/pentium/mul_basecase.asm +++ b/mpn/x86/pentium/mul_basecase.asm @@ -1,9 +1,6 @@ dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication. -dnl -dnl P5: 14.2 cycles/crossproduct (approx) - -dnl Copyright 1996, 1998, 1999, 2000 Free Software Foundation, Inc. +dnl Copyright 1996, 1998, 1999, 2000, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P5: 14.2 cycles/crossproduct (approx) + + C void mpn_mul_basecase (mp_ptr wp, C mp_srcptr xp, mp_size_t xsize, C mp_srcptr yp, mp_size_t ysize); diff --git a/mpn/x86/pentium/popcount.asm b/mpn/x86/pentium/popcount.asm index e1f21b171..deb078175 100644 --- a/mpn/x86/pentium/popcount.asm +++ b/mpn/x86/pentium/popcount.asm @@ -1,8 +1,6 @@ dnl Intel P5 mpn_popcount -- mpn bit population count. -dnl -dnl P5: 8.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P5: 8.0 cycles/limb + + C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size); C C An arithmetic approach has been found to be slower than the table lookup, diff --git a/mpn/x86/pentium/rshift.asm b/mpn/x86/pentium/rshift.asm index c50f2f924..6d8f14d71 100644 --- a/mpn/x86/pentium/rshift.asm +++ b/mpn/x86/pentium/rshift.asm @@ -1,11 +1,6 @@ dnl Intel Pentium mpn_rshift -- mpn right shift. -dnl -dnl cycles/limb -dnl P5,P54: 6.0 -dnl P55: 5.375 - -dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000 Free Software +dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software dnl Foundation, Inc. dnl dnl This file is part of the GNU MP Library. @@ -25,10 +20,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C cycles/limb +C P5,P54: 6.0 +C P55: 5.375 + + C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, C unsigned shift); C diff --git a/mpn/x86/pentium/sqr_basecase.asm b/mpn/x86/pentium/sqr_basecase.asm index ba0786d97..06aab2eb4 100644 --- a/mpn/x86/pentium/sqr_basecase.asm +++ b/mpn/x86/pentium/sqr_basecase.asm @@ -1,10 +1,6 @@ dnl Intel P5 mpn_sqr_basecase -- square an mpn number. -dnl -dnl P5: approx 8 cycles per crossproduct, or 15.5 cycles per triangular -dnl product at around 20x20 limbs. - -dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P5: approx 8 cycles per crossproduct, or 15.5 cycles per triangular +C product at around 20x20 limbs. + + C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); C C Calculate src,size squared, storing the result in dst,2*size. diff --git a/mpn/x86/pentium4/mmx/lshift.asm b/mpn/x86/pentium4/mmx/lshift.asm index f59891358..a0bd867fa 100644 --- a/mpn/x86/pentium4/mmx/lshift.asm +++ b/mpn/x86/pentium4/mmx/lshift.asm @@ -1,8 +1,6 @@ -dnl Intel Pentium 4 mpn_lshift -- left shift. -dnl -dnl Pentium 4: 1.75 cycles/limb. +dnl Intel Pentium-4 mpn_lshift -- left shift. -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,5 +21,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') + +C P4: 1.75 cycles/limb. + + MULFUNC_PROLOGUE(mpn_lshift) include_mpn(`x86/pentium/mmx/lshift.asm') diff --git a/mpn/x86/pentium4/mmx/popham.asm b/mpn/x86/pentium4/mmx/popham.asm index ebcdb9197..516de5bd0 100644 --- a/mpn/x86/pentium4/mmx/popham.asm +++ b/mpn/x86/pentium4/mmx/popham.asm @@ -1,10 +1,7 @@ dnl Intel Pentium 4 mpn_popcount, mpn_hamdist -- population count and dnl hamming distance. -dnl -dnl P4: popcount 8.5 cycles/limb -dnl hamdist 9.5 cycles/limb -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -26,6 +23,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P4: popcount 8.5 cycles/limb +C hamdist 9.5 cycles/limb + + C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size); C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size); C diff --git a/mpn/x86/pentium4/mmx/rshift.asm b/mpn/x86/pentium4/mmx/rshift.asm index 2f4bf937e..b1db05950 100644 --- a/mpn/x86/pentium4/mmx/rshift.asm +++ b/mpn/x86/pentium4/mmx/rshift.asm @@ -1,8 +1,6 @@ -dnl Intel Pentium 4 mpn_rshift -- right shift. -dnl -dnl Pentium 4: 1.75 cycles/limb. +dnl Intel Pentium-4 mpn_rshift -- right shift. -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,5 +21,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') + +C P4: 1.75 cycles/limb. + + MULFUNC_PROLOGUE(mpn_rshift) include_mpn(`x86/pentium/mmx/rshift.asm') diff --git a/mpn/x86/pentium4/sse2/add_n.asm b/mpn/x86/pentium4/sse2/add_n.asm index c67d4b9d9..f138bb40b 100644 --- a/mpn/x86/pentium4/sse2/add_n.asm +++ b/mpn/x86/pentium4/sse2/add_n.asm @@ -1,8 +1,6 @@ dnl Intel Pentium-4 mpn_add_n -- mpn addition. -dnl -dnl P4: 4.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -21,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P4: 4.0 cycles/limb if dst!=src1 and dst!=src2 +C 6.0 cycles/limb if dst==src1 or dst==src2 + + C mp_limb_t mpn_add_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, C mp_size_t size); C mp_limb_t mpn_add_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, diff --git a/mpn/x86/pentium4/sse2/addmul_1.asm b/mpn/x86/pentium4/sse2/addmul_1.asm index d0e0bf10c..7c52cd3f2 100644 --- a/mpn/x86/pentium4/sse2/addmul_1.asm +++ b/mpn/x86/pentium4/sse2/addmul_1.asm @@ -1,10 +1,7 @@ dnl Intel Pentium-4 mpn_addmul_1 -- Multiply a limb vector with a limb and add dnl the result to a second limb vector. -dnl -dnl Pentium4: 6 cycles/limb, unstable timing, at least on early Pentium4 -dnl silicon (stepping 10). -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,10 +20,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P4: 6 cycles/limb, unstable timing, at least on early Pentium4 silicon +C (stepping 10). + + C mp_limb_t mpn_addmul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t multiplier); C mp_limb_t mpn_addmul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/pentium4/sse2/dive_1.asm b/mpn/x86/pentium4/sse2/dive_1.asm index c5f18cad0..ad53dad89 100644 --- a/mpn/x86/pentium4/sse2/dive_1.asm +++ b/mpn/x86/pentium4/sse2/dive_1.asm @@ -1,8 +1,6 @@ dnl Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division. -dnl -dnl P4: 19.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P4: 19.0 cycles/limb + + C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C diff --git a/mpn/x86/pentium4/sse2/diveby3.asm b/mpn/x86/pentium4/sse2/diveby3.asm index fade75a1f..e258caa4d 100644 --- a/mpn/x86/pentium4/sse2/diveby3.asm +++ b/mpn/x86/pentium4/sse2/diveby3.asm @@ -1,8 +1,6 @@ dnl Intel Pentium-4 mpn_divexact_by3 -- mpn exact division by 3. -dnl -dnl P4: 18.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P4: 18.0 cycles/limb + + C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t carry); C diff --git a/mpn/x86/pentium4/sse2/mod_34lsub1.asm b/mpn/x86/pentium4/sse2/mod_34lsub1.asm index 7b5cd145b..50481f0d8 100644 --- a/mpn/x86/pentium4/sse2/mod_34lsub1.asm +++ b/mpn/x86/pentium4/sse2/mod_34lsub1.asm @@ -1,8 +1,6 @@ dnl Intel Pentium 4 mpn_mod_32lsub1 -- remainder modulo 2^24-1. -dnl -dnl Pentium4: 1.0 cycles/limb -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -21,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C Pentium4: 1.0 cycles/limb + + C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) C C Enhancements: diff --git a/mpn/x86/pentium4/sse2/mode1o.asm b/mpn/x86/pentium4/sse2/mode1o.asm index 191d07127..14ccae86b 100644 --- a/mpn/x86/pentium4/sse2/mode1o.asm +++ b/mpn/x86/pentium4/sse2/mode1o.asm @@ -1,8 +1,6 @@ dnl Intel Pentium-4 mpn_modexact_1_odd -- mpn by limb exact remainder. -dnl -dnl P4: 19.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P4: 19.0 cycles/limb + + C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/pentium4/sse2/mul_1.asm b/mpn/x86/pentium4/sse2/mul_1.asm index 9bf9dea1f..b3916966f 100644 --- a/mpn/x86/pentium4/sse2/mul_1.asm +++ b/mpn/x86/pentium4/sse2/mul_1.asm @@ -1,9 +1,7 @@ dnl Intel Pentium-4 mpn_mul_1 -- Multiply a limb vector with a limb and store dnl the result in a second limb vector. -dnl -dnl Pentium4: 4 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +20,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P4: 4 cycles/limb + + C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t multiplier); C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/pentium4/sse2/mul_basecase.asm b/mpn/x86/pentium4/sse2/mul_basecase.asm index 3e7672b2e..af7c18d8e 100644 --- a/mpn/x86/pentium4/sse2/mul_basecase.asm +++ b/mpn/x86/pentium4/sse2/mul_basecase.asm @@ -1,8 +1,6 @@ -dnl Intel Pentium 4 mpn_mul_basecase -- mpn by mpn multiplication. -dnl -dnl P4: 6.0 cycles/crossproduct (approx) +dnl Intel Pentium-4 mpn_mul_basecase -- mpn by mpn multiplication. -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P4: 6.0 cycles/crossproduct (approx) + + C void mpn_mul_basecase (mp_ptr wp, C mp_srcptr xp, mp_size_t xsize, C mp_srcptr yp, mp_size_t ysize); diff --git a/mpn/x86/pentium4/sse2/sqr_basecase.asm b/mpn/x86/pentium4/sse2/sqr_basecase.asm index 4bcd798dc..a32a708ad 100644 --- a/mpn/x86/pentium4/sse2/sqr_basecase.asm +++ b/mpn/x86/pentium4/sse2/sqr_basecase.asm @@ -1,9 +1,6 @@ dnl Intel Pentium-4 mpn_sqr_basecase -- square an mpn number. -dnl -dnl P4: approx 3.5 cycles per crossproduct, or 7 cycles per triangular -dnl product, at around 30x30 limbs. -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -25,6 +22,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P4: approx 3.5 cycles per crossproduct, or 7 cycles per triangular +C product, at around 30x30 limbs. + + C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); C C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a diff --git a/mpn/x86/pentium4/sse2/sub_n.asm b/mpn/x86/pentium4/sse2/sub_n.asm index ff4004d9a..5e6667b22 100644 --- a/mpn/x86/pentium4/sse2/sub_n.asm +++ b/mpn/x86/pentium4/sse2/sub_n.asm @@ -1,8 +1,6 @@ dnl Intel Pentium-4 mpn_sub_n -- mpn subtraction. -dnl -dnl P4: 4.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -21,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P4: 4.0 cycles/limb if dst!=src1 and dst!=src2 +C 6.0 cycles/limb if dst==src1 or dst==src2 + + C mp_limb_t mpn_sub_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, C mp_size_t size); C mp_limb_t mpn_sub_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, diff --git a/mpn/x86/pentium4/sse2/submul_1.asm b/mpn/x86/pentium4/sse2/submul_1.asm index 558cb1ca5..a43ea8afa 100644 --- a/mpn/x86/pentium4/sse2/submul_1.asm +++ b/mpn/x86/pentium4/sse2/submul_1.asm @@ -1,10 +1,7 @@ dnl Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and dnl subtract the result from a second limb vector. -dnl -dnl Pentium4: 7 cycles/limb, unstable timing, at least on early Pentium4 -dnl silicon (stepping 10). -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,10 +20,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon +C (stepping 10). + + C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t mult); C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/rshift.asm b/mpn/x86/rshift.asm index 8f9b54a87..837f1828c 100644 --- a/mpn/x86/rshift.asm +++ b/mpn/x86/rshift.asm @@ -1,15 +1,7 @@ dnl x86 mpn_rshift -- mpn right shift. -dnl -dnl cycles/limb -dnl P54: 7.5 -dnl P55: 7.0 -dnl P6: 2.5 -dnl K6: 4.5 -dnl K7: 5.0 -dnl P4: 16.5 - -dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001 Free Software Foundation, -dnl Inc. + +dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software +dnl Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -28,10 +20,18 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C cycles/limb +C P54: 7.5 +C P55: 7.0 +C P6: 2.5 +C K6: 4.5 +C K7: 5.0 +C P4: 16.5 + + C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, C unsigned shift); |