summaryrefslogtreecommitdiff
path: root/mpn
diff options
context:
space:
mode:
authorKevin Ryde <user42@zip.com.au>2002-03-30 23:20:30 +0100
committerKevin Ryde <user42@zip.com.au>2002-03-30 23:20:30 +0100
commit64d8a4619f996d6d91423e28d1bc6eb8695508c5 (patch)
tree1e1512cdf662ddeb0468cfe9905f93c0b76b0060 /mpn
parent7adb3efb61b771d93fba849454e910333b0997a9 (diff)
downloadgmp-64d8a4619f996d6d91423e28d1bc6eb8695508c5.tar.gz
* mpn/x86/*/*.asm, mpn/powerpc32/*/*.asm, mpn/powerpc64/*/*.asm: Put
speeds after the copyright notice, so as to keep that clear.
Diffstat (limited to 'mpn')
-rw-r--r--mpn/x86/aors_n.asm20
-rw-r--r--mpn/x86/aorsmul_1.asm22
-rw-r--r--mpn/x86/copyd.asm23
-rw-r--r--mpn/x86/copyi.asm25
-rw-r--r--mpn/x86/dive_1.asm21
-rw-r--r--mpn/x86/diveby3.asm22
-rw-r--r--mpn/x86/divrem_1.asm25
-rw-r--r--mpn/x86/k6/aors_n.asm9
-rw-r--r--mpn/x86/k6/aorsmul_1.asm12
-rw-r--r--mpn/x86/k6/diveby3.asm9
-rw-r--r--mpn/x86/k6/gcd_1.asm9
-rw-r--r--mpn/x86/k6/gcd_finda.asm9
-rw-r--r--mpn/x86/k6/k62mmx/copyd.asm7
-rw-r--r--mpn/x86/k6/k62mmx/lshift.asm9
-rw-r--r--mpn/x86/k6/k62mmx/rshift.asm9
-rw-r--r--mpn/x86/k6/mmx/com_n.asm13
-rw-r--r--mpn/x86/k6/mmx/dive_1.asm13
-rw-r--r--mpn/x86/k6/mmx/logops_n.asm27
-rw-r--r--mpn/x86/k6/mmx/lshift.asm9
-rw-r--r--mpn/x86/k6/mmx/popham.asm11
-rw-r--r--mpn/x86/k6/mmx/rshift.asm9
-rw-r--r--mpn/x86/k6/mod_34lsub1.asm7
-rw-r--r--mpn/x86/k6/mode1o.asm7
-rw-r--r--mpn/x86/k6/mul_1.asm9
-rw-r--r--mpn/x86/k6/mul_basecase.asm12
-rw-r--r--mpn/x86/k6/pre_mod_1.asm9
-rw-r--r--mpn/x86/k6/sqr_basecase.asm11
-rw-r--r--mpn/x86/k7/aors_n.asm10
-rw-r--r--mpn/x86/k7/aorsmul_1.asm16
-rw-r--r--mpn/x86/k7/dive_1.asm9
-rw-r--r--mpn/x86/k7/diveby3.asm9
-rw-r--r--mpn/x86/k7/gcd_1.asm9
-rw-r--r--mpn/x86/k7/mmx/com_n.asm7
-rw-r--r--mpn/x86/k7/mmx/copyd.asm13
-rw-r--r--mpn/x86/k7/mmx/copyi.asm13
-rw-r--r--mpn/x86/k7/mmx/divrem_1.asm9
-rw-r--r--mpn/x86/k7/mmx/lshift.asm10
-rw-r--r--mpn/x86/k7/mmx/mod_1.asm9
-rw-r--r--mpn/x86/k7/mmx/popham.asm9
-rw-r--r--mpn/x86/k7/mmx/rshift.asm10
-rw-r--r--mpn/x86/k7/mod_34lsub1.asm7
-rw-r--r--mpn/x86/k7/mode1o.asm7
-rw-r--r--mpn/x86/k7/mul_1.asm10
-rw-r--r--mpn/x86/k7/mul_basecase.asm12
-rw-r--r--mpn/x86/k7/sqr_basecase.asm11
-rw-r--r--mpn/x86/lshift.asm25
-rw-r--r--mpn/x86/mod_1.asm21
-rw-r--r--mpn/x86/mod_34lsub1.asm19
-rw-r--r--mpn/x86/mul_1.asm19
-rw-r--r--mpn/x86/mul_basecase.asm21
-rw-r--r--mpn/x86/p6/aorsmul_1.asm9
-rw-r--r--mpn/x86/p6/copyd.asm7
-rw-r--r--mpn/x86/p6/dive_1.asm9
-rw-r--r--mpn/x86/p6/diveby3.asm24
-rw-r--r--mpn/x86/p6/mmx/divrem_1.asm9
-rw-r--r--mpn/x86/p6/mmx/popham.asm11
-rw-r--r--mpn/x86/p6/mod_1.asm9
-rw-r--r--mpn/x86/p6/mode1o.asm7
-rw-r--r--mpn/x86/p6/sqr_basecase.asm11
-rw-r--r--mpn/x86/pentium/aors_n.asm9
-rw-r--r--mpn/x86/pentium/aorsmul_1.asm9
-rw-r--r--mpn/x86/pentium/com_n.asm5
-rw-r--r--mpn/x86/pentium/copyd.asm7
-rw-r--r--mpn/x86/pentium/copyi.asm7
-rw-r--r--mpn/x86/pentium/dive_1.asm13
-rw-r--r--mpn/x86/pentium/diveby3.asm9
-rw-r--r--mpn/x86/pentium/hamdist.asm7
-rw-r--r--mpn/x86/pentium/logops_n.asm7
-rw-r--r--mpn/x86/pentium/lshift.asm13
-rw-r--r--mpn/x86/pentium/mmx/lshift.asm9
-rw-r--r--mpn/x86/pentium/mmx/mul_1.asm11
-rw-r--r--mpn/x86/pentium/mmx/popham.asm10
-rw-r--r--mpn/x86/pentium/mmx/rshift.asm9
-rw-r--r--mpn/x86/pentium/mod_1.asm9
-rw-r--r--mpn/x86/pentium/mod_34lsub1.asm7
-rw-r--r--mpn/x86/pentium/mode1o.asm9
-rw-r--r--mpn/x86/pentium/mul_1.asm8
-rw-r--r--mpn/x86/pentium/mul_2.asm7
-rw-r--r--mpn/x86/pentium/mul_basecase.asm9
-rw-r--r--mpn/x86/pentium/popcount.asm7
-rw-r--r--mpn/x86/pentium/rshift.asm13
-rw-r--r--mpn/x86/pentium/sqr_basecase.asm11
-rw-r--r--mpn/x86/pentium4/mmx/lshift.asm10
-rw-r--r--mpn/x86/pentium4/mmx/popham.asm9
-rw-r--r--mpn/x86/pentium4/mmx/rshift.asm10
-rw-r--r--mpn/x86/pentium4/sse2/add_n.asm9
-rw-r--r--mpn/x86/pentium4/sse2/addmul_1.asm10
-rw-r--r--mpn/x86/pentium4/sse2/dive_1.asm7
-rw-r--r--mpn/x86/pentium4/sse2/diveby3.asm7
-rw-r--r--mpn/x86/pentium4/sse2/mod_34lsub1.asm8
-rw-r--r--mpn/x86/pentium4/sse2/mode1o.asm7
-rw-r--r--mpn/x86/pentium4/sse2/mul_1.asm8
-rw-r--r--mpn/x86/pentium4/sse2/mul_basecase.asm9
-rw-r--r--mpn/x86/pentium4/sse2/sqr_basecase.asm9
-rw-r--r--mpn/x86/pentium4/sse2/sub_n.asm9
-rw-r--r--mpn/x86/pentium4/sse2/submul_1.asm10
-rw-r--r--mpn/x86/rshift.asm24
97 files changed, 548 insertions, 552 deletions
diff --git a/mpn/x86/aors_n.asm b/mpn/x86/aors_n.asm
index a7a1ed8d1..cd6592b7d 100644
--- a/mpn/x86/aors_n.asm
+++ b/mpn/x86/aors_n.asm
@@ -1,13 +1,6 @@
dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
-dnl
-dnl cycles/limb
-dnl P5: 3.375
-dnl P6: 3.7
-dnl K6: 3.5
-dnl K7: 2.25
-dnl P4: 8.75
-
-dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001 Free Software
+
+dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
@@ -27,10 +20,17 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C cycles/limb
+C P5: 3.375
+C P6: 3.7
+C K6: 3.5
+C K7: 2.25
+C P4: 8.75
+
+
ifdef(`OPERATION_add_n',`
define(M4_inst, adcl)
define(M4_function_n, mpn_add_n)
diff --git a/mpn/x86/aorsmul_1.asm b/mpn/x86/aorsmul_1.asm
index 8d90fe5c2..696956a7e 100644
--- a/mpn/x86/aorsmul_1.asm
+++ b/mpn/x86/aorsmul_1.asm
@@ -1,15 +1,8 @@
dnl x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a
dnl limb and add the result to a second limb vector.
-dnl
-dnl cycles/limb
-dnl P5: 14.75
-dnl P6: 7.5
-dnl K6: 12.5
-dnl K7: 5.25
-dnl P4: 24
-
-dnl Copyright 1992, 1994, 1997, 1999, 2000, 2001 Free Software Foundation,
-dnl Inc.
+
+dnl Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002 Free Software
+dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -28,10 +21,17 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C cycles/limb
+C P5: 14.75
+C P6: 7.5
+C K6: 12.5
+C K7: 5.25
+C P4: 24
+
+
ifdef(`OPERATION_addmul_1',`
define(M4_inst, addl)
define(M4_function_1, mpn_addmul_1)
diff --git a/mpn/x86/copyd.asm b/mpn/x86/copyd.asm
index b2eb922a5..dfac83f08 100644
--- a/mpn/x86/copyd.asm
+++ b/mpn/x86/copyd.asm
@@ -1,15 +1,6 @@
dnl x86 mpn_copyd -- copy limb vector, decrementing.
-dnl
-dnl cycles/limb startup (approx)
-dnl P5: 1.0 40
-dnl P6 2.4 70
-dnl K6 1.0 55
-dnl K7: 1.3 75
-dnl P4: 2.6 175
-dnl
-dnl (Startup time includes some function call overheads.)
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -31,6 +22,16 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C cycles/limb startup (approx)
+C P5: 1.0 40
+C P6 2.4 70
+C K6 1.0 55
+C K7: 1.3 75
+C P4: 2.6 175
+C
+C (Startup time includes some function call overheads.)
+
+
C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C Copy src,size to dst,size, working from high to low addresses.
diff --git a/mpn/x86/copyi.asm b/mpn/x86/copyi.asm
index 40ffcf2a2..d5e054151 100644
--- a/mpn/x86/copyi.asm
+++ b/mpn/x86/copyi.asm
@@ -1,16 +1,6 @@
dnl x86 mpn_copyi -- copy limb vector, incrementing.
-dnl
-dnl cycles/limb startup (approx)
-dnl P5: 1.0 35
-dnl P6 0.75 45
-dnl K6 1.0 30
-dnl K7: 1.3 65
-dnl P4: 1.0 120
-dnl
-dnl (Startup time includes some function call overheads.)
-
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -29,10 +19,19 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C cycles/limb startup (approx)
+C P5: 1.0 35
+C P6 0.75 45
+C K6 1.0 30
+C K7: 1.3 65
+C P4: 1.0 120
+C
+C (Startup time includes some function call overheads.)
+
+
C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C Copy src,size to dst,size, working from low to high addresses.
diff --git a/mpn/x86/dive_1.asm b/mpn/x86/dive_1.asm
index 4dfa131ce..7d7ce0f13 100644
--- a/mpn/x86/dive_1.asm
+++ b/mpn/x86/dive_1.asm
@@ -1,14 +1,6 @@
dnl x86 mpn_divexact_1 -- mpn by limb exact division.
-dnl
-dnl cycles/limb
-dnl P54 30.0
-dnl P55 29.0
-dnl P6 13.0 odd divisor, 12.0 even (strangely)
-dnl K6 14.0
-dnl K7 12.0
-dnl P4 42.0
-
-dnl Copyright 2001 Free Software Foundation, Inc.
+
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -30,6 +22,15 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C cycles/limb
+C P54 30.0
+C P55 29.0
+C P6 13.0 odd divisor, 12.0 even (strangely)
+C K6 14.0
+C K7 12.0
+C P4 42.0
+
+
C mp_limb_t mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C
diff --git a/mpn/x86/diveby3.asm b/mpn/x86/diveby3.asm
index 3d8817633..8f7870643 100644
--- a/mpn/x86/diveby3.asm
+++ b/mpn/x86/diveby3.asm
@@ -1,14 +1,6 @@
dnl x86 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
-dnl
-dnl cycles/limb
-dnl P54 18.0
-dnl P55 17.0
-dnl P6 14.0
-dnl K6 14.0
-dnl K7 10.0
-dnl P4 24.0
-
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -27,10 +19,18 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C cycles/limb
+C P54 18.0
+C P55 17.0
+C P6 14.0
+C K6 14.0
+C K7 10.0
+C P4 24.0
+
+
C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t carry);
diff --git a/mpn/x86/divrem_1.asm b/mpn/x86/divrem_1.asm
index 2389da3de..1da474c48 100644
--- a/mpn/x86/divrem_1.asm
+++ b/mpn/x86/divrem_1.asm
@@ -1,16 +1,6 @@
dnl x86 mpn_divrem_1 -- mpn by limb division extending to fractional quotient.
-dnl
-dnl cycles/limb
-dnl 486 approx 43 maybe
-dnl P5 44
-dnl P6 39
-dnl P6MMX 39
-dnl K6 20
-dnl K7 42
-dnl P4 58
-
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -29,10 +19,19 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C cycles/limb
+C 486 approx 43 maybe
+C P5 44
+C P6 39
+C P6MMX 39
+C K6 20
+C K7 42
+C P4 58
+
+
C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
C mp_srcptr src, mp_size_t size, mp_limb_t divisor);
C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
diff --git a/mpn/x86/k6/aors_n.asm b/mpn/x86/k6/aors_n.asm
index 579afbc45..2f718fafc 100644
--- a/mpn/x86/k6/aors_n.asm
+++ b/mpn/x86/k6/aors_n.asm
@@ -1,9 +1,6 @@
dnl AMD K6 mpn_add/sub_n -- mpn addition or subtraction.
-dnl
-dnl K6: normal 3.25 cycles/limb, in-place 2.75 cycles/limb.
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K6: normal 3.25 cycles/limb, in-place 2.75 cycles/limb.
+
+
ifdef(`OPERATION_add_n', `
define(M4_inst, adcl)
define(M4_function_n, mpn_add_n)
diff --git a/mpn/x86/k6/aorsmul_1.asm b/mpn/x86/k6/aorsmul_1.asm
index c6e0f40a6..7a8896dd9 100644
--- a/mpn/x86/k6/aorsmul_1.asm
+++ b/mpn/x86/k6/aorsmul_1.asm
@@ -1,10 +1,6 @@
dnl AMD K6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
-dnl
-dnl K6: 7.65 to 8.5 cycles/limb (at 16 limbs/loop and depending on the data),
-dnl PIC adds about 6 cycles at the start.
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -23,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K6: 7.65 to 8.5 cycles/limb (at 16 limbs/loop and depending on the data),
+C PIC adds about 6 cycles at the start.
+
+
+
dnl K6: large multpliers small multpliers
dnl UNROLL_COUNT cycles/limb cycles/limb
dnl 4 9.5 7.78
diff --git a/mpn/x86/k6/diveby3.asm b/mpn/x86/k6/diveby3.asm
index 9d5f2cf3c..3c6099874 100644
--- a/mpn/x86/k6/diveby3.asm
+++ b/mpn/x86/k6/diveby3.asm
@@ -1,9 +1,6 @@
dnl AMD K6 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
-dnl
-dnl K6: 11.0 cycles/limb
-
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K6: 11.0 cycles/limb
+
+
C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t carry);
C
diff --git a/mpn/x86/k6/gcd_1.asm b/mpn/x86/k6/gcd_1.asm
index e5adeb7e4..74fb99633 100644
--- a/mpn/x86/k6/gcd_1.asm
+++ b/mpn/x86/k6/gcd_1.asm
@@ -1,9 +1,6 @@
dnl AMD K6 mpn_mod_1 -- mpn by 1 gcd.
-dnl
-dnl K6: 9.5 cycles/bit (approx) 1x1 gcd
-dnl 11.0 cycles/limb Nx1 reduction (modexact_1_odd)
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -25,6 +22,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C K6: 9.5 cycles/bit (approx) 1x1 gcd
+C 11.0 cycles/limb Nx1 reduction (modexact_1_odd)
+
+
C mp_limb_t mpn_gcd_1 (mp_srcptr src, mp_size_t size, mp_limb_t y);
C
C This code is nothing very special, but offers a speedup over what gcc 2.95
diff --git a/mpn/x86/k6/gcd_finda.asm b/mpn/x86/k6/gcd_finda.asm
index e69439812..544b49480 100644
--- a/mpn/x86/k6/gcd_finda.asm
+++ b/mpn/x86/k6/gcd_finda.asm
@@ -1,9 +1,6 @@
dnl AMD K6 mpn_gcd_finda.
-dnl
-dnl K6: 680 cycles (approx) on average
-
-dnl Copyright 2000 Free Software Foundation, Inc.
+dnl Copyright 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K6: 680 cycles (approx) on average
+
+
dnl How many trial subtractions to attempt before launching into a full
dnl division.
diff --git a/mpn/x86/k6/k62mmx/copyd.asm b/mpn/x86/k6/k62mmx/copyd.asm
index 1f9dba538..fff168c80 100644
--- a/mpn/x86/k6/k62mmx/copyd.asm
+++ b/mpn/x86/k6/k62mmx/copyd.asm
@@ -1,8 +1,6 @@
dnl AMD K6-2 mpn_copyd -- copy limb vector, decrementing.
-dnl
-dnl K6-2: 1.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C K6-2: 1.0 cycles/limb
+
+
C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C The loop here is no faster than a rep movsl at 1.0 c/l, but it avoids a 30
diff --git a/mpn/x86/k6/k62mmx/lshift.asm b/mpn/x86/k6/k62mmx/lshift.asm
index 34fde7c99..a5449bf30 100644
--- a/mpn/x86/k6/k62mmx/lshift.asm
+++ b/mpn/x86/k6/k62mmx/lshift.asm
@@ -1,9 +1,6 @@
dnl AMD K6-2 mpn_lshift -- mpn left shift.
-dnl
-dnl K6-2: 1.75 cycles/limb
-
-dnl Copyright 1999, 2000 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K6-2: 1.75 cycles/limb
+
+
C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
C unsigned shift);
C
diff --git a/mpn/x86/k6/k62mmx/rshift.asm b/mpn/x86/k6/k62mmx/rshift.asm
index f47510bc6..daaff0fe7 100644
--- a/mpn/x86/k6/k62mmx/rshift.asm
+++ b/mpn/x86/k6/k62mmx/rshift.asm
@@ -1,9 +1,6 @@
dnl AMD K6-2 mpn_rshift -- mpn right shift.
-dnl
-dnl K6-2: 1.75 cycles/limb
-
-dnl Copyright 1999, 2000 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K6-2: 1.75 cycles/limb
+
+
C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
C unsigned shift);
C
diff --git a/mpn/x86/k6/mmx/com_n.asm b/mpn/x86/k6/mmx/com_n.asm
index 07ab394a0..e5ab55de2 100644
--- a/mpn/x86/k6/mmx/com_n.asm
+++ b/mpn/x86/k6/mmx/com_n.asm
@@ -1,10 +1,4 @@
dnl AMD K6-2 mpn_com_n -- mpn bitwise one's complement.
-dnl
-dnl alignment dst/src, A=0mod8 N=4mod8
-dnl A/A A/N N/A N/N
-dnl K6-2 1.0 1.18 1.18 1.18 cycles/limb
-dnl K6 1.5 1.85 1.75 1.85
-
dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
@@ -25,10 +19,15 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C alignment dst/src, A=0mod8 N=4mod8
+C A/A A/N N/A N/N
+C K6-2 1.0 1.18 1.18 1.18 cycles/limb
+C K6 1.5 1.85 1.75 1.85
+
+
C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C Take the bitwise ones-complement of src,size and write it to dst,size.
diff --git a/mpn/x86/k6/mmx/dive_1.asm b/mpn/x86/k6/mmx/dive_1.asm
index bc8cd750d..8a5f45c90 100644
--- a/mpn/x86/k6/mmx/dive_1.asm
+++ b/mpn/x86/k6/mmx/dive_1.asm
@@ -1,11 +1,6 @@
dnl AMD K6 mpn_divexact_1 -- mpn by limb exact division.
-dnl
-dnl divisor
-dnl odd even
-dnl K6: 10.0 12.0 cycles/limb
-dnl K6-2: 10.0 11.5
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -27,6 +22,12 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C divisor
+C odd even
+C K6: 10.0 12.0 cycles/limb
+C K6-2: 10.0 11.5
+
+
C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C
diff --git a/mpn/x86/k6/mmx/logops_n.asm b/mpn/x86/k6/mmx/logops_n.asm
index ef0c5c600..98dd0fd2c 100644
--- a/mpn/x86/k6/mmx/logops_n.asm
+++ b/mpn/x86/k6/mmx/logops_n.asm
@@ -1,19 +1,7 @@
dnl AMD K6-2 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
-dnl
-dnl alignment dst/src1/src2, A=0mod8, N=4mod8
-dnl A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N
-dnl
-dnl K6-2 1.2 1.5 1.5 1.2 1.2 1.5 1.5 1.2 and,andn,ior,xor
-dnl K6-2 1.5 1.75 2.0 1.75 1.75 2.0 1.75 1.5 iorn,xnor
-dnl K6-2 1.75 2.0 2.0 2.0 2.0 2.0 2.0 1.75 nand,nior
-dnl
-dnl K6 1.5 1.68 1.75 1.2 1.75 1.75 1.68 1.5 and,andn,ior,xor
-dnl K6 2.0 2.0 2.25 2.25 2.25 2.25 2.0 2.0 iorn,xnor
-dnl K6 2.0 2.25 2.25 2.25 2.25 2.25 2.25 2.0 nand,nior
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -32,10 +20,21 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C alignment dst/src1/src2, A=0mod8, N=4mod8
+C A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N
+C
+C K6-2 1.2 1.5 1.5 1.2 1.2 1.5 1.5 1.2 and,andn,ior,xor
+C K6-2 1.5 1.75 2.0 1.75 1.75 2.0 1.75 1.5 iorn,xnor
+C K6-2 1.75 2.0 2.0 2.0 2.0 2.0 2.0 1.75 nand,nior
+C
+C K6 1.5 1.68 1.75 1.2 1.75 1.75 1.68 1.5 and,andn,ior,xor
+C K6 2.0 2.0 2.25 2.25 2.25 2.25 2.0 2.0 iorn,xnor
+C K6 2.0 2.25 2.25 2.25 2.25 2.25 2.25 2.0 nand,nior
+
+
dnl M4_p and M4_i are the MMX and integer instructions
dnl M4_*_neg_dst means whether to negate the final result before writing
dnl M4_*_neg_src2 means whether to negate the src2 values before using them
diff --git a/mpn/x86/k6/mmx/lshift.asm b/mpn/x86/k6/mmx/lshift.asm
index 8331849ad..2293e666e 100644
--- a/mpn/x86/k6/mmx/lshift.asm
+++ b/mpn/x86/k6/mmx/lshift.asm
@@ -1,9 +1,6 @@
dnl AMD K6 mpn_lshift -- mpn left shift.
-dnl
-dnl K6: 3.0 cycles/limb
-
-dnl Copyright 1999, 2000 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K6: 3.0 cycles/limb
+
+
C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
C unsigned shift);
C
diff --git a/mpn/x86/k6/mmx/popham.asm b/mpn/x86/k6/mmx/popham.asm
index 06c08df4e..733a7ac86 100644
--- a/mpn/x86/k6/mmx/popham.asm
+++ b/mpn/x86/k6/mmx/popham.asm
@@ -1,10 +1,5 @@
dnl AMD K6-2 mpn_popcount, mpn_hamdist -- mpn bit population count and
dnl hamming distance.
-dnl
-dnl popcount hamdist
-dnl K6-2: 9.0 11.5 cycles/limb
-dnl K6: 12.5 13.0
-
dnl Copyright 2000, 2001 Free Software Foundation, Inc.
dnl
@@ -25,10 +20,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C popcount hamdist
+C K6-2: 9.0 11.5 cycles/limb
+C K6: 12.5 13.0
+
+
C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size);
C
diff --git a/mpn/x86/k6/mmx/rshift.asm b/mpn/x86/k6/mmx/rshift.asm
index cab88562f..e80f47506 100644
--- a/mpn/x86/k6/mmx/rshift.asm
+++ b/mpn/x86/k6/mmx/rshift.asm
@@ -1,9 +1,6 @@
dnl AMD K6 mpn_rshift -- mpn right shift.
-dnl
-dnl K6: 3.0 cycles/limb
-
-dnl Copyright 1999, 2000 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K6: 3.0 cycles/limb
+
+
C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
C unsigned shift);
C
diff --git a/mpn/x86/k6/mod_34lsub1.asm b/mpn/x86/k6/mod_34lsub1.asm
index 29f9daa4f..4d1577ca7 100644
--- a/mpn/x86/k6/mod_34lsub1.asm
+++ b/mpn/x86/k6/mod_34lsub1.asm
@@ -1,8 +1,6 @@
dnl AMD K6 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
-dnl
-dnl K6: 2.66 cycles/limb
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C K6: 2.66 cycles/limb
+
+
C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
C
C An attempt was made to use a loop like
diff --git a/mpn/x86/k6/mode1o.asm b/mpn/x86/k6/mode1o.asm
index a0e4028ab..1d382a82d 100644
--- a/mpn/x86/k6/mode1o.asm
+++ b/mpn/x86/k6/mode1o.asm
@@ -1,8 +1,6 @@
dnl AMD K6 mpn_modexact_1_odd -- exact division style remainder.
-dnl
-dnl K6: 10.0 cycles/limb
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C K6: 10.0 cycles/limb
+
+
C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/k6/mul_1.asm b/mpn/x86/k6/mul_1.asm
index eefb7595c..9be89b62d 100644
--- a/mpn/x86/k6/mul_1.asm
+++ b/mpn/x86/k6/mul_1.asm
@@ -1,9 +1,6 @@
dnl AMD K6 mpn_mul_1 -- mpn by limb multiply.
-dnl
-dnl K6: 6.25 cycles/limb.
-
-dnl Copyright 1999, 2000 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K6: 6.25 cycles/limb.
+
+
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t multiplier);
C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/k6/mul_basecase.asm b/mpn/x86/k6/mul_basecase.asm
index 195b67f9f..3b38e085b 100644
--- a/mpn/x86/k6/mul_basecase.asm
+++ b/mpn/x86/k6/mul_basecase.asm
@@ -1,10 +1,6 @@
dnl AMD K6 mpn_mul_basecase -- multiply two mpn numbers.
-dnl
-dnl K6: approx 9.0 cycles per cross product on 30x30 limbs (with 16 limbs/loop
-dnl unrolling).
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -23,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K6: approx 9.0 cycles per cross product on 30x30 limbs (with 16 limbs/loop
+C unrolling).
+
+
+
dnl K6: UNROLL_COUNT cycles/product (approx)
dnl 8 9.75
dnl 16 9.3
diff --git a/mpn/x86/k6/pre_mod_1.asm b/mpn/x86/k6/pre_mod_1.asm
index 396998970..13e49911a 100644
--- a/mpn/x86/k6/pre_mod_1.asm
+++ b/mpn/x86/k6/pre_mod_1.asm
@@ -1,9 +1,6 @@
dnl AMD K6 mpn_preinv_mod_1 -- mpn by 1 remainder, with pre-inverted divisor.
-dnl
-dnl K6: 18.0 cycles/limb
-
-dnl Copyright 2000 Free Software Foundation, Inc.
+dnl Copyright 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K6: 18.0 cycles/limb
+
+
C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
C mp_limb_t inverse);
C
diff --git a/mpn/x86/k6/sqr_basecase.asm b/mpn/x86/k6/sqr_basecase.asm
index a86013acf..e13c60d6a 100644
--- a/mpn/x86/k6/sqr_basecase.asm
+++ b/mpn/x86/k6/sqr_basecase.asm
@@ -1,9 +1,4 @@
dnl AMD K6 mpn_sqr_basecase -- square an mpn number.
-dnl
-dnl K6: approx 4.7 cycles per cross product, or 9.2 cycles per triangular
-dnl product (measured on the speed difference between 17 and 33 limbs,
-dnl which is roughly the Karatsuba recursing range).
-
dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
@@ -24,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K6: approx 4.7 cycles per cross product, or 9.2 cycles per triangular
+C product (measured on the speed difference between 17 and 33 limbs,
+C which is roughly the Karatsuba recursing range).
+
+
dnl SQR_KARATSUBA_THRESHOLD_MAX is the maximum SQR_KARATSUBA_THRESHOLD this
dnl code supports. This value is used only by the tune program to know
dnl what it can go up to. (An attempt to compile with a bigger value will
diff --git a/mpn/x86/k7/aors_n.asm b/mpn/x86/k7/aors_n.asm
index ad538e293..c048625cc 100644
--- a/mpn/x86/k7/aors_n.asm
+++ b/mpn/x86/k7/aors_n.asm
@@ -1,9 +1,6 @@
dnl AMD K7 mpn_add_n/mpn_sub_n -- mpn add or subtract.
-dnl
-dnl K7: 1.64 cycles/limb (at 16 limb/loop).
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K7: 1.64 cycles/limb (at 16 limbs/loop).
+
+
+
dnl K7: UNROLL_COUNT cycles/limb
dnl 8 1.9
dnl 16 1.64
diff --git a/mpn/x86/k7/aorsmul_1.asm b/mpn/x86/k7/aorsmul_1.asm
index 29d418579..9bf31d8db 100644
--- a/mpn/x86/k7/aorsmul_1.asm
+++ b/mpn/x86/k7/aorsmul_1.asm
@@ -1,12 +1,6 @@
dnl AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
-dnl
-dnl K7: 3.9 cycles/limb.
-dnl
-dnl Future: It should be possible to avoid the separate mul after the
-dnl unrolled loop by moving the movl/adcl to the top.
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -25,10 +19,16 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K7: 3.9 cycles/limb.
+C
+C Future: It should be possible to avoid the separate mul after the
+C unrolled loop by moving the movl/adcl to the top.
+
+
+
dnl K7: UNROLL_COUNT cycles/limb
dnl 4 4.42
dnl 8 4.16
diff --git a/mpn/x86/k7/dive_1.asm b/mpn/x86/k7/dive_1.asm
index 3faf4cdc0..fe6c29a0d 100644
--- a/mpn/x86/k7/dive_1.asm
+++ b/mpn/x86/k7/dive_1.asm
@@ -1,8 +1,6 @@
-dnl AMD Athlon mpn_divexact_1 -- mpn by limb exact division.
-dnl
-dnl K7: 11.0 cycles/limb
+dnl AMD K7 mpn_divexact_1 -- mpn by limb exact division.
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C K7: 11.0 cycles/limb
+
+
C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C
diff --git a/mpn/x86/k7/diveby3.asm b/mpn/x86/k7/diveby3.asm
index 88a905f11..b612b613b 100644
--- a/mpn/x86/k7/diveby3.asm
+++ b/mpn/x86/k7/diveby3.asm
@@ -1,9 +1,6 @@
dnl AMD K7 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
-dnl
-dnl K7: 8.0 cycles/limb
-
-dnl Copyright 2000 Free Software Foundation, Inc.
+dnl Copyright 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K7: 8.0 cycles/limb
+
+
C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t carry);
diff --git a/mpn/x86/k7/gcd_1.asm b/mpn/x86/k7/gcd_1.asm
index 6c683c373..8be32b067 100644
--- a/mpn/x86/k7/gcd_1.asm
+++ b/mpn/x86/k7/gcd_1.asm
@@ -1,9 +1,6 @@
dnl AMD K7 mpn_gcd_1 -- mpn by 1 gcd.
-dnl
-dnl K7: 6.75 cycles/bit (approx) 1x1 gcd
-dnl 11.0 cycles/limb Nx1 reduction (modexact_1_odd)
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -25,6 +22,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C K7: 6.75 cycles/bit (approx) 1x1 gcd
+C 11.0 cycles/limb Nx1 reduction (modexact_1_odd)
+
+
dnl Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y,
dnl where x is the larger of the two. See tune/README for more.
dnl
diff --git a/mpn/x86/k7/mmx/com_n.asm b/mpn/x86/k7/mmx/com_n.asm
index 53c96b10f..78ca6ca66 100644
--- a/mpn/x86/k7/mmx/com_n.asm
+++ b/mpn/x86/k7/mmx/com_n.asm
@@ -1,7 +1,4 @@
dnl AMD Athlon mpn_com_n -- mpn bitwise one's complement.
-dnl
-dnl K7: 1.0 cycles/limb
-
dnl Copyright 2002 Free Software Foundation, Inc.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K7: 1.0 cycles/limb
+
+
C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C The loop form below is necessary for the claimed speed. It needs to be
diff --git a/mpn/x86/k7/mmx/copyd.asm b/mpn/x86/k7/mmx/copyd.asm
index 33332c607..8de034114 100644
--- a/mpn/x86/k7/mmx/copyd.asm
+++ b/mpn/x86/k7/mmx/copyd.asm
@@ -1,11 +1,6 @@
dnl AMD K7 mpn_copyd -- copy limb vector, decrementing.
-dnl
-dnl alignment dst/src, A=0mod8 N=4mod8
-dnl A/A A/N N/A N/N
-dnl K7 0.75 1.0 1.0 0.75
-
-dnl Copyright 1999, 2000 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C alignment dst/src, A=0mod8 N=4mod8
+C A/A A/N N/A N/N
+C K7 0.75 1.0 1.0 0.75
+
+
C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C The various comments in mpn/x86/k7/copyi.asm apply here too.
diff --git a/mpn/x86/k7/mmx/copyi.asm b/mpn/x86/k7/mmx/copyi.asm
index 5d774f9e2..5026bd5c1 100644
--- a/mpn/x86/k7/mmx/copyi.asm
+++ b/mpn/x86/k7/mmx/copyi.asm
@@ -1,11 +1,6 @@
dnl AMD K7 mpn_copyi -- copy limb vector, incrementing.
-dnl
-dnl alignment dst/src, A=0mod8 N=4mod8
-dnl A/A A/N N/A N/N
-dnl K7 0.75 1.0 1.0 0.75
-
-dnl Copyright 1999, 2000 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C alignment dst/src, A=0mod8 N=4mod8
+C A/A A/N N/A N/N
+C K7 0.75 1.0 1.0 0.75
+
+
C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C Copy src,size to dst,size.
diff --git a/mpn/x86/k7/mmx/divrem_1.asm b/mpn/x86/k7/mmx/divrem_1.asm
index ef5b37b5f..161ae4643 100644
--- a/mpn/x86/k7/mmx/divrem_1.asm
+++ b/mpn/x86/k7/mmx/divrem_1.asm
@@ -1,9 +1,6 @@
dnl AMD K7 mpn_divrem_1 -- mpn by limb division.
-dnl
-dnl K7: 17.0 cycles/limb integer part, 15.0 cycles/limb fraction part.
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K7: 17.0 cycles/limb integer part, 15.0 cycles/limb fraction part.
+
+
C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
C mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
diff --git a/mpn/x86/k7/mmx/lshift.asm b/mpn/x86/k7/mmx/lshift.asm
index 9a9f8bd81..5316763b0 100644
--- a/mpn/x86/k7/mmx/lshift.asm
+++ b/mpn/x86/k7/mmx/lshift.asm
@@ -1,9 +1,6 @@
dnl AMD K7 mpn_lshift -- mpn left shift.
-dnl
-dnl K7: 1.21 cycles/limb (at 16 limbs/loop).
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K7: 1.21 cycles/limb (at 16 limbs/loop).
+
+
+
dnl K7: UNROLL_COUNT cycles/limb
dnl 4 1.51
dnl 8 1.26
diff --git a/mpn/x86/k7/mmx/mod_1.asm b/mpn/x86/k7/mmx/mod_1.asm
index 192a9f360..b27531cca 100644
--- a/mpn/x86/k7/mmx/mod_1.asm
+++ b/mpn/x86/k7/mmx/mod_1.asm
@@ -1,9 +1,6 @@
dnl AMD K7 mpn_mod_1 -- mpn by limb remainder.
-dnl
-dnl K7: 17.0 cycles/limb.
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K7: 17.0 cycles/limb.
+
+
C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
C mp_limb_t carry);
diff --git a/mpn/x86/k7/mmx/popham.asm b/mpn/x86/k7/mmx/popham.asm
index 61e009fcc..f7f2daeef 100644
--- a/mpn/x86/k7/mmx/popham.asm
+++ b/mpn/x86/k7/mmx/popham.asm
@@ -1,10 +1,7 @@
dnl AMD K7 mpn_popcount, mpn_hamdist -- population count and hamming
dnl distance.
-dnl
-dnl K7: popcount 5.0 cycles/limb, hamdist 6.0 cycles/limb
-
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -23,10 +20,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K7: popcount 5.0 cycles/limb, hamdist 6.0 cycles/limb
+
+
C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size);
C
diff --git a/mpn/x86/k7/mmx/rshift.asm b/mpn/x86/k7/mmx/rshift.asm
index 7013a1623..ba7ef81c0 100644
--- a/mpn/x86/k7/mmx/rshift.asm
+++ b/mpn/x86/k7/mmx/rshift.asm
@@ -1,9 +1,6 @@
dnl AMD K7 mpn_rshift -- mpn right shift.
-dnl
-dnl K7: 1.21 cycles/limb (at 16 limbs/loop).
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K7: 1.21 cycles/limb (at 16 limbs/loop).
+
+
+
dnl K7: UNROLL_COUNT cycles/limb
dnl 4 1.51
dnl 8 1.26
diff --git a/mpn/x86/k7/mod_34lsub1.asm b/mpn/x86/k7/mod_34lsub1.asm
index 22a335a7d..45307b9ec 100644
--- a/mpn/x86/k7/mod_34lsub1.asm
+++ b/mpn/x86/k7/mod_34lsub1.asm
@@ -1,8 +1,6 @@
dnl AMD K7 mpn_mod_32lsub1 -- remainder modulo 2^24-1.
-dnl
-dnl K7: 1.0 cycles/limb
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C K7: 1.0 cycles/limb
+
+
C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
C
C The loop form below and the 64 byte code alignment seem necessary for the
diff --git a/mpn/x86/k7/mode1o.asm b/mpn/x86/k7/mode1o.asm
index 5888e42d0..ddb9e5bb1 100644
--- a/mpn/x86/k7/mode1o.asm
+++ b/mpn/x86/k7/mode1o.asm
@@ -1,8 +1,6 @@
dnl AMD K7 mpn_modexact_1_odd -- exact division style remainder.
-dnl
-dnl K7: 11.0 cycles/limb
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C K7: 11.0 cycles/limb
+
+
C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/k7/mul_1.asm b/mpn/x86/k7/mul_1.asm
index e60a8fb3b..9fa9625ab 100644
--- a/mpn/x86/k7/mul_1.asm
+++ b/mpn/x86/k7/mul_1.asm
@@ -1,9 +1,6 @@
dnl AMD K7 mpn_mul_1 -- mpn by limb multiply.
-dnl
-dnl K7: 3.4 cycles/limb (at 16 limbs/loop).
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K7: 3.4 cycles/limb (at 16 limbs/loop).
+
+
+
dnl K7: UNROLL_COUNT cycles/limb
dnl 8 3.9
dnl 16 3.4
diff --git a/mpn/x86/k7/mul_basecase.asm b/mpn/x86/k7/mul_basecase.asm
index 03b5c362b..5800ce0ec 100644
--- a/mpn/x86/k7/mul_basecase.asm
+++ b/mpn/x86/k7/mul_basecase.asm
@@ -1,10 +1,6 @@
dnl AMD K7 mpn_mul_basecase -- multiply two mpn numbers.
-dnl
-dnl K7: approx 4.42 cycles per cross product at around 20x20 limbs (16
-dnl limbs/loop unrolling).
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -23,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K7: approx 4.42 cycles per cross product at around 20x20 limbs (16
+C limbs/loop unrolling).
+
+
+
dnl K7 UNROLL_COUNT cycles/product (at around 20x20)
dnl 8 4.67
dnl 16 4.59
diff --git a/mpn/x86/k7/sqr_basecase.asm b/mpn/x86/k7/sqr_basecase.asm
index d987df2ab..1d29c8241 100644
--- a/mpn/x86/k7/sqr_basecase.asm
+++ b/mpn/x86/k7/sqr_basecase.asm
@@ -1,9 +1,4 @@
dnl AMD K7 mpn_sqr_basecase -- square an mpn number.
-dnl
-dnl K7: approx 2.3 cycles/crossproduct, or 4.55 cycles/triangular product
-dnl (measured on the speed difference between 25 and 50 limbs, which is
-dnl roughly the Karatsuba recursing range).
-
dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
@@ -24,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C K7: approx 2.3 cycles/crossproduct, or 4.55 cycles/triangular product
+C (measured on the speed difference between 25 and 50 limbs, which is
+C roughly the Karatsuba recursing range).
+
+
dnl These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for
dnl some comments.
diff --git a/mpn/x86/lshift.asm b/mpn/x86/lshift.asm
index 54cb0cf3b..93f144711 100644
--- a/mpn/x86/lshift.asm
+++ b/mpn/x86/lshift.asm
@@ -1,16 +1,7 @@
dnl x86 mpn_lshift -- mpn left shift.
-dnl
-dnl cycles/limb
-dnl P54: 7.5
-dnl P55: 7.0
-dnl P6: 2.5
-dnl K6: 4.5
-dnl K7: 5.0
-dnl P4: 14.5
-
-
-dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001 Free Software Foundation,
-dnl Inc.
+
+dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -29,10 +20,18 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C cycles/limb
+C P54: 7.5
+C P55: 7.0
+C P6: 2.5
+C K6: 4.5
+C K7: 5.0
+C P4: 14.5
+
+
C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
C unsigned shift);
diff --git a/mpn/x86/mod_1.asm b/mpn/x86/mod_1.asm
index 97aa308b7..fd251332a 100644
--- a/mpn/x86/mod_1.asm
+++ b/mpn/x86/mod_1.asm
@@ -1,14 +1,6 @@
dnl x86 mpn_mod_1 -- mpn by limb remainder.
-dnl
-dnl cycles/limb
-dnl 486 42 approx, maybe
-dnl P5 44
-dnl P6 39
-dnl K6 20
-dnl K7 41
-dnl P4 58
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -30,6 +22,15 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C cycles/limb
+C 486 42 approx, maybe
+C P5 44
+C P6 39
+C K6 20
+C K7 41
+C P4 58
+
+
C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
C mp_limb_t carry);
diff --git a/mpn/x86/mod_34lsub1.asm b/mpn/x86/mod_34lsub1.asm
index 928e937a2..d89a2c209 100644
--- a/mpn/x86/mod_34lsub1.asm
+++ b/mpn/x86/mod_34lsub1.asm
@@ -1,13 +1,6 @@
dnl Generic x86 mpn_mod_32lsub1 -- mpn remainder modulo 2^24-1.
-dnl
-dnl cycles/limb
-dnl P5: 3.0
-dnl P6: 3.66
-dnl K6: 3.0
-dnl K7: 1.3
-dnl P4: 9
-
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -29,6 +22,14 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C cycles/limb
+C P5: 3.0
+C P6: 3.66
+C K6: 3.0
+C K7: 1.3
+C P4: 9
+
+
C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
C
diff --git a/mpn/x86/mul_1.asm b/mpn/x86/mul_1.asm
index fecefc855..c989e40da 100644
--- a/mpn/x86/mul_1.asm
+++ b/mpn/x86/mul_1.asm
@@ -1,15 +1,7 @@
dnl x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector
dnl with a limb and store the result in a second limb vector.
-dnl
-dnl cycles/limb
-dnl P5: 12.5
-dnl P6: 5.5
-dnl K6: 10.5
-dnl K7: 4.5
-dnl P4: 19
-
-dnl Copyright 1992, 1994, 1997, 1998, 1999, 2000, 2001 Free Software
+dnl Copyright 1992, 1994, 1997, 1998, 1999, 2000, 2001, 2002 Free Software
dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
@@ -29,10 +21,17 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C cycles/limb
+C P5: 12.5
+C P6: 5.5
+C K6: 10.5
+C K7: 4.5
+C P4: 19
+
+
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t multiplier);
diff --git a/mpn/x86/mul_basecase.asm b/mpn/x86/mul_basecase.asm
index 1b1778797..fb0c46b66 100644
--- a/mpn/x86/mul_basecase.asm
+++ b/mpn/x86/mul_basecase.asm
@@ -1,16 +1,8 @@
dnl x86 mpn_mul_basecase -- Multiply two limb vectors and store the result
dnl in a third limb vector.
-dnl
-dnl cycles/crossproduct
-dnl P5: 15
-dnl P6: 7.5
-dnl K6: 12.5
-dnl K7: 5.5
-dnl P4: 24
-
-dnl Copyright 1996, 1997, 1998, 1999, 2000, 2001 Free Software Foundation,
-dnl Inc.
+dnl Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free Software
+dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -29,10 +21,17 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C cycles/crossproduct
+C P5: 15
+C P6: 7.5
+C K6: 12.5
+C K7: 5.5
+C P4: 24
+
+
C void mpn_mul_basecase (mp_ptr wp,
C mp_srcptr xp, mp_size_t xsize,
C mp_srcptr yp, mp_size_t ysize);
diff --git a/mpn/x86/p6/aorsmul_1.asm b/mpn/x86/p6/aorsmul_1.asm
index ba3e5146f..7aab1afa4 100644
--- a/mpn/x86/p6/aorsmul_1.asm
+++ b/mpn/x86/p6/aorsmul_1.asm
@@ -1,9 +1,6 @@
dnl Intel P6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
-dnl
-dnl P6: 6.35 cycles/limb (at 16 limbs/loop).
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P6: 6.35 cycles/limb (at 16 limbs/loop).
+
+
dnl P6 UNROLL_COUNT cycles/limb
dnl 8 6.7
dnl 16 6.35
diff --git a/mpn/x86/p6/copyd.asm b/mpn/x86/p6/copyd.asm
index b4ed341e4..ddcaafaff 100644
--- a/mpn/x86/p6/copyd.asm
+++ b/mpn/x86/p6/copyd.asm
@@ -1,8 +1,6 @@
dnl Intel P6 mpn_copyd -- copy limb vector backwards.
-dnl
-dnl P6: 1.75 cycles/limb, or 0.75 if no overlap
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P6: 1.75 cycles/limb, or 0.75 if no overlap
+
+
C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C An explicit loop is used because a decrementing rep movsl is a bit slow at
diff --git a/mpn/x86/p6/dive_1.asm b/mpn/x86/p6/dive_1.asm
index 478781d28..d512762d7 100644
--- a/mpn/x86/p6/dive_1.asm
+++ b/mpn/x86/p6/dive_1.asm
@@ -1,9 +1,6 @@
dnl Intel P6 mpn_modexact_1_odd -- exact division style remainder.
-dnl
-dnl odd even divisor
-dnl P6: 10.0 12.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -25,6 +22,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C odd even divisor
+C P6: 10.0 12.0 cycles/limb
+
+
C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C
diff --git a/mpn/x86/p6/diveby3.asm b/mpn/x86/p6/diveby3.asm
index e5abbe905..b9c0bbd7d 100644
--- a/mpn/x86/p6/diveby3.asm
+++ b/mpn/x86/p6/diveby3.asm
@@ -1,9 +1,6 @@
dnl Intel P6 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
-dnl
-dnl P6: 8.5 cycles/limb
-
-dnl Copyright 2000 Free Software Foundation, Inc.
+dnl Copyright 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,16 +19,19 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
+include(`../config.m4')
-dnl The P5 code runs well on P6, in fact better than anything else found so
-dnl far. An imul is 4 cycles, meaning the two cmp/sbbl pairs on the
-dnl dependent path are taking 4.5 cycles.
-dnl
-dnl The destination cache line prefetching is unnecessary on P6, but
-dnl removing it is a 2 cycle slowdown (approx), so it must be inducing
-dnl something good in the out of order execution.
-include(`../config.m4')
+C P6: 8.5 cycles/limb
+
+
+C The P5 code runs well on P6, in fact better than anything else found so
+C far. An imul is 4 cycles, meaning the two cmp/sbbl pairs on the dependent
+C path are taking 4.5 cycles.
+C
+C The destination cache line prefetching is unnecessary on P6, but removing
+C it is a 2 cycle slowdown (approx), so it must be inducing something good
+C in the out of order execution.
MULFUNC_PROLOGUE(mpn_divexact_by3c)
include_mpn(`x86/pentium/diveby3.asm')
diff --git a/mpn/x86/p6/mmx/divrem_1.asm b/mpn/x86/p6/mmx/divrem_1.asm
index 36a0d9837..da85aca10 100644
--- a/mpn/x86/p6/mmx/divrem_1.asm
+++ b/mpn/x86/p6/mmx/divrem_1.asm
@@ -1,9 +1,6 @@
dnl Intel Pentium-II mpn_divrem_1 -- mpn by limb division.
-dnl
-dnl P6MMX: 25.0 cycles/limb integer part, 17.5 cycles/limb fraction part.
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P6MMX: 25.0 cycles/limb integer part, 17.5 cycles/limb fraction part.
+
+
C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
C mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
diff --git a/mpn/x86/p6/mmx/popham.asm b/mpn/x86/p6/mmx/popham.asm
index e00a8fada..9188af743 100644
--- a/mpn/x86/p6/mmx/popham.asm
+++ b/mpn/x86/p6/mmx/popham.asm
@@ -1,11 +1,7 @@
dnl Intel Pentium-II mpn_popcount, mpn_hamdist -- population count and
dnl hamming distance.
-dnl
-dnl P6MMX: popcount 11 cycles/limb (approx), hamdist 11.5 cycles/limb
-dnl (approx)
-
-dnl Copyright 2000 Free Software Foundation, Inc.
+dnl Copyright 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,8 +20,11 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+
+C P6MMX: popcount 11 cycles/limb (approx), hamdist 11.5 cycles/limb (approx)
+
+
MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
include_mpn(`x86/k6/mmx/popham.asm')
diff --git a/mpn/x86/p6/mod_1.asm b/mpn/x86/p6/mod_1.asm
index 5ffbb568c..84bb1034c 100644
--- a/mpn/x86/p6/mod_1.asm
+++ b/mpn/x86/p6/mod_1.asm
@@ -1,9 +1,6 @@
dnl Intel P6 mpn_mod_1 -- mpn by limb remainder.
-dnl
-dnl P6: 21.5 cycles/limb
-
-dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P6: 21.5 cycles/limb
+
+
C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
C mp_limb_t carry);
diff --git a/mpn/x86/p6/mode1o.asm b/mpn/x86/p6/mode1o.asm
index 2f01e6646..7e468fdd9 100644
--- a/mpn/x86/p6/mode1o.asm
+++ b/mpn/x86/p6/mode1o.asm
@@ -1,8 +1,6 @@
dnl Intel P6 mpn_modexact_1_odd -- exact division style remainder.
-dnl
-dnl P6: 10.0 cycles/limb
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P6: 10.0 cycles/limb
+
+
C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/p6/sqr_basecase.asm b/mpn/x86/p6/sqr_basecase.asm
index 9a2aa2b60..be306ccda 100644
--- a/mpn/x86/p6/sqr_basecase.asm
+++ b/mpn/x86/p6/sqr_basecase.asm
@@ -1,9 +1,4 @@
dnl Intel P6 mpn_sqr_basecase -- square an mpn number.
-dnl
-dnl P6: approx 4.0 cycles per cross product, or 7.75 cycles per triangular
-dnl product (measured on the speed difference between 20 and 40 limbs,
-dnl which is the Karatsuba recursing range).
-
dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
dnl
@@ -24,10 +19,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P6: approx 4.0 cycles per cross product, or 7.75 cycles per triangular
+C product (measured on the speed difference between 20 and 40 limbs,
+C which is the Karatsuba recursing range).
+
+
dnl These are the same as in mpn/x86/k6/sqr_basecase.asm, see that file for
dnl a description. The only difference here is that UNROLL_COUNT can go up
dnl to 64 (not 63) making SQR_KARATSUBA_THRESHOLD_MAX 67.
diff --git a/mpn/x86/pentium/aors_n.asm b/mpn/x86/pentium/aors_n.asm
index bc190776c..1a9ab6e84 100644
--- a/mpn/x86/pentium/aors_n.asm
+++ b/mpn/x86/pentium/aors_n.asm
@@ -1,9 +1,6 @@
dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
-dnl
-dnl P5: 2.375 cycles/limb
-
-dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000 Free Software
+dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
@@ -23,10 +20,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P5: 2.375 cycles/limb
+
+
ifdef(`OPERATION_add_n',`
define(M4_inst, adcl)
define(M4_function_n, mpn_add_n)
diff --git a/mpn/x86/pentium/aorsmul_1.asm b/mpn/x86/pentium/aorsmul_1.asm
index ae68fe08f..428ba8ddc 100644
--- a/mpn/x86/pentium/aorsmul_1.asm
+++ b/mpn/x86/pentium/aorsmul_1.asm
@@ -1,9 +1,6 @@
dnl Intel Pentium mpn_addmul_1 -- mpn by limb multiplication.
-dnl
-dnl P5: 14.0 cycles/limb
-
-dnl Copyright 1992, 1994, 1996, 1999, 2000 Free Software Foundation,
+dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
dnl Inc.
dnl
dnl This file is part of the GNU MP Library.
@@ -23,10 +20,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA. */
-
include(`../config.m4')
+C P5: 14.0 cycles/limb
+
+
ifdef(`OPERATION_addmul_1', `
define(M4_inst, addl)
define(M4_function_1, mpn_addmul_1)
diff --git a/mpn/x86/pentium/com_n.asm b/mpn/x86/pentium/com_n.asm
index 4e7c651a1..0d479b0bc 100644
--- a/mpn/x86/pentium/com_n.asm
+++ b/mpn/x86/pentium/com_n.asm
@@ -1,6 +1,4 @@
dnl Intel Pentium mpn_com_n -- mpn ones complement.
-dnl
-dnl P5: 1.75 cycles/limb
dnl Copyright 1996, 2001, 2002 Free Software Foundation, Inc.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P5: 1.75 cycles/limb
+
+
C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C This code is similar to mpn_copyi, basically there's just some "xorl
diff --git a/mpn/x86/pentium/copyd.asm b/mpn/x86/pentium/copyd.asm
index fda5e6a2a..3487d0e6e 100644
--- a/mpn/x86/pentium/copyd.asm
+++ b/mpn/x86/pentium/copyd.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium mpn_copyd -- copy limb vector, decrementing.
-dnl
-dnl P5: 1.25 cycles/limb
-dnl Copyright 1996, 2001 Free Software Foundation, Inc.
+dnl Copyright 1996, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P5: 1.25 cycles/limb
+
+
C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C See comments in copyi.asm.
diff --git a/mpn/x86/pentium/copyi.asm b/mpn/x86/pentium/copyi.asm
index 48321b965..a246f5d99 100644
--- a/mpn/x86/pentium/copyi.asm
+++ b/mpn/x86/pentium/copyi.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium mpn_copyi -- copy limb vector, incrementing.
-dnl
-dnl P5: 1.25 cycles/limb
-dnl Copyright 1996, 2001 Free Software Foundation, Inc.
+dnl Copyright 1996, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P5: 1.25 cycles/limb
+
+
C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C Destination prefetching is done to avoid repeated write-throughs on lines
diff --git a/mpn/x86/pentium/dive_1.asm b/mpn/x86/pentium/dive_1.asm
index 01b41d982..b311d4c0c 100644
--- a/mpn/x86/pentium/dive_1.asm
+++ b/mpn/x86/pentium/dive_1.asm
@@ -1,11 +1,6 @@
dnl Intel Pentium mpn_divexact_1 -- mpn by limb exact division.
-dnl
-dnl divisor
-dnl odd even
-dnl P54: 24.5 30.5 cycles/limb
-dnl P55: 23.0 28.0
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -27,6 +22,12 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C divisor
+C odd even
+C P54: 24.5 30.5 cycles/limb
+C P55: 23.0 28.0
+
+
C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C
diff --git a/mpn/x86/pentium/diveby3.asm b/mpn/x86/pentium/diveby3.asm
index 8ab098602..1497a1a1f 100644
--- a/mpn/x86/pentium/diveby3.asm
+++ b/mpn/x86/pentium/diveby3.asm
@@ -1,9 +1,6 @@
dnl Intel P5 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
-dnl
-dnl P5: 15.0 cycles/limb
-
-dnl Copyright 2000 Free Software Foundation, Inc.
+dnl Copyright 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P5: 15.0 cycles/limb
+
+
C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t carry);
diff --git a/mpn/x86/pentium/hamdist.asm b/mpn/x86/pentium/hamdist.asm
index 04694a68f..cc5abc6fc 100644
--- a/mpn/x86/pentium/hamdist.asm
+++ b/mpn/x86/pentium/hamdist.asm
@@ -1,8 +1,6 @@
dnl Intel P5 mpn_hamdist -- mpn hamming distance.
-dnl
-dnl P5: 14.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P5: 14.0 cycles/limb
+
+
C unsigned long mpn_hamdist (mp_srcptr src1, mp_srcptr src2, mp_size_t size);
C
C It might be possible to shave 1 cycle from the loop, and hence 2
diff --git a/mpn/x86/pentium/logops_n.asm b/mpn/x86/pentium/logops_n.asm
index 47f649e61..feea75b81 100644
--- a/mpn/x86/pentium/logops_n.asm
+++ b/mpn/x86/pentium/logops_n.asm
@@ -1,7 +1,4 @@
dnl Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
-dnl
-dnl P5: 3.0 c/l and, ior, xor
-dnl 3.5 c/l andn, iorn, nand, nior, xnor
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
@@ -25,6 +22,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P5: 3.0 c/l and, ior, xor
+C 3.5 c/l andn, iorn, nand, nior, xnor
+
+
define(M4_choose_op,
`ifdef(`OPERATION_$1',`
define(`M4_function', `mpn_$1')
diff --git a/mpn/x86/pentium/lshift.asm b/mpn/x86/pentium/lshift.asm
index a576526d7..721d7a0ed 100644
--- a/mpn/x86/pentium/lshift.asm
+++ b/mpn/x86/pentium/lshift.asm
@@ -1,11 +1,6 @@
dnl Intel Pentium mpn_lshift -- mpn left shift.
-dnl
-dnl cycles/limb
-dnl P5,P54: 6.0
-dnl P55: 5.375
-
-dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000 Free Software
+dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
@@ -25,10 +20,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C cycles/limb
+C P5,P54: 6.0
+C P55: 5.375
+
+
C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
C unsigned shift);
C
diff --git a/mpn/x86/pentium/mmx/lshift.asm b/mpn/x86/pentium/mmx/lshift.asm
index 95202760c..21baff1b9 100644
--- a/mpn/x86/pentium/mmx/lshift.asm
+++ b/mpn/x86/pentium/mmx/lshift.asm
@@ -1,9 +1,6 @@
dnl Intel P5 mpn_lshift -- mpn left shift.
-dnl
-dnl P5: 1.75 cycles/limb.
-
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P5: 1.75 cycles/limb.
+
+
C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
C unsigned shift);
C
diff --git a/mpn/x86/pentium/mmx/mul_1.asm b/mpn/x86/pentium/mmx/mul_1.asm
index 18c214490..3acffef78 100644
--- a/mpn/x86/pentium/mmx/mul_1.asm
+++ b/mpn/x86/pentium/mmx/mul_1.asm
@@ -1,10 +1,6 @@
dnl Intel Pentium MMX mpn_mul_1 -- mpn by limb multiplication.
-dnl
-dnl cycles/limb
-dnl P5: 12.0 for 32-bit multiplier
-dnl 7.0 for 16-bit multiplier
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -26,6 +22,11 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C cycles/limb
+C P5: 12.0 for 32-bit multiplier
+C 7.0 for 16-bit multiplier
+
+
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t multiplier);
C
diff --git a/mpn/x86/pentium/mmx/popham.asm b/mpn/x86/pentium/mmx/popham.asm
index 7e15f49b5..3552a9331 100644
--- a/mpn/x86/pentium/mmx/popham.asm
+++ b/mpn/x86/pentium/mmx/popham.asm
@@ -1,10 +1,7 @@
dnl Intel P55 mpn_popcount, mpn_hamdist -- population count and hamming
dnl distance.
-dnl
-dnl P55: popcount 11.5 cycles/limb, hamdist 12.0 cycles/limb
-
-dnl Copyright 2000 Free Software Foundation, Inc.
+dnl Copyright 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -23,8 +20,11 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+
+C P55: popcount 11.5 cycles/limb, hamdist 12.0 cycles/limb
+
+
MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
include_mpn(`x86/k6/mmx/popham.asm')
diff --git a/mpn/x86/pentium/mmx/rshift.asm b/mpn/x86/pentium/mmx/rshift.asm
index 8b03d5811..26afeff59 100644
--- a/mpn/x86/pentium/mmx/rshift.asm
+++ b/mpn/x86/pentium/mmx/rshift.asm
@@ -1,9 +1,6 @@
dnl Intel P5 mpn_rshift -- mpn right shift.
-dnl
-dnl P5: 1.75 cycles/limb.
-
-dnl Copyright 2000 Free Software Foundation, Inc.
+dnl Copyright 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P5: 1.75 cycles/limb.
+
+
C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
C unsigned shift);
C
diff --git a/mpn/x86/pentium/mod_1.asm b/mpn/x86/pentium/mod_1.asm
index 065efef9d..b42b3dc91 100644
--- a/mpn/x86/pentium/mod_1.asm
+++ b/mpn/x86/pentium/mod_1.asm
@@ -1,9 +1,6 @@
dnl Intel P5 mpn_mod_1 -- mpn by limb remainder.
-dnl
-dnl P5: 28.0 cycles/limb
-
-dnl Copyright 1999, 2000 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P5: 28.0 cycles/limb
+
+
C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
C mp_limb_t carry);
diff --git a/mpn/x86/pentium/mod_34lsub1.asm b/mpn/x86/pentium/mod_34lsub1.asm
index 84ac6ec9b..881af5e9b 100644
--- a/mpn/x86/pentium/mod_34lsub1.asm
+++ b/mpn/x86/pentium/mod_34lsub1.asm
@@ -1,8 +1,6 @@
dnl Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
-dnl
-dnl P5: 1.66 cycles/limb
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P5: 1.66 cycles/limb
+
+
C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
C
diff --git a/mpn/x86/pentium/mode1o.asm b/mpn/x86/pentium/mode1o.asm
index 2f74463c7..b99d3f686 100644
--- a/mpn/x86/pentium/mode1o.asm
+++ b/mpn/x86/pentium/mode1o.asm
@@ -1,9 +1,6 @@
dnl Intel Pentium mpn_modexact_1_odd -- exact division style remainder.
-dnl
-dnl P5: 23.0 cycles/limb
-
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P5: 23.0 cycles/limb
+
+
C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/pentium/mul_1.asm b/mpn/x86/pentium/mul_1.asm
index 9e727a08f..66309f4c8 100644
--- a/mpn/x86/pentium/mul_1.asm
+++ b/mpn/x86/pentium/mul_1.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium mpn_mul_1 -- mpn by limb multiplication.
-dnl
-dnl P5: 12.0 cycles/limb
-dnl Copyright 1992, 1994, 1996, 1999, 2000 Free Software Foundation,
+dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
dnl Inc.
dnl
dnl This file is part of the GNU MP Library.
@@ -22,10 +20,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA. */
-
include(`../config.m4')
+C P5: 12.0 cycles/limb
+
+
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t multiplier);
C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/pentium/mul_2.asm b/mpn/x86/pentium/mul_2.asm
index 5d9f4023e..39c9e6989 100644
--- a/mpn/x86/pentium/mul_2.asm
+++ b/mpn/x86/pentium/mul_2.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium mpn_mul_2 -- mpn by 2-limb multiplication.
-dnl
-dnl P5: 24.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. */
include(`../config.m4')
+C P5: 24.0 cycles/limb
+
+
C mp_limb_t mpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_srcptr mult);
C
diff --git a/mpn/x86/pentium/mul_basecase.asm b/mpn/x86/pentium/mul_basecase.asm
index 38a4bcadc..615051248 100644
--- a/mpn/x86/pentium/mul_basecase.asm
+++ b/mpn/x86/pentium/mul_basecase.asm
@@ -1,9 +1,6 @@
dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
-dnl
-dnl P5: 14.2 cycles/crossproduct (approx)
-
-dnl Copyright 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
+dnl Copyright 1996, 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P5: 14.2 cycles/crossproduct (approx)
+
+
C void mpn_mul_basecase (mp_ptr wp,
C mp_srcptr xp, mp_size_t xsize,
C mp_srcptr yp, mp_size_t ysize);
diff --git a/mpn/x86/pentium/popcount.asm b/mpn/x86/pentium/popcount.asm
index e1f21b171..deb078175 100644
--- a/mpn/x86/pentium/popcount.asm
+++ b/mpn/x86/pentium/popcount.asm
@@ -1,8 +1,6 @@
dnl Intel P5 mpn_popcount -- mpn bit population count.
-dnl
-dnl P5: 8.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P5: 8.0 cycles/limb
+
+
C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
C
C An arithmetic approach has been found to be slower than the table lookup,
diff --git a/mpn/x86/pentium/rshift.asm b/mpn/x86/pentium/rshift.asm
index c50f2f924..6d8f14d71 100644
--- a/mpn/x86/pentium/rshift.asm
+++ b/mpn/x86/pentium/rshift.asm
@@ -1,11 +1,6 @@
dnl Intel Pentium mpn_rshift -- mpn right shift.
-dnl
-dnl cycles/limb
-dnl P5,P54: 6.0
-dnl P55: 5.375
-
-dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000 Free Software
+dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
@@ -25,10 +20,14 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C cycles/limb
+C P5,P54: 6.0
+C P55: 5.375
+
+
C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
C unsigned shift);
C
diff --git a/mpn/x86/pentium/sqr_basecase.asm b/mpn/x86/pentium/sqr_basecase.asm
index ba0786d97..06aab2eb4 100644
--- a/mpn/x86/pentium/sqr_basecase.asm
+++ b/mpn/x86/pentium/sqr_basecase.asm
@@ -1,10 +1,6 @@
dnl Intel P5 mpn_sqr_basecase -- square an mpn number.
-dnl
-dnl P5: approx 8 cycles per crossproduct, or 15.5 cycles per triangular
-dnl product at around 20x20 limbs.
-
-dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -23,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P5: approx 8 cycles per crossproduct, or 15.5 cycles per triangular
+C product at around 20x20 limbs.
+
+
C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C Calculate src,size squared, storing the result in dst,2*size.
diff --git a/mpn/x86/pentium4/mmx/lshift.asm b/mpn/x86/pentium4/mmx/lshift.asm
index f59891358..a0bd867fa 100644
--- a/mpn/x86/pentium4/mmx/lshift.asm
+++ b/mpn/x86/pentium4/mmx/lshift.asm
@@ -1,8 +1,6 @@
-dnl Intel Pentium 4 mpn_lshift -- left shift.
-dnl
-dnl Pentium 4: 1.75 cycles/limb.
+dnl Intel Pentium-4 mpn_lshift -- left shift.
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -23,5 +21,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+
+C P4: 1.75 cycles/limb.
+
+
MULFUNC_PROLOGUE(mpn_lshift)
include_mpn(`x86/pentium/mmx/lshift.asm')
diff --git a/mpn/x86/pentium4/mmx/popham.asm b/mpn/x86/pentium4/mmx/popham.asm
index ebcdb9197..516de5bd0 100644
--- a/mpn/x86/pentium4/mmx/popham.asm
+++ b/mpn/x86/pentium4/mmx/popham.asm
@@ -1,10 +1,7 @@
dnl Intel Pentium 4 mpn_popcount, mpn_hamdist -- population count and
dnl hamming distance.
-dnl
-dnl P4: popcount 8.5 cycles/limb
-dnl hamdist 9.5 cycles/limb
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -26,6 +23,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P4: popcount 8.5 cycles/limb
+C hamdist 9.5 cycles/limb
+
+
C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size);
C
diff --git a/mpn/x86/pentium4/mmx/rshift.asm b/mpn/x86/pentium4/mmx/rshift.asm
index 2f4bf937e..b1db05950 100644
--- a/mpn/x86/pentium4/mmx/rshift.asm
+++ b/mpn/x86/pentium4/mmx/rshift.asm
@@ -1,8 +1,6 @@
-dnl Intel Pentium 4 mpn_rshift -- right shift.
-dnl
-dnl Pentium 4: 1.75 cycles/limb.
+dnl Intel Pentium-4 mpn_rshift -- right shift.
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -23,5 +21,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+
+C P4: 1.75 cycles/limb.
+
+
MULFUNC_PROLOGUE(mpn_rshift)
include_mpn(`x86/pentium/mmx/rshift.asm')
diff --git a/mpn/x86/pentium4/sse2/add_n.asm b/mpn/x86/pentium4/sse2/add_n.asm
index c67d4b9d9..f138bb40b 100644
--- a/mpn/x86/pentium4/sse2/add_n.asm
+++ b/mpn/x86/pentium4/sse2/add_n.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium-4 mpn_add_n -- mpn addition.
-dnl
-dnl P4: 4.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -21,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P4: 4.0 cycles/limb if dst!=src1 and dst!=src2
+C 6.0 cycles/limb if dst==src1 or dst==src2
+
+
C mp_limb_t mpn_add_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
C mp_size_t size);
C mp_limb_t mpn_add_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
diff --git a/mpn/x86/pentium4/sse2/addmul_1.asm b/mpn/x86/pentium4/sse2/addmul_1.asm
index d0e0bf10c..7c52cd3f2 100644
--- a/mpn/x86/pentium4/sse2/addmul_1.asm
+++ b/mpn/x86/pentium4/sse2/addmul_1.asm
@@ -1,10 +1,7 @@
dnl Intel Pentium-4 mpn_addmul_1 -- Multiply a limb vector with a limb and add
dnl the result to a second limb vector.
-dnl
-dnl Pentium4: 6 cycles/limb, unstable timing, at least on early Pentium4
-dnl silicon (stepping 10).
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -23,10 +20,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P4: 6 cycles/limb, unstable timing, at least on early Pentium4 silicon
+C (stepping 10).
+
+
C mp_limb_t mpn_addmul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t multiplier);
C mp_limb_t mpn_addmul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/pentium4/sse2/dive_1.asm b/mpn/x86/pentium4/sse2/dive_1.asm
index c5f18cad0..ad53dad89 100644
--- a/mpn/x86/pentium4/sse2/dive_1.asm
+++ b/mpn/x86/pentium4/sse2/dive_1.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division.
-dnl
-dnl P4: 19.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P4: 19.0 cycles/limb
+
+
C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C
diff --git a/mpn/x86/pentium4/sse2/diveby3.asm b/mpn/x86/pentium4/sse2/diveby3.asm
index fade75a1f..e258caa4d 100644
--- a/mpn/x86/pentium4/sse2/diveby3.asm
+++ b/mpn/x86/pentium4/sse2/diveby3.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium-4 mpn_divexact_by3 -- mpn exact division by 3.
-dnl
-dnl P4: 18.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P4: 18.0 cycles/limb
+
+
C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t carry);
C
diff --git a/mpn/x86/pentium4/sse2/mod_34lsub1.asm b/mpn/x86/pentium4/sse2/mod_34lsub1.asm
index 7b5cd145b..50481f0d8 100644
--- a/mpn/x86/pentium4/sse2/mod_34lsub1.asm
+++ b/mpn/x86/pentium4/sse2/mod_34lsub1.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium 4 mpn_mod_32lsub1 -- remainder modulo 2^24-1.
-dnl
-dnl Pentium4: 1.0 cycles/limb
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -21,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C Pentium4: 1.0 cycles/limb
+
+
C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
C
C Enhancements:
diff --git a/mpn/x86/pentium4/sse2/mode1o.asm b/mpn/x86/pentium4/sse2/mode1o.asm
index 191d07127..14ccae86b 100644
--- a/mpn/x86/pentium4/sse2/mode1o.asm
+++ b/mpn/x86/pentium4/sse2/mode1o.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium-4 mpn_modexact_1_odd -- mpn by limb exact remainder.
-dnl
-dnl P4: 19.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P4: 19.0 cycles/limb
+
+
C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/pentium4/sse2/mul_1.asm b/mpn/x86/pentium4/sse2/mul_1.asm
index 9bf9dea1f..b3916966f 100644
--- a/mpn/x86/pentium4/sse2/mul_1.asm
+++ b/mpn/x86/pentium4/sse2/mul_1.asm
@@ -1,9 +1,7 @@
dnl Intel Pentium-4 mpn_mul_1 -- Multiply a limb vector with a limb and store
dnl the result in a second limb vector.
-dnl
-dnl Pentium4: 4 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +20,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P4: 4 cycles/limb
+
+
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t multiplier);
C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/pentium4/sse2/mul_basecase.asm b/mpn/x86/pentium4/sse2/mul_basecase.asm
index 3e7672b2e..af7c18d8e 100644
--- a/mpn/x86/pentium4/sse2/mul_basecase.asm
+++ b/mpn/x86/pentium4/sse2/mul_basecase.asm
@@ -1,8 +1,6 @@
-dnl Intel Pentium 4 mpn_mul_basecase -- mpn by mpn multiplication.
-dnl
-dnl P4: 6.0 cycles/crossproduct (approx)
+dnl Intel Pentium-4 mpn_mul_basecase -- mpn by mpn multiplication.
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P4: 6.0 cycles/crossproduct (approx)
+
+
C void mpn_mul_basecase (mp_ptr wp,
C mp_srcptr xp, mp_size_t xsize,
C mp_srcptr yp, mp_size_t ysize);
diff --git a/mpn/x86/pentium4/sse2/sqr_basecase.asm b/mpn/x86/pentium4/sse2/sqr_basecase.asm
index 4bcd798dc..a32a708ad 100644
--- a/mpn/x86/pentium4/sse2/sqr_basecase.asm
+++ b/mpn/x86/pentium4/sse2/sqr_basecase.asm
@@ -1,9 +1,6 @@
dnl Intel Pentium-4 mpn_sqr_basecase -- square an mpn number.
-dnl
-dnl P4: approx 3.5 cycles per crossproduct, or 7 cycles per triangular
-dnl product, at around 30x30 limbs.
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -25,6 +22,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P4: approx 3.5 cycles per crossproduct, or 7 cycles per triangular
+C product, at around 30x30 limbs.
+
+
C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a
diff --git a/mpn/x86/pentium4/sse2/sub_n.asm b/mpn/x86/pentium4/sse2/sub_n.asm
index ff4004d9a..5e6667b22 100644
--- a/mpn/x86/pentium4/sse2/sub_n.asm
+++ b/mpn/x86/pentium4/sse2/sub_n.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium-4 mpn_sub_n -- mpn subtraction.
-dnl
-dnl P4: 4.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -21,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P4: 4.0 cycles/limb if dst!=src1 and dst!=src2
+C 6.0 cycles/limb if dst==src1 or dst==src2
+
+
C mp_limb_t mpn_sub_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
C mp_size_t size);
C mp_limb_t mpn_sub_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
diff --git a/mpn/x86/pentium4/sse2/submul_1.asm b/mpn/x86/pentium4/sse2/submul_1.asm
index 558cb1ca5..a43ea8afa 100644
--- a/mpn/x86/pentium4/sse2/submul_1.asm
+++ b/mpn/x86/pentium4/sse2/submul_1.asm
@@ -1,10 +1,7 @@
dnl Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and
dnl subtract the result from a second limb vector.
-dnl
-dnl Pentium4: 7 cycles/limb, unstable timing, at least on early Pentium4
-dnl silicon (stepping 10).
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -23,10 +20,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon
+C (stepping 10).
+
+
C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t mult);
C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/rshift.asm b/mpn/x86/rshift.asm
index 8f9b54a87..837f1828c 100644
--- a/mpn/x86/rshift.asm
+++ b/mpn/x86/rshift.asm
@@ -1,15 +1,7 @@
dnl x86 mpn_rshift -- mpn right shift.
-dnl
-dnl cycles/limb
-dnl P54: 7.5
-dnl P55: 7.0
-dnl P6: 2.5
-dnl K6: 4.5
-dnl K7: 5.0
-dnl P4: 16.5
-
-dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001 Free Software Foundation,
-dnl Inc.
+
+dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -28,10 +20,18 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C cycles/limb
+C P54: 7.5
+C P55: 7.0
+C P6: 2.5
+C K6: 4.5
+C K7: 5.0
+C P4: 16.5
+
+
C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
C unsigned shift);