summaryrefslogtreecommitdiff
path: root/mpn/x86/pentium4/sse2
diff options
context:
space:
mode:
authorKevin Ryde <user42@zip.com.au>2002-03-30 23:20:30 +0100
committerKevin Ryde <user42@zip.com.au>2002-03-30 23:20:30 +0100
commit64d8a4619f996d6d91423e28d1bc6eb8695508c5 (patch)
tree1e1512cdf662ddeb0468cfe9905f93c0b76b0060 /mpn/x86/pentium4/sse2
parent7adb3efb61b771d93fba849454e910333b0997a9 (diff)
downloadgmp-64d8a4619f996d6d91423e28d1bc6eb8695508c5.tar.gz
* mpn/x86/*/*.asm, mpn/powerpc32/*/*.asm, mpn/powerpc64/*/*.asm: Put
speeds after the copyright notice, so as to keep that clear.
Diffstat (limited to 'mpn/x86/pentium4/sse2')
-rw-r--r--mpn/x86/pentium4/sse2/add_n.asm9
-rw-r--r--mpn/x86/pentium4/sse2/addmul_1.asm10
-rw-r--r--mpn/x86/pentium4/sse2/dive_1.asm7
-rw-r--r--mpn/x86/pentium4/sse2/diveby3.asm7
-rw-r--r--mpn/x86/pentium4/sse2/mod_34lsub1.asm8
-rw-r--r--mpn/x86/pentium4/sse2/mode1o.asm7
-rw-r--r--mpn/x86/pentium4/sse2/mul_1.asm8
-rw-r--r--mpn/x86/pentium4/sse2/mul_basecase.asm9
-rw-r--r--mpn/x86/pentium4/sse2/sqr_basecase.asm9
-rw-r--r--mpn/x86/pentium4/sse2/sub_n.asm9
-rw-r--r--mpn/x86/pentium4/sse2/submul_1.asm10
11 files changed, 50 insertions, 43 deletions
diff --git a/mpn/x86/pentium4/sse2/add_n.asm b/mpn/x86/pentium4/sse2/add_n.asm
index c67d4b9d9..f138bb40b 100644
--- a/mpn/x86/pentium4/sse2/add_n.asm
+++ b/mpn/x86/pentium4/sse2/add_n.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium-4 mpn_add_n -- mpn addition.
-dnl
-dnl P4: 4.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -21,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P4: 4.0 cycles/limb if dst!=src1 and dst!=src2
+C 6.0 cycles/limb if dst==src1 or dst==src2
+
+
C mp_limb_t mpn_add_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
C mp_size_t size);
C mp_limb_t mpn_add_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
diff --git a/mpn/x86/pentium4/sse2/addmul_1.asm b/mpn/x86/pentium4/sse2/addmul_1.asm
index d0e0bf10c..7c52cd3f2 100644
--- a/mpn/x86/pentium4/sse2/addmul_1.asm
+++ b/mpn/x86/pentium4/sse2/addmul_1.asm
@@ -1,10 +1,7 @@
dnl Intel Pentium-4 mpn_addmul_1 -- Multiply a limb vector with a limb and add
dnl the result to a second limb vector.
-dnl
-dnl Pentium4: 6 cycles/limb, unstable timing, at least on early Pentium4
-dnl silicon (stepping 10).
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -23,10 +20,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P4: 6 cycles/limb, unstable timing, at least on early Pentium4 silicon
+C (stepping 10).
+
+
C mp_limb_t mpn_addmul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t multiplier);
C mp_limb_t mpn_addmul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/pentium4/sse2/dive_1.asm b/mpn/x86/pentium4/sse2/dive_1.asm
index c5f18cad0..ad53dad89 100644
--- a/mpn/x86/pentium4/sse2/dive_1.asm
+++ b/mpn/x86/pentium4/sse2/dive_1.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division.
-dnl
-dnl P4: 19.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P4: 19.0 cycles/limb
+
+
C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C
diff --git a/mpn/x86/pentium4/sse2/diveby3.asm b/mpn/x86/pentium4/sse2/diveby3.asm
index fade75a1f..e258caa4d 100644
--- a/mpn/x86/pentium4/sse2/diveby3.asm
+++ b/mpn/x86/pentium4/sse2/diveby3.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium-4 mpn_divexact_by3 -- mpn exact division by 3.
-dnl
-dnl P4: 18.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P4: 18.0 cycles/limb
+
+
C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t carry);
C
diff --git a/mpn/x86/pentium4/sse2/mod_34lsub1.asm b/mpn/x86/pentium4/sse2/mod_34lsub1.asm
index 7b5cd145b..50481f0d8 100644
--- a/mpn/x86/pentium4/sse2/mod_34lsub1.asm
+++ b/mpn/x86/pentium4/sse2/mod_34lsub1.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium 4 mpn_mod_32lsub1 -- remainder modulo 2^24-1.
-dnl
-dnl Pentium4: 1.0 cycles/limb
-dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -21,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C Pentium4: 1.0 cycles/limb
+
+
C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
C
C Enhancements:
diff --git a/mpn/x86/pentium4/sse2/mode1o.asm b/mpn/x86/pentium4/sse2/mode1o.asm
index 191d07127..14ccae86b 100644
--- a/mpn/x86/pentium4/sse2/mode1o.asm
+++ b/mpn/x86/pentium4/sse2/mode1o.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium-4 mpn_modexact_1_odd -- mpn by limb exact remainder.
-dnl
-dnl P4: 19.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P4: 19.0 cycles/limb
+
+
C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
C mp_limb_t divisor);
C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/pentium4/sse2/mul_1.asm b/mpn/x86/pentium4/sse2/mul_1.asm
index 9bf9dea1f..b3916966f 100644
--- a/mpn/x86/pentium4/sse2/mul_1.asm
+++ b/mpn/x86/pentium4/sse2/mul_1.asm
@@ -1,9 +1,7 @@
dnl Intel Pentium-4 mpn_mul_1 -- Multiply a limb vector with a limb and store
dnl the result in a second limb vector.
-dnl
-dnl Pentium4: 4 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -22,10 +20,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P4: 4 cycles/limb
+
+
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t multiplier);
C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/pentium4/sse2/mul_basecase.asm b/mpn/x86/pentium4/sse2/mul_basecase.asm
index 3e7672b2e..af7c18d8e 100644
--- a/mpn/x86/pentium4/sse2/mul_basecase.asm
+++ b/mpn/x86/pentium4/sse2/mul_basecase.asm
@@ -1,8 +1,6 @@
-dnl Intel Pentium 4 mpn_mul_basecase -- mpn by mpn multiplication.
-dnl
-dnl P4: 6.0 cycles/crossproduct (approx)
+dnl Intel Pentium-4 mpn_mul_basecase -- mpn by mpn multiplication.
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P4: 6.0 cycles/crossproduct (approx)
+
+
C void mpn_mul_basecase (mp_ptr wp,
C mp_srcptr xp, mp_size_t xsize,
C mp_srcptr yp, mp_size_t ysize);
diff --git a/mpn/x86/pentium4/sse2/sqr_basecase.asm b/mpn/x86/pentium4/sse2/sqr_basecase.asm
index 4bcd798dc..a32a708ad 100644
--- a/mpn/x86/pentium4/sse2/sqr_basecase.asm
+++ b/mpn/x86/pentium4/sse2/sqr_basecase.asm
@@ -1,9 +1,6 @@
dnl Intel Pentium-4 mpn_sqr_basecase -- square an mpn number.
-dnl
-dnl P4: approx 3.5 cycles per crossproduct, or 7 cycles per triangular
-dnl product, at around 30x30 limbs.
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -25,6 +22,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA.
include(`../config.m4')
+C P4: approx 3.5 cycles per crossproduct, or 7 cycles per triangular
+C product, at around 30x30 limbs.
+
+
C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a
diff --git a/mpn/x86/pentium4/sse2/sub_n.asm b/mpn/x86/pentium4/sse2/sub_n.asm
index ff4004d9a..5e6667b22 100644
--- a/mpn/x86/pentium4/sse2/sub_n.asm
+++ b/mpn/x86/pentium4/sse2/sub_n.asm
@@ -1,8 +1,6 @@
dnl Intel Pentium-4 mpn_sub_n -- mpn subtraction.
-dnl
-dnl P4: 4.0 cycles/limb
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -21,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P4: 4.0 cycles/limb if dst!=src1 and dst!=src2
+C 6.0 cycles/limb if dst==src1 or dst==src2
+
+
C mp_limb_t mpn_sub_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
C mp_size_t size);
C mp_limb_t mpn_sub_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
diff --git a/mpn/x86/pentium4/sse2/submul_1.asm b/mpn/x86/pentium4/sse2/submul_1.asm
index 558cb1ca5..a43ea8afa 100644
--- a/mpn/x86/pentium4/sse2/submul_1.asm
+++ b/mpn/x86/pentium4/sse2/submul_1.asm
@@ -1,10 +1,7 @@
dnl Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and
dnl subtract the result from a second limb vector.
-dnl
-dnl Pentium4: 7 cycles/limb, unstable timing, at least on early Pentium4
-dnl silicon (stepping 10).
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -23,10 +20,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl Suite 330, Boston, MA 02111-1307, USA.
-
include(`../config.m4')
+C P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon
+C (stepping 10).
+
+
C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t mult);
C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,