From 64d8a4619f996d6d91423e28d1bc6eb8695508c5 Mon Sep 17 00:00:00 2001 From: Kevin Ryde Date: Sat, 30 Mar 2002 23:20:30 +0100 Subject: * mpn/x86/*/*.asm, mpn/powerpc32/*/*.asm, mpn/powerpc64/*/*.asm: Put speeds after the copyright notice, so as to keep that clear. --- mpn/x86/pentium4/sse2/add_n.asm | 9 +++++---- mpn/x86/pentium4/sse2/addmul_1.asm | 10 +++++----- mpn/x86/pentium4/sse2/dive_1.asm | 7 ++++--- mpn/x86/pentium4/sse2/diveby3.asm | 7 ++++--- mpn/x86/pentium4/sse2/mod_34lsub1.asm | 8 ++++---- mpn/x86/pentium4/sse2/mode1o.asm | 7 ++++--- mpn/x86/pentium4/sse2/mul_1.asm | 8 ++++---- mpn/x86/pentium4/sse2/mul_basecase.asm | 9 +++++---- mpn/x86/pentium4/sse2/sqr_basecase.asm | 9 +++++---- mpn/x86/pentium4/sse2/sub_n.asm | 9 +++++---- mpn/x86/pentium4/sse2/submul_1.asm | 10 +++++----- 11 files changed, 50 insertions(+), 43 deletions(-) (limited to 'mpn/x86/pentium4/sse2') diff --git a/mpn/x86/pentium4/sse2/add_n.asm b/mpn/x86/pentium4/sse2/add_n.asm index c67d4b9d9..f138bb40b 100644 --- a/mpn/x86/pentium4/sse2/add_n.asm +++ b/mpn/x86/pentium4/sse2/add_n.asm @@ -1,8 +1,6 @@ dnl Intel Pentium-4 mpn_add_n -- mpn addition. -dnl -dnl P4: 4.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -21,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P4: 4.0 cycles/limb if dst!=src1 and dst!=src2 +C 6.0 cycles/limb if dst==src1 or dst==src2 + + C mp_limb_t mpn_add_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, C mp_size_t size); C mp_limb_t mpn_add_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, diff --git a/mpn/x86/pentium4/sse2/addmul_1.asm b/mpn/x86/pentium4/sse2/addmul_1.asm index d0e0bf10c..7c52cd3f2 100644 --- a/mpn/x86/pentium4/sse2/addmul_1.asm +++ b/mpn/x86/pentium4/sse2/addmul_1.asm @@ -1,10 +1,7 @@ dnl Intel Pentium-4 mpn_addmul_1 -- Multiply a limb vector with a limb and add dnl the result to a second limb vector. -dnl -dnl Pentium4: 6 cycles/limb, unstable timing, at least on early Pentium4 -dnl silicon (stepping 10). -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,10 +20,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P4: 6 cycles/limb, unstable timing, at least on early Pentium4 silicon +C (stepping 10). + + C mp_limb_t mpn_addmul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t multiplier); C mp_limb_t mpn_addmul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/pentium4/sse2/dive_1.asm b/mpn/x86/pentium4/sse2/dive_1.asm index c5f18cad0..ad53dad89 100644 --- a/mpn/x86/pentium4/sse2/dive_1.asm +++ b/mpn/x86/pentium4/sse2/dive_1.asm @@ -1,8 +1,6 @@ dnl Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division. -dnl -dnl P4: 19.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P4: 19.0 cycles/limb + + C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C diff --git a/mpn/x86/pentium4/sse2/diveby3.asm b/mpn/x86/pentium4/sse2/diveby3.asm index fade75a1f..e258caa4d 100644 --- a/mpn/x86/pentium4/sse2/diveby3.asm +++ b/mpn/x86/pentium4/sse2/diveby3.asm @@ -1,8 +1,6 @@ dnl Intel Pentium-4 mpn_divexact_by3 -- mpn exact division by 3. -dnl -dnl P4: 18.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P4: 18.0 cycles/limb + + C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t carry); C diff --git a/mpn/x86/pentium4/sse2/mod_34lsub1.asm b/mpn/x86/pentium4/sse2/mod_34lsub1.asm index 7b5cd145b..50481f0d8 100644 --- a/mpn/x86/pentium4/sse2/mod_34lsub1.asm +++ b/mpn/x86/pentium4/sse2/mod_34lsub1.asm @@ -1,8 +1,6 @@ dnl Intel Pentium 4 mpn_mod_32lsub1 -- remainder modulo 2^24-1. -dnl -dnl Pentium4: 1.0 cycles/limb -dnl Copyright 2000, 2001 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -21,10 +19,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C Pentium4: 1.0 cycles/limb + + C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) C C Enhancements: diff --git a/mpn/x86/pentium4/sse2/mode1o.asm b/mpn/x86/pentium4/sse2/mode1o.asm index 191d07127..14ccae86b 100644 --- a/mpn/x86/pentium4/sse2/mode1o.asm +++ b/mpn/x86/pentium4/sse2/mode1o.asm @@ -1,8 +1,6 @@ dnl Intel Pentium-4 mpn_modexact_1_odd -- mpn by limb exact remainder. -dnl -dnl P4: 19.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P4: 19.0 cycles/limb + + C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size, C mp_limb_t divisor); C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/pentium4/sse2/mul_1.asm b/mpn/x86/pentium4/sse2/mul_1.asm index 9bf9dea1f..b3916966f 100644 --- a/mpn/x86/pentium4/sse2/mul_1.asm +++ b/mpn/x86/pentium4/sse2/mul_1.asm @@ -1,9 +1,7 @@ dnl Intel Pentium-4 mpn_mul_1 -- Multiply a limb vector with a limb and store dnl the result in a second limb vector. -dnl -dnl Pentium4: 4 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -22,10 +20,12 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P4: 4 cycles/limb + + C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t multiplier); C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, diff --git a/mpn/x86/pentium4/sse2/mul_basecase.asm b/mpn/x86/pentium4/sse2/mul_basecase.asm index 3e7672b2e..af7c18d8e 100644 --- a/mpn/x86/pentium4/sse2/mul_basecase.asm +++ b/mpn/x86/pentium4/sse2/mul_basecase.asm @@ -1,8 +1,6 @@ -dnl Intel Pentium 4 mpn_mul_basecase -- mpn by mpn multiplication. -dnl -dnl P4: 6.0 cycles/crossproduct (approx) +dnl Intel Pentium-4 mpn_mul_basecase -- mpn by mpn multiplication. -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,6 +22,9 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P4: 6.0 cycles/crossproduct (approx) + + C void mpn_mul_basecase (mp_ptr wp, C mp_srcptr xp, mp_size_t xsize, C mp_srcptr yp, mp_size_t ysize); diff --git a/mpn/x86/pentium4/sse2/sqr_basecase.asm b/mpn/x86/pentium4/sse2/sqr_basecase.asm index 4bcd798dc..a32a708ad 100644 --- a/mpn/x86/pentium4/sse2/sqr_basecase.asm +++ b/mpn/x86/pentium4/sse2/sqr_basecase.asm @@ -1,9 +1,6 @@ dnl Intel Pentium-4 mpn_sqr_basecase -- square an mpn number. -dnl -dnl P4: approx 3.5 cycles per crossproduct, or 7 cycles per triangular -dnl product, at around 30x30 limbs. -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -25,6 +22,10 @@ dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') +C P4: approx 3.5 cycles per crossproduct, or 7 cycles per triangular +C product, at around 30x30 limbs. + + C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); C C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a diff --git a/mpn/x86/pentium4/sse2/sub_n.asm b/mpn/x86/pentium4/sse2/sub_n.asm index ff4004d9a..5e6667b22 100644 --- a/mpn/x86/pentium4/sse2/sub_n.asm +++ b/mpn/x86/pentium4/sse2/sub_n.asm @@ -1,8 +1,6 @@ dnl Intel Pentium-4 mpn_sub_n -- mpn subtraction. -dnl -dnl P4: 4.0 cycles/limb -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -21,10 +19,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P4: 4.0 cycles/limb if dst!=src1 and dst!=src2 +C 6.0 cycles/limb if dst==src1 or dst==src2 + + C mp_limb_t mpn_sub_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, C mp_size_t size); C mp_limb_t mpn_sub_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, diff --git a/mpn/x86/pentium4/sse2/submul_1.asm b/mpn/x86/pentium4/sse2/submul_1.asm index 558cb1ca5..a43ea8afa 100644 --- a/mpn/x86/pentium4/sse2/submul_1.asm +++ b/mpn/x86/pentium4/sse2/submul_1.asm @@ -1,10 +1,7 @@ dnl Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and dnl subtract the result from a second limb vector. -dnl -dnl Pentium4: 7 cycles/limb, unstable timing, at least on early Pentium4 -dnl silicon (stepping 10). -dnl Copyright 2001 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,10 +20,13 @@ dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') +C P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon +C (stepping 10). + + C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t mult); C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, -- cgit v1.2.1