summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortege <tege@gmplib.org>2003-09-28 05:51:23 +0200
committertege <tege@gmplib.org>2003-09-28 05:51:23 +0200
commit6bf9e2b32f1491a69317708474c201727ead2b6d (patch)
tree5bfeed80385ed0931d01bcc736095674802497a2
parentf936e2f1df485efb30f61c0badbfedec1b16702a (diff)
downloadgmp-6bf9e2b32f1491a69317708474c201727ead2b6d.tar.gz
Table cycle counts.
-rw-r--r--mpn/alpha/ev5/lshift.asm20
-rw-r--r--mpn/alpha/ev5/rshift.asm18
-rw-r--r--mpn/alpha/ev6/add_n.asm78
-rw-r--r--mpn/alpha/ev6/sub_n.asm78
-rw-r--r--mpn/alpha/lshift.asm7
-rw-r--r--mpn/alpha/rshift.asm5
6 files changed, 110 insertions, 96 deletions
diff --git a/mpn/alpha/ev5/lshift.asm b/mpn/alpha/ev5/lshift.asm
index bf885588a..8a6db1fda 100644
--- a/mpn/alpha/ev5/lshift.asm
+++ b/mpn/alpha/ev5/lshift.asm
@@ -1,6 +1,6 @@
-dnl Alpha EV5 __gmpn_lshift -- Shift a number left.
+dnl Alpha EV5 mpn_lshift -- Shift a number left.
-dnl Copyright 1994, 1995, 2000 Free Software Foundation, Inc.
+dnl Copyright 1994, 1995, 2000, 2003 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -21,13 +21,17 @@ dnl MA 02111-1307, USA.
include(`../config.m4')
-dnl INPUT PARAMETERS
-dnl res_ptr r16
-dnl s1_ptr r17
-dnl size r18
-dnl cnt r19
+C cycles/limb
+C EV4: ?
+C EV5: 3.25
+C EV6: 1.75
+
+C INPUT PARAMETERS
+C rp r16
+C up r17
+C n r18
+C cnt r19
-dnl This code runs at 3.25 cycles/limb on the EV5.
ASM_START()
PROLOGUE(mpn_lshift)
diff --git a/mpn/alpha/ev5/rshift.asm b/mpn/alpha/ev5/rshift.asm
index 49d5424de..36d13d02a 100644
--- a/mpn/alpha/ev5/rshift.asm
+++ b/mpn/alpha/ev5/rshift.asm
@@ -1,4 +1,4 @@
-dnl Alpha EV5 __gmpn_rshift -- Shift a number right.
+dnl Alpha EV5 mpn_rshift -- Shift a number right.
dnl Copyright 1994, 1995, 2000 Free Software Foundation, Inc.
@@ -21,13 +21,17 @@ dnl MA 02111-1307, USA.
include(`../config.m4')
-dnl INPUT PARAMETERS
-dnl res_ptr r16
-dnl s1_ptr r17
-dnl size r18
-dnl cnt r19
+C cycles/limb
+C EV4: ?
+C EV5: 3.25
+C EV6: 1.75
+
+C INPUT PARAMETERS
+C rp r16
+C up r17
+C n r18
+C cnt r19
-dnl This code runs at 3.25 cycles/limb on the EV5.
ASM_START()
PROLOGUE(mpn_rshift)
diff --git a/mpn/alpha/ev6/add_n.asm b/mpn/alpha/ev6/add_n.asm
index b5954ca17..f11d6a3ba 100644
--- a/mpn/alpha/ev6/add_n.asm
+++ b/mpn/alpha/ev6/add_n.asm
@@ -1,7 +1,7 @@
dnl Alpha ev6 mpn_add_n -- Add two limb vectors of the same length > 0 and
dnl store sum in a third limb vector.
-dnl Copyright 2000 Free Software Foundation, Inc.
+dnl Copyright 2000, 2003 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -22,42 +22,46 @@ dnl MA 02111-1307, USA.
include(`../config.m4')
-dnl INPUT PARAMETERS
-dnl res_ptr r16
-dnl s1_ptr r17
-dnl s2_ptr r18
-dnl size r19
-
-dnl This code runs at 5.4 cycles/limb on EV5, and 2.1 cycles/limb on EV6.
-
-dnl This code was written in close cooperation with ev6 pipeline expert
-dnl Steve Root. Any errors are tege's fault, though.
-
-dnl work triplet 0-2
-dnl work triplet 3-5
-dnl work triplet 6-8
-dnl work triplet 9-11
-dnl carry's 20-23
-
-dnl sustains 8 adds in 17 cycles !
-dnl (from the d_cache)
-
-dnl pair loads and stores where possible
-dnl store pairs oct-aligned where possible
-dnl (didn't need it here)
-dnl stores are delayed every third cycle
-dnl loads and stores are delayed by fills
-dnl U stays still, put code there where possible
-dnl (note alternation of U1 and U0)
-dnl L moves because of loads and stores
-dnl note dampers in L to limit damage
-dnl note, load ahead of time where possible
-
-dnl this odd-looking optimization expects
-dnl that were having random bits in our data, so
-dnl that a pure zero result is unlikely. so we
-dnl penalize the unlikely case to help the
-dnl common case.
+C cycles/limb
+C EV4: ?
+C EV5: 5.4
+C EV6: 2.125
+
+C INPUT PARAMETERS
+C rp r16
+C up r17
+C vp r18
+C n r19
+
+
+C This code was written in close cooperation with ev6 pipeline expert
+C Steve Root. Any errors are tege's fault, though.
+
+C work triplet 0-2
+C work triplet 3-5
+C work triplet 6-8
+C work triplet 9-11
+C carry's 20-23
+
+C sustains 8 adds in 17 cycles !
+C (from the d_cache)
+
+C pair loads and stores where possible
+C store pairs oct-aligned where possible
+C (didn't need it here)
+C stores are delayed every third cycle
+C loads and stores are delayed by fills
+C U stays still, put code there where possible
+C (note alternation of U1 and U0)
+C L moves because of loads and stores
+C note dampers in L to limit damage
+C note, load ahead of time where possible
+
+C this odd-looking optimization expects
+C that were having random bits in our data, so
+C that a pure zero result is unlikely. so we
+C penalize the unlikely case to help the
+C common case.
ASM_START()
PROLOGUE(mpn_add_n)
diff --git a/mpn/alpha/ev6/sub_n.asm b/mpn/alpha/ev6/sub_n.asm
index dfb05f890..6d0620aaf 100644
--- a/mpn/alpha/ev6/sub_n.asm
+++ b/mpn/alpha/ev6/sub_n.asm
@@ -1,7 +1,7 @@
dnl Alpha ev6 mpn_sub_n -- Subtract two limb vectors of the same length > 0
dnl and store difference in a third limb vector.
-dnl Copyright 2000 Free Software Foundation, Inc.
+dnl Copyright 2000, 2003 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -22,42 +22,46 @@ dnl MA 02111-1307, USA.
include(`../config.m4')
-dnl INPUT PARAMETERS
-dnl res_ptr r16
-dnl s1_ptr r17
-dnl s2_ptr r18
-dnl size r19
-
-dnl This code runs at 5.4 cycles/limb on EV5, and 2.1 cycles/limb on EV6.
-
-dnl This code was written in close cooperation with ev6 pipeline expert
-dnl Steve Root. Any errors are tege's fault, though.
-
-dnl work triplet 0-2
-dnl work triplet 3-5
-dnl work triplet 6-8
-dnl work triplet 9-11
-dnl carry's 20-23
-
-dnl sustains 8 subtracts in 17 cycles !
-dnl (from the d_cache)
-
-dnl pair loads and stores where possible
-dnl store pairs oct-aligned where possible
-dnl (didn't need it here)
-dnl stores are delayed every third cycle
-dnl loads and stores are delayed by fills
-dnl U stays still, put code there where possible
-dnl (note alternation of U1 and U0)
-dnl L moves because of loads and stores
-dnl note dampers in L to limit damage
-dnl note, load ahead of time where possible
-
-dnl this odd-looking optimization expects
-dnl that were having random bits in our data, so
-dnl that a pure zero result is unlikely. so we
-dnl penalize the unlikely case to help the
-dnl common case.
+C cycles/limb
+C EV4: ?
+C EV5: 5.4
+C EV6: 2.125
+
+C INPUT PARAMETERS
+C rp r16
+C up r17
+C vp r18
+C n r19
+
+
+C This code was written in close cooperation with ev6 pipeline expert
+C Steve Root. Any errors are tege's fault, though.
+
+C work triplet 0-2
+C work triplet 3-5
+C work triplet 6-8
+C work triplet 9-11
+C carry's 20-23
+
+C sustains 8 subtracts in 17 cycles !
+C (from the d_cache)
+
+C pair loads and stores where possible
+C store pairs oct-aligned where possible
+C (didn't need it here)
+C stores are delayed every third cycle
+C loads and stores are delayed by fills
+C U stays still, put code there where possible
+C (note alternation of U1 and U0)
+C L moves because of loads and stores
+C note dampers in L to limit damage
+C note, load ahead of time where possible
+
+C this odd-looking optimization expects
+C that were having random bits in our data, so
+C that a pure zero result is unlikely. so we
+C penalize the unlikely case to help the
+C common case.
ASM_START()
PROLOGUE(mpn_sub_n)
diff --git a/mpn/alpha/lshift.asm b/mpn/alpha/lshift.asm
index 21c76c46c..78f75ebc3 100644
--- a/mpn/alpha/lshift.asm
+++ b/mpn/alpha/lshift.asm
@@ -1,6 +1,6 @@
dnl Alpha mpn_lshift -- Shift a number left.
-dnl Copyright 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
+dnl Copyright 1994, 1995, 2000, 2002, 2003 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -23,7 +23,7 @@ include(`../config.m4')
C cycles/limb
C EV4: 4.75
-C EV5: 3.25
+C EV5: 4
C EV6: 2
C INPUT PARAMETERS
@@ -48,8 +48,7 @@ PROLOGUE(mpn_lshift)
subq r18,r20,r18
ALIGN(8)
-$Loop0:
- ldq r3,-8(r17)
+$Loop0: ldq r3,-8(r17)
subq r16,8,r16
subq r17,8,r17
subq r20,1,r20
diff --git a/mpn/alpha/rshift.asm b/mpn/alpha/rshift.asm
index 7cd4d06d8..07ec6f80a 100644
--- a/mpn/alpha/rshift.asm
+++ b/mpn/alpha/rshift.asm
@@ -23,7 +23,7 @@ include(`../config.m4')
C cycles/limb
C EV4: 4.75
-C EV5: 3.25
+C EV5: 3.75
C EV6: 2
C INPUT PARAMETERS
@@ -46,8 +46,7 @@ PROLOGUE(mpn_rshift)
subq r18,r20,r18
ALIGN(8)
-$Loop0:
- ldq r3,0(r17)
+$Loop0: ldq r3,0(r17)
addq r16,8,r16
addq r17,8,r17
subq r20,1,r20