diff options
author | tege <tege@gmplib.org> | 2003-09-28 05:51:23 +0200 |
---|---|---|
committer | tege <tege@gmplib.org> | 2003-09-28 05:51:23 +0200 |
commit | 6bf9e2b32f1491a69317708474c201727ead2b6d (patch) | |
tree | 5bfeed80385ed0931d01bcc736095674802497a2 | |
parent | f936e2f1df485efb30f61c0badbfedec1b16702a (diff) | |
download | gmp-6bf9e2b32f1491a69317708474c201727ead2b6d.tar.gz |
Table cycle counts.
-rw-r--r-- | mpn/alpha/ev5/lshift.asm | 20 | ||||
-rw-r--r-- | mpn/alpha/ev5/rshift.asm | 18 | ||||
-rw-r--r-- | mpn/alpha/ev6/add_n.asm | 78 | ||||
-rw-r--r-- | mpn/alpha/ev6/sub_n.asm | 78 | ||||
-rw-r--r-- | mpn/alpha/lshift.asm | 7 | ||||
-rw-r--r-- | mpn/alpha/rshift.asm | 5 |
6 files changed, 110 insertions, 96 deletions
diff --git a/mpn/alpha/ev5/lshift.asm b/mpn/alpha/ev5/lshift.asm index bf885588a..8a6db1fda 100644 --- a/mpn/alpha/ev5/lshift.asm +++ b/mpn/alpha/ev5/lshift.asm @@ -1,6 +1,6 @@ -dnl Alpha EV5 __gmpn_lshift -- Shift a number left. +dnl Alpha EV5 mpn_lshift -- Shift a number left. -dnl Copyright 1994, 1995, 2000 Free Software Foundation, Inc. +dnl Copyright 1994, 1995, 2000, 2003 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -21,13 +21,17 @@ dnl MA 02111-1307, USA. include(`../config.m4') -dnl INPUT PARAMETERS -dnl res_ptr r16 -dnl s1_ptr r17 -dnl size r18 -dnl cnt r19 +C cycles/limb +C EV4: ? +C EV5: 3.25 +C EV6: 1.75 + +C INPUT PARAMETERS +C rp r16 +C up r17 +C n r18 +C cnt r19 -dnl This code runs at 3.25 cycles/limb on the EV5. ASM_START() PROLOGUE(mpn_lshift) diff --git a/mpn/alpha/ev5/rshift.asm b/mpn/alpha/ev5/rshift.asm index 49d5424de..36d13d02a 100644 --- a/mpn/alpha/ev5/rshift.asm +++ b/mpn/alpha/ev5/rshift.asm @@ -1,4 +1,4 @@ -dnl Alpha EV5 __gmpn_rshift -- Shift a number right. +dnl Alpha EV5 mpn_rshift -- Shift a number right. dnl Copyright 1994, 1995, 2000 Free Software Foundation, Inc. @@ -21,13 +21,17 @@ dnl MA 02111-1307, USA. include(`../config.m4') -dnl INPUT PARAMETERS -dnl res_ptr r16 -dnl s1_ptr r17 -dnl size r18 -dnl cnt r19 +C cycles/limb +C EV4: ? +C EV5: 3.25 +C EV6: 1.75 + +C INPUT PARAMETERS +C rp r16 +C up r17 +C n r18 +C cnt r19 -dnl This code runs at 3.25 cycles/limb on the EV5. ASM_START() PROLOGUE(mpn_rshift) diff --git a/mpn/alpha/ev6/add_n.asm b/mpn/alpha/ev6/add_n.asm index b5954ca17..f11d6a3ba 100644 --- a/mpn/alpha/ev6/add_n.asm +++ b/mpn/alpha/ev6/add_n.asm @@ -1,7 +1,7 @@ dnl Alpha ev6 mpn_add_n -- Add two limb vectors of the same length > 0 and dnl store sum in a third limb vector. -dnl Copyright 2000 Free Software Foundation, Inc. +dnl Copyright 2000, 2003 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -22,42 +22,46 @@ dnl MA 02111-1307, USA. include(`../config.m4') -dnl INPUT PARAMETERS -dnl res_ptr r16 -dnl s1_ptr r17 -dnl s2_ptr r18 -dnl size r19 - -dnl This code runs at 5.4 cycles/limb on EV5, and 2.1 cycles/limb on EV6. - -dnl This code was written in close cooperation with ev6 pipeline expert -dnl Steve Root. Any errors are tege's fault, though. - -dnl work triplet 0-2 -dnl work triplet 3-5 -dnl work triplet 6-8 -dnl work triplet 9-11 -dnl carry's 20-23 - -dnl sustains 8 adds in 17 cycles ! -dnl (from the d_cache) - -dnl pair loads and stores where possible -dnl store pairs oct-aligned where possible -dnl (didn't need it here) -dnl stores are delayed every third cycle -dnl loads and stores are delayed by fills -dnl U stays still, put code there where possible -dnl (note alternation of U1 and U0) -dnl L moves because of loads and stores -dnl note dampers in L to limit damage -dnl note, load ahead of time where possible - -dnl this odd-looking optimization expects -dnl that were having random bits in our data, so -dnl that a pure zero result is unlikely. so we -dnl penalize the unlikely case to help the -dnl common case. +C cycles/limb +C EV4: ? +C EV5: 5.4 +C EV6: 2.125 + +C INPUT PARAMETERS +C rp r16 +C up r17 +C vp r18 +C n r19 + + +C This code was written in close cooperation with ev6 pipeline expert +C Steve Root. Any errors are tege's fault, though. + +C work triplet 0-2 +C work triplet 3-5 +C work triplet 6-8 +C work triplet 9-11 +C carry's 20-23 + +C sustains 8 adds in 17 cycles ! +C (from the d_cache) + +C pair loads and stores where possible +C store pairs oct-aligned where possible +C (didn't need it here) +C stores are delayed every third cycle +C loads and stores are delayed by fills +C U stays still, put code there where possible +C (note alternation of U1 and U0) +C L moves because of loads and stores +C note dampers in L to limit damage +C note, load ahead of time where possible + +C this odd-looking optimization expects +C that were having random bits in our data, so +C that a pure zero result is unlikely. so we +C penalize the unlikely case to help the +C common case. ASM_START() PROLOGUE(mpn_add_n) diff --git a/mpn/alpha/ev6/sub_n.asm b/mpn/alpha/ev6/sub_n.asm index dfb05f890..6d0620aaf 100644 --- a/mpn/alpha/ev6/sub_n.asm +++ b/mpn/alpha/ev6/sub_n.asm @@ -1,7 +1,7 @@ dnl Alpha ev6 mpn_sub_n -- Subtract two limb vectors of the same length > 0 dnl and store difference in a third limb vector. -dnl Copyright 2000 Free Software Foundation, Inc. +dnl Copyright 2000, 2003 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -22,42 +22,46 @@ dnl MA 02111-1307, USA. include(`../config.m4') -dnl INPUT PARAMETERS -dnl res_ptr r16 -dnl s1_ptr r17 -dnl s2_ptr r18 -dnl size r19 - -dnl This code runs at 5.4 cycles/limb on EV5, and 2.1 cycles/limb on EV6. - -dnl This code was written in close cooperation with ev6 pipeline expert -dnl Steve Root. Any errors are tege's fault, though. - -dnl work triplet 0-2 -dnl work triplet 3-5 -dnl work triplet 6-8 -dnl work triplet 9-11 -dnl carry's 20-23 - -dnl sustains 8 subtracts in 17 cycles ! -dnl (from the d_cache) - -dnl pair loads and stores where possible -dnl store pairs oct-aligned where possible -dnl (didn't need it here) -dnl stores are delayed every third cycle -dnl loads and stores are delayed by fills -dnl U stays still, put code there where possible -dnl (note alternation of U1 and U0) -dnl L moves because of loads and stores -dnl note dampers in L to limit damage -dnl note, load ahead of time where possible - -dnl this odd-looking optimization expects -dnl that were having random bits in our data, so -dnl that a pure zero result is unlikely. so we -dnl penalize the unlikely case to help the -dnl common case. +C cycles/limb +C EV4: ? +C EV5: 5.4 +C EV6: 2.125 + +C INPUT PARAMETERS +C rp r16 +C up r17 +C vp r18 +C n r19 + + +C This code was written in close cooperation with ev6 pipeline expert +C Steve Root. Any errors are tege's fault, though. + +C work triplet 0-2 +C work triplet 3-5 +C work triplet 6-8 +C work triplet 9-11 +C carry's 20-23 + +C sustains 8 subtracts in 17 cycles ! +C (from the d_cache) + +C pair loads and stores where possible +C store pairs oct-aligned where possible +C (didn't need it here) +C stores are delayed every third cycle +C loads and stores are delayed by fills +C U stays still, put code there where possible +C (note alternation of U1 and U0) +C L moves because of loads and stores +C note dampers in L to limit damage +C note, load ahead of time where possible + +C this odd-looking optimization expects +C that were having random bits in our data, so +C that a pure zero result is unlikely. so we +C penalize the unlikely case to help the +C common case. ASM_START() PROLOGUE(mpn_sub_n) diff --git a/mpn/alpha/lshift.asm b/mpn/alpha/lshift.asm index 21c76c46c..78f75ebc3 100644 --- a/mpn/alpha/lshift.asm +++ b/mpn/alpha/lshift.asm @@ -1,6 +1,6 @@ dnl Alpha mpn_lshift -- Shift a number left. -dnl Copyright 1994, 1995, 2000, 2002 Free Software Foundation, Inc. +dnl Copyright 1994, 1995, 2000, 2002, 2003 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -23,7 +23,7 @@ include(`../config.m4') C cycles/limb C EV4: 4.75 -C EV5: 3.25 +C EV5: 4 C EV6: 2 C INPUT PARAMETERS @@ -48,8 +48,7 @@ PROLOGUE(mpn_lshift) subq r18,r20,r18 ALIGN(8) -$Loop0: - ldq r3,-8(r17) +$Loop0: ldq r3,-8(r17) subq r16,8,r16 subq r17,8,r17 subq r20,1,r20 diff --git a/mpn/alpha/rshift.asm b/mpn/alpha/rshift.asm index 7cd4d06d8..07ec6f80a 100644 --- a/mpn/alpha/rshift.asm +++ b/mpn/alpha/rshift.asm @@ -23,7 +23,7 @@ include(`../config.m4') C cycles/limb C EV4: 4.75 -C EV5: 3.25 +C EV5: 3.75 C EV6: 2 C INPUT PARAMETERS @@ -46,8 +46,7 @@ PROLOGUE(mpn_rshift) subq r18,r20,r18 ALIGN(8) -$Loop0: - ldq r3,0(r17) +$Loop0: ldq r3,0(r17) addq r16,8,r16 addq r17,8,r17 subq r20,1,r20 |