summaryrefslogtreecommitdiff
path: root/powerpc64
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2022-01-10 20:22:30 +0100
committerNiels Möller <nisse@lysator.liu.se>2022-01-10 20:22:30 +0100
commit0ec184d82c728f0708858cc4a37570aad55f2592 (patch)
treef15dd330cb7259445d45d7b8d453ef30f7026554 /powerpc64
parentc7cf19390dcb0e9792fc23b0c935c1fe5e4f91e1 (diff)
downloadnettle-0ec184d82c728f0708858cc4a37570aad55f2592.tar.gz
ppc: Reduce number of registers used for ecc_secp256r1_redc.
Diffstat (limited to 'powerpc64')
-rw-r--r--powerpc64/ecc-secp256r1-redc.asm68
1 files changed, 26 insertions, 42 deletions
diff --git a/powerpc64/ecc-secp256r1-redc.asm b/powerpc64/ecc-secp256r1-redc.asm
index 8012a61e..e61a20cb 100644
--- a/powerpc64/ecc-secp256r1-redc.asm
+++ b/powerpc64/ecc-secp256r1-redc.asm
@@ -34,42 +34,36 @@ ifelse(`
C Register usage:
-define(`SP', `r1')
-
define(`RP', `r4')
define(`XP', `r5')
define(`F0', `r3')
define(`F1', `r6')
define(`F2', `r7')
-define(`F3', `r8')
+define(`T', `r8')
define(`U0', `r9')
define(`U1', `r10')
define(`U2', `r11')
define(`U3', `r12')
-define(`U4', `r14')
-define(`U5', `r15')
-define(`U6', `r16')
-define(`U7', `r17')
.file "ecc-secp256r1-redc.asm"
-C FOLD(x), sets (F3,F2,F1,F0) <-- [(x << 192) - (x << 160) + (x << 128) + (x <<32)]
+C FOLD(x), sets (x,F2,F1,F0) <-- [(x << 192) - (x << 160) + (x << 128) + (x <<32)]
define(`FOLD', `
sldi F0, $1, 32
srdi F1, $1, 32
subfc F2, F0, $1
- subfe F3, F1, $1
+ subfe $1, F1, $1
')
-C FOLDC(x), sets (F3,F2,F1,F0) <-- [((x+c) << 192) - (x << 160) + (x << 128) + (x <<32)]
+C FOLDC(x), sets (x,F2,F1,F0) <-- [((x+c) << 192) - (x << 160) + (x << 128) + (x <<32)]
define(`FOLDC', `
sldi F0, $1, 32
srdi F1, $1, 32
- addze F3, $1
+ addze T, $1
subfc F2, F0, $1
- subfe F3, F1, F3
+ subfe $1, F1, T
')
C void ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp)
@@ -77,43 +71,38 @@ define(`FOLDC', `
define(`FUNC_ALIGN', `5')
PROLOGUE(_nettle_ecc_secp256r1_redc)
- std U4,-32(SP)
- std U5,-24(SP)
- std U6,-16(SP)
- std U7,-8(SP)
-
ld U0, 0(XP)
ld U1, 8(XP)
ld U2, 16(XP)
ld U3, 24(XP)
- ld U4, 32(XP)
- ld U5, 40(XP)
- ld U6, 48(XP)
- ld U7, 56(XP)
FOLD(U0)
+ ld T, 32(XP)
addc U1, F0, U1
adde U2, F1, U2
adde U3, F2, U3
- adde U4, F3, U4
+ adde U0, U0, T
FOLDC(U1)
+ ld T, 40(XP)
addc U2, F0, U2
adde U3, F1, U3
- adde U4, F2, U4
- adde U5, F3, U5
+ adde U0, F2, U0
+ adde U1, U1, T
FOLDC(U2)
+ ld T, 48(XP)
addc U3, F0, U3
- adde U4, F1, U4
- adde U5, F2, U5
- adde U6, F3, U6
+ adde U0, F1, U0
+ adde U1, F2, U1
+ adde U2, U2, T
FOLDC(U3)
- addc U4, F0, U4
- adde U5, F1, U5
- adde U6, F2, U6
- adde U7, F3, U7
+ ld T, 56(XP)
+ addc U0, F0, U0
+ adde U1, F1, U1
+ adde U2, F2, U2
+ adde U3, U3, T
C If carry, we need to add in
C 2^256 - p = <0xfffffffe, 0xff..ff, 0xffffffff00000000, 1>
@@ -121,24 +110,19 @@ PROLOGUE(_nettle_ecc_secp256r1_redc)
addze F0, F0
neg F2, F0
sldi F1, F2, 32
- srdi F3, F2, 32
+ srdi T, F2, 32
li XP, -2
- and F3, F3, XP
+ and T, T, XP
- addc U0, F0, U4
- adde U1, F1, U5
- adde U2, F2, U6
- adde U3, F3, U7
+ addc U0, F0, U0
+ adde U1, F1, U1
+ adde U2, F2, U2
+ adde U3, T, U3
std U0, 0(RP)
std U1, 8(RP)
std U2, 16(RP)
std U3, 24(RP)
- ld U4,-32(SP)
- ld U5,-24(SP)
- ld U6,-16(SP)
- ld U7,-8(SP)
-
blr
EPILOGUE(_nettle_ecc_secp256r1_redc)