diff options
author | Niels Möller <nisse@lysator.liu.se> | 2022-01-10 20:22:30 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2022-01-10 20:22:30 +0100 |
commit | 0ec184d82c728f0708858cc4a37570aad55f2592 (patch) | |
tree | f15dd330cb7259445d45d7b8d453ef30f7026554 /powerpc64 | |
parent | c7cf19390dcb0e9792fc23b0c935c1fe5e4f91e1 (diff) | |
download | nettle-0ec184d82c728f0708858cc4a37570aad55f2592.tar.gz |
ppc: Reduce number of registers used for ecc_secp256r1_redc.
Diffstat (limited to 'powerpc64')
-rw-r--r-- | powerpc64/ecc-secp256r1-redc.asm | 68 |
1 files changed, 26 insertions, 42 deletions
diff --git a/powerpc64/ecc-secp256r1-redc.asm b/powerpc64/ecc-secp256r1-redc.asm index 8012a61e..e61a20cb 100644 --- a/powerpc64/ecc-secp256r1-redc.asm +++ b/powerpc64/ecc-secp256r1-redc.asm @@ -34,42 +34,36 @@ ifelse(` C Register usage: -define(`SP', `r1') - define(`RP', `r4') define(`XP', `r5') define(`F0', `r3') define(`F1', `r6') define(`F2', `r7') -define(`F3', `r8') +define(`T', `r8') define(`U0', `r9') define(`U1', `r10') define(`U2', `r11') define(`U3', `r12') -define(`U4', `r14') -define(`U5', `r15') -define(`U6', `r16') -define(`U7', `r17') .file "ecc-secp256r1-redc.asm" -C FOLD(x), sets (F3,F2,F1,F0) <-- [(x << 192) - (x << 160) + (x << 128) + (x <<32)] +C FOLD(x), sets (x,F2,F1,F0) <-- [(x << 192) - (x << 160) + (x << 128) + (x <<32)] define(`FOLD', ` sldi F0, $1, 32 srdi F1, $1, 32 subfc F2, F0, $1 - subfe F3, F1, $1 + subfe $1, F1, $1 ') -C FOLDC(x), sets (F3,F2,F1,F0) <-- [((x+c) << 192) - (x << 160) + (x << 128) + (x <<32)] +C FOLDC(x), sets (x,F2,F1,F0) <-- [((x+c) << 192) - (x << 160) + (x << 128) + (x <<32)] define(`FOLDC', ` sldi F0, $1, 32 srdi F1, $1, 32 - addze F3, $1 + addze T, $1 subfc F2, F0, $1 - subfe F3, F1, F3 + subfe $1, F1, T ') C void ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp) @@ -77,43 +71,38 @@ define(`FOLDC', ` define(`FUNC_ALIGN', `5') PROLOGUE(_nettle_ecc_secp256r1_redc) - std U4,-32(SP) - std U5,-24(SP) - std U6,-16(SP) - std U7,-8(SP) - ld U0, 0(XP) ld U1, 8(XP) ld U2, 16(XP) ld U3, 24(XP) - ld U4, 32(XP) - ld U5, 40(XP) - ld U6, 48(XP) - ld U7, 56(XP) FOLD(U0) + ld T, 32(XP) addc U1, F0, U1 adde U2, F1, U2 adde U3, F2, U3 - adde U4, F3, U4 + adde U0, U0, T FOLDC(U1) + ld T, 40(XP) addc U2, F0, U2 adde U3, F1, U3 - adde U4, F2, U4 - adde U5, F3, U5 + adde U0, F2, U0 + adde U1, U1, T FOLDC(U2) + ld T, 48(XP) addc U3, F0, U3 - adde U4, F1, U4 - adde U5, F2, U5 - adde U6, F3, U6 + adde U0, F1, U0 + adde U1, F2, U1 + adde U2, U2, T FOLDC(U3) - addc U4, F0, U4 - adde U5, F1, U5 - adde U6, F2, U6 - adde U7, F3, U7 + ld T, 56(XP) + addc U0, F0, U0 + adde U1, F1, U1 + adde U2, F2, U2 + adde U3, U3, T C If carry, we need to add in C 2^256 - p = <0xfffffffe, 0xff..ff, 0xffffffff00000000, 1> @@ -121,24 +110,19 @@ PROLOGUE(_nettle_ecc_secp256r1_redc) addze F0, F0 neg F2, F0 sldi F1, F2, 32 - srdi F3, F2, 32 + srdi T, F2, 32 li XP, -2 - and F3, F3, XP + and T, T, XP - addc U0, F0, U4 - adde U1, F1, U5 - adde U2, F2, U6 - adde U3, F3, U7 + addc U0, F0, U0 + adde U1, F1, U1 + adde U2, F2, U2 + adde U3, T, U3 std U0, 0(RP) std U1, 8(RP) std U2, 16(RP) std U3, 24(RP) - ld U4,-32(SP) - ld U5,-24(SP) - ld U6,-16(SP) - ld U7,-8(SP) - blr EPILOGUE(_nettle_ecc_secp256r1_redc) |