diff options
author | Niels Möller <nisse@lysator.liu.se> | 2020-10-31 10:14:40 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2020-10-31 10:14:40 +0100 |
commit | 4f3523d81ab6cd6667ae78d8792a64e987386b16 (patch) | |
tree | 7ad9ac80e86f268ac43b272d9f7091339eaccaf1 /x86_64 | |
parent | 82dc13c952067e1d6d0af818bf5cc76f558da73b (diff) | |
download | nettle-4f3523d81ab6cd6667ae78d8792a64e987386b16.tar.gz |
Update x86_64 ecc_secp256r1_redc for separate result argument.
Diffstat (limited to 'x86_64')
-rw-r--r-- | x86_64/ecc-secp256r1-redc.asm | 36 |
1 files changed, 20 insertions, 16 deletions
diff --git a/x86_64/ecc-secp256r1-redc.asm b/x86_64/ecc-secp256r1-redc.asm index 38f12b22..a03059ae 100644 --- a/x86_64/ecc-secp256r1-redc.asm +++ b/x86_64/ecc-secp256r1-redc.asm @@ -33,15 +33,17 @@ ifelse(` .file "ecc-secp256r1-redc.asm" define(`RP', `%rsi') +define(`XP', `%rdx') + define(`U0', `%rdi') C Overlaps unused modulo input define(`U1', `%rcx') define(`U2', `%rax') -define(`U3', `%rdx') -define(`U4', `%r8') -define(`U5', `%r9') -define(`U6', `%r10') -define(`F0', `%r11') -define(`F1', `%r12') +define(`U3', `%r8') +define(`U4', `%r9') +define(`U5', `%r10') +define(`U6', `%r11') +define(`F0', `%r12') +define(`F1', `%r13') define(`F2', `%rbx') define(`F3', `%rbp') @@ -59,38 +61,39 @@ define(`FOLD', ` sbb `$'0, F3 ') PROLOGUE(_nettle_ecc_secp256r1_redc) - W64_ENTRY(2, 0) + W64_ENTRY(3, 0) C save all registers that need to be saved push %rbx push %rbp push %r12 + push %r13 - mov (RP), U0 + mov (XP), U0 FOLD(U0) - mov 8(RP), U1 - mov 16(RP), U2 - mov 24(RP), U3 + mov 8(XP), U1 + mov 16(XP), U2 + mov 24(XP), U3 sub F0, U1 sbb F1, U2 sbb F2, U3 sbb F3, U0 C Add in later FOLD(U1) - mov 32(RP), U4 + mov 32(XP), U4 sub F0, U2 sbb F1, U3 sbb F2, U4 sbb F3, U1 FOLD(U2) - mov 40(RP), U5 + mov 40(XP), U5 sub F0, U3 sbb F1, U4 sbb F2, U5 sbb F3, U2 FOLD(U3) - mov 48(RP), U6 + mov 48(XP), U6 sub F0, U4 sbb F1, U5 sbb F2, U6 @@ -99,7 +102,7 @@ PROLOGUE(_nettle_ecc_secp256r1_redc) add U4, U0 adc U5, U1 adc U6, U2 - adc 56(RP), U3 + adc 56(XP), U3 C If carry, we need to add in C 2^256 - p = <0xfffffffe, 0xff..ff, 0xffffffff00000000, 1> @@ -121,9 +124,10 @@ PROLOGUE(_nettle_ecc_secp256r1_redc) mov U3, 24(RP) + pop %r13 pop %r12 pop %rbp pop %rbx - W64_EXIT(2, 0) + W64_EXIT(3, 0) ret EPILOGUE(_nettle_ecc_secp256r1_redc) |