diff options
author | Niels Möller <nisse@lysator.liu.se> | 2013-02-28 11:45:44 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2013-02-28 11:45:44 +0100 |
commit | 10e0615f30f6997675985b655f1cfd6823aa8615 (patch) | |
tree | c71d897538d35ebf8edaba319689c25ee622d1b8 /x86_64/ecc-192-modp.asm | |
parent | 190c1584063e59f4d6475bc55243a2fb531bb16c (diff) | |
download | nettle-10e0615f30f6997675985b655f1cfd6823aa8615.tar.gz |
Reduce number of additions for x86_64 ecc_192_modp.
Diffstat (limited to 'x86_64/ecc-192-modp.asm')
-rw-r--r-- | x86_64/ecc-192-modp.asm | 69 |
1 files changed, 36 insertions, 33 deletions
diff --git a/x86_64/ecc-192-modp.asm b/x86_64/ecc-192-modp.asm index 288340f9..5812070b 100644 --- a/x86_64/ecc-192-modp.asm +++ b/x86_64/ecc-192-modp.asm @@ -20,50 +20,53 @@ C MA 02111-1301, USA. .file "ecc-192-modp.asm" define(<RP>, <%rsi>) -define(<T1>, <%rdi>) C Overlaps unused ecc input -define(<T2>, <%rcx>) -define(<T3>, <%rdx>) -define(<T4>, <%r8>) -define(<T5>, <%r9>) -define(<T6>, <%r10>) +define(<T0>, <%rdi>) C Overlaps unused ecc input +define(<T1>, <%rcx>) +define(<T2>, <%rdx>) +define(<T3>, <%r8>) +define(<H>, <%r9>) +define(<C1>, <%r10>) +define(<C2>, <%r11>) C ecc_192_modp (const struct ecc_curve *ecc, mp_limb_t *rp) .text ALIGN(4) PROLOGUE(nettle_ecc_192_modp) W64_ENTRY(2, 0) - C First: (B+1)*{r5, r4} < B^3 + B^2 - B - mov 32(RP), T1 - mov 40(RP), T2 - mov T2, T3 - xor T4, T4 - add T1, T2 - adc $0, T3 - adc $0, T4 + mov 16(RP), T2 + mov 24(RP), T3 + mov 40(RP), H + xor C1, C1 + xor C2, C2 - add 8(RP), T1 - adc 16(RP), T2 - adc 24(RP), T3 - adc $0, T4 - C Sum is < 2B^4 + B^3 - B - 1, so {T4, T3} < 3B + add H, T2 + adc H, T3 + C Carry to be added in at T1 and T2 + setc LREG(C2) + + mov 8(RP), T1 + mov 32(RP), H + adc H, T1 + adc H, T2 + C Carry to be added in at T0 and T1 + setc LREG(C1) + + mov (RP), T0 + adc T3, T0 + adc T3, T1 + adc $0, C2 - C Next: (B+1) * {T4, T3} < 3B^2 + 2B - mov T4, T5 - add T3, T4 - adc $0, T5 + C Add in C1 and C2 + add C1, T1 + adc C2, T2 + setc LREG(C1) - xor T6, T6 - add (RP), T3 - adc T4, T1 - adc T5, T2 - adc $0, T6 - - C Fold in final carry. - add T6, T3 - adc T6, T1 + C Fold final carry. + adc $0, T0 + adc C1, T1 adc $0, T2 - mov T3, (RP) + mov T0, (RP) mov T1, 8(RP) mov T2, 16(RP) |