diff options
author | Niels Möller <nisse@lysator.liu.se> | 2013-03-07 15:43:55 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2013-03-07 15:43:55 +0100 |
commit | 32f3ba18f7c9e8715a759380afdc7bbe93d2542e (patch) | |
tree | 7a36c1a7dd3a56fd84d931624d8bd025f6bcdd47 /armv7 | |
parent | 11609bf3647cc027f1d0c369c1a22b832acc0305 (diff) | |
parent | 33304507db5e8f1cb80b313b9d4128692b8ee385 (diff) | |
download | nettle-32f3ba18f7c9e8715a759380afdc7bbe93d2542e.tar.gz |
Merge branch 'ecc-support'.
Diffstat (limited to 'armv7')
-rw-r--r-- | armv7/README | 4 | ||||
-rw-r--r-- | armv7/ecc-192-modp.asm | 93 | ||||
-rw-r--r-- | armv7/ecc-224-modp.asm | 111 | ||||
-rw-r--r-- | armv7/ecc-256-redc.asm | 160 | ||||
-rw-r--r-- | armv7/ecc-384-modp.asm | 257 | ||||
-rw-r--r-- | armv7/ecc-521-modp.asm | 114 |
6 files changed, 738 insertions, 1 deletions
diff --git a/armv7/README b/armv7/README index 4d01f30b..9bacd97b 100644 --- a/armv7/README +++ b/armv7/README @@ -4,6 +4,8 @@ For efficient loads and stores, use ldmia, stmia and friends. Can do two loads or stores per cycle with 8-byte aligned addresses, or three loads or stores in two cycles, regardless of alignment. +12 usable registers (if we exclude r9). + ABI gnueabi(hf) (not depending on the floating point conventions) Registers May be Argument @@ -23,7 +25,7 @@ r10 N r11 N r12 (ip) Y r13 (sp) -r14 (lr) +r14 (lr) N r15 (pc) q0 (d0, d1) Y 1 (for "hf" abi) diff --git a/armv7/ecc-192-modp.asm b/armv7/ecc-192-modp.asm new file mode 100644 index 00000000..1b226e30 --- /dev/null +++ b/armv7/ecc-192-modp.asm @@ -0,0 +1,93 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2013, Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +C MA 02111-1301, USA. + + .file "ecc-192-modp.asm" + .arm + +define(<HP>, <r0>) C Overlaps unused ecc argument +define(<RP>, <r1>) + +define(<T0>, <r2>) +define(<T1>, <r3>) +define(<T2>, <r4>) +define(<T3>, <r5>) +define(<T4>, <r6>) +define(<T5>, <r7>) +define(<T6>, <r8>) +define(<T7>, <r10>) +define(<H0>, <T0>) C Overlaps T0 and T1 +define(<H1>, <T1>) +define(<C2>, <HP>) +define(<C4>, <r12>) + + C ecc_192_modp (const struct ecc_curve *ecc, mp_limb_t *rp) + .text + .align 2 + +PROLOGUE(nettle_ecc_192_modp) + push {r4,r5,r6,r7,r8,r10} + C Reduce two words at a time + add HP, RP, #48 + add RP, RP, #8 + ldmdb HP!, {H0,H1} + ldm RP, {T2,T3,T4,T5,T6,T7} + mov C4, #0 + adds T4, T4, H0 + adcs T5, T5, H1 + adcs T6, T6, H0 + adcs T7, T7, H1 + C Need to add carry to T2 and T4, do T4 later. + adc C4, C4, #0 + + ldmdb HP!, {H0,H1} + mov C2, #0 + adcs T2, T2, H0 + adcs T3, T3, H1 + adcs T4, T4, H0 + adcs T5, T5, H1 + C Need to add carry to T0 and T2, do T2 later + adc C2, C2, #0 + + ldmdb RP!, {T0, T1} + adcs T0, T0, T6 + adcs T1, T1, T7 + adcs T2, T2, T6 + adcs T3, T3, T7 + adc C4, C4, #0 + + adds T2, T2, C2 + adcs T3, T3, #0 + adcs T4, T4, C4 + adcs T5, T5, #0 + mov C2, #0 + adc C2, C2, #0 + + C Add in final carry + adcs T0, T0, #0 + adcs T1, T1, #0 + adcs T2, T2, C2 + adcs T3, T3, #0 + adcs T4, T4, #0 + adc T5, T5, #0 + + stm RP, {T0,T1,T2,T3,T4,T5} + + pop {r4,r5,r6,r7,r8,r10} + bx lr +EPILOGUE(nettle_ecc_192_modp) diff --git a/armv7/ecc-224-modp.asm b/armv7/ecc-224-modp.asm new file mode 100644 index 00000000..ef7a703a --- /dev/null +++ b/armv7/ecc-224-modp.asm @@ -0,0 +1,111 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2013, Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +C MA 02111-1301, USA. + + .file "ecc-224-modp.asm" + .arm + +define(<RP>, <r1>) +define(<H>, <r0>) C Overlaps unused ecc argument + +define(<T0>, <r2>) +define(<T1>, <r3>) +define(<T2>, <r4>) +define(<T3>, <r5>) +define(<T4>, <r6>) +define(<T5>, <r7>) +define(<T6>, <r8>) +define(<N3>, <r10>) +define(<L0>, <r11>) +define(<L1>, <r12>) +define(<L2>, <lr>) + + C ecc_224_modp (const struct ecc_curve *ecc, mp_limb_t *rp) + .text + .align 2 + +PROLOGUE(nettle_ecc_224_modp) + push {r4,r5,r6,r7,r8,r10,r11,lr} + + add L2, RP, #28 + ldm L2, {T0,T1,T2,T3,T4,T5,T6} + mov H, #0 + + adds T0, T0, T4 + adcs T1, T1, T5 + adcs T2, T2, T6 + adc H, H, #0 + + C This switch from adcs to sbcs takes carry into account with + C correct sign, but it always subtracts 1 too much. We arrange + C to also add B^7 + 1 below, so the effect is adding p. This + C addition of p also ensures that the result never is + C negative. + + sbcs N3, T3, T0 + sbcs T4, T4, T1 + sbcs T5, T5, T2 + sbcs T6, T6, H + mov H, #1 C This is the B^7 + sbc H, #0 + subs T6, T6, T3 + sbc H, #0 + + C Now subtract from low half + ldm RP!, {L0,L1,L2} + + C Clear carry, with the sbcs, this is the 1. + adds RP, #0 + + sbcs T0, L0, T0 + sbcs T1, L1, T1 + sbcs T2, L2, T2 + ldm RP!, {T3,L0,L1,L2} + sbcs T3, T3, N3 + sbcs T4, L0, T4 + sbcs T5, L1, T5 + sbcs T6, L2, T6 + rsc H, H, #0 + + C Now -2 <= H <= 0 is the borrow, so subtract (B^3 - 1) |H| + C Use (B^3 - 1) H = <H, H, H> if -1 <=H <= 0, and + C (B^3 - 1) H = <1,B-1, B-1, B-2> if H = -2 + subs T0, T0, H + asr L1, H, #1 + sbcs T1, T1, L1 + eor H, H, L1 + sbcs T2, T2, L1 + sbcs T3, T3, H + sbcs T4, T4, #0 + sbcs T5, T5, #0 + sbcs T6, T6, #0 + sbcs H, H, H + + C Final borrow, subtract (B^3 - 1) |H| + subs T0, T0, H + sbcs T1, T1, H + sbcs T2, T2, H + sbcs T3, T3, #0 + sbcs T4, T4, #0 + sbcs T5, T5, #0 + sbcs T6, T6, #0 + + stmdb RP, {T0,T1,T2,T3,T4,T5,T6} + + pop {r4,r5,r6,r7,r8,r10,r11,pc} +EPILOGUE(nettle_ecc_224_modp) diff --git a/armv7/ecc-256-redc.asm b/armv7/ecc-256-redc.asm new file mode 100644 index 00000000..cbf10a89 --- /dev/null +++ b/armv7/ecc-256-redc.asm @@ -0,0 +1,160 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2013, Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +C MA 02111-1301, USA. + + .file "ecc-256-redc.asm" + .arm + +define(<RP>, <r1>) + +define(<T0>, <r0>) C Overlaps unused ecc argument +define(<T1>, <r2>) +define(<T2>, <r3>) +define(<T3>, <r4>) +define(<T4>, <r5>) +define(<T5>, <r6>) +define(<T6>, <r7>) +define(<T7>, <r8>) +define(<F0>, <r10>) +define(<F1>, <r11>) +define(<F2>, <r12>) +define(<F3>, <lr>) + + C ecc_256_redc (const struct ecc_curve *ecc, mp_limb_t *rp) + .text + .align 2 + +PROLOGUE(nettle_ecc_256_redc) + push {r4,r5,r6,r7,r8,r10,r11,lr} + + ldm RP!, {T0,T1,T2,T3,T4,T5,T6,T7} + + C Set <F3,F2,F1> to the high 4 limbs of (B^2-B+1)<T2,T1,T0> + C T2 T1 + C T2 T1 T0 + C - T2 T1 T0 + C ------------- + C F3 F2 F1 F0 + + + adds F1, T0, T2 + adcs F2, T1, #0 + adc F3, T2, #0 + + subs F0, T1, T0 + sbcs F1, F1, T1 C Could also be rsc ? + sbcs F2, F2, T2 + sbc F3, F3, #0 + + C Add: + C T10 T9 T8 T7 T6 T5 T4 T3 + C + F3 F2 F1 F0 T0 T2 T1 T0 + C -------------------------- + C T7 T6 T5 T4 T3 T2 T1 T0 + + adds T3, T3, T0 + adcs T1, T4, T1 + adcs T2, T5, T2 + adcs T6, T6, T0 + mov T0, T3 C FIXME: Be more clever? + mov T3, T6 + adcs T4, T7, F0 + + ldm RP!, {T5,T6,T7} + adcs T5, T5, F1 + adcs T6, T6, F2 + adcs T7, T7, F3 + + C New F3, F2, F1, F0, also adding in carry + adcs F1, T0, T2 + adcs F2, T1, #0 + adc F3, T2, #0 + + subs F0, T1, T0 + sbcs F1, F1, T1 C Could also be rsc ? + sbcs F2, F2, T2 + sbc F3, F3, #0 + + C Start adding + adds T3, T3, T0 + adcs T1, T4, T1 + adcs T2, T5, T2 + adcs T6, T6, T0 + mov T0, T3 C FIXME: Be more clever? + mov T3, T6 + adcs T4, T7, F0 + + ldm RP!, {T5,T6,T7} + adcs T5, T5, F1 + adcs T6, T6, F2 + adcs T7, T7, F3 + + C Final iteration, eliminate only T0, T1 + C Set <F2, F1, F0> to the high 3 limbs of (B^2-B+1)<T1,T0> + + C T1 T0 T1 + C - T1 T0 + C ------------- + C F2 F1 F0 + + C First add in carry + adcs F1, T0, #0 + adcs F2, T1, #0 + subs F0, T1, T0 + sbcs F1, F1, T1 + sbc F2, F2, #0 + + C Add: + C T9 T8 T7 T6 T5 T4 T3 T2 + C + F2 F1 F0 T0 0 T1 T0 0 + C -------------------------- + C F2 F1 T7 T6 T5 T4 T3 T2 + + adds T3, T3, T0 + adcs T4, T4, T1 + adcs T5, T5, #0 + adcs T6, T6, T0 + adcs T7, T7, F0 + ldm RP!, {T0, T1} + mov F3, #0 + adcs F1, F1, T0 + adcs F2, F2, T1 + + C Sum is < B^8 + p, so it's enough to fold carry once, + C If carry, add in + C B^7 - B^6 - B^3 + 1 = <0, B-2, B-1, B-1, B-1, 0, 0, 1> + + C Mask from carry flag, leaving carry intact + adc F3, F3, #0 + rsb F3, F3, #0 + + adcs T0, T2, #0 + adcs T1, T3, #0 + adcs T2, T4, #0 + adcs T3, T5, F3 + adcs T4, T6, F3 + adcs T5, T7, F3 + and F3, F3, #-2 + adcs T6, F1, F3 + adcs T7, F2, #0 + + sub RP, RP, #64 + stm RP, {T0,T1,T2,T3,T4,T5,T6,T7} + + pop {r4,r5,r6,r7,r8,r10,r11,pc} +EPILOGUE(nettle_ecc_256_redc) diff --git a/armv7/ecc-384-modp.asm b/armv7/ecc-384-modp.asm new file mode 100644 index 00000000..fb5a6e12 --- /dev/null +++ b/armv7/ecc-384-modp.asm @@ -0,0 +1,257 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2013, Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +C MA 02111-1301, USA. + + .file "ecc-384-modp.asm" + .arm + +define(<RP>, <r1>) +define(<T0>, <r0>) +define(<T1>, <r2>) +define(<T2>, <r3>) +define(<T3>, <r4>) +define(<F0>, <r5>) +define(<F1>, <r6>) +define(<F2>, <r7>) +define(<F3>, <r8>) +define(<F4>, <r10>) +define(<N>, <r12>) +define(<H>, <lr>) + + C ecc_384_modp (const struct ecc_curve *ecc, mp_limb_t *rp) + .text + .align 2 + +PROLOGUE(nettle_ecc_384_modp) + push {r4,r5,r6,r7,r8,r10,lr} + + add RP, RP, #80 + ldm RP, {T0, T1, T2, T3} C 20-23 + + C First get top 4 limbs, which need folding twice, as + C + C T3 T2 T1 T0 + C T3 T2 T1 + C -T3 + C ---------------- + C F4 F3 F2 F1 F0 + C + C Start with + C + C T3 T1 T0 + C T1 + C -T3 + C ----------- + C F2 F1 F0 Always fits + + adds F0, T0, T1 + adcs F1, T1, #0 + adcs F2, T3, #0 + subs F0, F0, T3 + sbcs F1, F1, #0 + sbcs F2, F2, #0 + + C T3 T2 T2 0 + C F2 F1 F0 + C ---------------- + C F4 F3 F2 F1 F0 + + mov F4, #0 + adds F1, F1, T2 + adcs F2, F2, T2 + adcs F3, T3, #0 + adcs F4, F4, #0 + + C Add in to high part + sub RP, RP, #32 + ldm RP, {T0, T1, T2, T3} C 12-15 + mov H, #0 + adds F0, T0, F0 + adcs F1, T1, F1 + adcs F2, T2, F2 + adcs F3, T3, F3 + adcs F4, F4, #0 C Do F4 later + + C Add to low part, keeping carry (positive or negative) in H + sub RP, RP, #48 + ldm RP, {T0, T1, T2, T3} C 0-3 + mov H, #0 + adds T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + adcs T3, T3, F3 + adc H, H, #0 + subs T1, T1, F0 + sbcs T2, T2, F1 + sbcs T3, T3, F2 + sbc H, H, #0 + adds T3, T3, F0 + adc H, H, #0 + + stm RP!, {T0,T1,T2,T3} C 0-3 + mov N, #2 +.Loop: + ldm RP, {T0,T1,T2,T3} C 4-7 + + C First, propagate carry + adds T0, T0, H + asr H, #31 C Sign extend + adcs T1, T1, H + adcs T2, T2, H + adcs T3, T3, H + adc H, H, #0 + + C +B^4 term + adds T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + adcs T3, T3, F3 + adc H, H, #0 + + C +B^3 terms + ldr F0, [RP, #+48] C 16 + adds T0, T0, F1 + adcs T1, T1, F2 + adcs T2, T2, F3 + adcs T3, T3, F0 + adc H, H, #0 + + C -B + ldr F1, [RP, #+52] C 17-18 + ldr F2, [RP, #+56] + subs T0, T0, F3 + sbcs T1, T1, F0 + sbcs T2, T2, F1 + sbcs T3, T3, F2 + sbcs H, H, #0 + + C +1 + ldr F3, [RP, #+60] C 19 + adds T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + adcs T3, T3, F3 + adc H, H, #0 + subs N, N, #1 + stm RP!, {T0,T1,T2,T3} + bne .Loop + + C Fold high limbs, we need to add in + C + C F4 F4 0 -F4 F4 H H 0 -H H + C + C We always have F4 >= 0, but we can have H < 0. + C Sign extension gets tricky when F4 = 0 and H < 0. + sub RP, RP, #48 + + ldm RP, {T0,T1,T2,T3} C 0-3 + + C H H 0 -H H + C ---------------- + C S H F3 F2 F1 F0 + C + C Define S = H >> 31 (asr), we then have + C + C F0 = H + C F1 = S - H + C F2 = - [H > 0] + C F3 = H - [H > 0] + C H = H + S + C + C And we get underflow in S - H iff H > 0 + + C H = 0 H > 0 H = -1 + mov F0, H C 0 H -1 + asr H, #31 + subs F1, H, F0 C 0,C=1 -H,C=0 0,C=1 + sbc F2, F2, F2 C 0 -1 0 + sbc F3, F0, #0 C 0 H-1 -1 + + adds T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + adcs T3, T3, F3 + adc H, H, F0 C 0+cy H+cy -2+cy + + stm RP!, {T0,T1,T2,T3} C 0-3 + ldm RP, {T0,T1,T2,T3} C 4-7 + + C F4 0 -F4 + C --------- + C F3 F2 F1 + + rsbs F1, F4, #0 + sbc F2, F2, F2 + sbc F3, F4, #0 + + C Sign extend H + adds F0, F4, H + asr H, H, #31 + adcs F1, F1, H + adcs F2, F2, H + adcs F3, F3, H + adcs F4, F4, H + adc H, H, #0 + + adds T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + adcs T3, T3, F3 + + stm RP!, {T0,T1,T2,T3} C 4-7 + ldm RP, {T0,T1,T2,T3} C 8-11 + + adcs T0, T0, F4 + adcs T1, T1, H + adcs T2, T2, H + adcs T3, T3, H + adc H, H, #0 + + stm RP, {T0,T1,T2,T3} C 8-11 + + C Final (unlikely) carry + sub RP, RP, #32 + ldm RP, {T0,T1,T2,T3} C 0-3 + C Fold H into F0-F4 + mov F0, H + asr H, #31 + subs F1, H, F0 + sbc F2, F2, F2 + sbc F3, F0, #0 + add F4, F0, H + + adds T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + adcs T3, T3, F3 + + stm RP!, {T0,T1,T2,T3} C 0-3 + ldm RP, {T0,T1,T2,T3} C 4-7 + adcs T0, T0, F4 + adcs T1, T1, H + adcs T2, T2, H + adcs T3, T3, H + stm RP!, {T0,T1,T2,T3} C 4-7 + ldm RP, {T0,T1,T2,T3} C 8-11 + adcs T0, T0, H + adcs T1, T1, H + adcs T2, T2, H + adcs T3, T3, H + stm RP!, {T0,T1,T2,T3} C 8-11 + pop {r4,r5,r6,r7,r8,r10,pc} +EPILOGUE(nettle_ecc_384_modp) diff --git a/armv7/ecc-521-modp.asm b/armv7/ecc-521-modp.asm new file mode 100644 index 00000000..fe305805 --- /dev/null +++ b/armv7/ecc-521-modp.asm @@ -0,0 +1,114 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2013, Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +C MA 02111-1301, USA. + + .file "ecc-521-modp.asm" + .arm + +define(<HP>, <r0>) +define(<RP>, <r1>) +define(<T0>, <r2>) +define(<T1>, <r3>) +define(<T2>, <r4>) +define(<F0>, <r5>) +define(<F1>, <r6>) +define(<F2>, <r7>) +define(<F3>, <r8>) +define(<H>, <r12>) +define(<N>, <lr>) + + C ecc_521_modp (const struct ecc_curve *ecc, mp_limb_t *rp) + .text +.Lc511: + .int 511 + + .align 2 + +PROLOGUE(nettle_ecc_521_modp) + push {r4,r5,r6,r7,r8,lr} + + C Use that B^17 = 2^23 (mod p) + ldr F3, [RP, #+68] C 17 + add HP, RP, #72 C 18 + ldr T0, [RP] C 0 + adds T0, T0, F3, lsl #23 + str T0, [RP], #+4 + mov N, #5 + + C 5 iterations, reading limbs 18-20, 21-23, 24-26, 27-29, 30-32 + C and adding to limbs 1-3, 4-6, 7-9, 19-12, 13-15 +.Loop: + ldm RP, {T0,T1,T2} C 1+3*k -- 3+3*k + lsr F0, F3, #9 + ldm HP!, {F1,F2,F3} C 18+3*k -- 20+3*k + orr F0, F0, F1, lsl #23 + lsr F1, F1, #9 + orr F1, F1, F2, lsl #23 + lsr F2, F2, #9 + orr F2, F2, F3, lsl #23 + adcs T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + sub N, N, #1 + stm RP!,{T0,T1,T2} + teq N, #0 + bne .Loop + + ldr F0, [RP], #-64 C 16 + ldr F1, [HP] C 33 + ldr T0, .Lc511 + + C Handling of high limbs + C F0 = rp[16] + carry in + F3 >> 9 + adcs F0, F0, F3, lsr #9 + C Copy low 9 bits to H, then shift right including carry + and H, F0, T0 + rrx F0, F0 + lsr F0, F0, #8 + C Add in F1 = rp[33], with weight 2^1056 = 2^14 + adds F0, F0, F1, lsl #14 + lsr F1, F1, #18 + adc F1, F1, #0 + + ldm RP, {T0, T1} C 0-1 + adds T0, T0, F0 + adcs T1, T1, F1 + stm RP!, {T0, T1} + + ldm RP, {T0,T1,T2,F0,F1,F2,F3} C 2-8 + adcs T0, T0, #0 + adcs T1, T1, #0 + adcs T2, T2, #0 + adcs F0, F0, #0 + adcs F1, F1, #0 + adcs F2, F2, #0 + adcs F3, F3, #0 + stm RP!, {T0,T1,T2,F0,F1,F2,F3} C 2-8 + ldm RP, {T0,T1,T2,F0,F1,F2,F3} C 9-15 + adcs T0, T0, #0 + adcs T1, T1, #0 + adcs T2, T2, #0 + adcs F0, F0, #0 + adcs F1, F1, #0 + adcs F2, F2, #0 + adcs F3, F3, #0 + adcs H, H, #0 + stm RP, {T0,T1,T2,F0,F1,F2,F3,H} C 9-16 + + pop {r4,r5,r6,r7,r8,pc} +EPILOGUE(nettle_ecc_521_modp) |