summaryrefslogtreecommitdiff
path: root/armv7
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2013-03-04 15:18:10 +0100
committerNiels Möller <nisse@lysator.liu.se>2013-03-04 15:18:10 +0100
commit16768e1f7f3ce6744c46ebf61008e0a68dd2c677 (patch)
tree5b34385d632839b16db742df62f7916e6ed4aa3e /armv7
parent3ac426dfe3a7bd9ab242098305ef7c4b775bac5c (diff)
downloadnettle-16768e1f7f3ce6744c46ebf61008e0a68dd2c677.tar.gz
ARM assembly for ecc_384_modp.
Diffstat (limited to 'armv7')
-rw-r--r--armv7/ecc-384-modp.asm257
1 files changed, 257 insertions, 0 deletions
diff --git a/armv7/ecc-384-modp.asm b/armv7/ecc-384-modp.asm
new file mode 100644
index 00000000..e34d95f8
--- /dev/null
+++ b/armv7/ecc-384-modp.asm
@@ -0,0 +1,257 @@
+C nettle, low-level cryptographics library
+C
+C Copyright (C) 2013, Niels Möller
+C
+C The nettle library is free software; you can redistribute it and/or modify
+C it under the terms of the GNU Lesser General Public License as published by
+C the Free Software Foundation; either version 2.1 of the License, or (at your
+C option) any later version.
+C
+C The nettle library is distributed in the hope that it will be useful, but
+C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+C License for more details.
+C
+C You should have received a copy of the GNU Lesser General Public License
+C along with the nettle library; see the file COPYING.LIB. If not, write to
+C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+C MA 02111-1301, USA.
+
+ .file "ecc-384-modp.asm"
+ .arm
+
+define(<RP>, <r1>)
+define(<T0>, <r0>)
+define(<T1>, <r2>)
+define(<T2>, <r3>)
+define(<T3>, <r4>)
+define(<F0>, <r5>)
+define(<F1>, <r6>)
+define(<F2>, <r7>)
+define(<F3>, <r8>)
+define(<F4>, <r10>)
+define(<N>, <r12>)
+define(<H>, <lr>)
+
+ C ecc_384_modp (const struct ecc_curve *ecc, mp_limb_t *rp)
+ .text
+ .align 2
+
+PROLOGUE(nettle_ecc_384_modp)
+ push {r4,r5,r6,r7,r8,r10,lr}
+
+ add RP, RP, #80
+ ldm RP, {T0, T1, T2, T3} C 20-23
+
+ C First get top 4 limbs, which need folding twice, as
+ C
+ C T3 T2 T1 T0
+ C T3 T2 T1
+ C -T3
+ C ----------------
+ C F4 F3 F2 F1 F0
+ C
+ C Start with
+ C
+ C T3 T1 T0
+ C T1
+ C -T3
+ C -----------
+ C F2 F1 F0 Always fits
+
+ adds F0, T0, T1
+ adcs F1, T1, #0
+ adcs F2, T3, #0
+ subs F0, F0, T3
+ sbcs F1, F1, #0
+ sbcs F2, F2, #0
+
+ C T3 T2 T2 0
+ C F2 F1 F0
+ C ----------------
+ C F4 F3 F2 F1 F0
+
+ mov F4, #0
+ adds F1, F1, T2
+ adcs F2, F2, T2
+ adcs F3, T3, #0
+ adcs F4, F4, #0
+
+ C Add in to high part
+ sub RP, RP, #32
+ ldm RP, {T0, T1, T2, T3} C 12-15
+ mov H, #0
+ adds F0, T0, F0
+ adcs F1, T1, F1
+ adcs F2, T2, F2
+ adcs F3, T3, F3
+ adcs F4, F4, #0 C Do F4 later
+
+ C Add to low part, keeping carry (positive or negative) in H
+ sub RP, RP, #48
+ ldm RP, {T0, T1, T2, T3} C 0-3
+ mov H, #0
+ adds T0, T0, F0
+ adcs T1, T1, F1
+ adcs T2, T2, F2
+ adcs T3, T3, F3
+ adc H, H, #0
+ subs T1, T1, F0
+ sbcs T2, T2, F1
+ sbcs T3, T3, F2
+ sbc H, H, #0
+ adds T3, T3, F0
+ adc H, H, #0
+
+ stm RP!, {T0,T1,T2,T3} C 0-3
+ mov N, #2
+.Loop:
+ ldm RP, {T0,T1,T2,T3} C 4-7
+
+ C First, propagate carry
+ adds T0, T0, H
+ asr H, #31 C Sign extend
+ adcs T1, T1, H
+ adcs T2, T2, H
+ adcs T3, T3, H
+ adc H, H, #0
+
+ C +B^4 term
+ adds T0, T0, F0
+ adcs T1, T1, F1
+ adcs T2, T2, F2
+ adcs T3, T3, F3
+ adc H, H, #0
+
+ C +B^3 terms
+ ldr F0, [RP, #+48] C 16
+ adds T0, T0, F1
+ adcs T1, T1, F2
+ adcs T2, T2, F3
+ adcs T3, T3, F0
+ adc H, H, #0
+
+ C -B
+ ldr F1, [RP, #+52] C 17-18
+ ldr F2, [RP, #+56]
+ subs T0, T0, F3
+ sbcs T1, T1, F0
+ sbcs T2, T2, F1
+ sbcs T3, T3, F2
+ sbcs H, H, #0
+
+ C +1
+ ldr F3, [RP, #+60] C 19
+ adds T0, T0, F0
+ adcs T1, T1, F1
+ adcs T2, T2, F2
+ adcs T3, T3, F3
+ adc H, H, #0
+ subs N, N, #1
+ stm RP!, {T0,T1,T2,T3}
+ bne .Loop
+
+ C Fold high limbs, we need to add in
+ C
+ C F4 F4 -F4 F4 H H -H H
+ C
+ C We always have F4 >= 0, but we can have H < 0.
+ C Sign extension gets tricky when F4 = 0 and H < 0.
+ sub RP, RP, #48
+
+ ldm RP, {T0,T1,T2,T3} C 0-3
+
+ C H H 0 -H H
+ C ----------------
+ C S F4 F3 F2 F1 F0
+ C
+ C Define S = H >> 31 (asr), we then have
+ C
+ C F0 = H
+ C F1 = S - H
+ C F2 = - [H > 0]
+ C F3 = H - [H > 0]
+ C F4 = H + S
+ C
+ C And we get underflow in S - H iff H > 0
+
+ C H = 0 H > 0 H = -1
+ mov F0, H C 0 H -1
+ asr H, #31
+ subs F1, H, F0 C 0,C=1 -H,C=0 0,C=1
+ sbc F2, F2, F2 C 0 -1 0
+ sbc F3, F0, #0 C 0 H-1 -1
+
+ adds T0, T0, F0
+ adcs T1, T1, F1
+ adcs T2, T2, F2
+ adcs T3, T3, F3
+ adc H, H, F0 C 0+cy H+cy -2+cy
+
+ stm RP!, {T0,T1,T2,T3} C 0-3
+ ldm RP, {T0,T1,T2,T3} C 4-7
+
+ C F4 0 -F4
+ C ---------
+ C F3 F2 F1
+
+ rsbs F1, F4, #0
+ sbc F2, F2, F2
+ sbc F3, F4, #0
+
+ C Sign extend H
+ adds F0, F4, H
+ asr H, H, #31
+ adcs F1, F1, H
+ adcs F2, F2, H
+ adcs F3, F3, H
+ adcs F4, F4, H
+ adc H, H, #0
+
+ adds T0, T0, F0
+ adcs T1, T1, F1
+ adcs T2, T2, F2
+ adcs T3, T3, F3
+
+ stm RP!, {T0,T1,T2,T3} C 4-7
+ ldm RP, {T0,T1,T2,T3} C 8-11
+
+ adcs T0, T0, F4
+ adcs T1, T1, H
+ adcs T2, T2, H
+ adcs T3, T3, H
+ adc H, H, #0
+
+ stm RP, {T0,T1,T2,T3} C 8-11
+
+ C Final (unlikely) carry
+ sub RP, RP, #32
+ ldm RP, {T0,T1,T2,T3} C 0-3
+ C Fold H into F0-F4
+ mov F0, H
+ asr H, #31
+ subs F1, H, F0
+ sbc F2, F2, F2
+ sbc F3, F0, #0
+ add F4, F0, H
+
+ adds T0, T0, F0
+ adcs T1, T1, F1
+ adcs T2, T2, F2
+ adcs T3, T3, F3
+
+ stm RP!, {T0,T1,T2,T3} C 0-3
+ ldm RP, {T0,T1,T2,T3} C 4-7
+ adcs T0, T0, F4
+ adcs T1, T1, H
+ adcs T2, T2, H
+ adcs T3, T3, H
+ stm RP!, {T0,T1,T2,T3} C 4-7
+ ldm RP, {T0,T1,T2,T3} C 8-11
+ adcs T0, T0, H
+ adcs T1, T1, H
+ adcs T2, T2, H
+ adcs T3, T3, H
+ stm RP!, {T0,T1,T2,T3} C 8-11
+ pop {r4,r5,r6,r7,r8,r10,pc}
+EPILOGUE(nettle_ecc_384_modp)