summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2020-10-31 10:26:16 +0100
committerNiels Möller <nisse@lysator.liu.se>2020-10-31 10:26:16 +0100
commit8c7869f66e44fe119bbdb36416c865ac84f570a7 (patch)
treeee96d4928110a3485ec620fd58c846a8b8562dfc
parent9b1594b4a0b1637dab9d6869d89cd7f1adc8f173 (diff)
downloadnettle-8c7869f66e44fe119bbdb36416c865ac84f570a7.tar.gz
Update ARM mod and redc functions
-rw-r--r--arm/ecc-secp192r1-modp.asm31
-rw-r--r--arm/ecc-secp224r1-modp.asm22
-rw-r--r--arm/ecc-secp256r1-redc.asm23
-rw-r--r--arm/ecc-secp384r1-modp.asm72
-rw-r--r--arm/ecc-secp521r1-modp.asm39
5 files changed, 99 insertions, 88 deletions
diff --git a/arm/ecc-secp192r1-modp.asm b/arm/ecc-secp192r1-modp.asm
index 72a81a54..e500bc85 100644
--- a/arm/ecc-secp192r1-modp.asm
+++ b/arm/ecc-secp192r1-modp.asm
@@ -35,15 +35,16 @@ ifelse(`
define(`HP', `r0') C Overlaps unused modulo argument
define(`RP', `r1')
-
-define(`T0', `r2')
-define(`T1', `r3')
-define(`T2', `r4')
-define(`T3', `r5')
-define(`T4', `r6')
-define(`T5', `r7')
-define(`T6', `r8')
-define(`T7', `r10')
+define(`XP', `r2')
+
+define(`T0', `r3')
+define(`T1', `r4')
+define(`T2', `r5')
+define(`T3', `r6')
+define(`T4', `r7')
+define(`T5', `r8')
+define(`T6', `r10')
+define(`T7', `r11')
define(`H0', `T0') C Overlaps T0 and T1
define(`H1', `T1')
define(`C2', `HP')
@@ -54,12 +55,12 @@ define(`C4', `r12')
.align 2
PROLOGUE(_nettle_ecc_secp192r1_modp)
- push {r4,r5,r6,r7,r8,r10}
+ push {r4,r5,r6,r7,r8,r10,r11}
C Reduce two words at a time
- add HP, RP, #48
- add RP, RP, #8
+ add HP, XP, #48
+ add XP, XP, #8
ldmdb HP!, {H0,H1}
- ldm RP, {T2,T3,T4,T5,T6,T7}
+ ldm XP, {T2,T3,T4,T5,T6,T7}
mov C4, #0
adds T4, T4, H0
adcs T5, T5, H1
@@ -77,7 +78,7 @@ PROLOGUE(_nettle_ecc_secp192r1_modp)
C Need to add carry to T0 and T2, do T2 later
adc C2, C2, #0
- ldmdb RP!, {T0, T1}
+ ldmdb XP!, {T0, T1}
adcs T0, T0, T6
adcs T1, T1, T7
adcs T2, T2, T6
@@ -101,6 +102,6 @@ PROLOGUE(_nettle_ecc_secp192r1_modp)
stm RP, {T0,T1,T2,T3,T4,T5}
- pop {r4,r5,r6,r7,r8,r10}
+ pop {r4,r5,r6,r7,r8,r10,r11}
bx lr
EPILOGUE(_nettle_ecc_secp192r1_modp)
diff --git a/arm/ecc-secp224r1-modp.asm b/arm/ecc-secp224r1-modp.asm
index 3256601c..4b3b24e5 100644
--- a/arm/ecc-secp224r1-modp.asm
+++ b/arm/ecc-secp224r1-modp.asm
@@ -33,10 +33,11 @@ ifelse(`
.file "ecc-secp224r1-modp.asm"
.arm
-define(`RP', `r1')
-define(`H', `r0') C Overlaps unused modulo argument
+define(`RP', `r1') C Overlaps T0
+define(`XP', `r2')
+define(`H', `r0') C Overlaps unused modulo argument
-define(`T0', `r2')
+define(`T0', `r1')
define(`T1', `r3')
define(`T2', `r4')
define(`T3', `r5')
@@ -53,9 +54,10 @@ define(`L2', `lr')
.align 2
PROLOGUE(_nettle_ecc_secp224r1_modp)
- push {r4,r5,r6,r7,r8,r10,r11,lr}
+ C Pushes RP last
+ push {r1,r4,r5,r6,r7,r8,r10,r11,lr}
- add L2, RP, #28
+ add L2, XP, #28
ldm L2, {T0,T1,T2,T3,T4,T5,T6}
mov H, #0
@@ -80,15 +82,15 @@ PROLOGUE(_nettle_ecc_secp224r1_modp)
sbc H, #0
C Now subtract from low half
- ldm RP!, {L0,L1,L2}
+ ldm XP!, {L0,L1,L2}
C Clear carry, with the sbcs, this is the 1.
- adds RP, #0
+ adds XP, #0
sbcs T0, L0, T0
sbcs T1, L1, T1
sbcs T2, L2, T2
- ldm RP!, {T3,L0,L1,L2}
+ ldm XP!, {T3,L0,L1,L2}
sbcs T3, T3, N3
sbcs T4, L0, T4
sbcs T5, L1, T5
@@ -109,6 +111,8 @@ PROLOGUE(_nettle_ecc_secp224r1_modp)
sbcs T6, T6, #0
sbcs H, H, H
+ pop {XP} C Original RP
+
C Final borrow, subtract (B^3 - 1) |H|
subs T0, T0, H
sbcs T1, T1, H
@@ -118,7 +122,7 @@ PROLOGUE(_nettle_ecc_secp224r1_modp)
sbcs T5, T5, #0
sbcs T6, T6, #0
- stmdb RP, {T0,T1,T2,T3,T4,T5,T6}
+ stm XP, {T0,T1,T2,T3,T4,T5,T6}
pop {r4,r5,r6,r7,r8,r10,r11,pc}
EPILOGUE(_nettle_ecc_secp224r1_modp)
diff --git a/arm/ecc-secp256r1-redc.asm b/arm/ecc-secp256r1-redc.asm
index e127a2f2..da574398 100644
--- a/arm/ecc-secp256r1-redc.asm
+++ b/arm/ecc-secp256r1-redc.asm
@@ -33,10 +33,11 @@ ifelse(`
.file "ecc-secp256r1-redc.asm"
.arm
-define(`RP', `r1')
+define(`RP', `r1') C Overlaps T1 below
+define(`XP', `r2')
-define(`T0', `r0') C Overlaps unused modulo argument
-define(`T1', `r2')
+define(`T0', `r0') C Overlaps unused modulo argument
+define(`T1', `r1')
define(`T2', `r3')
define(`T3', `r4')
define(`T4', `r5')
@@ -53,9 +54,10 @@ define(`F3', `lr')
.align 2
PROLOGUE(_nettle_ecc_secp256r1_redc)
- push {r4,r5,r6,r7,r8,r10,r11,lr}
+ C Pushes RP last
+ push {r1, r4,r5,r6,r7,r8,r10,r11,lr}
- ldm RP!, {T0,T1,T2,T3,T4,T5,T6,T7}
+ ldm XP!, {T0,T1,T2,T3,T4,T5,T6,T7}
C Set <F3,F2,F1> to the high 4 limbs of (B^2-B+1)<T2,T1,T0>
C T2 T1
@@ -88,7 +90,7 @@ PROLOGUE(_nettle_ecc_secp256r1_redc)
mov T3, T6
adcs T4, T7, F0
- ldm RP!, {T5,T6,T7}
+ ldm XP!, {T5,T6,T7}
adcs T5, T5, F1
adcs T6, T6, F2
adcs T7, T7, F3
@@ -112,7 +114,7 @@ PROLOGUE(_nettle_ecc_secp256r1_redc)
mov T3, T6
adcs T4, T7, F0
- ldm RP!, {T5,T6,T7}
+ ldm XP!, {T5,T6,T7}
adcs T5, T5, F1
adcs T6, T6, F2
adcs T7, T7, F3
@@ -143,7 +145,7 @@ PROLOGUE(_nettle_ecc_secp256r1_redc)
adcs T5, T5, #0
adcs T6, T6, T0
adcs T7, T7, F0
- ldm RP!, {T0, T1}
+ ldm XP!, {T0, T1}
mov F3, #0
adcs F1, F1, T0
adcs F2, F2, T1
@@ -156,6 +158,8 @@ PROLOGUE(_nettle_ecc_secp256r1_redc)
adc F3, F3, #0
rsb F3, F3, #0
+ pop {XP} C Original RP
+
adcs T0, T2, #0
adcs T1, T3, #0
adcs T2, T4, #0
@@ -166,8 +170,7 @@ PROLOGUE(_nettle_ecc_secp256r1_redc)
adcs T6, F1, F3
adcs T7, F2, #0
- sub RP, RP, #64
- stm RP, {T0,T1,T2,T3,T4,T5,T6,T7}
+ stm XP, {T0,T1,T2,T3,T4,T5,T6,T7}
pop {r4,r5,r6,r7,r8,r10,r11,pc}
EPILOGUE(_nettle_ecc_secp256r1_redc)
diff --git a/arm/ecc-secp384r1-modp.asm b/arm/ecc-secp384r1-modp.asm
index 96744ee9..840eead8 100644
--- a/arm/ecc-secp384r1-modp.asm
+++ b/arm/ecc-secp384r1-modp.asm
@@ -34,15 +34,17 @@ ifelse(`
.arm
define(`RP', `r1')
+define(`XP', `r2')
+
define(`T0', `r0')
-define(`T1', `r2')
-define(`T2', `r3')
-define(`T3', `r4')
-define(`F0', `r5')
-define(`F1', `r6')
-define(`F2', `r7')
-define(`F3', `r8')
-define(`F4', `r10')
+define(`T1', `r3')
+define(`T2', `r4')
+define(`T3', `r5')
+define(`F0', `r6')
+define(`F1', `r7')
+define(`F2', `r8')
+define(`F3', `r10')
+define(`F4', `r11')
define(`N', `r12')
define(`H', `lr')
@@ -51,10 +53,10 @@ define(`H', `lr')
.align 2
PROLOGUE(_nettle_ecc_secp384r1_modp)
- push {r4,r5,r6,r7,r8,r10,lr}
+ push {r4,r5,r6,r7,r8,r10,r11,lr}
- add RP, RP, #80
- ldm RP, {T0, T1, T2, T3} C 20-23
+ add XP, XP, #80
+ ldm XP, {T0, T1, T2, T3} C 20-23
C First get top 4 limbs, which need folding twice, as
C
@@ -91,8 +93,8 @@ PROLOGUE(_nettle_ecc_secp384r1_modp)
adcs F4, F4, #0
C Add in to high part
- sub RP, RP, #32
- ldm RP, {T0, T1, T2, T3} C 12-15
+ sub XP, XP, #32
+ ldm XP, {T0, T1, T2, T3} C 12-15
mov H, #0
adds F0, T0, F0
adcs F1, T1, F1
@@ -101,8 +103,8 @@ PROLOGUE(_nettle_ecc_secp384r1_modp)
adcs F4, F4, #0 C Do F4 later
C Add to low part, keeping carry (positive or negative) in H
- sub RP, RP, #48
- ldm RP, {T0, T1, T2, T3} C 0-3
+ sub XP, XP, #48
+ ldm XP, {T0, T1, T2, T3} C 0-3
mov H, #0
adds T0, T0, F0
adcs T1, T1, F1
@@ -116,10 +118,10 @@ PROLOGUE(_nettle_ecc_secp384r1_modp)
adds T3, T3, F0
adc H, H, #0
- stm RP!, {T0,T1,T2,T3} C 0-3
+ stm XP!, {T0,T1,T2,T3} C 0-3
mov N, #2
.Loop:
- ldm RP, {T0,T1,T2,T3} C 4-7
+ ldm XP, {T0,T1,T2,T3} C 4-7
C First, propagate carry
adds T0, T0, H
@@ -137,7 +139,7 @@ PROLOGUE(_nettle_ecc_secp384r1_modp)
adc H, H, #0
C +B^3 terms
- ldr F0, [RP, #+48] C 16
+ ldr F0, [XP, #+48] C 16
adds T0, T0, F1
adcs T1, T1, F2
adcs T2, T2, F3
@@ -145,8 +147,8 @@ PROLOGUE(_nettle_ecc_secp384r1_modp)
adc H, H, #0
C -B
- ldr F1, [RP, #+52] C 17-18
- ldr F2, [RP, #+56]
+ ldr F1, [XP, #+52] C 17-18
+ ldr F2, [XP, #+56]
subs T0, T0, F3
sbcs T1, T1, F0
sbcs T2, T2, F1
@@ -154,14 +156,14 @@ PROLOGUE(_nettle_ecc_secp384r1_modp)
sbcs H, H, #0
C +1
- ldr F3, [RP, #+60] C 19
+ ldr F3, [XP, #+60] C 19
adds T0, T0, F0
adcs T1, T1, F1
adcs T2, T2, F2
adcs T3, T3, F3
adc H, H, #0
subs N, N, #1
- stm RP!, {T0,T1,T2,T3}
+ stm XP!, {T0,T1,T2,T3}
bne .Loop
C Fold high limbs, we need to add in
@@ -170,9 +172,9 @@ PROLOGUE(_nettle_ecc_secp384r1_modp)
C
C We always have F4 >= 0, but we can have H < 0.
C Sign extension gets tricky when F4 = 0 and H < 0.
- sub RP, RP, #48
+ sub XP, XP, #48
- ldm RP, {T0,T1,T2,T3} C 0-3
+ ldm XP, {T0,T1,T2,T3} C 0-3
C H H 0 -H H
C ----------------
@@ -201,8 +203,8 @@ PROLOGUE(_nettle_ecc_secp384r1_modp)
adcs T3, T3, F3
adc H, H, F0 C 0+cy H+cy -2+cy
- stm RP!, {T0,T1,T2,T3} C 0-3
- ldm RP, {T0,T1,T2,T3} C 4-7
+ stm XP!, {T0,T1,T2,T3} C 0-3
+ ldm XP, {T0,T1,T2,T3} C 4-7
C F4 0 -F4
C ---------
@@ -226,8 +228,8 @@ PROLOGUE(_nettle_ecc_secp384r1_modp)
adcs T2, T2, F2
adcs T3, T3, F3
- stm RP!, {T0,T1,T2,T3} C 4-7
- ldm RP, {T0,T1,T2,T3} C 8-11
+ stm XP!, {T0,T1,T2,T3} C 4-7
+ ldm XP, {T0,T1,T2,T3} C 8-11
adcs T0, T0, F4
adcs T1, T1, H
@@ -235,11 +237,11 @@ PROLOGUE(_nettle_ecc_secp384r1_modp)
adcs T3, T3, H
adc H, H, #0
- stm RP, {T0,T1,T2,T3} C 8-11
+ stm XP, {T0,T1,T2,T3} C 8-11
C Final (unlikely) carry
- sub RP, RP, #32
- ldm RP, {T0,T1,T2,T3} C 0-3
+ sub XP, XP, #32
+ ldm XP!, {T0,T1,T2,T3} C 0-3
C Fold H into F0-F4
mov F0, H
asr H, #31
@@ -254,17 +256,17 @@ PROLOGUE(_nettle_ecc_secp384r1_modp)
adcs T3, T3, F3
stm RP!, {T0,T1,T2,T3} C 0-3
- ldm RP, {T0,T1,T2,T3} C 4-7
+ ldm XP!, {T0,T1,T2,T3} C 4-7
adcs T0, T0, F4
adcs T1, T1, H
adcs T2, T2, H
adcs T3, T3, H
stm RP!, {T0,T1,T2,T3} C 4-7
- ldm RP, {T0,T1,T2,T3} C 8-11
+ ldm XP, {T0,T1,T2,T3} C 8-11
adcs T0, T0, H
adcs T1, T1, H
adcs T2, T2, H
adcs T3, T3, H
- stm RP!, {T0,T1,T2,T3} C 8-11
- pop {r4,r5,r6,r7,r8,r10,pc}
+ stm RP, {T0,T1,T2,T3} C 8-11
+ pop {r4,r5,r6,r7,r8,r10,r11,pc}
EPILOGUE(_nettle_ecc_secp384r1_modp)
diff --git a/arm/ecc-secp521r1-modp.asm b/arm/ecc-secp521r1-modp.asm
index 22e8dd4e..d1952173 100644
--- a/arm/ecc-secp521r1-modp.asm
+++ b/arm/ecc-secp521r1-modp.asm
@@ -35,13 +35,14 @@ ifelse(`
define(`HP', `r0')
define(`RP', `r1')
-define(`T0', `r2')
-define(`T1', `r3')
-define(`T2', `r4')
-define(`F0', `r5')
-define(`F1', `r6')
-define(`F2', `r7')
-define(`F3', `r8')
+define(`XP', `r2')
+define(`T0', `r3')
+define(`T1', `r4')
+define(`T2', `r5')
+define(`F0', `r6')
+define(`F1', `r7')
+define(`F2', `r8')
+define(`F3', `r10')
define(`H', `r12')
define(`N', `lr')
@@ -53,20 +54,20 @@ define(`N', `lr')
.align 2
PROLOGUE(_nettle_ecc_secp521r1_modp)
- push {r4,r5,r6,r7,r8,lr}
+ push {r4,r5,r6,r7,r8,r10,lr}
C Use that B^17 = 2^23 (mod p)
- ldr F3, [RP, #+68] C 17
- add HP, RP, #72 C 18
- ldr T0, [RP] C 0
+ ldr F3, [XP, #+68] C 17
+ add HP, XP, #72 C 18
+ ldr T0, [XP] C 0
adds T0, T0, F3, lsl #23
- str T0, [RP], #+4
+ str T0, [XP], #+4
mov N, #5
C 5 iterations, reading limbs 18-20, 21-23, 24-26, 27-29, 30-32
C and adding to limbs 1-3, 4-6, 7-9, 19-12, 13-15
.Loop:
- ldm RP, {T0,T1,T2} C 1+3*k -- 3+3*k
+ ldm XP, {T0,T1,T2} C 1+3*k -- 3+3*k
lsr F0, F3, #9
ldm HP!, {F1,F2,F3} C 18+3*k -- 20+3*k
orr F0, F0, F1, lsl #23
@@ -78,11 +79,11 @@ PROLOGUE(_nettle_ecc_secp521r1_modp)
adcs T1, T1, F1
adcs T2, T2, F2
sub N, N, #1
- stm RP!,{T0,T1,T2}
+ stm XP!,{T0,T1,T2}
teq N, #0
bne .Loop
- ldr F0, [RP], #-64 C 16
+ ldr F0, [XP], #-64 C 16
ldr F1, [HP] C 33
ldr T0, .Lc511
@@ -98,12 +99,12 @@ PROLOGUE(_nettle_ecc_secp521r1_modp)
lsr F1, F1, #18
adc F1, F1, #0
- ldm RP, {T0, T1} C 0-1
+ ldm XP!, {T0, T1} C 0-1
adds T0, T0, F0
adcs T1, T1, F1
stm RP!, {T0, T1}
- ldm RP, {T0,T1,T2,F0,F1,F2,F3} C 2-8
+ ldm XP!, {T0,T1,T2,F0,F1,F2,F3} C 2-8
adcs T0, T0, #0
adcs T1, T1, #0
adcs T2, T2, #0
@@ -112,7 +113,7 @@ PROLOGUE(_nettle_ecc_secp521r1_modp)
adcs F2, F2, #0
adcs F3, F3, #0
stm RP!, {T0,T1,T2,F0,F1,F2,F3} C 2-8
- ldm RP, {T0,T1,T2,F0,F1,F2,F3} C 9-15
+ ldm XP, {T0,T1,T2,F0,F1,F2,F3} C 9-15
adcs T0, T0, #0
adcs T1, T1, #0
adcs T2, T2, #0
@@ -123,5 +124,5 @@ PROLOGUE(_nettle_ecc_secp521r1_modp)
adcs H, H, #0
stm RP, {T0,T1,T2,F0,F1,F2,F3,H} C 9-16
- pop {r4,r5,r6,r7,r8,pc}
+ pop {r4,r5,r6,r7,r8,r10,pc}
EPILOGUE(_nettle_ecc_secp521r1_modp)