diff options
-rw-r--r-- | arm/ecc-secp192r1-modp.asm | 31 | ||||
-rw-r--r-- | arm/ecc-secp224r1-modp.asm | 22 | ||||
-rw-r--r-- | arm/ecc-secp256r1-redc.asm | 23 | ||||
-rw-r--r-- | arm/ecc-secp384r1-modp.asm | 72 | ||||
-rw-r--r-- | arm/ecc-secp521r1-modp.asm | 39 |
5 files changed, 99 insertions, 88 deletions
diff --git a/arm/ecc-secp192r1-modp.asm b/arm/ecc-secp192r1-modp.asm index 72a81a54..e500bc85 100644 --- a/arm/ecc-secp192r1-modp.asm +++ b/arm/ecc-secp192r1-modp.asm @@ -35,15 +35,16 @@ ifelse(` define(`HP', `r0') C Overlaps unused modulo argument define(`RP', `r1') - -define(`T0', `r2') -define(`T1', `r3') -define(`T2', `r4') -define(`T3', `r5') -define(`T4', `r6') -define(`T5', `r7') -define(`T6', `r8') -define(`T7', `r10') +define(`XP', `r2') + +define(`T0', `r3') +define(`T1', `r4') +define(`T2', `r5') +define(`T3', `r6') +define(`T4', `r7') +define(`T5', `r8') +define(`T6', `r10') +define(`T7', `r11') define(`H0', `T0') C Overlaps T0 and T1 define(`H1', `T1') define(`C2', `HP') @@ -54,12 +55,12 @@ define(`C4', `r12') .align 2 PROLOGUE(_nettle_ecc_secp192r1_modp) - push {r4,r5,r6,r7,r8,r10} + push {r4,r5,r6,r7,r8,r10,r11} C Reduce two words at a time - add HP, RP, #48 - add RP, RP, #8 + add HP, XP, #48 + add XP, XP, #8 ldmdb HP!, {H0,H1} - ldm RP, {T2,T3,T4,T5,T6,T7} + ldm XP, {T2,T3,T4,T5,T6,T7} mov C4, #0 adds T4, T4, H0 adcs T5, T5, H1 @@ -77,7 +78,7 @@ PROLOGUE(_nettle_ecc_secp192r1_modp) C Need to add carry to T0 and T2, do T2 later adc C2, C2, #0 - ldmdb RP!, {T0, T1} + ldmdb XP!, {T0, T1} adcs T0, T0, T6 adcs T1, T1, T7 adcs T2, T2, T6 @@ -101,6 +102,6 @@ PROLOGUE(_nettle_ecc_secp192r1_modp) stm RP, {T0,T1,T2,T3,T4,T5} - pop {r4,r5,r6,r7,r8,r10} + pop {r4,r5,r6,r7,r8,r10,r11} bx lr EPILOGUE(_nettle_ecc_secp192r1_modp) diff --git a/arm/ecc-secp224r1-modp.asm b/arm/ecc-secp224r1-modp.asm index 3256601c..4b3b24e5 100644 --- a/arm/ecc-secp224r1-modp.asm +++ b/arm/ecc-secp224r1-modp.asm @@ -33,10 +33,11 @@ ifelse(` .file "ecc-secp224r1-modp.asm" .arm -define(`RP', `r1') -define(`H', `r0') C Overlaps unused modulo argument +define(`RP', `r1') C Overlaps T0 +define(`XP', `r2') +define(`H', `r0') C Overlaps unused modulo argument -define(`T0', `r2') +define(`T0', `r1') define(`T1', `r3') define(`T2', `r4') define(`T3', `r5') @@ -53,9 +54,10 @@ define(`L2', `lr') .align 2 PROLOGUE(_nettle_ecc_secp224r1_modp) - push {r4,r5,r6,r7,r8,r10,r11,lr} + C Pushes RP last + push {r1,r4,r5,r6,r7,r8,r10,r11,lr} - add L2, RP, #28 + add L2, XP, #28 ldm L2, {T0,T1,T2,T3,T4,T5,T6} mov H, #0 @@ -80,15 +82,15 @@ PROLOGUE(_nettle_ecc_secp224r1_modp) sbc H, #0 C Now subtract from low half - ldm RP!, {L0,L1,L2} + ldm XP!, {L0,L1,L2} C Clear carry, with the sbcs, this is the 1. - adds RP, #0 + adds XP, #0 sbcs T0, L0, T0 sbcs T1, L1, T1 sbcs T2, L2, T2 - ldm RP!, {T3,L0,L1,L2} + ldm XP!, {T3,L0,L1,L2} sbcs T3, T3, N3 sbcs T4, L0, T4 sbcs T5, L1, T5 @@ -109,6 +111,8 @@ PROLOGUE(_nettle_ecc_secp224r1_modp) sbcs T6, T6, #0 sbcs H, H, H + pop {XP} C Original RP + C Final borrow, subtract (B^3 - 1) |H| subs T0, T0, H sbcs T1, T1, H @@ -118,7 +122,7 @@ PROLOGUE(_nettle_ecc_secp224r1_modp) sbcs T5, T5, #0 sbcs T6, T6, #0 - stmdb RP, {T0,T1,T2,T3,T4,T5,T6} + stm XP, {T0,T1,T2,T3,T4,T5,T6} pop {r4,r5,r6,r7,r8,r10,r11,pc} EPILOGUE(_nettle_ecc_secp224r1_modp) diff --git a/arm/ecc-secp256r1-redc.asm b/arm/ecc-secp256r1-redc.asm index e127a2f2..da574398 100644 --- a/arm/ecc-secp256r1-redc.asm +++ b/arm/ecc-secp256r1-redc.asm @@ -33,10 +33,11 @@ ifelse(` .file "ecc-secp256r1-redc.asm" .arm -define(`RP', `r1') +define(`RP', `r1') C Overlaps T1 below +define(`XP', `r2') -define(`T0', `r0') C Overlaps unused modulo argument -define(`T1', `r2') +define(`T0', `r0') C Overlaps unused modulo argument +define(`T1', `r1') define(`T2', `r3') define(`T3', `r4') define(`T4', `r5') @@ -53,9 +54,10 @@ define(`F3', `lr') .align 2 PROLOGUE(_nettle_ecc_secp256r1_redc) - push {r4,r5,r6,r7,r8,r10,r11,lr} + C Pushes RP last + push {r1, r4,r5,r6,r7,r8,r10,r11,lr} - ldm RP!, {T0,T1,T2,T3,T4,T5,T6,T7} + ldm XP!, {T0,T1,T2,T3,T4,T5,T6,T7} C Set <F3,F2,F1> to the high 4 limbs of (B^2-B+1)<T2,T1,T0> C T2 T1 @@ -88,7 +90,7 @@ PROLOGUE(_nettle_ecc_secp256r1_redc) mov T3, T6 adcs T4, T7, F0 - ldm RP!, {T5,T6,T7} + ldm XP!, {T5,T6,T7} adcs T5, T5, F1 adcs T6, T6, F2 adcs T7, T7, F3 @@ -112,7 +114,7 @@ PROLOGUE(_nettle_ecc_secp256r1_redc) mov T3, T6 adcs T4, T7, F0 - ldm RP!, {T5,T6,T7} + ldm XP!, {T5,T6,T7} adcs T5, T5, F1 adcs T6, T6, F2 adcs T7, T7, F3 @@ -143,7 +145,7 @@ PROLOGUE(_nettle_ecc_secp256r1_redc) adcs T5, T5, #0 adcs T6, T6, T0 adcs T7, T7, F0 - ldm RP!, {T0, T1} + ldm XP!, {T0, T1} mov F3, #0 adcs F1, F1, T0 adcs F2, F2, T1 @@ -156,6 +158,8 @@ PROLOGUE(_nettle_ecc_secp256r1_redc) adc F3, F3, #0 rsb F3, F3, #0 + pop {XP} C Original RP + adcs T0, T2, #0 adcs T1, T3, #0 adcs T2, T4, #0 @@ -166,8 +170,7 @@ PROLOGUE(_nettle_ecc_secp256r1_redc) adcs T6, F1, F3 adcs T7, F2, #0 - sub RP, RP, #64 - stm RP, {T0,T1,T2,T3,T4,T5,T6,T7} + stm XP, {T0,T1,T2,T3,T4,T5,T6,T7} pop {r4,r5,r6,r7,r8,r10,r11,pc} EPILOGUE(_nettle_ecc_secp256r1_redc) diff --git a/arm/ecc-secp384r1-modp.asm b/arm/ecc-secp384r1-modp.asm index 96744ee9..840eead8 100644 --- a/arm/ecc-secp384r1-modp.asm +++ b/arm/ecc-secp384r1-modp.asm @@ -34,15 +34,17 @@ ifelse(` .arm define(`RP', `r1') +define(`XP', `r2') + define(`T0', `r0') -define(`T1', `r2') -define(`T2', `r3') -define(`T3', `r4') -define(`F0', `r5') -define(`F1', `r6') -define(`F2', `r7') -define(`F3', `r8') -define(`F4', `r10') +define(`T1', `r3') +define(`T2', `r4') +define(`T3', `r5') +define(`F0', `r6') +define(`F1', `r7') +define(`F2', `r8') +define(`F3', `r10') +define(`F4', `r11') define(`N', `r12') define(`H', `lr') @@ -51,10 +53,10 @@ define(`H', `lr') .align 2 PROLOGUE(_nettle_ecc_secp384r1_modp) - push {r4,r5,r6,r7,r8,r10,lr} + push {r4,r5,r6,r7,r8,r10,r11,lr} - add RP, RP, #80 - ldm RP, {T0, T1, T2, T3} C 20-23 + add XP, XP, #80 + ldm XP, {T0, T1, T2, T3} C 20-23 C First get top 4 limbs, which need folding twice, as C @@ -91,8 +93,8 @@ PROLOGUE(_nettle_ecc_secp384r1_modp) adcs F4, F4, #0 C Add in to high part - sub RP, RP, #32 - ldm RP, {T0, T1, T2, T3} C 12-15 + sub XP, XP, #32 + ldm XP, {T0, T1, T2, T3} C 12-15 mov H, #0 adds F0, T0, F0 adcs F1, T1, F1 @@ -101,8 +103,8 @@ PROLOGUE(_nettle_ecc_secp384r1_modp) adcs F4, F4, #0 C Do F4 later C Add to low part, keeping carry (positive or negative) in H - sub RP, RP, #48 - ldm RP, {T0, T1, T2, T3} C 0-3 + sub XP, XP, #48 + ldm XP, {T0, T1, T2, T3} C 0-3 mov H, #0 adds T0, T0, F0 adcs T1, T1, F1 @@ -116,10 +118,10 @@ PROLOGUE(_nettle_ecc_secp384r1_modp) adds T3, T3, F0 adc H, H, #0 - stm RP!, {T0,T1,T2,T3} C 0-3 + stm XP!, {T0,T1,T2,T3} C 0-3 mov N, #2 .Loop: - ldm RP, {T0,T1,T2,T3} C 4-7 + ldm XP, {T0,T1,T2,T3} C 4-7 C First, propagate carry adds T0, T0, H @@ -137,7 +139,7 @@ PROLOGUE(_nettle_ecc_secp384r1_modp) adc H, H, #0 C +B^3 terms - ldr F0, [RP, #+48] C 16 + ldr F0, [XP, #+48] C 16 adds T0, T0, F1 adcs T1, T1, F2 adcs T2, T2, F3 @@ -145,8 +147,8 @@ PROLOGUE(_nettle_ecc_secp384r1_modp) adc H, H, #0 C -B - ldr F1, [RP, #+52] C 17-18 - ldr F2, [RP, #+56] + ldr F1, [XP, #+52] C 17-18 + ldr F2, [XP, #+56] subs T0, T0, F3 sbcs T1, T1, F0 sbcs T2, T2, F1 @@ -154,14 +156,14 @@ PROLOGUE(_nettle_ecc_secp384r1_modp) sbcs H, H, #0 C +1 - ldr F3, [RP, #+60] C 19 + ldr F3, [XP, #+60] C 19 adds T0, T0, F0 adcs T1, T1, F1 adcs T2, T2, F2 adcs T3, T3, F3 adc H, H, #0 subs N, N, #1 - stm RP!, {T0,T1,T2,T3} + stm XP!, {T0,T1,T2,T3} bne .Loop C Fold high limbs, we need to add in @@ -170,9 +172,9 @@ PROLOGUE(_nettle_ecc_secp384r1_modp) C C We always have F4 >= 0, but we can have H < 0. C Sign extension gets tricky when F4 = 0 and H < 0. - sub RP, RP, #48 + sub XP, XP, #48 - ldm RP, {T0,T1,T2,T3} C 0-3 + ldm XP, {T0,T1,T2,T3} C 0-3 C H H 0 -H H C ---------------- @@ -201,8 +203,8 @@ PROLOGUE(_nettle_ecc_secp384r1_modp) adcs T3, T3, F3 adc H, H, F0 C 0+cy H+cy -2+cy - stm RP!, {T0,T1,T2,T3} C 0-3 - ldm RP, {T0,T1,T2,T3} C 4-7 + stm XP!, {T0,T1,T2,T3} C 0-3 + ldm XP, {T0,T1,T2,T3} C 4-7 C F4 0 -F4 C --------- @@ -226,8 +228,8 @@ PROLOGUE(_nettle_ecc_secp384r1_modp) adcs T2, T2, F2 adcs T3, T3, F3 - stm RP!, {T0,T1,T2,T3} C 4-7 - ldm RP, {T0,T1,T2,T3} C 8-11 + stm XP!, {T0,T1,T2,T3} C 4-7 + ldm XP, {T0,T1,T2,T3} C 8-11 adcs T0, T0, F4 adcs T1, T1, H @@ -235,11 +237,11 @@ PROLOGUE(_nettle_ecc_secp384r1_modp) adcs T3, T3, H adc H, H, #0 - stm RP, {T0,T1,T2,T3} C 8-11 + stm XP, {T0,T1,T2,T3} C 8-11 C Final (unlikely) carry - sub RP, RP, #32 - ldm RP, {T0,T1,T2,T3} C 0-3 + sub XP, XP, #32 + ldm XP!, {T0,T1,T2,T3} C 0-3 C Fold H into F0-F4 mov F0, H asr H, #31 @@ -254,17 +256,17 @@ PROLOGUE(_nettle_ecc_secp384r1_modp) adcs T3, T3, F3 stm RP!, {T0,T1,T2,T3} C 0-3 - ldm RP, {T0,T1,T2,T3} C 4-7 + ldm XP!, {T0,T1,T2,T3} C 4-7 adcs T0, T0, F4 adcs T1, T1, H adcs T2, T2, H adcs T3, T3, H stm RP!, {T0,T1,T2,T3} C 4-7 - ldm RP, {T0,T1,T2,T3} C 8-11 + ldm XP, {T0,T1,T2,T3} C 8-11 adcs T0, T0, H adcs T1, T1, H adcs T2, T2, H adcs T3, T3, H - stm RP!, {T0,T1,T2,T3} C 8-11 - pop {r4,r5,r6,r7,r8,r10,pc} + stm RP, {T0,T1,T2,T3} C 8-11 + pop {r4,r5,r6,r7,r8,r10,r11,pc} EPILOGUE(_nettle_ecc_secp384r1_modp) diff --git a/arm/ecc-secp521r1-modp.asm b/arm/ecc-secp521r1-modp.asm index 22e8dd4e..d1952173 100644 --- a/arm/ecc-secp521r1-modp.asm +++ b/arm/ecc-secp521r1-modp.asm @@ -35,13 +35,14 @@ ifelse(` define(`HP', `r0') define(`RP', `r1') -define(`T0', `r2') -define(`T1', `r3') -define(`T2', `r4') -define(`F0', `r5') -define(`F1', `r6') -define(`F2', `r7') -define(`F3', `r8') +define(`XP', `r2') +define(`T0', `r3') +define(`T1', `r4') +define(`T2', `r5') +define(`F0', `r6') +define(`F1', `r7') +define(`F2', `r8') +define(`F3', `r10') define(`H', `r12') define(`N', `lr') @@ -53,20 +54,20 @@ define(`N', `lr') .align 2 PROLOGUE(_nettle_ecc_secp521r1_modp) - push {r4,r5,r6,r7,r8,lr} + push {r4,r5,r6,r7,r8,r10,lr} C Use that B^17 = 2^23 (mod p) - ldr F3, [RP, #+68] C 17 - add HP, RP, #72 C 18 - ldr T0, [RP] C 0 + ldr F3, [XP, #+68] C 17 + add HP, XP, #72 C 18 + ldr T0, [XP] C 0 adds T0, T0, F3, lsl #23 - str T0, [RP], #+4 + str T0, [XP], #+4 mov N, #5 C 5 iterations, reading limbs 18-20, 21-23, 24-26, 27-29, 30-32 C and adding to limbs 1-3, 4-6, 7-9, 19-12, 13-15 .Loop: - ldm RP, {T0,T1,T2} C 1+3*k -- 3+3*k + ldm XP, {T0,T1,T2} C 1+3*k -- 3+3*k lsr F0, F3, #9 ldm HP!, {F1,F2,F3} C 18+3*k -- 20+3*k orr F0, F0, F1, lsl #23 @@ -78,11 +79,11 @@ PROLOGUE(_nettle_ecc_secp521r1_modp) adcs T1, T1, F1 adcs T2, T2, F2 sub N, N, #1 - stm RP!,{T0,T1,T2} + stm XP!,{T0,T1,T2} teq N, #0 bne .Loop - ldr F0, [RP], #-64 C 16 + ldr F0, [XP], #-64 C 16 ldr F1, [HP] C 33 ldr T0, .Lc511 @@ -98,12 +99,12 @@ PROLOGUE(_nettle_ecc_secp521r1_modp) lsr F1, F1, #18 adc F1, F1, #0 - ldm RP, {T0, T1} C 0-1 + ldm XP!, {T0, T1} C 0-1 adds T0, T0, F0 adcs T1, T1, F1 stm RP!, {T0, T1} - ldm RP, {T0,T1,T2,F0,F1,F2,F3} C 2-8 + ldm XP!, {T0,T1,T2,F0,F1,F2,F3} C 2-8 adcs T0, T0, #0 adcs T1, T1, #0 adcs T2, T2, #0 @@ -112,7 +113,7 @@ PROLOGUE(_nettle_ecc_secp521r1_modp) adcs F2, F2, #0 adcs F3, F3, #0 stm RP!, {T0,T1,T2,F0,F1,F2,F3} C 2-8 - ldm RP, {T0,T1,T2,F0,F1,F2,F3} C 9-15 + ldm XP, {T0,T1,T2,F0,F1,F2,F3} C 9-15 adcs T0, T0, #0 adcs T1, T1, #0 adcs T2, T2, #0 @@ -123,5 +124,5 @@ PROLOGUE(_nettle_ecc_secp521r1_modp) adcs H, H, #0 stm RP, {T0,T1,T2,F0,F1,F2,F3,H} C 9-16 - pop {r4,r5,r6,r7,r8,pc} + pop {r4,r5,r6,r7,r8,r10,pc} EPILOGUE(_nettle_ecc_secp521r1_modp) |