diff options
author | Niels Möller <nisse@lysator.liu.se> | 2013-03-01 14:17:36 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2013-03-01 14:17:36 +0100 |
commit | 344c6d288dc0e8b9b68346725a346c6c341750d9 (patch) | |
tree | 2876cf9bcfb73041b8a8c7a3acddf8a30e512b8e | |
parent | 42e40f786c89a758069135de6d9d147cfe7f0951 (diff) | |
download | nettle-344c6d288dc0e8b9b68346725a346c6c341750d9.tar.gz |
ARM assembly for ecc_256_redc.
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | armv7/ecc-256-redc.asm | 160 | ||||
-rw-r--r-- | ecc-256.c | 14 |
3 files changed, 175 insertions, 4 deletions
@@ -1,10 +1,13 @@ 2013-03-01 Niels Möller <nisse@lysator.liu.se> + * ecc-256.c: Check HAVE_NATIVE_ecc_256_redc, and use native + version if available. + * armv7/ecc-256-redc.asm: New file, 4 time speedup over C version. + * testsuite/ecc-redc-test.c: Increased test count. * ecc-224.c: Check HAVE_NATIVE_ecc_224_modp, and use native version if available. - * armv7/ecc-224-modp.asm: New file, 4.5 time speedup over C version. diff --git a/armv7/ecc-256-redc.asm b/armv7/ecc-256-redc.asm new file mode 100644 index 00000000..cbf10a89 --- /dev/null +++ b/armv7/ecc-256-redc.asm @@ -0,0 +1,160 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2013, Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +C MA 02111-1301, USA. + + .file "ecc-256-redc.asm" + .arm + +define(<RP>, <r1>) + +define(<T0>, <r0>) C Overlaps unused ecc argument +define(<T1>, <r2>) +define(<T2>, <r3>) +define(<T3>, <r4>) +define(<T4>, <r5>) +define(<T5>, <r6>) +define(<T6>, <r7>) +define(<T7>, <r8>) +define(<F0>, <r10>) +define(<F1>, <r11>) +define(<F2>, <r12>) +define(<F3>, <lr>) + + C ecc_256_redc (const struct ecc_curve *ecc, mp_limb_t *rp) + .text + .align 2 + +PROLOGUE(nettle_ecc_256_redc) + push {r4,r5,r6,r7,r8,r10,r11,lr} + + ldm RP!, {T0,T1,T2,T3,T4,T5,T6,T7} + + C Set <F3,F2,F1> to the high 4 limbs of (B^2-B+1)<T2,T1,T0> + C T2 T1 + C T2 T1 T0 + C - T2 T1 T0 + C ------------- + C F3 F2 F1 F0 + + + adds F1, T0, T2 + adcs F2, T1, #0 + adc F3, T2, #0 + + subs F0, T1, T0 + sbcs F1, F1, T1 C Could also be rsc ? + sbcs F2, F2, T2 + sbc F3, F3, #0 + + C Add: + C T10 T9 T8 T7 T6 T5 T4 T3 + C + F3 F2 F1 F0 T0 T2 T1 T0 + C -------------------------- + C T7 T6 T5 T4 T3 T2 T1 T0 + + adds T3, T3, T0 + adcs T1, T4, T1 + adcs T2, T5, T2 + adcs T6, T6, T0 + mov T0, T3 C FIXME: Be more clever? + mov T3, T6 + adcs T4, T7, F0 + + ldm RP!, {T5,T6,T7} + adcs T5, T5, F1 + adcs T6, T6, F2 + adcs T7, T7, F3 + + C New F3, F2, F1, F0, also adding in carry + adcs F1, T0, T2 + adcs F2, T1, #0 + adc F3, T2, #0 + + subs F0, T1, T0 + sbcs F1, F1, T1 C Could also be rsc ? + sbcs F2, F2, T2 + sbc F3, F3, #0 + + C Start adding + adds T3, T3, T0 + adcs T1, T4, T1 + adcs T2, T5, T2 + adcs T6, T6, T0 + mov T0, T3 C FIXME: Be more clever? + mov T3, T6 + adcs T4, T7, F0 + + ldm RP!, {T5,T6,T7} + adcs T5, T5, F1 + adcs T6, T6, F2 + adcs T7, T7, F3 + + C Final iteration, eliminate only T0, T1 + C Set <F2, F1, F0> to the high 3 limbs of (B^2-B+1)<T1,T0> + + C T1 T0 T1 + C - T1 T0 + C ------------- + C F2 F1 F0 + + C First add in carry + adcs F1, T0, #0 + adcs F2, T1, #0 + subs F0, T1, T0 + sbcs F1, F1, T1 + sbc F2, F2, #0 + + C Add: + C T9 T8 T7 T6 T5 T4 T3 T2 + C + F2 F1 F0 T0 0 T1 T0 0 + C -------------------------- + C F2 F1 T7 T6 T5 T4 T3 T2 + + adds T3, T3, T0 + adcs T4, T4, T1 + adcs T5, T5, #0 + adcs T6, T6, T0 + adcs T7, T7, F0 + ldm RP!, {T0, T1} + mov F3, #0 + adcs F1, F1, T0 + adcs F2, F2, T1 + + C Sum is < B^8 + p, so it's enough to fold carry once, + C If carry, add in + C B^7 - B^6 - B^3 + 1 = <0, B-2, B-1, B-1, B-1, 0, 0, 1> + + C Mask from carry flag, leaving carry intact + adc F3, F3, #0 + rsb F3, F3, #0 + + adcs T0, T2, #0 + adcs T1, T3, #0 + adcs T2, T4, #0 + adcs T3, T5, F3 + adcs T4, T6, F3 + adcs T5, T7, F3 + and F3, F3, #-2 + adcs T6, F1, F3 + adcs T7, F2, #0 + + sub RP, RP, #64 + stm RP, {T0,T1,T2,T3,T4,T5,T6,T7} + + pop {r4,r5,r6,r7,r8,r10,r11,pc} +EPILOGUE(nettle_ecc_256_redc) @@ -32,10 +32,18 @@ #include "ecc-internal.h" -#define USE_REDC (ECC_REDC_SIZE != 0) +#define USE_REDC (HAVE_NATIVE_ecc_256_redc || ECC_REDC_SIZE != 0) #include "ecc-256.h" +#if HAVE_NATIVE_ecc_256_redc +# define ecc_256_redc nettle_ecc_256_redc +void +ecc_256_redc (const struct ecc_curve *ecc, mp_limb_t *rp); +#else /* !HAVE_NATIVE_ecc_256_redc */ +# define ecc_256_redc ecc_generic_redc +#endif + #if ECC_BMODP_SIZE < ECC_LIMB_SIZE #define ecc_256_modp ecc_generic_modp #define ecc_256_modq ecc_generic_modq @@ -213,8 +221,8 @@ const struct ecc_curve nettle_secp_256r1 = ecc_g, ecc_redc_g, ecc_256_modp, - ecc_generic_redc, - USE_REDC ? ecc_generic_redc : ecc_generic_modp, + ecc_256_redc, + USE_REDC ? ecc_256_redc : ecc_256_modp, ecc_256_modq, ecc_Bmodp, ecc_Bmodp_shifted, |