diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-10-22 17:07:53 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-10-22 19:57:27 +0300 |
commit | 98674fdaa30ab22a3ac86ca05d688b5b6112895d (patch) | |
tree | 937374c6701fa80161a727b200aaddf0933d37c5 /cipher/twofish.c | |
parent | e67c67321ce240c93dd0fa2b21c649c0a8e233f7 (diff) | |
download | libgcrypt-98674fdaa30ab22a3ac86ca05d688b5b6112895d.tar.gz |
twofish: add ARMv6 assembly implementation
* cipher/Makefile.am: Add 'twofish-armv6.S'.
* cipher/twofish-armv6.S: New.
* cipher/twofish.c (USE_ARMV6_ASM): New macro.
[USE_ARMV6_ASM] (_gcry_twofish_armv6_encrypt_block)
(_gcry_twofish_armv6_decrypt_block): New prototypes.
[USE_AMDV6_ASM] (twofish_encrypt, twofish_decrypt): Add.
[USE_AMD64_ASM] (do_twofish_encrypt, do_twofish_decrypt): Remove.
(_gcry_twofish_ctr_enc, _gcry_twofish_cfb_dec): Use 'twofish_encrypt'
instead of 'do_twofish_encrypt'.
(_gcry_twofish_cbc_dec): Use 'twofish_decrypt' instead of
'do_twofish_decrypt'.
* configure.ac [arm]: Add 'twofish-armv6.lo'.
--
Add optimized ARMv6 assembly implementation for Twofish. Implementation is tuned
for Cortex-A8. Unaligned access handling is done in assembly part.
For now, only enable this on little-endian systems as big-endian correctness
have not been tested yet.
Old (gcc-4.8) vs new (twofish-asm), Cortex-A8 (on armhf):
ECB/Stream CBC CFB OFB CTR CCM
--------------- --------------- --------------- --------------- --------------- ---------------
TWOFISH 1.23x 1.25x 1.16x 1.26x 1.16x 1.30x 1.18x 1.17x 1.23x 1.23x 1.22x 1.22x
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/twofish.c')
-rw-r--r-- | cipher/twofish.c | 88 |
1 files changed, 62 insertions, 26 deletions
diff --git a/cipher/twofish.c b/cipher/twofish.c index 993ad0f4..d2cabbe8 100644 --- a/cipher/twofish.c +++ b/cipher/twofish.c @@ -57,6 +57,14 @@ # define USE_AMD64_ASM 1 #endif +/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */ +#undef USE_ARMV6_ASM +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) +# if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) +# define USE_ARMV6_ASM 1 +# endif +#endif + /* Prototype for the self-test function. */ static const char *selftest(void); @@ -746,7 +754,16 @@ extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in, byte *iv); -#else /*!USE_AMD64_ASM*/ +#elif defined(USE_ARMV6_ASM) + +/* Assembly implementations of Twofish. */ +extern void _gcry_twofish_armv6_encrypt_block(const TWOFISH_context *c, + byte *out, const byte *in); + +extern void _gcry_twofish_armv6_decrypt_block(const TWOFISH_context *c, + byte *out, const byte *in); + +#else /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ /* Macros to compute the g() function in the encryption and decryption * rounds. G1 is the straight g() function; G2 includes the 8-bit @@ -812,21 +829,25 @@ extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, #ifdef USE_AMD64_ASM -static void -do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in) +static unsigned int +twofish_encrypt (void *context, byte *out, const byte *in) { + TWOFISH_context *ctx = context; _gcry_twofish_amd64_encrypt_block(ctx, out, in); + return /*burn_stack*/ (4*sizeof (void*)); } +#elif defined(USE_ARMV6_ASM) + static unsigned int twofish_encrypt (void *context, byte *out, const byte *in) { TWOFISH_context *ctx = context; - _gcry_twofish_amd64_encrypt_block(ctx, out, in); + _gcry_twofish_armv6_encrypt_block(ctx, out, in); return /*burn_stack*/ (4*sizeof (void*)); } -#else /*!USE_AMD64_ASM*/ +#else /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ static void do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in) @@ -868,28 +889,32 @@ twofish_encrypt (void *context, byte *out, const byte *in) return /*burn_stack*/ (24+3*sizeof (void*)); } -#endif /*!USE_AMD64_ASM*/ +#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ /* Decrypt one block. in and out may be the same. */ #ifdef USE_AMD64_ASM -static void -do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in) +static unsigned int +twofish_decrypt (void *context, byte *out, const byte *in) { + TWOFISH_context *ctx = context; _gcry_twofish_amd64_decrypt_block(ctx, out, in); + return /*burn_stack*/ (4*sizeof (void*)); } +#elif defined(USE_ARMV6_ASM) + static unsigned int twofish_decrypt (void *context, byte *out, const byte *in) { TWOFISH_context *ctx = context; - _gcry_twofish_amd64_decrypt_block(ctx, out, in); + _gcry_twofish_armv6_decrypt_block(ctx, out, in); return /*burn_stack*/ (4*sizeof (void*)); } -#else /*!USE_AMD64_ASM*/ +#else /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ static void do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in) @@ -932,7 +957,7 @@ twofish_decrypt (void *context, byte *out, const byte *in) return /*burn_stack*/ (24+3*sizeof (void*)); } -#endif /*!USE_AMD64_ASM*/ +#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ @@ -947,14 +972,11 @@ _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char tmpbuf[TWOFISH_BLOCKSIZE]; - int burn_stack_depth = 24 + 3 * sizeof (void*); + unsigned int burn, burn_stack_depth = 0; int i; #ifdef USE_AMD64_ASM { - if (nblocks >= 3 && burn_stack_depth < 8 * sizeof(void*)) - burn_stack_depth = 8 * sizeof(void*); - /* Process data in 3 block chunks. */ while (nblocks >= 3) { @@ -963,6 +985,10 @@ _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, nblocks -= 3; outbuf += 3 * TWOFISH_BLOCKSIZE; inbuf += 3 * TWOFISH_BLOCKSIZE; + + burn = 8 * sizeof(void*); + if (burn > burn_stack_depth) + burn_stack_depth = burn; } /* Use generic code to handle smaller chunks... */ @@ -973,7 +999,10 @@ _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ - do_twofish_encrypt(ctx, tmpbuf, ctr); + burn = twofish_encrypt(ctx, tmpbuf, ctr); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + /* XOR the input with the encrypted counter and store in output. */ buf_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE); outbuf += TWOFISH_BLOCKSIZE; @@ -1002,13 +1031,10 @@ _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char savebuf[TWOFISH_BLOCKSIZE]; - int burn_stack_depth = 24 + 3 * sizeof (void*); + unsigned int burn, burn_stack_depth = 0; #ifdef USE_AMD64_ASM { - if (nblocks >= 3 && burn_stack_depth < 9 * sizeof(void*)) - burn_stack_depth = 9 * sizeof(void*); - /* Process data in 3 block chunks. */ while (nblocks >= 3) { @@ -1017,6 +1043,10 @@ _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, nblocks -= 3; outbuf += 3 * TWOFISH_BLOCKSIZE; inbuf += 3 * TWOFISH_BLOCKSIZE; + + burn = 9 * sizeof(void*); + if (burn > burn_stack_depth) + burn_stack_depth = burn; } /* Use generic code to handle smaller chunks... */ @@ -1029,7 +1059,9 @@ _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, OUTBUF. */ memcpy(savebuf, inbuf, TWOFISH_BLOCKSIZE); - do_twofish_decrypt (ctx, outbuf, inbuf); + burn = twofish_decrypt (ctx, outbuf, inbuf); + if (burn > burn_stack_depth) + burn_stack_depth = burn; buf_xor(outbuf, outbuf, iv, TWOFISH_BLOCKSIZE); memcpy(iv, savebuf, TWOFISH_BLOCKSIZE); @@ -1051,13 +1083,10 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, TWOFISH_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; - int burn_stack_depth = 24 + 3 * sizeof (void*); + unsigned int burn, burn_stack_depth = 0; #ifdef USE_AMD64_ASM { - if (nblocks >= 3 && burn_stack_depth < 8 * sizeof(void*)) - burn_stack_depth = 8 * sizeof(void*); - /* Process data in 3 block chunks. */ while (nblocks >= 3) { @@ -1066,6 +1095,10 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, nblocks -= 3; outbuf += 3 * TWOFISH_BLOCKSIZE; inbuf += 3 * TWOFISH_BLOCKSIZE; + + burn = 8 * sizeof(void*); + if (burn > burn_stack_depth) + burn_stack_depth = burn; } /* Use generic code to handle smaller chunks... */ @@ -1074,7 +1107,10 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, for ( ;nblocks; nblocks-- ) { - do_twofish_encrypt(ctx, iv, iv); + burn = twofish_encrypt(ctx, iv, iv); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + buf_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE); outbuf += TWOFISH_BLOCKSIZE; inbuf += TWOFISH_BLOCKSIZE; |