diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2016-02-08 20:13:38 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2016-02-08 20:13:38 +0200 |
commit | 8353884bc65c820d5bcacaf1ac23cdee72091a09 (patch) | |
tree | e203452ffb6274d0b9d7515268fb0e490dd371c2 /cipher/sha512.c | |
parent | b8b3361504950689ef1e779fb3357cecf8a9f739 (diff) | |
download | libgcrypt-8353884bc65c820d5bcacaf1ac23cdee72091a09.tar.gz |
Add ARM assembly implementation of SHA-512
* cipher/Makefile.am: Add 'sha512-arm.S'.
* cipher/sha512-arm.S: New.
* cipher/sha512.c (USE_ARM_ASM): New.
(_gcry_sha512_transform_arm): New.
(transform) [USE_ARM_ASM]: Use ARM assembly implementation instead of
generic.
* configure.ac: Add 'sha512-arm.lo'.
--
Benchmark on Cortex-A8 (armv6, 1008 Mhz):
Before:
| nanosecs/byte mebibytes/sec cycles/byte
SHA512 | 112.0 ns/B 8.52 MiB/s 112.9 c/B
After (3.3x faster):
| nanosecs/byte mebibytes/sec cycles/byte
SHA512 | 34.01 ns/B 28.04 MiB/s 34.28 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha512.c')
-rw-r--r-- | cipher/sha512.c | 82 |
1 files changed, 50 insertions, 32 deletions
diff --git a/cipher/sha512.c b/cipher/sha512.c index 1196db93..5b259650 100644 --- a/cipher/sha512.c +++ b/cipher/sha512.c @@ -66,6 +66,13 @@ #endif /*ENABLE_NEON_SUPPORT*/ +/* USE_ARM_ASM indicates whether to enable ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) +# define USE_ARM_ASM 1 +#endif + + /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ #undef USE_SSSE3 #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ @@ -204,36 +211,6 @@ sha384_init (void *context, unsigned int flags) } -static inline u64 -ROTR (u64 x, u64 n) -{ - return ((x >> n) | (x << (64 - n))); -} - -static inline u64 -Ch (u64 x, u64 y, u64 z) -{ - return ((x & y) ^ ( ~x & z)); -} - -static inline u64 -Maj (u64 x, u64 y, u64 z) -{ - return ((x & y) ^ (x & z) ^ (y & z)); -} - -static inline u64 -Sum0 (u64 x) -{ - return (ROTR (x, 28) ^ ROTR (x, 34) ^ ROTR (x, 39)); -} - -static inline u64 -Sum1 (u64 x) -{ - return (ROTR (x, 14) ^ ROTR (x, 18) ^ ROTR (x, 41)); -} - static const u64 k[] = { U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd), @@ -278,6 +255,38 @@ static const u64 k[] = U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817) }; +#ifndef USE_ARM_ASM + +static inline u64 +ROTR (u64 x, u64 n) +{ + return ((x >> n) | (x << (64 - n))); +} + +static inline u64 +Ch (u64 x, u64 y, u64 z) +{ + return ((x & y) ^ ( ~x & z)); +} + +static inline u64 +Maj (u64 x, u64 y, u64 z) +{ + return ((x & y) ^ (x & z) ^ (y & z)); +} + +static inline u64 +Sum0 (u64 x) +{ + return (ROTR (x, 28) ^ ROTR (x, 34) ^ ROTR (x, 39)); +} + +static inline u64 +Sum1 (u64 x) +{ + return (ROTR (x, 14) ^ ROTR (x, 18) ^ ROTR (x, 41)); +} + /**************** * Transform the message W which consists of 16 64-bit-words */ @@ -304,7 +313,6 @@ transform_blk (SHA512_STATE *hd, const unsigned char *data) #define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) #define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) - for (t = 0; t < 80 - 16; ) { u64 t1, t2; @@ -545,7 +553,7 @@ transform_blk (SHA512_STATE *hd, const unsigned char *data) return /* burn_stack */ (8 + 16) * sizeof(u64) + sizeof(u32) + 3 * sizeof(void*); } - +#endif /*!USE_ARM_ASM*/ /* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional * stack to store XMM6-XMM15 needed on Win64. */ @@ -568,6 +576,12 @@ void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd, const u64 k[], size_t num_blks); #endif +#ifdef USE_ARM_ASM +unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd, + const unsigned char *data, + const u64 k[], size_t num_blks); +#endif + #ifdef USE_SSSE3 unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data, void *state, @@ -622,6 +636,9 @@ transform (void *context, const unsigned char *data, size_t nblks) } #endif +#ifdef USE_ARM_ASM + burn = _gcry_sha512_transform_arm (&ctx->state, data, k, nblks); +#else do { burn = transform_blk (&ctx->state, data) + 3 * sizeof(void*); @@ -636,6 +653,7 @@ transform (void *context, const unsigned char *data, size_t nblks) */ burn += ASM_EXTRA_STACK; #endif +#endif return burn; } |